210 files changed, 15709 insertions, 9604 deletions
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 29c18966b05..0ad6dcb5d45 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -35,12 +35,37 @@ In short, 8-char indents make things easier to read, and have the added
 benefit of warning you when you're nesting your functions too deep.
 Heed that warning.
 
+The preferred way to ease multiple indentation levels in a switch statement is
+to align the "switch" and its subordinate "case" labels in the same column
+instead of "double-indenting" the "case" labels.  E.g.:
+
+	switch (suffix) {
+	case 'G':
+	case 'g':
+		mem <<= 30;
+		break;
+	case 'M':
+	case 'm':
+		mem <<= 20;
+		break;
+	case 'K':
+	case 'k':
+		mem <<= 10;
+		/* fall through */
+	default:
+		break;
+	}
+
+
 Don't put multiple statements on a single line unless you have
 something to hide:
 
 	if (condition) do_this;
 	  do_something_everytime;
 
+Don't put multiple assignments on a single line either.  Kernel coding style
+is super simple.  Avoid tricky expressions.
+
 Outside of comments, documentation and except in Kconfig, spaces are never
 used for indentation, and the above example is deliberately broken.
 
@@ -69,7 +94,7 @@ void fun(int a, int b, int c)
 		next_statement;
 }
 
-		Chapter 3: Placing Braces
+		Chapter 3: Placing Braces and Spaces
 
 The other issue that always comes up in C styling is the placement of
 braces.  Unlike the indent size, there are few technical reasons to
@@ -81,6 +106,20 @@ brace last on the line, and put the closing brace first, thusly:
 		we do y
 	}
 
+This applies to all non-function statement blocks (if, switch, for,
+while, do).  E.g.:
+
+	switch (action) {
+	case KOBJ_ADD:
+		return "add";
+	case KOBJ_REMOVE:
+		return "remove";
+	case KOBJ_CHANGE:
+		return "change";
+	default:
+		return NULL;
+	}
+
 However, there is one special case, namely functions: they have the
 opening brace at the beginning of the next line, thus:
 
@@ -121,6 +160,49 @@ supply of new-lines on your screen is not a renewable resource (think
 25-line terminal screens here), you have more empty lines to put
 comments on.
 
+		3.1:  Spaces
+
+Linux kernel style for use of spaces depends (mostly) on
+function-versus-keyword usage.  Use a space after (most) keywords.  The
+notable exceptions are sizeof, typeof, alignof, and __attribute__, which look
+somewhat like functions (and are usually used with parentheses in Linux,
+although they are not required in the language, as in: "sizeof info" after
+"struct fileinfo info;" is declared).
+
+So use a space after these keywords:
+	if, switch, case, for, do, while
+but not with sizeof, typeof, alignof, or __attribute__.  E.g.,
+	s = sizeof(struct file);
+
+Do not add spaces around (inside) parenthesized expressions.  This example is
+*bad*:
+
+	s = sizeof( struct file );
+
+When declaring pointer data or a function that returns a pointer type, the
+preferred use of '*' is adjacent to the data name or function name and not
+adjacent to the type name.  Examples:
+
+	char *linux_banner;
+	unsigned long long memparse(char *ptr, char **retptr);
+	char *match_strdup(substring_t *s);
+
+Use one space around (on each side of) most binary and ternary operators,
+such as any of these:
+
+	=  +  -  <  >  *  /  %  |  &  ^  <=  >=  ==  !=  ?  :
+
+but no space after unary operators:
+	&  *  +  -  ~  !  sizeof  typeof  alignof  __attribute__  defined
+
+no space before the postfix increment & decrement unary operators:
+	++  --
+
+no space after the prefix increment & decrement unary operators:
+	++  --
+
+and no space around the '.' and "->" structure member operators.
+
 
 		Chapter 4: Naming
 
@@ -152,7 +234,7 @@ variable that is used to hold a temporary value.
 
 If you are afraid to mix up your local variable names, you have another
 problem, which is called the function-growth-hormone-imbalance syndrome.
-See next chapter.
+See chapter 6 (Functions).
 
 
 		Chapter 5: Typedefs
@@ -258,6 +340,20 @@ generally easily keep track of about 7 different things, anything more
 and it gets confused.  You know you're brilliant, but maybe you'd like
 to understand what you did 2 weeks from now.
 
+In source files, separate functions with one blank line.  If the function is
+exported, the EXPORT* macro for it should follow immediately after the closing
+function brace line.  E.g.:
+
+int system_is_up(void)
+{
+	return system_state == SYSTEM_RUNNING;
+}
+EXPORT_SYMBOL(system_is_up);
+
+In function prototypes, include parameter names with their data types.
+Although this is not required by the C language, it is preferred in Linux
+because it is a simple way to add valuable information for the reader.
+
 
 		Chapter 7: Centralized exiting of functions
 
@@ -306,16 +402,36 @@ time to explain badly written code.
 Generally, you want your comments to tell WHAT your code does, not HOW.
 Also, try to avoid putting comments inside a function body: if the
 function is so complex that you need to separately comment parts of it,
-you should probably go back to chapter 5 for a while.  You can make
+you should probably go back to chapter 6 for a while.  You can make
 small comments to note or warn about something particularly clever (or
 ugly), but try to avoid excess.  Instead, put the comments at the head
 of the function, telling people what it does, and possibly WHY it does
 it.
 
-When commenting the kernel API functions, please use the kerneldoc format.
+When commenting the kernel API functions, please use the kernel-doc format.
 See the files Documentation/kernel-doc-nano-HOWTO.txt and scripts/kernel-doc
 for details.
 
+Linux style for comments is the C89 "/* ... */" style.
+Don't use C99-style "// ..." comments.
+
+The preferred style for long (multi-line) comments is:
+
+	/*
+	 * This is the preferred style for multi-line
+	 * comments in the Linux kernel source code.
+	 * Please use it consistently.
+	 *
+	 * Description:  A column of asterisks on the left side,
+	 * with beginning and ending almost-blank lines.
+	 */
+
+It's also important to comment data, whether they are basic types or derived
+types.  To this end, use just one data declaration per line (no commas for
+multiple data declarations).  This leaves you room for a small comment on each
+item, explaining its use.
+
+
 		Chapter 9: You've made a mess of it
 
 That's OK, we all do.  You've probably been told by your long-time Unix
@@ -591,4 +707,4 @@ Kernel CodingStyle, by greg@kroah.com at OLS 2002:
 http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
 
 --
-Last updated on 30 April 2006.
+Last updated on 2006-December-06.
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index 7ac61f60037..2270efa1015 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist
@@ -66,3 +66,9 @@ kernel patches.
     See Documentation/ABI/README for more information.
 
 20: Check that it all passes `make headers_check'.
+
+21: Has been checked with injection of at least slab and page-allocation
+    fauilures.  See Documentation/fault-injection/.
+
+    If the new code is substantial, addition of subsystem-specific fault
+    injection might be appropriate.
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index bf2b0e2f87e..e9126e794ed 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -7,6 +7,8 @@
  * Copyright (C) Balbir Singh, IBM Corp. 2006
  * Copyright (c) Jay Lan, SGI. 2006
  *
+ * Compile with
+ *	gcc -I/usr/src/linux/include getdelays.c -o getdelays
  */
 
 #include <stdio.h>
@@ -35,13 +37,20 @@
 #define NLA_DATA(na)		((void *)((char*)(na) + NLA_HDRLEN))
 #define NLA_PAYLOAD(len)	(len - NLA_HDRLEN)
 
-#define err(code, fmt, arg...) do { printf(fmt, ##arg); exit(code); } while (0)
-int done = 0;
-int rcvbufsz=0;
-
-    char name[100];
-int dbg=0, print_delays=0;
+#define err(code, fmt, arg...)			\
+	do {					\
+		fprintf(stderr, fmt, ##arg);	\
+		exit(code);			\
+	} while (0)
+
+int done;
+int rcvbufsz;
+char name[100];
+int dbg;
+int print_delays;
+int print_io_accounting;
 __u64 stime, utime;
+
 #define PRINTF(fmt, arg...) {			\
 	    if (dbg) {				\
 		printf(fmt, ##arg);		\
@@ -78,8 +87,9 @@ static int create_nl_socket(int protocol)
 	if (rcvbufsz)
 		if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
 				&rcvbufsz, sizeof(rcvbufsz)) < 0) {
-			printf("Unable to set socket rcv buf size to %d\n",
-			       rcvbufsz);
+			fprintf(stderr, "Unable to set socket rcv buf size "
+					"to %d\n",
+				rcvbufsz);
 			return -1;
 		}
 
@@ -186,6 +196,15 @@ void print_delayacct(struct taskstats *t)
 	       "count", "delay total", t->swapin_count, t->swapin_delay_total);
 }
 
+void print_ioacct(struct taskstats *t)
+{
+	printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
+		t->ac_comm,
+		(unsigned long long)t->read_bytes,
+		(unsigned long long)t->write_bytes,
+		(unsigned long long)t->cancelled_write_bytes);
+}
+
 int main(int argc, char *argv[])
 {
 	int c, rc, rep_len, aggr_len, len2, cmd_type;
@@ -208,7 +227,7 @@ int main(int argc, char *argv[])
 	struct msgtemplate msg;
 
 	while (1) {
-		c = getopt(argc, argv, "dw:r:m:t:p:v:l");
+		c = getopt(argc, argv, "diw:r:m:t:p:v:l");
 		if (c < 0)
 			break;
 
@@ -217,6 +236,10 @@ int main(int argc, char *argv[])
 			printf("print delayacct stats ON\n");
 			print_delays = 1;
 			break;
+		case 'i':
+			printf("printing IO accounting\n");
+			print_io_accounting = 1;
+			break;
 		case 'w':
 			strncpy(logfile, optarg, MAX_FILENAME);
 			printf("write to file %s\n", logfile);
@@ -238,14 +261,12 @@ int main(int argc, char *argv[])
 			if (!tid)
 				err(1, "Invalid tgid\n");
 			cmd_type = TASKSTATS_CMD_ATTR_TGID;
-			print_delays = 1;
 			break;
 		case 'p':
 			tid = atoi(optarg);
 			if (!tid)
 				err(1, "Invalid pid\n");
 			cmd_type = TASKSTATS_CMD_ATTR_PID;
-			print_delays = 1;
 			break;
 		case 'v':
 			printf("debug on\n");
@@ -277,7 +298,7 @@ int main(int argc, char *argv[])
 	mypid = getpid();
 	id = get_family_id(nl_sd);
 	if (!id) {
-		printf("Error getting family id, errno %d", errno);
+		fprintf(stderr, "Error getting family id, errno %d\n", errno);
 		goto err;
 	}
 	PRINTF("family id %d\n", id);
@@ -288,7 +309,7 @@ int main(int argc, char *argv[])
 			      &cpumask, strlen(cpumask) + 1);
 		PRINTF("Sent register cpumask, retval %d\n", rc);
 		if (rc < 0) {
-			printf("error sending register cpumask\n");
+			fprintf(stderr, "error sending register cpumask\n");
 			goto err;
 		}
 	}
@@ -298,7 +319,7 @@ int main(int argc, char *argv[])
 			      cmd_type, &tid, sizeof(__u32));
 		PRINTF("Sent pid/tgid, retval %d\n", rc);
 		if (rc < 0) {
-			printf("error sending tid/tgid cmd\n");
+			fprintf(stderr, "error sending tid/tgid cmd\n");
 			goto done;
 		}
 	}
@@ -310,13 +331,15 @@ int main(int argc, char *argv[])
 		PRINTF("received %d bytes\n", rep_len);
 
 		if (rep_len < 0) {
-			printf("nonfatal reply error: errno %d\n", errno);
+			fprintf(stderr, "nonfatal reply error: errno %d\n",
+				errno);
 			continue;
 		}
 		if (msg.n.nlmsg_type == NLMSG_ERROR ||
 		    !NLMSG_OK((&msg.n), rep_len)) {
 			struct nlmsgerr *err = NLMSG_DATA(&msg);
-			printf("fatal reply error,  errno %d\n", err->error);
+			fprintf(stderr, "fatal reply error,  errno %d\n",
+				err->error);
 			goto done;
 		}
 
@@ -356,6 +379,8 @@ int main(int argc, char *argv[])
 						count++;
 						if (print_delays)
 							print_delayacct((struct taskstats *) NLA_DATA(na));
+						if (print_io_accounting)
+							print_ioacct((struct taskstats *) NLA_DATA(na));
 						if (fd) {
 							if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
 								err(1,"write error\n");
@@ -365,7 +390,9 @@ int main(int argc, char *argv[])
 							goto done;
 						break;
 					default:
-						printf("Unknown nested nla_type %d\n", na->nla_type);
+						fprintf(stderr, "Unknown nested"
+							" nla_type %d\n",
+							na->nla_type);
 						break;
 					}
 					len2 += NLA_ALIGN(na->nla_len);
@@ -374,7 +401,8 @@ int main(int argc, char *argv[])
 				break;
 
 			default:
-				printf("Unknown nla_type %d\n", na->nla_type);
+				fprintf(stderr, "Unknown nla_type %d\n",
+					na->nla_type);
 				break;
 			}
 			na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
diff --git a/Documentation/ioctl/ioctl-decoding.txt b/Documentation/ioctl/ioctl-decoding.txt
new file mode 100644
index 00000000000..bfdf7f3ee4f
--- /dev/null
+++ b/Documentation/ioctl/ioctl-decoding.txt
@@ -0,0 +1,24 @@
+To decode a hex IOCTL code:
+
+Most architecures use this generic format, but check
+include/ARCH/ioctl.h for specifics, e.g. powerpc
+uses 3 bits to encode read/write and 13 bits for size.
+
+ bits    meaning
+ 31-30	00 - no parameters: uses _IO macro
+	10 - read: _IOR
+	01 - write: _IOW
+	11 - read/write: _IOWR
+
+ 29-16	size of arguments
+
+ 15-8	ascii character supposedly
+	unique to each driver
+
+ 7-0	function #
+
+
+ So for example 0x82187201 is a read with arg length of 0x218,
+character 'r' function 1. Grepping the source reveals this is:
+
+#define VFAT_IOCTL_READDIR_BOTH         _IOR('r', 1, struct dirent [2])
diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
index a1e0ee20f59..f9717fe9bd8 100644
--- a/Documentation/spi/pxa2xx
+++ b/Documentation/spi/pxa2xx
@@ -102,7 +102,7 @@ struct pxa2xx_spi_chip {
 	u8 tx_threshold;
 	u8 rx_threshold;
 	u8 dma_burst_size;
-	u32 timeout_microsecs;
+	u32 timeout;
 	u8 enable_loopback;
 	void (*cs_control)(u32 command);
 };
@@ -121,7 +121,7 @@ the PXA2xx "Developer Manual" sections on the DMA controller and SSP Controllers
 to determine the correct value. An SSP configured for byte-wide transfers would
 use a value of 8.
 
-The "pxa2xx_spi_chip.timeout_microsecs" fields is used to efficiently handle
+The "pxa2xx_spi_chip.timeout" fields is used to efficiently handle
 trailing bytes in the SSP receiver fifo.  The correct value for this field is
 dependent on the SPI bus speed ("spi_board_info.max_speed_hz") and the specific
 slave device.  Please note that the PXA2xx SSP 1 does not support trailing byte
@@ -162,18 +162,18 @@ static void cs8405a_cs_control(u32 command)
 }
 
 static struct pxa2xx_spi_chip cs8415a_chip_info = {
-	.tx_threshold = 12, /* SSP hardward FIFO threshold */
-	.rx_threshold = 4, /* SSP hardward FIFO threshold */
+	.tx_threshold = 8, /* SSP hardward FIFO threshold */
+	.rx_threshold = 8, /* SSP hardward FIFO threshold */
 	.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
-	.timeout_microsecs = 64, /* Wait at least 64usec to handle trailing */
+	.timeout = 235, /* See Intel documentation */
 	.cs_control = cs8415a_cs_control, /* Use external chip select */
 };
 
 static struct pxa2xx_spi_chip cs8405a_chip_info = {
-	.tx_threshold = 12, /* SSP hardward FIFO threshold */
-	.rx_threshold = 4, /* SSP hardward FIFO threshold */
+	.tx_threshold = 8, /* SSP hardward FIFO threshold */
+	.rx_threshold = 8, /* SSP hardward FIFO threshold */
 	.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
-	.timeout_microsecs = 64, /* Wait at least 64usec to handle trailing */
+	.timeout = 235, /* See Intel documentation */
 	.cs_control = cs8405a_cs_control, /* Use external chip select */
 };
 
diff --git a/MAINTAINERS b/MAINTAINERS
index dbf449ba240..b2024938adc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2554,7 +2554,7 @@ S:	Maintained
 REAL TIME CLOCK (RTC) SUBSYSTEM
 P:	Alessandro Zummo
 M:	a.zummo@towertech.it
-L:	linux-kernel@vger.kernel.org
+L:	rtc-linux@googlegroups.com
 S:	Maintained
 
 REISERFS FILE SYSTEM
diff --git a/Makefile b/Makefile
index aef96259051..73e825b39a0 100644
--- a/Makefile
+++ b/Makefile
@@ -10,8 +10,11 @@ NAME=Avast! A bilge rat!
 # Comments in this file are targeted only to the developer, do not
 # expect to learn how to build the kernel reading this file.
 
-# Do not print "Entering directory ..."
-MAKEFLAGS += --no-print-directory
+# Do not:
+# o  use make's built-in rules and variables
+#    (this increases performance and avoid hard-to-debug behavour);
+# o  print "Entering directory ...";
+MAKEFLAGS += -rR --no-print-directory
 
 # We are using a recursive build, so we need to do a little thinking
 # to get the ordering right.
@@ -271,12 +274,8 @@ export quiet Q KBUILD_VERBOSE
 # Look for make include files relative to root of kernel src
 MAKEFLAGS += --include-dir=$(srctree)
 
-# We need some generic definitions
-include  $(srctree)/scripts/Kbuild.include
-
-# Do not use make's built-in rules and variables
-# This increases performance and avoid hard-to-debug behavour
-MAKEFLAGS += -rR
+# We need some generic definitions.
+include $(srctree)/scripts/Kbuild.include
 
 # Make variables (CC, etc...)
 
@@ -1484,6 +1483,8 @@ endif	# skip-makefile
 PHONY += FORCE
 FORCE:
 
+# Cancel implicit rules on top Makefile, `-rR' will apply to sub-makes.
+Makefile: ;
 
 # Declare the contents of the .PHONY variable as phony.  We keep that
 # information in a variable se we can use it in if_changed and friends.
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fb804043b32..be133f1f75a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -979,7 +979,7 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
 	long timeout;
 	int ret = -EINVAL;
 	struct fdtable *fdt;
-	int max_fdset;
+	int max_fds;
 
 	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (tvp) {
@@ -1003,9 +1003,9 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
 
 	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	max_fdset = fdt->max_fdset;
+	max_fds = fdt->max_fds;
 	rcu_read_unlock();
-	if (n < 0 || n > max_fdset)
+	if (n < 0 || n > max_fds)
 		goto out_nofds;
 
 	/*
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index c1d9fc8f1a8..ee677ced7b6 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -223,7 +223,7 @@ static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
 
 static int cmode_sysctl(ctl_table *table, int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
-			void __user *newval, size_t newlen, void **context)
+			void __user *newval, size_t newlen)
 {
 	if (oldval && oldlenp) {
 		size_t oldlen;
@@ -326,7 +326,7 @@ static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
 
 static int p0_sysctl(ctl_table *table, int __user *name, int nlen,
 		     void __user *oldval, size_t __user *oldlenp,
-		     void __user *newval, size_t newlen, void **context)
+		     void __user *newval, size_t newlen)
 {
 	if (oldval && oldlenp) {
 		size_t oldlen;
@@ -370,7 +370,7 @@ static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
 
 static int cm_sysctl(ctl_table *table, int __user *name, int nlen,
 		     void __user *oldval, size_t __user *oldlenp,
-		     void __user *newval, size_t newlen, void **context)
+		     void __user *newval, size_t newlen)
 {
 	if (oldval && oldlenp) {
 		size_t oldlen;
diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c
index 2c82412b9ef..5929f883e46 100644
--- a/arch/mips/kernel/kspd.c
+++ b/arch/mips/kernel/kspd.c
@@ -301,7 +301,7 @@ static void sp_cleanup(void)
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
-		if (i >= fdt->max_fdset || i >= fdt->max_fds)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->open_fds->fds_bits[j++];
 		while (set) {
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index da35d455549..12878359f2c 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -40,12 +40,12 @@ static DEFINE_MUTEX(lasat_info_mutex);
 /* Strategy function to write EEPROM after changing string entry */
 int sysctl_lasatstring(ctl_table *table, int *name, int nlen,
 		void *oldval, size_t *oldlenp,
-		void *newval, size_t newlen, void **context)
+		void *newval, size_t newlen)
 {
 	int r;
 	mutex_lock(&lasat_info_mutex);
 	r = sysctl_string(table, name,
-			  nlen, oldval, oldlenp, newval, newlen, context);
+			  nlen, oldval, oldlenp, newval, newlen);
 	if (r < 0) {
 		mutex_unlock(&lasat_info_mutex);
 		return r;
@@ -119,11 +119,11 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
 /* Sysctl for setting the IP addresses */
 int sysctl_lasat_intvec(ctl_table *table, int *name, int nlen,
 		    void *oldval, size_t *oldlenp,
-		    void *newval, size_t newlen, void **context)
+		    void *newval, size_t newlen)
 {
 	int r;
 	mutex_lock(&lasat_info_mutex);
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen, context);
+	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
 	if (r < 0) {
 		mutex_unlock(&lasat_info_mutex);
 		return r;
@@ -139,14 +139,14 @@ int sysctl_lasat_intvec(ctl_table *table, int *name, int nlen,
 /* Same for RTC */
 int sysctl_lasat_rtc(ctl_table *table, int *name, int nlen,
 		    void *oldval, size_t *oldlenp,
-		    void *newval, size_t newlen, void **context)
+		    void *newval, size_t newlen)
 {
 	int r;
 	mutex_lock(&lasat_info_mutex);
 	rtctmp = ds1603_read();
 	if (rtctmp < 0)
 		rtctmp = 0;
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen, context);
+	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
 	if (r < 0) {
 		mutex_unlock(&lasat_info_mutex);
 		return r;
@@ -251,13 +251,12 @@ int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
 
 static int sysctl_lasat_eeprom_value(ctl_table *table, int *name, int nlen,
 				     void *oldval, size_t *oldlenp,
-				     void *newval, size_t newlen,
-				     void **context)
+				     void *newval, size_t newlen)
 {
 	int r;
 
 	mutex_lock(&lasat_info_mutex);
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen, context);
+	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
 	if (r < 0) {
 		mutex_unlock(&lasat_info_mutex);
 		return r;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 4a673f5397a..2433d6fc68b 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -225,8 +225,7 @@ out:
 
 static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
 				void __user *oldval, size_t __user *oldlenp,
-				void __user *newval, size_t newlen,
-				void **context)
+				void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 9eccfbd1b53..2e74cb0b780 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -48,25 +48,10 @@ menu "Processor type and features"
 
 choice
 	prompt "Xtensa Processor Configuration"
-	default XTENSA_CPU_LINUX_BE
+	default XTENSA_VARIANT_FSF
 
-config XTENSA_CPU_LINUX_BE
-	bool "linux_be"
-	---help---
-	  The linux_be processor configuration is the baseline Xtensa
-	  configurations included in this kernel and also used by
-	  binutils, gcc, and gdb. It contains no TIE, no coprocessors,
-	  and the following configuration options:
-
-	  Code Density Option                2 Misc Special Registers
-	  NSA/NSAU Instructions              128-bit Data Bus Width
-	  Processor ID                       8K, 2-way I and D Caches
-	  Zero-Overhead Loops                2 Inst Address Break Registers
-	  Big Endian                         2 Data Address Break Registers
-	  64 General-Purpose Registers       JTAG Interface and Trace Port
-	  17 Interrupts                      MMU w/ TLBs and Autorefill
-	  3 Interrupt Levels                 8 Autorefill Ways (I/D TLBs)
-	  3 Timers                           Unaligned Exceptions
+config XTENSA_VARIANT_FSF
+	bool "fsf"
 endchoice
 
 config MMU
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 3a3a4c66ef8..95f836db38f 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -11,13 +11,13 @@
 # this architecture
 
 # Core configuration.
-# (Use CPU=<xtensa_config> to use another default compiler.)
+# (Use VAR=<xtensa_config> to use another default compiler.)
 
-cpu-$(CONFIG_XTENSA_CPU_LINUX_BE)	:= linux_be
-cpu-$(CONFIG_XTENSA_CPU_LINUX_CUSTOM)	:= linux_custom
+variant-$(CONFIG_XTENSA_VARIANT_FSF)		:= fsf
+variant-$(CONFIG_XTENSA_VARIANT_LINUX_CUSTOM)	:= custom
 
-CPU = $(cpu-y)
-export CPU
+VARIANT = $(variant-y)
+export VARIANT
 
 # Platform configuration
 
@@ -27,8 +27,6 @@ platform-$(CONFIG_XTENSA_PLATFORM_ISS)		:= iss
 PLATFORM = $(platform-y)
 export PLATFORM
 
-CPPFLAGS	+= $(if $(KBUILD_SRC),-I$(srctree)/include/asm-xtensa/)
-CPPFLAGS	+= -Iinclude/asm
 CFLAGS		+= -pipe -mlongcalls
 
 KBUILD_DEFCONFIG := iss_defconfig
@@ -41,12 +39,12 @@ core-$(CONFIG_EMBEDDED_RAMDISK)	+= arch/xtensa/boot/ramdisk/
 
 # Test for cross compiling
 
-ifneq ($(CPU),)
+ifneq ($(VARIANT),)
   COMPILE_ARCH = $(shell uname -m)
 
   ifneq ($(COMPILE_ARCH), xtensa)
     ifndef CROSS_COMPILE
-      CROSS_COMPILE = xtensa_$(CPU)-
+      CROSS_COMPILE = xtensa_$(VARIANT)-
     endif
   endif
 endif
@@ -68,14 +66,13 @@ archinc		:= include/asm-xtensa
 
 archprepare: $(archinc)/.platform
 
-# Update machine cpu and platform symlinks if something which affects
+# Update processor variant and platform symlinks if something which affects
 # them changed.
 
 $(archinc)/.platform: $(wildcard include/config/arch/*.h) include/config/auto.conf
-	@echo '  SYMLINK $(archinc)/xtensa/config -> $(archinc)/xtensa/config-$(CPU)'
+	@echo '  SYMLINK $(archinc)/variant -> $(archinc)/variant-$(VARIANT)'
 	$(Q)mkdir -p $(archinc)
-	$(Q)mkdir -p $(archinc)/xtensa
-	$(Q)ln -fsn $(srctree)/$(archinc)/xtensa/config-$(CPU) $(archinc)/xtensa/config
+	$(Q)ln -fsn $(srctree)/$(archinc)/variant-$(VARIANT) $(archinc)/variant
 	@echo '  SYMLINK $(archinc)/platform -> $(archinc)/platform-$(PLATFORM)'
 	$(Q)ln -fsn $(srctree)/$(archinc)/platform-$(PLATFORM) $(archinc)/platform
 	@touch $@
@@ -89,7 +86,7 @@ zImage zImage.initrd: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
 CLEAN_FILES	+= arch/xtensa/vmlinux.lds                      \
-		   $(archinc)/platform $(archinc)/xtensa/config \
+		   $(archinc)/platform $(archinc)/variant	\
 		   $(archinc)/.platform
 
 define archhelp
diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S
index f857fc760aa..464298bc348 100644
--- a/arch/xtensa/boot/boot-elf/bootstrap.S
+++ b/arch/xtensa/boot/boot-elf/bootstrap.S
@@ -1,7 +1,4 @@
 
-#include <xtensa/config/specreg.h>
-#include <xtensa/config/core.h>
-
 #include <asm/bootparam.h>
 
 
diff --git a/arch/xtensa/boot/boot-redboot/bootstrap.S b/arch/xtensa/boot/boot-redboot/bootstrap.S
index ee636b0da81..84848123e2a 100644
--- a/arch/xtensa/boot/boot-redboot/bootstrap.S
+++ b/arch/xtensa/boot/boot-redboot/bootstrap.S
@@ -1,9 +1,7 @@
-
-#define _ASMLANGUAGE
-#include <xtensa/config/specreg.h>
-#include <xtensa/config/core.h>
-#include <xtensa/cacheasm.h>
-
+#include <asm/variant/core.h>
+#include <asm/regs.h>
+#include <asm/asmmacro.h>
+#include <asm/cacheasm.h>
 	/*
 	 * RB-Data: RedBoot data/bss
 	 * P:	    Boot-Parameters
@@ -77,8 +75,14 @@ _start:
 	/* Note: The assembler cannot relax "addi a0, a0, ..." to an
 	   l32r, so we load to a4 first. */
 
-	addi	a4, a0, __start - __start_a0
-	mov	a0, a4
+	# addi	a4, a0, __start - __start_a0
+	# mov	a0, a4
+
+	movi	a4, __start
+	movi	a5, __start_a0
+	add	a4, a0, a4
+	sub	a0, a4, a5
+
 	movi	a4, __start
 	movi	a5, __reloc_end
 
@@ -106,9 +110,13 @@ _start:
 	/* We have to flush and invalidate the caches here before we jump. */
 
 #if XCHAL_DCACHE_IS_WRITEBACK
-	dcache_writeback_all  a5, a6
+
+	___flush_dcache_all a5 a6
+
 #endif
-	icache_invalidate_all a5, a6
+
+	___invalidate_icache_all a5 a6
+	isync
 
 	movi	a11, _reloc
 	jx	a11
@@ -209,9 +217,14 @@ _reloc:
 	/* jump to the kernel */
 2:
 #if XCHAL_DCACHE_IS_WRITEBACK
-	dcache_writeback_all a5, a6
+
+	___flush_dcache_all a5 a6
+
 #endif
-	icache_invalidate_all a5, a6
+
+	___invalidate_icache_all a5 a6
+
+	isync
 
 	movi	a5, __start
 	movi	a3, boot_initrd_start
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index 802621dd486..f19854035e6 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -53,11 +53,7 @@ CONFIG_CC_ALIGN_JUMPS=0
 #
 # Processor type and features
 #
-CONFIG_XTENSA_ARCH_LINUX_BE=y
-# CONFIG_XTENSA_ARCH_LINUX_LE is not set
-# CONFIG_XTENSA_ARCH_LINUX_TEST is not set
-# CONFIG_XTENSA_ARCH_S5 is not set
-# CONFIG_XTENSA_CUSTOM is not set
+CONFIG_XTENSA_VARIANT_FSF=y
 CONFIG_MMU=y
 # CONFIG_XTENSA_UNALIGNED_USER is not set
 # CONFIG_PREEMPT is not set
diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S
index a4956578a24..33d6e9d2e83 100644
--- a/arch/xtensa/kernel/align.S
+++ b/arch/xtensa/kernel/align.S
@@ -16,14 +16,9 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm/ptrace.h>
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
 #include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/thread_info.h>
 
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
 
@@ -216,7 +211,7 @@ ENTRY(fast_unaligned)
 
 	extui	a5, a4, INSN_OP0, 4	# get insn.op0 nibble
 
-#if XCHAL_HAVE_NARROW
+#if XCHAL_HAVE_DENSITY
 	_beqi	a5, OP0_L32I_N, .Lload	# L32I.N, jump
 	addi	a6, a5, -OP0_S32I_N
 	_beqz	a6, .Lstore		# S32I.N, do a store
@@ -251,7 +246,7 @@ ENTRY(fast_unaligned)
 #endif
 	__src_b	a3, a5, a6		# a3 has the data word
 
-#if XCHAL_HAVE_NARROW
+#if XCHAL_HAVE_DENSITY
 	addi	a7, a7, 2		# increment PC (assume 16-bit insn)
 
 	extui	a5, a4, INSN_OP0, 4
@@ -279,14 +274,14 @@ ENTRY(fast_unaligned)
 
 1:
 
-#if XCHAL_HAVE_LOOP
-	rsr	a3, LEND		# check if we reached LEND
-	bne	a7, a3, 1f
-	rsr	a3, LCOUNT		# and LCOUNT != 0
-	beqz	a3, 1f
-	addi	a3, a3, -1		# decrement LCOUNT and set
+#if XCHAL_HAVE_LOOPS
+	rsr	a5, LEND		# check if we reached LEND
+	bne	a7, a5, 1f
+	rsr	a5, LCOUNT		# and LCOUNT != 0
+	beqz	a5, 1f
+	addi	a5, a5, -1		# decrement LCOUNT and set
 	rsr	a7, LBEG		# set PC to LBEGIN
-	wsr	a3, LCOUNT
+	wsr	a5, LCOUNT
 #endif
 
 1:	wsr	a7, EPC_1		# skip load instruction
@@ -336,7 +331,7 @@ ENTRY(fast_unaligned)
 
 	movi	a6, 0			# mask: ffffffff:00000000
 
-#if XCHAL_HAVE_NARROW
+#if XCHAL_HAVE_DENSITY
 	addi	a7, a7, 2		# incr. PC,assume 16-bit instruction
 
 	extui	a5, a4, INSN_OP0, 4	# extract OP0
@@ -359,14 +354,14 @@ ENTRY(fast_unaligned)
 	/* Get memory address */
 
 1:
-#if XCHAL_HAVE_LOOP
-	rsr	a3, LEND		# check if we reached LEND
-	bne	a7, a3, 1f
-	rsr	a3, LCOUNT		# and LCOUNT != 0
-	beqz	a3, 1f
-	addi	a3, a3, -1		# decrement LCOUNT and set
+#if XCHAL_HAVE_LOOPS
+	rsr	a4, LEND		# check if we reached LEND
+	bne	a7, a4, 1f
+	rsr	a4, LCOUNT		# and LCOUNT != 0
+	beqz	a4, 1f
+	addi	a4, a4, -1		# decrement LCOUNT and set
 	rsr	a7, LBEG		# set PC to LBEGIN
-	wsr	a3, LCOUNT
+	wsr	a4, LCOUNT
 #endif
 
 1:	wsr	a7, EPC_1		# skip store instruction
@@ -416,6 +411,7 @@ ENTRY(fast_unaligned)
 
 	/* Restore working register */
 
+	l32i	a8, a2, PT_AREG8
 	l32i	a7, a2, PT_AREG7
 	l32i	a6, a2, PT_AREG6
 	l32i	a5, a2, PT_AREG5
@@ -446,7 +442,7 @@ ENTRY(fast_unaligned)
 	mov	a1, a2
 
 	rsr	a0, PS
-        bbsi.l  a2, PS_UM_SHIFT, 1f     # jump if user mode
+        bbsi.l  a2, PS_UM_BIT, 1f     # jump if user mode
 
 	movi	a0, _kernel_exception
 	jx	a0
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 7cd1d7f8f60..b256cfbef34 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -87,6 +87,11 @@ int main(void)
 	DEFINE(MM_CONTEXT, offsetof (struct mm_struct, context));
 	BLANK();
 	DEFINE(PT_SINGLESTEP_BIT, PT_SINGLESTEP_BIT);
+
+	/* constants */
+	DEFINE(_CLONE_VM, CLONE_VM);
+	DEFINE(_CLONE_UNTRACED, CLONE_UNTRACED);
+
 	return 0;
 }
 
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index cf5a93fb6a2..01bcb9fcfcb 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -90,7 +90,6 @@ ENTRY(enable_coprocessor)
 	rsync
 	retw
 
-#endif
 
 ENTRY(save_coprocessor_extra)
 	entry	sp, 16
@@ -197,4 +196,5 @@ _xtensa_reginfo_tables:
 	XCHAL_CP7_SA_CONTENTS_LIBDB
 	.word	0xFC000000	/* invalid register number,marks end of table*/
 _xtensa_reginfo_table_end:
+#endif
 
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 89e409e9e0d..9e271ba009b 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -24,7 +24,7 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/signal.h>
-#include <xtensa/coreasm.h>
+#include <asm/tlbflush.h>
 
 /* Unimplemented features. */
 
@@ -364,7 +364,7 @@ common_exception:
 	movi	a2, 1
 	extui	a3, a3, 0, 1		# a3 = PS.INTLEVEL[0]
 	moveqz	a3, a2, a0		# a3 = 1 iff interrupt exception
-	movi	a2, PS_WOE_MASK
+	movi	a2, 1 << PS_WOE_BIT
 	or	a3, a3, a2
 	rsr	a0, EXCCAUSE
 	xsr	a3, PS
@@ -399,7 +399,7 @@ common_exception_return:
 	/* Jump if we are returning from kernel exceptions. */
 
 1:	l32i	a3, a1, PT_PS
-	_bbsi.l	a3, PS_UM_SHIFT, 2f
+	_bbsi.l	a3, PS_UM_BIT, 2f
 	j	kernel_exception_exit
 
 	/* Specific to a user exception exit:
@@ -422,7 +422,7 @@ common_exception_return:
 	 *       (Hint: There is only one user exception frame on stack)
 	 */
 
-	movi	a3, PS_WOE_MASK
+	movi	a3, 1 << PS_WOE_BIT
 
 	_bbsi.l	a4, TIF_NEED_RESCHED, 3f
 	_bbci.l	a4, TIF_SIGPENDING, 4f
@@ -694,7 +694,7 @@ common_exception_exit:
 ENTRY(debug_exception)
 
 	rsr	a0, EPS + XCHAL_DEBUGLEVEL
-	bbsi.l	a0, PS_EXCM_SHIFT, 1f	# exception mode
+	bbsi.l	a0, PS_EXCM_BIT, 1f	# exception mode
 
 	/* Set EPC_1 and EXCCAUSE */
 
@@ -707,7 +707,7 @@ ENTRY(debug_exception)
 
 	/* Restore PS to the value before the debug exc but with PS.EXCM set.*/
 
-	movi	a2, 1 << PS_EXCM_SHIFT
+	movi	a2, 1 << PS_EXCM_BIT
 	or	a2, a0, a2
 	movi	a0, debug_exception	# restore a3, debug jump vector
 	wsr	a2, PS
@@ -715,7 +715,7 @@ ENTRY(debug_exception)
 
 	/* Switch to kernel/user stack, restore jump vector, and save a0 */
 
-	bbsi.l	a2, PS_UM_SHIFT, 2f	# jump if user mode
+	bbsi.l	a2, PS_UM_BIT, 2f	# jump if user mode
 
 	addi	a2, a1, -16-PT_SIZE	# assume kernel stack
 	s32i	a0, a2, PT_AREG0
@@ -778,7 +778,7 @@ ENTRY(unrecoverable_exception)
 	wsr	a1, WINDOWBASE
 	rsync
 
-	movi	a1, PS_WOE_MASK | 1
+	movi	a1, (1 << PS_WOE_BIT) | 1
 	wsr	a1, PS
 	rsync
 
@@ -1004,13 +1004,10 @@ ENTRY(fast_syscall_kernel)
 
 	rsr	a0, DEPC			# get syscall-nr
 	_beqz	a0, fast_syscall_spill_registers
-
-	addi	a0, a0, -__NR_sysxtensa
-	_beqz	a0, fast_syscall_sysxtensa
+	_beqi	a0, __NR_xtensa, fast_syscall_xtensa
 
 	j	kernel_exception
 
-
 ENTRY(fast_syscall_user)
 
 	/* Skip syscall. */
@@ -1024,9 +1021,7 @@ ENTRY(fast_syscall_user)
 
 	rsr	a0, DEPC			# get syscall-nr
 	_beqz	a0, fast_syscall_spill_registers
-
-	addi	a0, a0, -__NR_sysxtensa
-	_beqz	a0, fast_syscall_sysxtensa
+	_beqi	a0, __NR_xtensa, fast_syscall_xtensa
 
 	j	user_exception
 
@@ -1047,18 +1042,19 @@ ENTRY(fast_syscall_unrecoverable)
 /*
  * sysxtensa syscall handler
  *
- * int sysxtensa (XTENSA_ATOMIC_SET, ptr, val, unused);
- * int sysxtensa (XTENSA_ATOMIC_ADD, ptr, val, unused);
- * int sysxtensa (XTENSA_ATOMIC_EXG_ADD, ptr, val, unused);
- * int sysxtensa (XTENSA_ATOMIC_CMP_SWP, ptr, oldval, newval);
- * a2                    a6              a3    a4      a5
+ * int sysxtensa (SYS_XTENSA_ATOMIC_SET,     ptr, val,    unused);
+ * int sysxtensa (SYS_XTENSA_ATOMIC_ADD,     ptr, val,    unused);
+ * int sysxtensa (SYS_XTENSA_ATOMIC_EXG_ADD, ptr, val,    unused);
+ * int sysxtensa (SYS_XTENSA_ATOMIC_CMP_SWP, ptr, oldval, newval);
+ *        a2            a6                   a3    a4      a5
  *
  * Entry condition:
  *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
+ *   a0:	a2 (syscall-nr), original value saved on stack (PT_AREG0)
  *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	dispatch table
+ *   a2:	new stack pointer, original in a0 and DEPC
+ *   a3:	dispatch table, original in excsave_1
+ *   a4..a15:	unchanged
  *   depc:	a2, original value saved on stack (PT_DEPC)
  *   excsave_1:	a3
  *
@@ -1091,59 +1087,62 @@ ENTRY(fast_syscall_unrecoverable)
 #define CATCH								\
 67:
 
-ENTRY(fast_syscall_sysxtensa)
-
-	_beqz	a6, 1f
-	_blti	a6, SYSXTENSA_COUNT, 2f
+ENTRY(fast_syscall_xtensa)
 
-1:	j	user_exception
-
-2:	xsr	a3, EXCSAVE_1		# restore a3, excsave1
-	s32i	a7, a2, PT_AREG7
+	xsr	a3, EXCSAVE_1		# restore a3, excsave1
 
+	s32i	a7, a2, PT_AREG7	# we need an additional register
 	movi	a7, 4			# sizeof(unsigned int)
-	access_ok a0, a3, a7, a2, .Leac
+	access_ok a3, a7, a0, a2, .Leac	# a0: scratch reg, a2: sp
 
-	_beqi	a6, SYSXTENSA_ATOMIC_SET, .Lset
-	_beqi	a6, SYSXTENSA_ATOMIC_EXG_ADD, .Lexg
-	_beqi	a6, SYSXTENSA_ATOMIC_ADD, .Ladd
+	addi	a6, a6, -1		# assuming SYS_XTENSA_ATOMIC_SET = 1
+	_bgeui	a6, SYS_XTENSA_COUNT - 1, .Lill
+	_bnei	a6, SYS_XTENSA_ATOMIC_CMP_SWP - 1, .Lnswp
 
-	/* Fall through for SYSXTENSA_ATOMIC_CMP_SWP */
+	/* Fall through for ATOMIC_CMP_SWP. */
 
 .Lswp:	/* Atomic compare and swap */
 
-TRY	l32i	a7, a3, 0		# read old value
-	bne	a7, a4, 1f		# same as old value? jump
-	s32i	a5, a3, 0		# different, modify value
-	movi	a7, 1			# and return 1
-	j	.Lret
-
-1:	movi	a7, 0			# same values: return 0
-	j	.Lret
-
-.Ladd:	/* Atomic add */
-.Lexg:	/* Atomic (exchange) add */
+TRY	l32i	a0, a3, 0		# read old value
+	bne	a0, a4, 1f		# same as old value? jump
+TRY	s32i	a5, a3, 0		# different, modify value
+	l32i	a7, a2, PT_AREG7	# restore a7
+	l32i	a0, a2, PT_AREG0	# restore a0
+	movi	a2, 1			# and return 1
+	addi	a6, a6, 1		# restore a6 (really necessary?)
+	rfe
 
-TRY	l32i	a7, a3, 0
-	add	a4, a4, a7
-	s32i	a4, a3, 0
-	j	.Lret
+1:	l32i	a7, a2, PT_AREG7	# restore a7
+	l32i	a0, a2, PT_AREG0	# restore a0
+	movi	a2, 0			# return 0 (note that we cannot set
+	addi	a6, a6, 1		# restore a6 (really necessary?)
+	rfe
 
-.Lset:	/* Atomic set */
+.Lnswp:	/* Atomic set, add, and exg_add. */
 
-TRY	l32i	a7, a3, 0		# read old value as return value
-	s32i	a4, a3, 0		# write new value
+TRY	l32i	a7, a3, 0		# orig
+	add	a0, a4, a7		# + arg
+	moveqz	a0, a4, a6		# set
+TRY	s32i	a0, a3, 0		# write new value
 
-.Lret:	mov	a0, a2
+	mov	a0, a2
 	mov	a2, a7
-	l32i	a7, a0, PT_AREG7
-	l32i	a3, a0, PT_AREG3
-	l32i	a0, a0, PT_AREG0
+	l32i	a7, a0, PT_AREG7	# restore a7
+	l32i	a0, a0, PT_AREG0	# restore a0
+	addi	a6, a6, 1		# restore a6 (really necessary?)
 	rfe
 
 CATCH
-.Leac:	movi	a7, -EFAULT
-	j	.Lret
+.Leac:	l32i	a7, a2, PT_AREG7	# restore a7
+	l32i	a0, a2, PT_AREG0	# restore a0
+	movi	a2, -EFAULT
+	rfe
+
+.Lill:	l32i	a7, a2, PT_AREG0	# restore a7
+	l32i	a0, a2, PT_AREG0	# restore a0
+	movi	a2, -EINVAL
+	rfe
+
 
 
 
@@ -1491,7 +1490,7 @@ ENTRY(_spill_registers)
 	 */
 
 	rsr	a0, PS
-	_bbci.l	a0, PS_UM_SHIFT, 1f
+	_bbci.l	a0, PS_UM_BIT, 1f
 
  	/* User space: Setup a dummy frame and kill application.
 	 * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
@@ -1510,7 +1509,7 @@ ENTRY(_spill_registers)
 	l32i	a1, a3, EXC_TABLE_KSTK
 	wsr	a3, EXCSAVE_1
 
-	movi	a4, PS_WOE_MASK | 1
+	movi	a4, (1 << PS_WOE_BIT) | 1
 	wsr	a4, PS
 	rsync
 
@@ -1612,7 +1611,7 @@ ENTRY(fast_second_level_miss)
 	rsr	a1, PTEVADDR
 	srli	a1, a1, PAGE_SHIFT
 	slli	a1, a1, PAGE_SHIFT	# ptevaddr & PAGE_MASK
-	addi	a1, a1, DTLB_WAY_PGTABLE	# ... + way_number
+	addi	a1, a1, DTLB_WAY_PGD	# ... + way_number
 
 	wdtlb	a0, a1
 	dsync
@@ -1654,7 +1653,7 @@ ENTRY(fast_second_level_miss)
 	mov	a1, a2
 
 	rsr	a2, PS
-	bbsi.l	a2, PS_UM_SHIFT, 1f
+	bbsi.l	a2, PS_UM_BIT, 1f
 	j	_kernel_exception
 1:	j	_user_exception
 
@@ -1753,7 +1752,7 @@ ENTRY(fast_store_prohibited)
 	mov	a1, a2
 
 	rsr	a2, PS
-	bbsi.l	a2, PS_UM_SHIFT, 1f
+	bbsi.l	a2, PS_UM_BIT, 1f
 	j	_kernel_exception
 1:	j	_user_exception
 
@@ -1907,6 +1906,103 @@ ENTRY(fast_coprocessor)
 #endif /* XCHAL_EXTRA_SA_SIZE */
 
 /*
+ * System Calls.
+ *
+ * void system_call (struct pt_regs* regs, int exccause)
+ *                            a2                 a3
+ */
+
+ENTRY(system_call)
+	entry	a1, 32
+
+	/* regs->syscall = regs->areg[2] */
+
+	l32i	a3, a2, PT_AREG2
+	mov	a6, a2
+	movi	a4, do_syscall_trace_enter
+	s32i	a3, a2, PT_SYSCALL
+	callx4	a4
+
+	/* syscall = sys_call_table[syscall_nr] */
+
+	movi	a4, sys_call_table;
+	movi	a5, __NR_syscall_count
+	movi	a6, -ENOSYS
+	bgeu	a3, a5, 1f
+
+	addx4	a4, a3, a4
+	l32i	a4, a4, 0
+	movi	a5, sys_ni_syscall;
+	beq	a4, a5, 1f
+
+	/* Load args: arg0 - arg5 are passed via regs. */
+
+	l32i	a6, a2, PT_AREG6
+	l32i	a7, a2, PT_AREG3
+	l32i	a8, a2, PT_AREG4
+	l32i	a9, a2, PT_AREG5
+	l32i	a10, a2, PT_AREG8
+	l32i	a11, a2, PT_AREG9
+
+	/* Pass one additional argument to the syscall: pt_regs (on stack) */
+	s32i	a2, a1, 0
+
+	callx4	a4
+
+1:	/* regs->areg[2] = return_value */
+
+	s32i	a6, a2, PT_AREG2
+	movi	a4, do_syscall_trace_leave
+	mov	a6, a2
+	callx4	a4
+	retw
+
+
+/*
+ * Create a kernel thread
+ *
+ * int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+ * a2                    a2                 a3             a4
+ */
+
+ENTRY(kernel_thread)
+	entry	a1, 16
+
+	mov	a5, a2			# preserve fn over syscall
+	mov	a7, a3			# preserve args over syscall
+
+	movi	a3, _CLONE_VM | _CLONE_UNTRACED
+	movi	a2, __NR_clone
+	or	a6, a4, a3		# arg0: flags
+	mov	a3, a1			# arg1: sp
+	syscall
+
+	beq	a3, a1, 1f		# branch if parent
+	mov	a6, a7			# args
+	callx4	a5			# fn(args)
+
+	movi	a2, __NR_exit
+	syscall				# return value of fn(args) still in a6
+
+1:	retw
+
+/*
+ * Do a system call from kernel instead of calling sys_execve, so we end up
+ * with proper pt_regs.
+ *
+ * int kernel_execve(const char *fname, char *const argv[], charg *const envp[])
+ * a2                        a2               a3                  a4
+ */
+
+ENTRY(kernel_execve)
+	entry	a1, 16
+	mov	a6, a2			# arg0 is in a6
+	movi	a2, __NR_execve
+	syscall
+
+	retw
+
+/*
  * Task switch.
  *
  * struct task*  _switch_to (struct task* prev, struct task* next)
@@ -1924,7 +2020,7 @@ ENTRY(_switch_to)
 
 	/* Disable ints while we manipulate the stack pointer; spill regs. */
 
-	movi	a5, PS_EXCM_MASK | LOCKLEVEL
+	movi	a5, (1 << PS_EXCM_BIT) | LOCKLEVEL
 	xsr	a5, PS
 	rsr	a3, EXCSAVE_1
 	rsync
@@ -1964,33 +2060,9 @@ ENTRY(ret_from_fork)
 	movi	a4, schedule_tail
 	callx4	a4
 
-	movi	a4, do_syscall_trace
+	movi	a4, do_syscall_trace_leave
+	mov	a6, a1
 	callx4	a4
 
 	j	common_exception_return
 
-
-
-/*
- * Table of syscalls
- */
-
-.data
-.align  4
-.global sys_call_table
-sys_call_table:
-
-#define SYSCALL(call, narg) .word call
-#include "syscalls.h"
-
-/*
- * Number of arguments of each syscall
- */
-
-.global sys_narg_table
-sys_narg_table:
-
-#undef SYSCALL
-#define SYSCALL(call, narg) .byte narg
-#include "syscalls.h"
-
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index c07cb252299..ea89910efa4 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -15,9 +15,9 @@
  * Kevin Chea
  */
 
-#include <xtensa/cacheasm.h>
 #include <asm/processor.h>
 #include <asm/page.h>
+#include <asm/cacheasm.h>
 
 /*
  * This module contains the entry code for kernel images. It performs the
@@ -32,13 +32,6 @@
  *
  */
 
-	.macro	iterate	from, to , cmd
-		.ifeq	((\to - \from) & ~0xfff)
-			\cmd	\from
-			iterate "(\from+1)", \to, \cmd
-		.endif
-	.endm
-
 /*
  *  _start
  *
@@ -64,7 +57,7 @@ _startup:
 
 	/* Disable interrupts and exceptions. */
 
-	movi	a0, XCHAL_PS_EXCM_MASK
+	movi	a0, LOCKLEVEL
 	wsr	a0, PS
 
 	/* Preserve the pointer to the boot parameter list in EXCSAVE_1 */
@@ -91,11 +84,11 @@ _startup:
 	movi	a1, 15
 	wsr	a0, ICOUNTLEVEL
 
-	.macro reset_dbreak num
-	wsr	a0, DBREAKC + \num
-	.endm
-
-        iterate 0, XCHAL_NUM_IBREAK-1, reset_dbreak
+	.set	_index, 0
+	.rept	XCHAL_NUM_DBREAK - 1
+	wsr	a0, DBREAKC + _index
+	.set	_index, _index + 1
+	.endr
 #endif
 
 	/* Clear CCOUNT (not really necessary, but nice) */
@@ -110,10 +103,11 @@ _startup:
 
 	/* Disable all timers. */
 
-	.macro	reset_timer	num
-	wsr	a0, CCOMPARE_0 + \num
-	.endm
-	iterate	0, XCHAL_NUM_TIMERS-1, reset_timer
+	.set	_index, 0
+	.rept	XCHAL_NUM_TIMERS - 1
+	wsr	a0, CCOMPARE + _index
+	.set	_index, _index + 1
+	.endr
 
 	/* Interrupt initialization. */
 
@@ -139,12 +133,21 @@ _startup:
 	rsync
 
 	/*  Initialize the caches.
-	 *  Does not include flushing writeback d-cache.
-	 *  a6, a7 are just working registers (clobbered).
+	 *  a2, a3 are just working registers (clobbered).
 	 */
 
-	icache_reset  a2, a3
-	dcache_reset  a2, a3
+#if XCHAL_DCACHE_LINE_LOCKABLE
+	___unlock_dcache_all a2 a3
+#endif
+
+#if XCHAL_ICACHE_LINE_LOCKABLE
+	___unlock_icache_all a2 a3
+#endif
+
+	___invalidate_dcache_all a2 a3
+	___invalidate_icache_all a2 a3
+
+	isync
 
 	/* Unpack data sections
 	 *
@@ -181,9 +184,9 @@ _startup:
 	movi	a2, _bss_start	# start of BSS
 	movi	a3, _bss_end	# end of BSS
 
-1:	addi	a2, a2, 4
+	__loopt	a2, a3, a4, 2
 	s32i	a0, a2, 0
-	blt	a2, a3, 1b
+	__endla	a2, a4, 4
 
 #if XCHAL_DCACHE_IS_WRITEBACK
 
@@ -191,7 +194,7 @@ _startup:
 	 * instructions/data are available.
 	 */
 
-	dcache_writeback_all	a2, a3
+	___flush_dcache_all a2 a3
 #endif
 
 	/* Setup stack and enable window exceptions (keep irqs disabled) */
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 1cf744ee095..c9ea73b7031 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -4,7 +4,7 @@
  * Xtensa built-in interrupt controller and some generic functions copied
  * from i386.
  *
- * Copyright (C) 2002 - 2005 Tensilica, Inc.
+ * Copyright (C) 2002 - 2006 Tensilica, Inc.
  * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  *
  *
@@ -22,11 +22,6 @@
 #include <asm/uaccess.h>
 #include <asm/platform.h>
 
-static void enable_xtensa_irq(unsigned int irq);
-static void disable_xtensa_irq(unsigned int irq);
-static void mask_and_ack_xtensa(unsigned int irq);
-static void end_xtensa_irq(unsigned int irq);
-
 static unsigned int cached_irq_mask;
 
 atomic_t irq_err_count;
@@ -46,8 +41,16 @@ void ack_bad_irq(unsigned int irq)
  * handlers).
  */
 
-unsigned int  do_IRQ(int irq, struct pt_regs *regs)
+asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
 {
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct irq_desc *desc = irq_desc + irq;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
+				__FUNCTION__, irq);
+	}
+
 	irq_enter();
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -63,12 +66,10 @@ unsigned int  do_IRQ(int irq, struct pt_regs *regs)
 			       sp - sizeof(struct thread_info));
 	}
 #endif
-
-	__do_IRQ(irq, regs);
+	desc->handle_irq(irq, desc);
 
 	irq_exit();
-
-	return 1;
+	set_irq_regs(old_regs);
 }
 
 /*
@@ -118,72 +119,68 @@ skip:
 	}
 	return 0;
 }
-/* shutdown is same as "disable" */
-#define shutdown_xtensa_irq disable_xtensa_irq
 
-static unsigned int startup_xtensa_irq(unsigned int irq)
-{
-	enable_xtensa_irq(irq);
-	return 0;               /* never anything pending */
-}
-
-static struct hw_interrupt_type xtensa_irq_type = {
-	"Xtensa-IRQ",
-	startup_xtensa_irq,
-	shutdown_xtensa_irq,
-	enable_xtensa_irq,
-	disable_xtensa_irq,
-	mask_and_ack_xtensa,
-	end_xtensa_irq
-};
-
-static inline void mask_irq(unsigned int irq)
+static void xtensa_irq_mask(unsigned int irq)
 {
 	cached_irq_mask &= ~(1 << irq);
 	set_sr (cached_irq_mask, INTENABLE);
 }
 
-static inline void unmask_irq(unsigned int irq)
+static void xtensa_irq_unmask(unsigned int irq)
 {
 	cached_irq_mask |= 1 << irq;
 	set_sr (cached_irq_mask, INTENABLE);
 }
 
-static void disable_xtensa_irq(unsigned int irq)
+static void xtensa_irq_ack(unsigned int irq)
 {
-	unsigned long flags;
-	local_save_flags(flags);
-	mask_irq(irq);
-	local_irq_restore(flags);
+	set_sr(1 << irq, INTCLEAR);
 }
 
-static void enable_xtensa_irq(unsigned int irq)
+static int xtensa_irq_retrigger(unsigned int irq)
 {
-	unsigned long flags;
-	local_save_flags(flags);
-	unmask_irq(irq);
-	local_irq_restore(flags);
-}
-
-static void mask_and_ack_xtensa(unsigned int irq)
-{
-        disable_xtensa_irq(irq);
+	set_sr (1 << irq, INTSET);
+	return 1;
 }
 
-static void end_xtensa_irq(unsigned int irq)
-{
-        enable_xtensa_irq(irq);
-}
 
+static struct irq_chip xtensa_irq_chip = {
+	.name		= "xtensa",
+	.mask		= xtensa_irq_mask,
+	.unmask		= xtensa_irq_unmask,
+	.ack		= xtensa_irq_ack,
+	.retrigger	= xtensa_irq_retrigger,
+};
 
 void __init init_IRQ(void)
 {
-	int i;
+	int index;
 
-	for (i=0; i < XTENSA_NR_IRQS; i++)
-		irq_desc[i].chip = &xtensa_irq_type;
+	for (index = 0; index < XTENSA_NR_IRQS; index++) {
+		int mask = 1 << index;
 
-	cached_irq_mask = 0;
+		if (mask & XCHAL_INTTYPE_MASK_SOFTWARE)
+			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+						 handle_simple_irq);
 
-	platform_init_irq();
+		else if (mask & XCHAL_INTTYPE_MASK_EXTERN_EDGE)
+			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+						 handle_edge_irq);
+
+		else if (mask & XCHAL_INTTYPE_MASK_EXTERN_LEVEL)
+			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+						 handle_level_irq);
+
+		else if (mask & XCHAL_INTTYPE_MASK_TIMER)
+			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+						 handle_edge_irq);
+
+		else	/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
+			/* XCHAL_INTTYPE_MASK_NMI */
+
+			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+						 handle_level_irq);
+	}
+
+	cached_irq_mask = 0;
 }
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 6648fa9d919..ca76f071666 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -1,5 +1,5 @@
 /*
- * arch/xtensa/kernel/pci-dma.c
+ * arch/xtensa/pci-dma.c
  *
  * DMA coherent memory allocation.
  *
@@ -29,28 +29,48 @@
  */
 
 void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
+dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag)
 {
-	void *ret;
+	unsigned long ret;
+	unsigned long uncached = 0;
 
 	/* ignore region speicifiers */
-	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
 
-	if (dev == NULL || (*dev->dma_mask < 0xffffffff))
-		gfp |= GFP_DMA;
-	ret = (void *)__get_free_pages(gfp, get_order(size));
+	flag &= ~(__GFP_DMA | __GFP_HIGHMEM);
 
-	if (ret != NULL) {
-		memset(ret, 0, size);
-		*handle = virt_to_bus(ret);
+	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+		flag |= GFP_DMA;
+	ret = (unsigned long)__get_free_pages(flag, get_order(size));
+
+	if (ret == 0)
+		return NULL;
+
+	/* We currently don't support coherent memory outside KSEG */
+
+	if (ret < XCHAL_KSEG_CACHED_VADDR
+	    || ret >= XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE)
+		BUG();
+
+
+	if (ret != 0) {
+		memset((void*) ret, 0, size);
+		uncached = ret+XCHAL_KSEG_BYPASS_VADDR-XCHAL_KSEG_CACHED_VADDR;
+		*handle = virt_to_bus((void*)ret);
+		__flush_invalidate_dcache_range(ret, size);
 	}
-	return (void*) BYPASS_ADDR((unsigned long)ret);
+
+	return (void*)uncached;
 }
 
 void dma_free_coherent(struct device *hwdev, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
-	free_pages(CACHED_ADDR((unsigned long)vaddr), get_order(size));
+	long addr=(long)vaddr+XCHAL_KSEG_CACHED_VADDR-XCHAL_KSEG_BYPASS_VADDR;
+
+	if (addr < 0 || addr >= XCHAL_KSEG_SIZE)
+		BUG();
+
+	free_pages(addr, get_order(size));
 }
 
 
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index a7c4178c2a8..795bd5ac6f4 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -1,4 +1,3 @@
-// TODO	verify coprocessor handling
 /*
  * arch/xtensa/kernel/process.c
  *
@@ -43,7 +42,7 @@
 #include <asm/irq.h>
 #include <asm/atomic.h>
 #include <asm/asm-offsets.h>
-#include <asm/coprocessor.h>
+#include <asm/regs.h>
 
 extern void ret_from_fork(void);
 
@@ -67,25 +66,6 @@ void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
 
-#if XCHAL_CP_NUM > 0
-
-/*
- * Coprocessor ownership.
- */
-
-coprocessor_info_t coprocessor_info[] = {
-	{ 0, XTENSA_CPE_CP0_OFFSET },
-	{ 0, XTENSA_CPE_CP1_OFFSET },
-	{ 0, XTENSA_CPE_CP2_OFFSET },
-	{ 0, XTENSA_CPE_CP3_OFFSET },
-	{ 0, XTENSA_CPE_CP4_OFFSET },
-	{ 0, XTENSA_CPE_CP5_OFFSET },
-	{ 0, XTENSA_CPE_CP6_OFFSET },
-	{ 0, XTENSA_CPE_CP7_OFFSET },
-};
-
-#endif
-
 /*
  * Powermanagement idle function, if any is provided by the platform.
  */
@@ -110,12 +90,10 @@ void cpu_idle(void)
 
 void exit_thread(void)
 {
-	release_coprocessors(current);	/* Empty macro if no CPs are defined */
 }
 
 void flush_thread(void)
 {
-	release_coprocessors(current);	/* Empty macro if no CPs are defined */
 }
 
 /*
@@ -183,36 +161,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
 
 
 /*
- * Create a kernel thread
- */
-
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	long retval;
-	__asm__ __volatile__
-		("mov           a5, %4\n\t" /* preserve fn in a5 */
-		 "mov           a6, %3\n\t" /* preserve and setup arg in a6 */
-		 "movi		a2, %1\n\t" /* load __NR_clone for syscall*/
-		 "mov		a3, sp\n\t" /* sp check and sys_clone */
-		 "mov		a4, %5\n\t" /* load flags for syscall */
-		 "syscall\n\t"
-		 "beq		a3, sp, 1f\n\t" /* branch if parent */
-		 "callx4	a5\n\t"     /* call fn */
-		 "movi		a2, %2\n\t" /* load __NR_exit for syscall */
-		 "mov		a3, a6\n\t" /* load fn return value */
-		 "syscall\n"
-		 "1:\n\t"
-		 "mov		%0, a2\n\t" /* parent returns zero */
-		 :"=r" (retval)
-		 :"i" (__NR_clone), "i" (__NR_exit),
-		 "r" (arg), "r" (fn),
-		 "r" (flags | CLONE_VM)
-		 : "a2", "a3", "a4", "a5", "a6" );
-	return retval;
-}
-
-
-/*
  * These bracket the sleeping functions..
  */
 
@@ -275,7 +223,7 @@ void do_copy_regs (xtensa_gregset_t *elfregs, struct pt_regs *regs,
 	 */
 
 	elfregs->pc		= regs->pc;
-	elfregs->ps		= (regs->ps & ~XCHAL_PS_EXCM_MASK);
+	elfregs->ps		= (regs->ps & ~(1 << PS_EXCM_BIT));
 	elfregs->exccause	= regs->exccause;
 	elfregs->excvaddr	= regs->excvaddr;
 	elfregs->windowbase	= regs->windowbase;
@@ -325,7 +273,7 @@ void do_restore_regs (xtensa_gregset_t *elfregs, struct pt_regs *regs,
 	 */
 
 	regs->pc		= elfregs->pc;
-	regs->ps		= (elfregs->ps | XCHAL_PS_EXCM_MASK);
+	regs->ps		= (elfregs->ps | (1 << PS_EXCM_BIT));
 	regs->exccause		= elfregs->exccause;
 	regs->excvaddr		= elfregs->excvaddr;
 	regs->windowbase	= elfregs->windowbase;
@@ -459,16 +407,7 @@ int  do_restore_fpregs (elf_fpregset_t *fpregs, struct pt_regs *regs,
 int
 dump_task_fpu(struct pt_regs *regs, struct task_struct *task, elf_fpregset_t *r)
 {
-/* see asm/coprocessor.h for this magic number 16 */
-#if XTENSA_CP_EXTRA_SIZE > 16
-	do_save_fpregs (r, regs, task);
-
-	/*  For now, bit 16 means some extra state may be present:  */
-// FIXME!! need to track to return more accurate mask
-	return 0x10000 | XCHAL_CP_MASK;
-#else
 	return 0;	/* no coprocessors active on this processor */
-#endif
 }
 
 /*
@@ -483,3 +422,44 @@ int  dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
 {
 	return dump_task_fpu(regs, current, r);
 }
+
+asmlinkage
+long xtensa_clone(unsigned long clone_flags, unsigned long newsp,
+                  void __user *parent_tid, void *child_tls,
+                  void __user *child_tid, long a5,
+                  struct pt_regs *regs)
+{
+        if (!newsp)
+                newsp = regs->areg[1];
+        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+}
+
+/*
+ *  * xtensa_execve() executes a new program.
+ *   */
+
+asmlinkage
+long xtensa_execve(char __user *name, char __user * __user *argv,
+                   char __user * __user *envp,
+                   long a3, long a4, long a5,
+                   struct pt_regs *regs)
+{
+	long error;
+	char * filename;
+
+	filename = getname(name);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+	// FIXME: release coprocessor??
+	error = do_execve(filename, argv, envp, regs);
+	if (error == 0) {
+		task_lock(current);
+		current->ptrace &= ~PT_DTRACE;
+		task_unlock(current);
+	}
+	putname(filename);
+out:
+	return error;
+}
+
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 9aea23cc0dc..8b6d3d0623b 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -96,7 +96,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			/* Note:  PS.EXCM is not set while user task is running;
 			 * its being set in regs is for exception handling
 			 * convenience.  */
-			tmp = (regs->ps & ~XCHAL_PS_EXCM_MASK);
+			tmp = (regs->ps & ~(1 << PS_EXCM_BIT));
 			break;
 		case REG_WB:
 			tmp = regs->windowbase;
@@ -332,12 +332,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 void do_syscall_trace(void)
 {
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
-
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-
 	/*
 	 * The 0x80 provides a way for the tracing parent to distinguish
 	 * between a syscall stop and SIGTRAP delivery
@@ -354,3 +348,23 @@ void do_syscall_trace(void)
 		current->exit_code = 0;
 	}
 }
+
+void do_syscall_trace_enter(struct pt_regs *regs)
+{
+	if (test_thread_flag(TIF_SYSCALL_TRACE)
+			&& (current->ptrace & PT_PTRACED))
+		do_syscall_trace();
+
+#if 0
+	if (unlikely(current->audit_context))
+		audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
+#endif
+}
+
+void do_syscall_trace_leave(struct pt_regs *regs)
+{
+	if ((test_thread_flag(TIF_SYSCALL_TRACE))
+			&& (current->ptrace & PT_PTRACED))
+		do_syscall_trace();
+}
+
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index c99ab72b41b..b6374c09de2 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -42,8 +42,6 @@
 #include <asm/page.h>
 #include <asm/setup.h>
 
-#include <xtensa/config/system.h>
-
 #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
 struct screen_info screen_info = { 0, 24, 0, 0, 0, 80, 0, 0, 0, 24, 1, 16};
 #endif
@@ -336,7 +334,7 @@ c_show(struct seq_file *f, void *slot)
 	/* high-level stuff */
 	seq_printf(f,"processor\t: 0\n"
 		     "vendor_id\t: Tensilica\n"
-		     "model\t\t: Xtensa " XCHAL_HW_RELEASE_NAME "\n"
+		     "model\t\t: Xtensa " XCHAL_HW_VERSION_NAME "\n"
 		     "core ID\t\t: " XCHAL_CORE_ID "\n"
 		     "build ID\t: 0x%x\n"
 		     "byte order\t: %s\n"
@@ -420,25 +418,6 @@ c_show(struct seq_file *f, void *slot)
 		     XCHAL_NUM_TIMERS,
 		     XCHAL_DEBUGLEVEL);
 
-	/* Coprocessors */
-#if XCHAL_HAVE_CP
-	seq_printf(f, "coprocessors\t: %d\n", XCHAL_CP_NUM);
-#else
-	seq_printf(f, "coprocessors\t: none\n");
-#endif
-
-	/* {I,D}{RAM,ROM} and XLMI */
-	seq_printf(f,"inst ROMs\t: %d\n"
-		     "inst RAMs\t: %d\n"
-		     "data ROMs\t: %d\n"
-		     "data RAMs\t: %d\n"
-		     "XLMI ports\t: %d\n",
-		     XCHAL_NUM_IROM,
-		     XCHAL_NUM_IRAM,
-		     XCHAL_NUM_DROM,
-		     XCHAL_NUM_DRAM,
-		     XCHAL_NUM_XLMI);
-
 	/* Cache */
 	seq_printf(f,"icache line size: %d\n"
 		     "icache ways\t: %d\n"
@@ -466,24 +445,6 @@ c_show(struct seq_file *f, void *slot)
 		     XCHAL_DCACHE_WAYS,
 		     XCHAL_DCACHE_SIZE);
 
-	/* MMU */
-	seq_printf(f,"ASID bits\t: %d\n"
-		     "ASID invalid\t: %d\n"
-		     "ASID kernel\t: %d\n"
-		     "rings\t\t: %d\n"
-		     "itlb ways\t: %d\n"
-		     "itlb AR ways\t: %d\n"
-		     "dtlb ways\t: %d\n"
-		     "dtlb AR ways\t: %d\n",
-		     XCHAL_MMU_ASID_BITS,
-		     XCHAL_MMU_ASID_INVALID,
-		     XCHAL_MMU_ASID_KERNEL,
-		     XCHAL_MMU_RINGS,
-		     XCHAL_ITLB_WAYS,
-		     XCHAL_ITLB_ARF_WAYS,
-		     XCHAL_DTLB_WAYS,
-		     XCHAL_DTLB_ARF_WAYS);
-
 	return 0;
 }
 
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index c494f0826fc..c6d9880a4cd 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -12,8 +12,8 @@
  *
  */
 
-#include <xtensa/config/core.h>
-#include <xtensa/hal.h>
+#include <asm/variant/core.h>
+#include <asm/coprocessor.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -46,7 +46,7 @@ extern struct task_struct *coproc_owners[];
  * Atomically swap in the new signal mask, and wait for a signal.
  */
 
-int sys_sigsuspend(struct pt_regs *regs)
+int xtensa_sigsuspend(struct pt_regs *regs)
 {
 	old_sigset_t mask = (old_sigset_t) regs->areg[3];
 	sigset_t saveset;
@@ -68,7 +68,7 @@ int sys_sigsuspend(struct pt_regs *regs)
 }
 
 asmlinkage int
-sys_rt_sigsuspend(struct pt_regs *regs)
+xtensa_rt_sigsuspend(struct pt_regs *regs)
 {
 	sigset_t *unewset = (sigset_t *) regs->areg[4];
 	size_t sigsetsize = (size_t) regs->areg[3];
@@ -96,7 +96,7 @@ sys_rt_sigsuspend(struct pt_regs *regs)
 }
 
 asmlinkage int
-sys_sigaction(int sig, const struct old_sigaction *act,
+xtensa_sigaction(int sig, const struct old_sigaction *act,
 	      struct old_sigaction *oact)
 {
 	struct k_sigaction new_ka, old_ka;
@@ -128,7 +128,7 @@ sys_sigaction(int sig, const struct old_sigaction *act,
 }
 
 asmlinkage int
-sys_sigaltstack(struct pt_regs *regs)
+xtensa_sigaltstack(struct pt_regs *regs)
 {
 	const stack_t *uss = (stack_t *) regs->areg[4];
 	stack_t *uoss = (stack_t *) regs->areg[3];
@@ -216,8 +216,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
 	 * handler, or the user mode value doesn't matter (e.g. PS.OWB).
 	 */
 	err |= __get_user(ps, &sc->sc_ps);
-	regs->ps = (regs->ps & ~XCHAL_PS_CALLINC_MASK)
-		| (ps & XCHAL_PS_CALLINC_MASK);
+	regs->ps = (regs->ps & ~PS_CALLINC_MASK)
+		| (ps & PS_CALLINC_MASK);
 
 	/* Additional corruption checks */
 
@@ -280,7 +280,7 @@ flush_my_cpstate(struct task_struct *tsk)
 static int
 save_cpextra (struct _cpstate *buf)
 {
-#if (XCHAL_EXTRA_SA_SIZE == 0) && (XCHAL_CP_NUM == 0)
+#if XCHAL_CP_NUM == 0
 	return 0;
 #else
 
@@ -350,7 +350,7 @@ setup_sigcontext(struct sigcontext *sc, struct _cpstate *cpstate,
 	return err;
 }
 
-asmlinkage int sys_sigreturn(struct pt_regs *regs)
+asmlinkage int xtensa_sigreturn(struct pt_regs *regs)
 {
 	struct sigframe *frame = (struct sigframe *)regs->areg[1];
 	sigset_t set;
@@ -382,7 +382,7 @@ badframe:
 	return 0;
 }
 
-asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+asmlinkage int xtensa_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe *frame = (struct rt_sigframe *)regs->areg[1];
 	sigset_t set;
@@ -497,8 +497,10 @@ gen_return_code(unsigned char *codemem, unsigned int use_rt_sigreturn)
 
 	/* Flush generated code out of the data cache */
 
-	if (err == 0)
-		__flush_invalidate_cache_range((unsigned long)codemem, 6UL);
+	if (err == 0) {
+		__invalidate_icache_range((unsigned long)codemem, 6UL);
+		__flush_invalidate_dcache_range((unsigned long)codemem, 6UL);
+	}
 
 	return err;
 }
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
new file mode 100644
index 00000000000..418268f4976
--- /dev/null
+++ b/arch/xtensa/kernel/syscall.c
@@ -0,0 +1,95 @@
+/*
+ * arch/xtensa/kernel/syscall.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 - 2005 Tensilica Inc.
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 1995 - 2000 by Ralf Baechle
+ *
+ * Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
+ * Marc Gauthier <marc@tensilica.com, marc@alumni.uwaterloo.ca>
+ * Chris Zankel <chris@zankel.net>
+ * Kevin Chea
+ *
+ */
+#include <asm/uaccess.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+#include <linux/linkage.h>
+#include <linux/stringify.h>
+#include <linux/errno.h>
+#include <linux/syscalls.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+
+typedef void (*syscall_t)(void);
+
+syscall_t sys_call_table[__NR_syscall_count] /* FIXME __cacheline_aligned */= {
+	[0 ... __NR_syscall_count - 1] = (syscall_t)&sys_ni_syscall,
+
+#undef __SYSCALL
+#define __SYSCALL(nr,symbol,nargs) [ nr ] = (syscall_t)symbol,
+#undef _XTENSA_UNISTD_H
+#undef  __KERNEL_SYSCALLS__
+#include <asm/unistd.h>
+};
+
+/*
+ * xtensa_pipe() is the normal C calling standard for creating a pipe. It's not
+ * the way unix traditional does this, though.
+ */
+
+asmlinkage long xtensa_pipe(int __user *userfds)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe(fd);
+	if (!error) {
+		if (copy_to_user(userfds, fd, 2 * sizeof(int)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+
+asmlinkage long xtensa_mmap2(unsigned long addr, unsigned long len,
+   			     unsigned long prot, unsigned long flags,
+			     unsigned long fd, unsigned long pgoff)
+{
+	int error = -EBADF;
+	struct file * file = NULL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			goto out;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+	up_write(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+out:
+	return error;
+}
+
+asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg)
+{
+	unsigned long ret;
+	long err;
+
+	err = do_shmat(shmid, shmaddr, shmflg, &ret);
+	if (err)
+		return err;
+	return (long)ret;
+}
+
diff --git a/arch/xtensa/kernel/syscalls.c b/arch/xtensa/kernel/syscalls.c
deleted file mode 100644
index f49cb239e60..00000000000
--- a/arch/xtensa/kernel/syscalls.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * arch/xtensa/kernel/syscalls.c
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- * Copyright (C) 2000 Silicon Graphics, Inc.
- * Copyright (C) 1995 - 2000 by Ralf Baechle
- *
- * Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
- * Marc Gauthier <marc@tensilica.com, marc@alumni.uwaterloo.ca>
- * Chris Zankel <chris@zankel.net>
- * Kevin Chea
- *
- */
-
-#define DEBUG	0
-
-#include <linux/linkage.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/mman.h>
-#include <linux/sched.h>
-#include <linux/file.h>
-#include <linux/slab.h>
-#include <linux/utsname.h>
-#include <linux/unistd.h>
-#include <linux/stringify.h>
-#include <linux/syscalls.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/errno.h>
-#include <asm/ptrace.h>
-#include <asm/signal.h>
-#include <asm/uaccess.h>
-#include <asm/hardirq.h>
-#include <asm/mman.h>
-#include <asm/shmparam.h>
-#include <asm/page.h>
-
-extern void do_syscall_trace(void);
-typedef int (*syscall_t)(void *a0,...);
-extern syscall_t sys_call_table[];
-extern unsigned char sys_narg_table[];
-
-/*
- * sys_pipe() is the normal C calling standard for creating a pipe. It's not
- * the way unix traditional does this, though.
- */
-
-int sys_pipe(int __user *userfds)
-{
-	int fd[2];
-	int error;
-
-	error = do_pipe(fd);
-	if (!error) {
-		if (copy_to_user(userfds, fd, 2 * sizeof(int)))
-			error = -EFAULT;
-	}
-	return error;
-}
-
-/*
- * Common code for old and new mmaps.
- */
-long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot,
-	      unsigned long flags, unsigned long fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
-int sys_clone(struct pt_regs *regs)
-{
-	unsigned long clone_flags;
-	unsigned long newsp;
-	int __user *parent_tidptr, *child_tidptr;
-	clone_flags = regs->areg[4];
-	newsp = regs->areg[3];
-	parent_tidptr = (int __user *)regs->areg[5];
-	child_tidptr = (int __user *)regs->areg[6];
-	if (!newsp)
-		newsp = regs->areg[1];
-	return do_fork(clone_flags,newsp,regs,0,parent_tidptr,child_tidptr);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-
-int sys_execve(struct pt_regs *regs)
-{
-	int error;
-	char * filename;
-
-	filename = getname((char *) (long)regs->areg[5]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename, (char **) (long)regs->areg[3],
-	                  (char **) (long)regs->areg[4], regs);
-	putname(filename);
-
-out:
-	return error;
-}
-
-int sys_uname(struct old_utsname * name)
-{
-	if (name && !copy_to_user(name, utsname(), sizeof (*name)))
-		return 0;
-	return -EFAULT;
-}
-
-/*
- * Build the string table for the builtin "poor man's strace".
- */
-
-#if DEBUG
-#define SYSCALL(fun, narg) #fun,
-static char *sfnames[] = {
-#include "syscalls.h"
-};
-#undef SYS
-#endif
-
-void system_call (struct pt_regs *regs)
-{
-	syscall_t syscall;
-	unsigned long parm0, parm1, parm2, parm3, parm4, parm5;
-	int nargs, res;
-	unsigned int syscallnr;
-	int ps;
-
-#if DEBUG
-	int i;
-	unsigned long parms[6];
-	char *sysname;
-#endif
-
-	regs->syscall = regs->areg[2];
-
-	do_syscall_trace();
-
-	/* Have to load after syscall_trace because strace
-	 * sometimes changes regs->syscall.
-	 */
-	syscallnr = regs->syscall;
-
-	parm0 = parm1 = parm2 = parm3 = parm4 = parm5 = 0;
-
-	/* Restore interrupt level to syscall invoker's.
-	 * If this were in assembly, we wouldn't disable
-	 * interrupts in the first place:
-	 */
-	local_save_flags (ps);
-	local_irq_restore((ps & ~XCHAL_PS_INTLEVEL_MASK) |
-			  (regs->ps & XCHAL_PS_INTLEVEL_MASK) );
-
-	if (syscallnr > __NR_Linux_syscalls) {
-		regs->areg[2] = -ENOSYS;
-		return;
-	}
-
-	syscall = sys_call_table[syscallnr];
-	nargs = sys_narg_table[syscallnr];
-
-	if (syscall == NULL) {
-		regs->areg[2] = -ENOSYS;
-		return;
-	}
-
-	/* There shouldn't be more than six arguments in the table! */
-
-	if (nargs > 6)
-		panic("Internal error - too many syscall arguments (%d)!\n",
-		      nargs);
-
-	/* Linux takes system-call arguments in registers.  The ABI
-         * and Xtensa software conventions require the system-call
-         * number in a2.  If an argument exists in a2, we move it to
-         * the next available register.  Note that for improved
-         * efficiency, we do NOT shift all parameters down one
-         * register to maintain the original order.
-	 *
-         * At best case (zero arguments), we just write the syscall
-         * number to a2.  At worst case (1 to 6 arguments), we move
-         * the argument in a2 to the next available register, then
-         * write the syscall number to a2.
-	 *
-         * For clarity, the following truth table enumerates all
-         * possibilities.
-	 *
-         * arguments	syscall number	arg0, arg1, arg2, arg3, arg4, arg5
-         * ---------	--------------	----------------------------------
-	 *	0	      a2
-	 *	1	      a2	a3
-	 *	2	      a2	a4,   a3
-	 *	3	      a2	a5,   a3,   a4
-	 *	4	      a2	a6,   a3,   a4,   a5
-	 *	5	      a2	a7,   a3,   a4,   a5,   a6
-	 *	6	      a2	a8,   a3,   a4,   a5,   a6,   a7
-	 */
-	if (nargs) {
-		parm0 = regs->areg[nargs+2];
-		parm1 = regs->areg[3];
-		parm2 = regs->areg[4];
-		parm3 = regs->areg[5];
-		parm4 = regs->areg[6];
-		parm5 = regs->areg[7];
-	} else /* nargs == 0 */
-		parm0 = (unsigned long) regs;
-
-#if DEBUG
-	parms[0] = parm0;
-	parms[1] = parm1;
-	parms[2] = parm2;
-	parms[3] = parm3;
-	parms[4] = parm4;
-	parms[5] = parm5;
-
-	sysname = sfnames[syscallnr];
-	if (strncmp(sysname, "sys_", 4) == 0)
-		sysname = sysname + 4;
-
-	printk("\017SYSCALL:I:%x:%d:%s  %s(", regs->pc, current->pid,
-	       current->comm, sysname);
-	for (i = 0; i < nargs; i++)
-		printk((i>0) ? ", %#lx" : "%#lx", parms[i]);
-	printk(")\n");
-#endif
-
-	res = syscall((void *)parm0, parm1, parm2, parm3, parm4, parm5);
-
-#if DEBUG
-	printk("\017SYSCALL:O:%d:%s  %s(",current->pid, current->comm, sysname);
-	for (i = 0; i < nargs; i++)
-		printk((i>0) ? ", %#lx" : "%#lx", parms[i]);
-	if (res < 4096)
-		printk(") = %d\n", res);
-	else
-		printk(") = %#x\n", res);
-#endif /* DEBUG */
-
-	regs->areg[2] = res;
-	do_syscall_trace();
-}
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
-{
-	long __res;
-	asm volatile (
-		"  mov   a5, %2 \n"
-		"  mov   a4, %4 \n"
-		"  mov   a3, %3 \n"
-		"  movi  a2, %1 \n"
-		"  syscall      \n"
-		"  mov   %0, a2 \n"
-		: "=a" (__res)
-		: "i" (__NR_execve), "a" (filename), "a" (argv), "a" (envp)
-		: "a2", "a3", "a4", "a5");
-	return __res;
-}
diff --git a/arch/xtensa/kernel/syscalls.h b/arch/xtensa/kernel/syscalls.h
deleted file mode 100644
index 216c10a3150..00000000000
--- a/arch/xtensa/kernel/syscalls.h
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * arch/xtensa/kernel/syscalls.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995, 1996, 1997, 1998 by Ralf Baechle
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- *
- * Changes by Joe Taylor <joe@tensilica.com>
- */
-
-/*
- * This file is being included twice - once to build a list of all
- * syscalls and once to build a table of how many arguments each syscall
- * accepts.  Syscalls that receive a pointer to the saved registers are
- * marked as having zero arguments.
- *
- * The binary compatibility calls are in a separate list.
- *
- * Entry '0' used to be system_call.  It's removed to disable indirect
- * system calls for now so user tasks can't recurse.  See mips'
- * sys_syscall for a comparable example.
- */
-
-SYSCALL(0, 0)		                /* 00 */
-SYSCALL(sys_exit, 1)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_read, 3)
-SYSCALL(sys_write, 3)
-SYSCALL(sys_open, 3)			/* 05 */
-SYSCALL(sys_close, 1)
-SYSCALL(sys_ni_syscall, 3)
-SYSCALL(sys_creat, 2)
-SYSCALL(sys_link, 2)
-SYSCALL(sys_unlink, 1)			/* 10 */
-SYSCALL(sys_execve, 0)
-SYSCALL(sys_chdir, 1)
-SYSCALL(sys_ni_syscall, 1)
-SYSCALL(sys_mknod, 3)
-SYSCALL(sys_chmod, 2)			/* 15 */
-SYSCALL(sys_lchown, 3)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_newstat, 2)
-SYSCALL(sys_lseek, 3)
-SYSCALL(sys_getpid, 0)			/* 20 */
-SYSCALL(sys_mount, 5)
-SYSCALL(sys_ni_syscall, 1)
-SYSCALL(sys_setuid, 1)
-SYSCALL(sys_getuid, 0)
-SYSCALL(sys_ni_syscall, 1)		/* 25 */
-SYSCALL(sys_ptrace, 4)
-SYSCALL(sys_ni_syscall, 1)
-SYSCALL(sys_newfstat, 2)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_utime, 2)			/* 30 */
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_access, 2)
-SYSCALL(sys_ni_syscall, 1)
-SYSCALL(sys_ni_syscall, 0)		/* 35 */
-SYSCALL(sys_sync, 0)
-SYSCALL(sys_kill, 2)
-SYSCALL(sys_rename, 2)
-SYSCALL(sys_mkdir, 2)
-SYSCALL(sys_rmdir, 1)			/* 40 */
-SYSCALL(sys_dup, 1)
-SYSCALL(sys_pipe, 1)
-SYSCALL(sys_times, 1)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_brk, 1)			/* 45 */
-SYSCALL(sys_setgid, 1)
-SYSCALL(sys_getgid, 0)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_geteuid, 0)
-SYSCALL(sys_getegid, 0)			/* 50 */
-SYSCALL(sys_acct, 1)
-SYSCALL(sys_umount, 2)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_ioctl, 3)
-SYSCALL(sys_fcntl, 3)			/* 55 */
-SYSCALL(sys_ni_syscall, 2)
-SYSCALL(sys_setpgid, 2)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_umask, 1)			/* 60 */
-SYSCALL(sys_chroot, 1)
-SYSCALL(sys_ustat, 2)
-SYSCALL(sys_dup2, 2)
-SYSCALL(sys_getppid, 0)
-SYSCALL(sys_ni_syscall, 0)		/* 65 */
-SYSCALL(sys_setsid, 0)
-SYSCALL(sys_sigaction, 3)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_ni_syscall, 1)
-SYSCALL(sys_setreuid, 2)		/* 70 */
-SYSCALL(sys_setregid, 2)
-SYSCALL(sys_sigsuspend, 0)
-SYSCALL(sys_ni_syscall, 1)
-SYSCALL(sys_sethostname, 2)
-SYSCALL(sys_setrlimit, 2)		/* 75 */
-SYSCALL(sys_getrlimit, 2)
-SYSCALL(sys_getrusage, 2)
-SYSCALL(sys_gettimeofday, 2)
-SYSCALL(sys_settimeofday, 2)
-SYSCALL(sys_getgroups, 2)		/* 80 */
-SYSCALL(sys_setgroups, 2)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_symlink, 2)
-SYSCALL(sys_newlstat, 2)
-SYSCALL(sys_readlink, 3)		/* 85 */
-SYSCALL(sys_uselib, 1)
-SYSCALL(sys_swapon, 2)
-SYSCALL(sys_reboot, 3)
-SYSCALL(sys_ni_syscall, 3)
-SYSCALL(sys_ni_syscall, 6)		/* 90 */
-SYSCALL(sys_munmap, 2)
-SYSCALL(sys_truncate, 2)
-SYSCALL(sys_ftruncate, 2)
-SYSCALL(sys_fchmod, 2)
-SYSCALL(sys_fchown, 3)			/* 95 */
-SYSCALL(sys_getpriority, 2)
-SYSCALL(sys_setpriority, 3)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_statfs, 2)
-SYSCALL(sys_fstatfs, 2)			/* 100 */
-SYSCALL(sys_ni_syscall, 3)
-SYSCALL(sys_ni_syscall, 2)
-SYSCALL(sys_syslog, 3)
-SYSCALL(sys_setitimer, 3)
-SYSCALL(sys_getitimer, 2)		/* 105 */
-SYSCALL(sys_newstat, 2)
-SYSCALL(sys_newlstat, 2)
-SYSCALL(sys_newfstat, 2)
-SYSCALL(sys_uname, 1)
-SYSCALL(sys_ni_syscall, 0)		/* 110 */
-SYSCALL(sys_vhangup, 0)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_wait4, 4)
-SYSCALL(sys_swapoff, 1)			/* 115 */
-SYSCALL(sys_sysinfo, 1)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_fsync, 1)
-SYSCALL(sys_sigreturn, 0)
-SYSCALL(sys_clone, 0)			/* 120 */
-SYSCALL(sys_setdomainname, 2)
-SYSCALL(sys_newuname, 1)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_adjtimex, 1)
-SYSCALL(sys_mprotect, 3)		/* 125 */
-SYSCALL(sys_ni_syscall, 3)
-SYSCALL(sys_ni_syscall, 2)
-SYSCALL(sys_init_module, 2)
-SYSCALL(sys_delete_module, 1)
-SYSCALL(sys_ni_syscall, 1)		/* 130 */
-SYSCALL(sys_quotactl, 0)
-SYSCALL(sys_getpgid, 1)
-SYSCALL(sys_fchdir, 1)
-SYSCALL(sys_bdflush, 2)
-SYSCALL(sys_sysfs, 3)			/* 135 */
-SYSCALL(sys_personality, 1)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_setfsuid, 1)
-SYSCALL(sys_setfsgid, 1)
-SYSCALL(sys_llseek, 5)			/* 140 */
-SYSCALL(sys_getdents, 3)
-SYSCALL(sys_select, 5)
-SYSCALL(sys_flock, 2)
-SYSCALL(sys_msync, 3)
-SYSCALL(sys_readv, 3)			/* 145 */
-SYSCALL(sys_writev, 3)
-SYSCALL(sys_ni_syscall, 3)
-SYSCALL(sys_ni_syscall, 3)
-SYSCALL(sys_ni_syscall, 4)		/* handled in fast syscall handler. */
-SYSCALL(sys_ni_syscall, 0)		/* 150 */
-SYSCALL(sys_getsid, 1)
-SYSCALL(sys_fdatasync, 1)
-SYSCALL(sys_sysctl, 1)
-SYSCALL(sys_mlock, 2)
-SYSCALL(sys_munlock, 2)			/* 155 */
-SYSCALL(sys_mlockall, 1)
-SYSCALL(sys_munlockall, 0)
-SYSCALL(sys_sched_setparam,2)
-SYSCALL(sys_sched_getparam,2)
-SYSCALL(sys_sched_setscheduler,3)	/* 160 */
-SYSCALL(sys_sched_getscheduler,1)
-SYSCALL(sys_sched_yield,0)
-SYSCALL(sys_sched_get_priority_max,1)
-SYSCALL(sys_sched_get_priority_min,1)
-SYSCALL(sys_sched_rr_get_interval,2)	/* 165 */
-SYSCALL(sys_nanosleep,2)
-SYSCALL(sys_mremap,4)
-SYSCALL(sys_accept, 3)
-SYSCALL(sys_bind, 3)
-SYSCALL(sys_connect, 3)			/* 170 */
-SYSCALL(sys_getpeername, 3)
-SYSCALL(sys_getsockname, 3)
-SYSCALL(sys_getsockopt, 5)
-SYSCALL(sys_listen, 2)
-SYSCALL(sys_recv, 4)			/* 175 */
-SYSCALL(sys_recvfrom, 6)
-SYSCALL(sys_recvmsg, 3)
-SYSCALL(sys_send, 4)
-SYSCALL(sys_sendmsg, 3)
-SYSCALL(sys_sendto, 6)			/* 180 */
-SYSCALL(sys_setsockopt, 5)
-SYSCALL(sys_shutdown, 2)
-SYSCALL(sys_socket, 3)
-SYSCALL(sys_socketpair, 4)
-SYSCALL(sys_setresuid, 3)		/* 185 */
-SYSCALL(sys_getresuid, 3)
-SYSCALL(sys_ni_syscall, 5)
-SYSCALL(sys_poll, 3)
-SYSCALL(sys_nfsservctl, 3)
-SYSCALL(sys_setresgid, 3)		/* 190 */
-SYSCALL(sys_getresgid, 3)
-SYSCALL(sys_prctl, 5)
-SYSCALL(sys_rt_sigreturn, 0)
-SYSCALL(sys_rt_sigaction, 4)
-SYSCALL(sys_rt_sigprocmask, 4)		/* 195 */
-SYSCALL(sys_rt_sigpending, 2)
-SYSCALL(sys_rt_sigtimedwait, 4)
-SYSCALL(sys_rt_sigqueueinfo, 3)
-SYSCALL(sys_rt_sigsuspend, 0)
-SYSCALL(sys_pread64, 5)			/* 200 */
-SYSCALL(sys_pwrite64, 5)
-SYSCALL(sys_chown, 3)
-SYSCALL(sys_getcwd, 2)
-SYSCALL(sys_capget, 2)
-SYSCALL(sys_capset, 2)			/* 205 */
-SYSCALL(sys_sigaltstack, 0)
-SYSCALL(sys_sendfile, 4)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_ni_syscall, 0)
-SYSCALL(sys_mmap, 6)			/* 210 */
-SYSCALL(sys_truncate64, 2)
-SYSCALL(sys_ftruncate64, 2)
-SYSCALL(sys_stat64, 2)
-SYSCALL(sys_lstat64, 2)
-SYSCALL(sys_fstat64, 2)			/* 215 */
-SYSCALL(sys_pivot_root, 2)
-SYSCALL(sys_mincore, 3)
-SYSCALL(sys_madvise, 3)
-SYSCALL(sys_getdents64, 3)
-SYSCALL(sys_ni_syscall, 0)		/* 220 */
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 37347e36998..a350431363a 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -47,7 +47,7 @@ unsigned long long sched_clock(void)
 	return (unsigned long long)jiffies * (1000000000 / HZ);
 }
 
-static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static irqreturn_t timer_interrupt(int irq, void *dev_id);
 static struct irqaction timer_irqaction = {
 	.handler =	timer_interrupt,
 	.flags =	IRQF_DISABLED,
@@ -150,7 +150,7 @@ EXPORT_SYMBOL(do_gettimeofday);
  * The timer interrupt is called HZ times per second.
  */
 
-irqreturn_t timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+irqreturn_t timer_interrupt (int irq, void *dev_id)
 {
 
 	unsigned long next;
@@ -160,9 +160,9 @@ irqreturn_t timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
 again:
 	while ((signed long)(get_ccount() - next) > 0) {
 
-		profile_tick(CPU_PROFILING, regs);
+		profile_tick(CPU_PROFILING);
 #ifndef CONFIG_SMP
-		update_process_times(user_mode(regs));
+		update_process_times(user_mode(get_irq_regs()));
 #endif
 
 		write_seqlock(&xtime_lock);
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index ce077d6bf3a..693ab268485 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -75,7 +75,7 @@ extern void system_call (struct pt_regs*);
 #define USER		0x02
 
 #define COPROCESSOR(x)							\
-{ XCHAL_EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }
 
 typedef struct {
 	int cause;
@@ -85,38 +85,38 @@ typedef struct {
 
 dispatch_init_table_t __init dispatch_init_table[] = {
 
-{ XCHAL_EXCCAUSE_ILLEGAL_INSTRUCTION,	0,	   do_illegal_instruction},
-{ XCHAL_EXCCAUSE_SYSTEM_CALL,		KRNL,	   fast_syscall_kernel },
-{ XCHAL_EXCCAUSE_SYSTEM_CALL,		USER,	   fast_syscall_user },
-{ XCHAL_EXCCAUSE_SYSTEM_CALL,		0,	   system_call },
-/* XCHAL_EXCCAUSE_INSTRUCTION_FETCH unhandled */
-/* XCHAL_EXCCAUSE_LOAD_STORE_ERROR unhandled*/
-{ XCHAL_EXCCAUSE_LEVEL1_INTERRUPT,	0,	   do_interrupt },
-{ XCHAL_EXCCAUSE_ALLOCA,		USER|KRNL, fast_alloca },
-/* XCHAL_EXCCAUSE_INTEGER_DIVIDE_BY_ZERO unhandled */
-/* XCHAL_EXCCAUSE_PRIVILEGED unhandled */
+{ EXCCAUSE_ILLEGAL_INSTRUCTION,	0,	   do_illegal_instruction},
+{ EXCCAUSE_SYSTEM_CALL,		KRNL,	   fast_syscall_kernel },
+{ EXCCAUSE_SYSTEM_CALL,		USER,	   fast_syscall_user },
+{ EXCCAUSE_SYSTEM_CALL,		0,	   system_call },
+/* EXCCAUSE_INSTRUCTION_FETCH unhandled */
+/* EXCCAUSE_LOAD_STORE_ERROR unhandled*/
+{ EXCCAUSE_LEVEL1_INTERRUPT,	0,	   do_interrupt },
+{ EXCCAUSE_ALLOCA,		USER|KRNL, fast_alloca },
+/* EXCCAUSE_INTEGER_DIVIDE_BY_ZERO unhandled */
+/* EXCCAUSE_PRIVILEGED unhandled */
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
 #ifdef CONFIG_UNALIGNED_USER
-{ XCHAL_EXCCAUSE_UNALIGNED,		USER,	   fast_unaligned },
+{ EXCCAUSE_UNALIGNED,		USER,	   fast_unaligned },
 #else
-{ XCHAL_EXCCAUSE_UNALIGNED,		0,	   do_unaligned_user },
+{ EXCCAUSE_UNALIGNED,		0,	   do_unaligned_user },
 #endif
-{ XCHAL_EXCCAUSE_UNALIGNED,		KRNL,	   fast_unaligned },
+{ EXCCAUSE_UNALIGNED,		KRNL,	   fast_unaligned },
 #endif
-{ XCHAL_EXCCAUSE_ITLB_MISS,		0,	   do_page_fault },
-{ XCHAL_EXCCAUSE_ITLB_MISS,		USER|KRNL, fast_second_level_miss},
-{ XCHAL_EXCCAUSE_ITLB_MULTIHIT,		0,	   do_multihit },
-{ XCHAL_EXCCAUSE_ITLB_PRIVILEGE,	0,	   do_page_fault },
-/* XCHAL_EXCCAUSE_SIZE_RESTRICTION unhandled */
-{ XCHAL_EXCCAUSE_FETCH_CACHE_ATTRIBUTE,	0,	   do_page_fault },
-{ XCHAL_EXCCAUSE_DTLB_MISS,		USER|KRNL, fast_second_level_miss},
-{ XCHAL_EXCCAUSE_DTLB_MISS,		0,	   do_page_fault },
-{ XCHAL_EXCCAUSE_DTLB_MULTIHIT,		0,	   do_multihit },
-{ XCHAL_EXCCAUSE_DTLB_PRIVILEGE,	0,	   do_page_fault },
-/* XCHAL_EXCCAUSE_DTLB_SIZE_RESTRICTION unhandled */
-{ XCHAL_EXCCAUSE_STORE_CACHE_ATTRIBUTE,	USER|KRNL, fast_store_prohibited },
-{ XCHAL_EXCCAUSE_STORE_CACHE_ATTRIBUTE,	0,	   do_page_fault },
-{ XCHAL_EXCCAUSE_LOAD_CACHE_ATTRIBUTE,	0,	   do_page_fault },
+{ EXCCAUSE_ITLB_MISS,		0,	   do_page_fault },
+{ EXCCAUSE_ITLB_MISS,		USER|KRNL, fast_second_level_miss},
+{ EXCCAUSE_ITLB_MULTIHIT,		0,	   do_multihit },
+{ EXCCAUSE_ITLB_PRIVILEGE,	0,	   do_page_fault },
+/* EXCCAUSE_SIZE_RESTRICTION unhandled */
+{ EXCCAUSE_FETCH_CACHE_ATTRIBUTE,	0,	   do_page_fault },
+{ EXCCAUSE_DTLB_MISS,		USER|KRNL, fast_second_level_miss},
+{ EXCCAUSE_DTLB_MISS,		0,	   do_page_fault },
+{ EXCCAUSE_DTLB_MULTIHIT,		0,	   do_multihit },
+{ EXCCAUSE_DTLB_PRIVILEGE,	0,	   do_page_fault },
+/* EXCCAUSE_DTLB_SIZE_RESTRICTION unhandled */
+{ EXCCAUSE_STORE_CACHE_ATTRIBUTE,	USER|KRNL, fast_store_prohibited },
+{ EXCCAUSE_STORE_CACHE_ATTRIBUTE,	0,	   do_page_fault },
+{ EXCCAUSE_LOAD_CACHE_ATTRIBUTE,	0,	   do_page_fault },
 /* XCCHAL_EXCCAUSE_FLOATING_POINT unhandled */
 #if (XCHAL_CP_MASK & 1)
 COPROCESSOR(0),
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 0e74397bfa2..eb2d7bb69ee 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -53,6 +53,8 @@
 #include <asm/thread_info.h>
 #include <asm/processor.h>
 
+#define WINDOW_VECTORS_SIZE   0x180
+
 
 /*
  * User exception vector. (Exceptions with PS.UM == 1, PS.EXCM == 0)
@@ -210,7 +212,7 @@ ENTRY(_DoubleExceptionVector)
 	/* Check for kernel double exception (usually fatal). */
 
 	rsr	a3, PS
-	_bbci.l	a3, PS_UM_SHIFT, .Lksp
+	_bbci.l	a3, PS_UM_BIT, .Lksp
 
 	/* Check if we are currently handling a window exception. */
 	/* Note: We don't need to indicate that we enter a critical section. */
@@ -219,7 +221,7 @@ ENTRY(_DoubleExceptionVector)
 
 	movi	a3, XCHAL_WINDOW_VECTORS_VADDR
 	_bltu	a0, a3, .Lfixup
-	addi	a3, a3, XSHAL_WINDOW_VECTORS_SIZE
+	addi	a3, a3, WINDOW_VECTORS_SIZE
 	_bgeu	a0, a3, .Lfixup
 
 	/* Window overflow/underflow exception. Get stack pointer. */
@@ -245,7 +247,7 @@ ENTRY(_DoubleExceptionVector)
 
 	wsr	a2, DEPC		# save stack pointer temporarily
 	rsr	a0, PS
-	extui	a0, a0, XCHAL_PS_OWB_SHIFT, XCHAL_PS_OWB_BITS
+	extui	a0, a0, PS_OWB_SHIFT, 4
 	wsr	a0, WINDOWBASE
 	rsync
 
@@ -312,8 +314,8 @@ ENTRY(_DoubleExceptionVector)
 .Lksp:	/* a0: a0, a1: a1, a2: a2, a3: trashed, depc: depc, excsave: a3 */
 
 	rsr	a3, EXCCAUSE
-	beqi	a3, XCHAL_EXCCAUSE_ITLB_MISS, 1f
-	addi	a3, a3, -XCHAL_EXCCAUSE_DTLB_MISS
+	beqi	a3, EXCCAUSE_ITLB_MISS, 1f
+	addi	a3, a3, -EXCCAUSE_DTLB_MISS
 	bnez	a3, .Lunrecoverable
 1:	movi	a3, fast_second_level_miss_double_kernel
 	jx	a3
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index cfe75f52872..a36c104c3a5 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -16,19 +16,17 @@
 
 #include <asm-generic/vmlinux.lds.h>
 
-#define _NOCLANGUAGE
-#include <xtensa/config/core.h>
-#include <xtensa/config/system.h>
+#include <asm/variant/core.h>
 OUTPUT_ARCH(xtensa)
 ENTRY(_start)
 
-#if XCHAL_MEMORY_ORDER == XTHAL_BIGENDIAN
+#ifdef __XTENSA_EB__
 jiffies = jiffies_64 + 4;
 #else
 jiffies = jiffies_64;
 #endif
 
-#define KERNELOFFSET 0x1000
+#define KERNELOFFSET 0xd0001000
 
 /* Note: In the following macros, it would be nice to specify only the
    vector name and section kind and construct "sym" and "section" using
@@ -75,7 +73,7 @@ jiffies = jiffies_64;
 
 SECTIONS
 {
-  . = XCHAL_KSEG_CACHED_VADDR + KERNELOFFSET;
+  . = KERNELOFFSET;
   /* .text section */
 
   _text = .;
@@ -159,7 +157,7 @@ SECTIONS
 
   /* Initialization code and data: */
 
-  . = ALIGN(1<<XCHAL_MMU_MIN_PTE_PAGE_SIZE);
+  . = ALIGN(1 << 12);
   __init_begin = .;
   .init.text : {
   	_sinittext = .;
@@ -223,32 +221,32 @@ SECTIONS
 		  .dummy)
   SECTION_VECTOR (_DebugInterruptVector_literal,
 		  .DebugInterruptVector.literal,
-		  XCHAL_INTLEVEL_VECTOR_VADDR(XCHAL_DEBUGLEVEL) - 4,
+		  XCHAL_DEBUG_VECTOR_VADDR - 4,
 		  SIZEOF(.WindowVectors.text),
 		  .WindowVectors.text)
   SECTION_VECTOR (_DebugInterruptVector_text,
 		  .DebugInterruptVector.text,
-		  XCHAL_INTLEVEL_VECTOR_VADDR(XCHAL_DEBUGLEVEL),
+		  XCHAL_DEBUG_VECTOR_VADDR,
 		  4,
 		  .DebugInterruptVector.literal)
   SECTION_VECTOR (_KernelExceptionVector_literal,
 		  .KernelExceptionVector.literal,
-		  XCHAL_KERNELEXC_VECTOR_VADDR - 4,
+		  XCHAL_KERNEL_VECTOR_VADDR - 4,
 		  SIZEOF(.DebugInterruptVector.text),
 		  .DebugInterruptVector.text)
   SECTION_VECTOR (_KernelExceptionVector_text,
 		  .KernelExceptionVector.text,
-		  XCHAL_KERNELEXC_VECTOR_VADDR,
+		  XCHAL_KERNEL_VECTOR_VADDR,
 		  4,
 		  .KernelExceptionVector.literal)
   SECTION_VECTOR (_UserExceptionVector_literal,
 		  .UserExceptionVector.literal,
-		  XCHAL_USEREXC_VECTOR_VADDR - 4,
+		  XCHAL_USER_VECTOR_VADDR - 4,
 		  SIZEOF(.KernelExceptionVector.text),
 		  .KernelExceptionVector.text)
   SECTION_VECTOR (_UserExceptionVector_text,
 		  .UserExceptionVector.text,
-		  XCHAL_USEREXC_VECTOR_VADDR,
+		  XCHAL_USER_VECTOR_VADDR,
 		  4,
 		  .UserExceptionVector.literal)
   SECTION_VECTOR (_DoubleExceptionVector_literal,
@@ -263,7 +261,7 @@ SECTIONS
 		  .DoubleExceptionVector.literal)
 
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
-  . = ALIGN(1<<XCHAL_MMU_MIN_PTE_PAGE_SIZE);
+  . = ALIGN(1 << 12);
 
   __init_end = .;
 
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S
index e2d64dfd530..9d9cd990afa 100644
--- a/arch/xtensa/lib/checksum.S
+++ b/arch/xtensa/lib/checksum.S
@@ -16,8 +16,7 @@
 
 #include <asm/errno.h>
 #include <linux/linkage.h>
-#define _ASMLANGUAGE
-#include <xtensa/config/core.h>
+#include <asm/variant/core.h>
 
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index e8f6d7eb722..ddda8f4bc86 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -9,7 +9,7 @@
  * Copyright (C) 2002 - 2005 Tensilica Inc.
  */
 
-#include <xtensa/coreasm.h>
+#include <asm/variant/core.h>
 
 	.macro	src_b	r, w0, w1
 #ifdef __XTENSA_EB__
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S
index 4de25134bc6..56a17495b2d 100644
--- a/arch/xtensa/lib/memset.S
+++ b/arch/xtensa/lib/memset.S
@@ -11,7 +11,7 @@
  *  Copyright (C) 2002 Tensilica Inc.
  */
 
-#include <xtensa/coreasm.h>
+#include <asm/variant/core.h>
 
 /*
  * void *memset(void *dst, int c, size_t length)
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S
index 71d55df4389..a834057bda6 100644
--- a/arch/xtensa/lib/strncpy_user.S
+++ b/arch/xtensa/lib/strncpy_user.S
@@ -11,7 +11,7 @@
  *  Copyright (C) 2002 Tensilica Inc.
  */
 
-#include <xtensa/coreasm.h>
+#include <asm/variant/core.h>
 #include <linux/errno.h>
 
 /* Load or store instructions that may cause exceptions use the EX macro. */
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S
index cdff4d670f3..5e9c1e709b2 100644
--- a/arch/xtensa/lib/strnlen_user.S
+++ b/arch/xtensa/lib/strnlen_user.S
@@ -11,7 +11,7 @@
  *  Copyright (C) 2002 Tensilica Inc.
  */
 
-#include <xtensa/coreasm.h>
+#include <asm/variant/core.h>
 
 /* Load or store instructions that may cause exceptions use the EX macro. */
 
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
index 4641ef510f0..a8ab1d4fe0a 100644
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S
@@ -53,7 +53,7 @@
  *	a11/ original length
  */
 
-#include <xtensa/coreasm.h>
+#include <asm/variant/core.h>
 
 #ifdef __XTENSA_EB__
 #define ALIGN(R, W0, W1) src	R, W0, W1
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index dd0dbec2e57..3dc6f2f07bb 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -21,7 +21,7 @@
 #include <asm/system.h>
 #include <asm/pgalloc.h>
 
-unsigned long asid_cache = ASID_FIRST_VERSION;
+unsigned long asid_cache = ASID_USER_FIRST;
 void bad_page_fault(struct pt_regs*, unsigned long, int);
 
 /*
@@ -58,10 +58,10 @@ void do_page_fault(struct pt_regs *regs)
 		return;
 	}
 
-	is_write = (exccause == XCHAL_EXCCAUSE_STORE_CACHE_ATTRIBUTE) ? 1 : 0;
-	is_exec =  (exccause == XCHAL_EXCCAUSE_ITLB_PRIVILEGE ||
-		    exccause == XCHAL_EXCCAUSE_ITLB_MISS ||
-		    exccause == XCHAL_EXCCAUSE_FETCH_CACHE_ATTRIBUTE) ? 1 : 0;
+	is_write = (exccause == EXCCAUSE_STORE_CACHE_ATTRIBUTE) ? 1 : 0;
+	is_exec =  (exccause == EXCCAUSE_ITLB_PRIVILEGE ||
+		    exccause == EXCCAUSE_ITLB_MISS ||
+		    exccause == EXCCAUSE_FETCH_CACHE_ATTRIBUTE) ? 1 : 0;
 
 #if 0
 	printk("[%s:%d:%08x:%d:%08x:%s%s]\n", current->comm, current->pid,
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 660ef058c14..e1ec2d1e818 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -141,8 +141,8 @@ void __init bootmem_init(void)
 	if (min_low_pfn > max_pfn)
 		panic("No memory found!\n");
 
-	max_low_pfn = max_pfn < MAX_LOW_MEMORY >> PAGE_SHIFT ?
-		max_pfn : MAX_LOW_MEMORY >> PAGE_SHIFT;
+	max_low_pfn = max_pfn < MAX_MEM_PFN >> PAGE_SHIFT ?
+		max_pfn : MAX_MEM_PFN >> PAGE_SHIFT;
 
 	/* Find an area to use for the bootmem bitmap. */
 
@@ -215,7 +215,7 @@ void __init init_mmu (void)
 
 	/* Set rasid register to a known value. */
 
-	set_rasid_register (ASID_ALL_RESERVED);
+	set_rasid_register (ASID_USER_FIRST);
 
 	/* Set PTEVADDR special register to the start of the page
 	 * table, which is in kernel mappable space (ie. not
diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S
index 327c0f17187..ae085332c60 100644
--- a/arch/xtensa/mm/misc.S
+++ b/arch/xtensa/mm/misc.S
@@ -19,9 +19,8 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
-
-#include <xtensa/cacheasm.h>
-#include <xtensa/cacheattrasm.h>
+#include <asm/asmmacro.h>
+#include <asm/cacheasm.h>
 
 /* clear_page (page) */
 
@@ -74,104 +73,66 @@ ENTRY(copy_page)
 
 	retw
 
-
 /*
- * void __flush_invalidate_cache_all(void)
+ * void __invalidate_icache_page(ulong start)
  */
 
-ENTRY(__flush_invalidate_cache_all)
+ENTRY(__invalidate_icache_page)
 	entry	sp, 16
-	dcache_writeback_inv_all a2, a3
-	icache_invalidate_all a2, a3
-	retw
 
-/*
- * void __invalidate_icache_all(void)
- */
+	___invalidate_icache_page a2 a3
+	isync
 
-ENTRY(__invalidate_icache_all)
-	entry	sp, 16
-	icache_invalidate_all a2, a3
 	retw
 
 /*
- * void __flush_invalidate_dcache_all(void)
+ * void __invalidate_dcache_page(ulong start)
  */
 
-ENTRY(__flush_invalidate_dcache_all)
+ENTRY(__invalidate_dcache_page)
 	entry	sp, 16
-	dcache_writeback_inv_all a2, a3
-	retw
-
 
-/*
- * void __flush_invalidate_cache_range(ulong start, ulong size)
- */
+	___invalidate_dcache_page a2 a3
+	dsync
 
-ENTRY(__flush_invalidate_cache_range)
-	entry	sp, 16
-	mov	a4, a2
-	mov	a5, a3
-	dcache_writeback_inv_region a4, a5, a6
-	icache_invalidate_region a2, a3, a4
 	retw
 
 /*
- * void __invalidate_icache_page(ulong start)
+ * void __flush_invalidate_dcache_page(ulong start)
  */
 
-ENTRY(__invalidate_icache_page)
+ENTRY(__flush_invalidate_dcache_page)
 	entry	sp, 16
-	movi	a3, PAGE_SIZE
-	icache_invalidate_region a2, a3, a4
-	retw
 
-/*
- * void __invalidate_dcache_page(ulong start)
- */
+	___flush_invalidate_dcache_page a2 a3
 
-ENTRY(__invalidate_dcache_page)
-	entry	sp, 16
-	movi	a3, PAGE_SIZE
-	dcache_invalidate_region a2, a3, a4
+	dsync
 	retw
 
 /*
- * void __invalidate_icache_range(ulong start, ulong size)
+ * void __flush_dcache_page(ulong start)
  */
 
-ENTRY(__invalidate_icache_range)
+ENTRY(__flush_dcache_page)
 	entry	sp, 16
-	icache_invalidate_region a2, a3, a4
-	retw
 
-/*
- * void __invalidate_dcache_range(ulong start, ulong size)
- */
+	___flush_dcache_page a2 a3
 
-ENTRY(__invalidate_dcache_range)
-	entry	sp, 16
-	dcache_invalidate_region a2, a3, a4
+	dsync
 	retw
 
-/*
- * void __flush_dcache_page(ulong start)
- */
 
-ENTRY(__flush_dcache_page)
-	entry	sp, 16
-	movi	a3, PAGE_SIZE
-	dcache_writeback_region a2, a3, a4
-	retw
 
 /*
- * void __flush_invalidate_dcache_page(ulong start)
+ * void __invalidate_icache_range(ulong start, ulong size)
  */
 
-ENTRY(__flush_invalidate_dcache_page)
+ENTRY(__invalidate_icache_range)
 	entry	sp, 16
-	movi	a3, PAGE_SIZE
-	dcache_writeback_inv_region a2, a3, a4
+
+	___invalidate_icache_range a2 a3 a4
+	isync
+
 	retw
 
 /*
@@ -180,195 +141,69 @@ ENTRY(__flush_invalidate_dcache_page)
 
 ENTRY(__flush_invalidate_dcache_range)
 	entry	sp, 16
-	dcache_writeback_inv_region a2, a3, a4
-	retw
 
-/*
- * void __invalidate_dcache_all(void)
- */
+	___flush_invalidate_dcache_range a2 a3 a4
+	dsync
 
-ENTRY(__invalidate_dcache_all)
-	entry	sp, 16
-	dcache_invalidate_all a2, a3
 	retw
 
 /*
- * void __flush_invalidate_dcache_page_phys(ulong start)
+ * void _flush_dcache_range(ulong start, ulong size)
  */
 
-ENTRY(__flush_invalidate_dcache_page_phys)
+ENTRY(__flush_dcache_range)
 	entry	sp, 16
 
-	movi	a3, XCHAL_DCACHE_SIZE
-	movi	a4, PAGE_MASK | 1
-	addi	a2, a2, 1
-
-1:	addi	a3, a3, -XCHAL_DCACHE_LINESIZE
-
-	ldct	a6, a3
+	___flush_dcache_range a2 a3 a4
 	dsync
-	and	a6, a6, a4
-	beq	a6, a2, 2f
-	bgeui	a3, 2, 1b
-	retw
 
-2:	diwbi	a3, 0
-	bgeui	a3, 2, 1b
 	retw
 
-ENTRY(check_dcache_low0)
-	entry	sp, 16
-
-	movi	a3, XCHAL_DCACHE_SIZE / 4
-	movi	a4, PAGE_MASK | 1
-	addi	a2, a2, 1
-
-1:	addi	a3, a3, -XCHAL_DCACHE_LINESIZE
-
-	ldct	a6, a3
-	dsync
-	and	a6, a6, a4
-	beq	a6, a2, 2f
-	bgeui	a3, 2, 1b
-	retw
-
-2:	j 2b
-
-ENTRY(check_dcache_high0)
-	entry	sp, 16
-
-	movi	a5, XCHAL_DCACHE_SIZE / 4
-	movi	a3, XCHAL_DCACHE_SIZE / 2
-	movi	a4, PAGE_MASK | 1
-	addi	a2, a2, 1
-
-1:	addi	a3, a3, -XCHAL_DCACHE_LINESIZE
-	addi	a5, a5, -XCHAL_DCACHE_LINESIZE
-
-	ldct	a6, a3
-	dsync
-	and	a6, a6, a4
-	beq	a6, a2, 2f
-	bgeui	a5, 2, 1b
-	retw
-
-2:	j 2b
+/*
+ * void _invalidate_dcache_range(ulong start, ulong size)
+ */
 
-ENTRY(check_dcache_low1)
+ENTRY(__invalidate_dcache_range)
 	entry	sp, 16
 
-	movi	a5, XCHAL_DCACHE_SIZE / 4
-	movi	a3, XCHAL_DCACHE_SIZE * 3 / 4
-	movi	a4, PAGE_MASK | 1
-	addi	a2, a2, 1
+	___invalidate_dcache_range a2 a3 a4
 
-1:	addi	a3, a3, -XCHAL_DCACHE_LINESIZE
-	addi	a5, a5, -XCHAL_DCACHE_LINESIZE
 
-	ldct	a6, a3
-	dsync
-	and	a6, a6, a4
-	beq	a6, a2, 2f
-	bgeui	a5, 2, 1b
 	retw
 
-2:	j 2b
+/*
+ * void _invalidate_icache_all(void)
+ */
 
-ENTRY(check_dcache_high1)
+ENTRY(__invalidate_icache_all)
 	entry	sp, 16
 
-	movi	a5, XCHAL_DCACHE_SIZE / 4
-	movi	a3, XCHAL_DCACHE_SIZE
-	movi	a4, PAGE_MASK | 1
-	addi	a2, a2, 1
-
-1:	addi	a3, a3, -XCHAL_DCACHE_LINESIZE
-	addi	a5, a5, -XCHAL_DCACHE_LINESIZE
+	___invalidate_icache_all a2 a3
+	isync
 
-	ldct	a6, a3
-	dsync
-	and	a6, a6, a4
-	beq	a6, a2, 2f
-	bgeui	a5, 2, 1b
 	retw
 
-2:	j 2b
-
-
 /*
- * void __invalidate_icache_page_phys(ulong start)
+ * void _flush_invalidate_dcache_all(void)
  */
 
-ENTRY(__invalidate_icache_page_phys)
+ENTRY(__flush_invalidate_dcache_all)
 	entry	sp, 16
 
-	movi	a3, XCHAL_ICACHE_SIZE
-	movi	a4, PAGE_MASK | 1
-	addi	a2, a2, 1
-
-1:	addi	a3, a3, -XCHAL_ICACHE_LINESIZE
-
-	lict	a6, a3
-	isync
-	and	a6, a6, a4
-	beq	a6, a2, 2f
-	bgeui	a3, 2, 1b
-	retw
+	___flush_invalidate_dcache_all a2 a3
+	dsync
 
-2:	iii	a3, 0
-	bgeui	a3, 2, 1b
 	retw
 
+/*
+ * void _invalidate_dcache_all(void)
+ */
 
-#if 0
-
-	movi	a3, XCHAL_DCACHE_WAYS - 1
-	movi	a4, PAGE_SIZE
-
-1:	mov	a5, a2
-	add	a6, a2, a4
-
-2:	diwbi	a5, 0
-	diwbi	a5, XCHAL_DCACHE_LINESIZE
-	diwbi	a5, XCHAL_DCACHE_LINESIZE * 2
-	diwbi	a5, XCHAL_DCACHE_LINESIZE * 3
-
-	addi	a5, a5, XCHAL_DCACHE_LINESIZE * 4
-	blt	a5, a6, 2b
-
-	addi	a3, a3, -1
-	addi	a2, a2, XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS
-	bgez	a3, 1b
-
-	retw
-
-ENTRY(__invalidate_icache_page_index)
+ENTRY(__invalidate_dcache_all)
 	entry	sp, 16
 
-	movi	a3, XCHAL_ICACHE_WAYS - 1
-	movi	a4, PAGE_SIZE
-
-1:	mov	a5, a2
-	add	a6, a2, a4
-
-2:	iii	a5, 0
-	iii	a5, XCHAL_ICACHE_LINESIZE
-	iii	a5, XCHAL_ICACHE_LINESIZE * 2
-	iii	a5, XCHAL_ICACHE_LINESIZE * 3
-
-	addi	a5, a5, XCHAL_ICACHE_LINESIZE * 4
-	blt	a5, a6, 2b
-
-	addi	a3, a3, -1
-	addi	a2, a2, XCHAL_ICACHE_SIZE / XCHAL_ICACHE_WAYS
-	bgez	a3, 2b
+	___invalidate_dcache_all a2 a3
+	dsync
 
 	retw
 
-#endif
-
-
-
-
-
-
diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c
index 0fefb866687..239461d8ea8 100644
--- a/arch/xtensa/mm/tlb.c
+++ b/arch/xtensa/mm/tlb.c
@@ -24,12 +24,12 @@
 
 static inline void __flush_itlb_all (void)
 {
-	int way, index;
+	int w, i;
 
-	for (way = 0; way < XCHAL_ITLB_ARF_WAYS; way++) {
-		for (index = 0; index < ITLB_ENTRIES_PER_ARF_WAY; index++) {
-			int entry = way + (index << PAGE_SHIFT);
-			invalidate_itlb_entry_no_isync (entry);
+	for (w = 0; w < ITLB_ARF_WAYS; w++) {
+		for (i = 0; i < (1 << XCHAL_ITLB_ARF_ENTRIES_LOG2); i++) {
+			int e = w + (i << PAGE_SHIFT);
+			invalidate_itlb_entry_no_isync(e);
 		}
 	}
 	asm volatile ("isync\n");
@@ -37,12 +37,12 @@ static inline void __flush_itlb_all (void)
 
 static inline void __flush_dtlb_all (void)
 {
-	int way, index;
+	int w, i;
 
-	for (way = 0; way < XCHAL_DTLB_ARF_WAYS; way++) {
-		for (index = 0; index < DTLB_ENTRIES_PER_ARF_WAY; index++) {
-			int entry = way + (index << PAGE_SHIFT);
-			invalidate_dtlb_entry_no_isync (entry);
+	for (w = 0; w < DTLB_ARF_WAYS; w++) {
+		for (i = 0; i < (1 << XCHAL_DTLB_ARF_ENTRIES_LOG2); i++) {
+			int e = w + (i << PAGE_SHIFT);
+			invalidate_dtlb_entry_no_isync(e);
 		}
 	}
 	asm volatile ("isync\n");
@@ -63,21 +63,25 @@ void flush_tlb_all (void)
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-#if 0
-	printk("[tlbmm<%lx>]\n", (unsigned long)mm->context);
-#endif
-
 	if (mm == current->active_mm) {
 		int flags;
 		local_save_flags(flags);
-		get_new_mmu_context(mm, asid_cache);
-		set_rasid_register(ASID_INSERT(mm->context));
+		__get_new_mmu_context(mm);
+		__load_mmu_context(mm);
 		local_irq_restore(flags);
 	}
 	else
 		mm->context = 0;
 }
 
+#define _ITLB_ENTRIES (ITLB_ARF_WAYS << XCHAL_ITLB_ARF_ENTRIES_LOG2)
+#define _DTLB_ENTRIES (DTLB_ARF_WAYS << XCHAL_DTLB_ARF_ENTRIES_LOG2)
+#if _ITLB_ENTRIES > _DTLB_ENTRIES
+# define _TLB_ENTRIES _ITLB_ENTRIES
+#else
+# define _TLB_ENTRIES _DTLB_ENTRIES
+#endif
+
 void flush_tlb_range (struct vm_area_struct *vma,
     		      unsigned long start, unsigned long end)
 {
@@ -93,7 +97,7 @@ void flush_tlb_range (struct vm_area_struct *vma,
 #endif
 	local_save_flags(flags);
 
-	if (end-start + (PAGE_SIZE-1) <= SMALLEST_NTLB_ENTRIES << PAGE_SHIFT) {
+	if (end-start + (PAGE_SIZE-1) <= _TLB_ENTRIES << PAGE_SHIFT) {
 		int oldpid = get_rasid_register();
 		set_rasid_register (ASID_INSERT(mm->context));
 		start &= PAGE_MASK;
@@ -111,9 +115,7 @@ void flush_tlb_range (struct vm_area_struct *vma,
 
 		set_rasid_register(oldpid);
 	} else {
-		get_new_mmu_context(mm, asid_cache);
-		if (mm == current->active_mm)
-			set_rasid_register(ASID_INSERT(mm->context));
+		flush_tlb_mm(mm);
 	}
 	local_irq_restore(flags);
 }
@@ -123,10 +125,6 @@ void flush_tlb_page (struct vm_area_struct *vma, unsigned long page)
 	struct mm_struct* mm = vma->vm_mm;
 	unsigned long flags;
 	int oldpid;
-#if 0
-	printk("[tlbpage<%02lx,%08lx>]\n",
-			(unsigned long)mm->context, page);
-#endif
 
 	if(mm->context == NO_CONTEXT)
 		return;
@@ -142,404 +140,5 @@ void flush_tlb_page (struct vm_area_struct *vma, unsigned long page)
 	set_rasid_register(oldpid);
 
 	local_irq_restore(flags);
-
-#if 0
-	flush_tlb_all();
-	return;
-#endif
-}
-
-
-#ifdef DEBUG_TLB
-
-#define USE_ITLB  0
-#define USE_DTLB  1
-
-struct way_config_t {
-	int indicies;
-	int indicies_log2;
-	int pgsz_log2;
-	int arf;
-};
-
-static struct way_config_t itlb[XCHAL_ITLB_WAYS] =
-{
-	{ XCHAL_ITLB_SET(XCHAL_ITLB_WAY0_SET, ENTRIES),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY0_SET, ENTRIES_LOG2),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY0_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY0_SET, ARF)
-	},
-	{ XCHAL_ITLB_SET(XCHAL_ITLB_WAY1_SET, ENTRIES),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY1_SET, ENTRIES_LOG2),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY1_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY1_SET, ARF)
-	},
-	{ XCHAL_ITLB_SET(XCHAL_ITLB_WAY2_SET, ENTRIES),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY2_SET, ENTRIES_LOG2),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY2_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY2_SET, ARF)
-	},
-	{ XCHAL_ITLB_SET(XCHAL_ITLB_WAY3_SET, ENTRIES),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY3_SET, ENTRIES_LOG2),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY3_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY3_SET, ARF)
-	},
-	{ XCHAL_ITLB_SET(XCHAL_ITLB_WAY4_SET, ENTRIES),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY4_SET, ENTRIES_LOG2),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY4_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY4_SET, ARF)
-	},
-	{ XCHAL_ITLB_SET(XCHAL_ITLB_WAY5_SET, ENTRIES),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY5_SET, ENTRIES_LOG2),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY5_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY5_SET, ARF)
-	},
-	{ XCHAL_ITLB_SET(XCHAL_ITLB_WAY6_SET, ENTRIES),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY6_SET, ENTRIES_LOG2),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY6_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_ITLB_SET(XCHAL_ITLB_WAY6_SET, ARF)
-	}
-};
-
-static struct way_config_t dtlb[XCHAL_DTLB_WAYS] =
-{
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY0_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY0_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY0_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY0_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY1_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY1_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY1_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY1_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY2_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY2_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY2_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY2_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY3_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY3_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY3_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY3_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY4_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY4_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY4_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY4_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY5_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY5_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY5_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY5_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY6_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY6_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY6_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY6_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY7_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY7_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY7_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY7_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY8_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY8_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY8_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY8_SET, ARF)
-	},
-	{ XCHAL_DTLB_SET(XCHAL_DTLB_WAY9_SET, ENTRIES),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY9_SET, ENTRIES_LOG2),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY9_SET, PAGESZ_LOG2_MIN),
-	  XCHAL_DTLB_SET(XCHAL_DTLB_WAY9_SET, ARF)
-	}
-};
-
-/*  Total number of entries:  */
-#define ITLB_TOTAL_ENTRIES	\
-		XCHAL_ITLB_SET(XCHAL_ITLB_WAY0_SET, ENTRIES) + \
-		XCHAL_ITLB_SET(XCHAL_ITLB_WAY1_SET, ENTRIES) + \
-		XCHAL_ITLB_SET(XCHAL_ITLB_WAY2_SET, ENTRIES) + \
-		XCHAL_ITLB_SET(XCHAL_ITLB_WAY3_SET, ENTRIES) + \
-		XCHAL_ITLB_SET(XCHAL_ITLB_WAY4_SET, ENTRIES) + \
-		XCHAL_ITLB_SET(XCHAL_ITLB_WAY5_SET, ENTRIES) + \
-		XCHAL_ITLB_SET(XCHAL_ITLB_WAY6_SET, ENTRIES)
-#define DTLB_TOTAL_ENTRIES	\
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY0_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY1_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY2_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY3_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY4_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY5_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY6_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY7_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY8_SET, ENTRIES) + \
-		XCHAL_DTLB_SET(XCHAL_DTLB_WAY9_SET, ENTRIES)
-
-
-typedef struct {
-    unsigned		va;
-    unsigned		pa;
-    unsigned char	asid;
-    unsigned char	ca;
-    unsigned char	way;
-    unsigned char	index;
-    unsigned char	pgsz_log2;	/* 0 .. 32 */
-    unsigned char	type;		/* 0=ITLB 1=DTLB */
-} tlb_dump_entry_t;
-
-/*  Return -1 if a precedes b, +1 if a follows b, 0 if same:  */
-int cmp_tlb_dump_info( tlb_dump_entry_t *a, tlb_dump_entry_t *b )
-{
-    if (a->asid < b->asid) return -1;
-    if (a->asid > b->asid) return  1;
-    if (a->va < b->va) return -1;
-    if (a->va > b->va) return  1;
-    if (a->pa < b->pa) return -1;
-    if (a->pa > b->pa) return  1;
-    if (a->ca < b->ca) return -1;
-    if (a->ca > b->ca) return  1;
-    if (a->way < b->way) return -1;
-    if (a->way > b->way) return  1;
-    if (a->index < b->index) return -1;
-    if (a->index > b->index) return  1;
-    return 0;
-}
-
-void sort_tlb_dump_info( tlb_dump_entry_t *t, int n )
-{
-    int i, j;
-    /*  Simple O(n*n) sort:  */
-    for (i = 0; i < n-1; i++)
-	for (j = i+1; j < n; j++)
-	    if (cmp_tlb_dump_info(t+i, t+j) > 0) {
-		tlb_dump_entry_t tmp = t[i];
-		t[i] = t[j];
-		t[j] = tmp;
-	    }
-}
-
-
-static tlb_dump_entry_t itlb_dump_info[ITLB_TOTAL_ENTRIES];
-static tlb_dump_entry_t dtlb_dump_info[DTLB_TOTAL_ENTRIES];
-
-
-static inline char *way_type (int type)
-{
-	return type ? "autorefill" : "non-autorefill";
-}
-
-void print_entry (struct way_config_t *way_info,
-		  unsigned int way,
-		  unsigned int index,
-		  unsigned int virtual,
-		  unsigned int translation)
-{
-	char valid_chr;
-	unsigned int va, pa, asid, ca;
-
-	va = virtual &
-	  	~((1 << (way_info->pgsz_log2 + way_info->indicies_log2)) - 1);
-	asid = virtual & ((1 << XCHAL_MMU_ASID_BITS) - 1);
-	pa = translation & ~((1 << way_info->pgsz_log2) - 1);
-	ca = translation & ((1 << XCHAL_MMU_CA_BITS) - 1);
-	valid_chr = asid ? 'V' : 'I';
-
-	/* Compute and incorporate the effect of the index bits on the
-	 * va.  It's more useful for kernel debugging, since we always
-	 * want to know the effective va anyway. */
-
-	va += index << way_info->pgsz_log2;
-
-	printk ("\t[%d,%d] (%c) vpn 0x%.8x  ppn 0x%.8x  asid 0x%.2x  am 0x%x\n",
-		way, index, valid_chr, va, pa, asid, ca);
-}
-
-void print_itlb_entry (struct way_config_t *way_info, int way, int index)
-{
-	print_entry (way_info, way, index,
-		     read_itlb_virtual (way + (index << way_info->pgsz_log2)),
-		     read_itlb_translation (way + (index << way_info->pgsz_log2)));
-}
-
-void print_dtlb_entry (struct way_config_t *way_info, int way, int index)
-{
-	print_entry (way_info, way, index,
-		     read_dtlb_virtual (way + (index << way_info->pgsz_log2)),
-		     read_dtlb_translation (way + (index << way_info->pgsz_log2)));
-}
-
-void dump_itlb (void)
-{
-	int way, index;
-
-	printk ("\nITLB: ways = %d\n", XCHAL_ITLB_WAYS);
-
-	for (way = 0; way < XCHAL_ITLB_WAYS; way++) {
-		printk ("\nWay: %d, Entries: %d, MinPageSize: %d, Type: %s\n",
-			way, itlb[way].indicies,
-			itlb[way].pgsz_log2, way_type(itlb[way].arf));
-		for (index = 0; index < itlb[way].indicies; index++) {
-			print_itlb_entry(&itlb[way], way, index);
-		}
-	}
-}
-
-void dump_dtlb (void)
-{
-	int way, index;
-
-	printk ("\nDTLB: ways = %d\n", XCHAL_DTLB_WAYS);
-
-	for (way = 0; way < XCHAL_DTLB_WAYS; way++) {
-		printk ("\nWay: %d, Entries: %d, MinPageSize: %d, Type: %s\n",
-			way, dtlb[way].indicies,
-			dtlb[way].pgsz_log2, way_type(dtlb[way].arf));
-		for (index = 0; index < dtlb[way].indicies; index++) {
-			print_dtlb_entry(&dtlb[way], way, index);
-		}
-	}
-}
-
-void dump_tlb (tlb_dump_entry_t *tinfo, struct way_config_t *config,
-		int entries, int ways, int type, int show_invalid)
-{
-    tlb_dump_entry_t *e = tinfo;
-    int way, i;
-
-    /*  Gather all info:  */
-    for (way = 0; way < ways; way++) {
-	struct way_config_t *cfg = config + way;
-	for (i = 0; i < cfg->indicies; i++) {
-	    unsigned wayindex = way + (i << cfg->pgsz_log2);
-	    unsigned vv = (type ? read_dtlb_virtual (wayindex)
-		    		: read_itlb_virtual (wayindex));
-	    unsigned pp = (type ? read_dtlb_translation (wayindex)
-		    		: read_itlb_translation (wayindex));
-
-	    /* Compute and incorporate the effect of the index bits on the
-	     * va.  It's more useful for kernel debugging, since we always
-	     * want to know the effective va anyway. */
-
-	    e->va = (vv & ~((1 << (cfg->pgsz_log2 + cfg->indicies_log2)) - 1));
-	    e->va += (i << cfg->pgsz_log2);
-	    e->pa = (pp & ~((1 << cfg->pgsz_log2) - 1));
-	    e->asid = (vv & ((1 << XCHAL_MMU_ASID_BITS) - 1));
-	    e->ca = (pp & ((1 << XCHAL_MMU_CA_BITS) - 1));
-	    e->way = way;
-	    e->index = i;
-	    e->pgsz_log2 = cfg->pgsz_log2;
-	    e->type = type;
-	    e++;
-	}
-    }
-#if 1
-    /*  Sort by ASID and VADDR:  */
-    sort_tlb_dump_info (tinfo, entries);
-#endif
-
-    /*  Display all sorted info:  */
-    printk ("\n%cTLB dump:\n", (type ? 'D' : 'I'));
-    for (e = tinfo, i = 0; i < entries; i++, e++) {
-#if 0
-	if (e->asid == 0 && !show_invalid)
-	    continue;
-#endif
-	printk ("%c way=%d i=%d  ASID=%02X V=%08X -> P=%08X CA=%X (%d %cB)\n",
-		(e->type ? 'D' : 'I'), e->way, e->index,
-		e->asid, e->va, e->pa, e->ca,
-		(1 << (e->pgsz_log2 % 10)),
-		" kMG"[e->pgsz_log2 / 10]
-		);
-    }
-}
-
-void dump_tlbs2 (int showinv)
-{
-    dump_tlb (itlb_dump_info, itlb, ITLB_TOTAL_ENTRIES, XCHAL_ITLB_WAYS, 0, showinv);
-    dump_tlb (dtlb_dump_info, dtlb, DTLB_TOTAL_ENTRIES, XCHAL_DTLB_WAYS, 1, showinv);
-}
-
-void dump_all_tlbs (void)
-{
-    dump_tlbs2 (1);
-}
-
-void dump_valid_tlbs (void)
-{
-    dump_tlbs2 (0);
 }
 
-
-void dump_tlbs (void)
-{
-	dump_itlb();
-	dump_dtlb();
-}
-
-void dump_cache_tag(int dcache, int idx)
-{
-	int w, i, s, e;
-	unsigned long tag, index;
-	unsigned long num_lines, num_ways, cache_size, line_size;
-
-	num_ways = dcache ? XCHAL_DCACHE_WAYS : XCHAL_ICACHE_WAYS;
-	cache_size = dcache ? XCHAL_DCACHE_SIZE : XCHAL_ICACHE_SIZE;
-	line_size = dcache ? XCHAL_DCACHE_LINESIZE : XCHAL_ICACHE_LINESIZE;
-
-	num_lines = cache_size / num_ways;
-
-	s = 0; e = num_lines;
-
-	if (idx >= 0)
-		e = (s = idx * line_size) + 1;
-
-	for (i = s; i < e; i+= line_size) {
-		printk("\nline %#08x:", i);
-		for (w = 0; w < num_ways; w++) {
-			index = w * num_lines + i;
-			if (dcache)
-				__asm__ __volatile__("ldct %0, %1\n\t"
-						: "=a"(tag) : "a"(index));
-			else
-				__asm__ __volatile__("lict %0, %1\n\t"
-						: "=a"(tag) : "a"(index));
-
-			printk(" %#010lx", tag);
-		}
-	}
-	printk ("\n");
-}
-
-void dump_icache(int index)
-{
-	unsigned long data, addr;
-	int w, i;
-
-	const unsigned long num_ways = XCHAL_ICACHE_WAYS;
-	const unsigned long cache_size = XCHAL_ICACHE_SIZE;
-	const unsigned long line_size = XCHAL_ICACHE_LINESIZE;
-	const unsigned long num_lines = cache_size / num_ways / line_size;
-
-	for (w = 0; w < num_ways; w++) {
-		printk ("\nWay %d", w);
-
-		for (i = 0; i < line_size; i+= 4) {
-			addr = w * num_lines + index * line_size + i;
-			__asm__ __volatile__("licw %0, %1\n\t"
-					: "=a"(data) : "a"(addr));
-			printk(" %#010lx", data);
-		}
-	}
-	printk ("\n");
-}
-
-void dump_cache_tags(void)
-{
-	printk("Instruction cache\n");
-	dump_cache_tag(0, -1);
-	printk("Data cache\n");
-	dump_cache_tag(1, -1);
-}
-
-#endif
diff --git a/arch/xtensa/platform-iss/console.c b/arch/xtensa/platform-iss/console.c
index 5c947cae752..2f4f20ffe66 100644
--- a/arch/xtensa/platform-iss/console.c
+++ b/arch/xtensa/platform-iss/console.c
@@ -25,11 +25,15 @@
 #include <asm/uaccess.h>
 #include <asm/irq.h>
 
-#include <xtensa/simcall.h>
+#include <asm/platform/simcall.h>
 
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 
+#ifdef SERIAL_INLINE
+#define _INLINE_ inline
+#endif
+
 #define SERIAL_MAX_NUM_LINES 1
 #define SERIAL_TIMER_VALUE (20 * HZ)
 
@@ -191,7 +195,7 @@ static int rs_read_proc(char *page, char **start, off_t off, int count,
 }
 
 
-static const struct tty_operations serial_ops = {
+static struct tty_operations serial_ops = {
 	.open = rs_open,
 	.close = rs_close,
 	.write = rs_write,
diff --git a/arch/xtensa/platform-iss/network.c b/arch/xtensa/platform-iss/network.c
index 15d64414bd6..8ebfc876122 100644
--- a/arch/xtensa/platform-iss/network.c
+++ b/arch/xtensa/platform-iss/network.c
@@ -34,7 +34,7 @@
 #include <linux/timer.h>
 #include <linux/platform_device.h>
 
-#include <xtensa/simcall.h>
+#include <asm/platform/simcall.h>
 
 #define DRIVER_NAME "iss-netdev"
 #define ETH_MAX_PACKET 1500
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 4f83fd92237..785e61c9a81 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/blktrace_api.h>
@@ -3235,10 +3236,12 @@ void submit_bio(int rw, struct bio *bio)
 	BIO_BUG_ON(!bio->bi_size);
 	BIO_BUG_ON(!bio->bi_io_vec);
 	bio->bi_rw |= rw;
-	if (rw & WRITE)
+	if (rw & WRITE) {
 		count_vm_events(PGPGOUT, count);
-	else
+	} else {
+		task_io_account_read(bio->bi_size);
 		count_vm_events(PGPGIN, count);
+	}
 
 	if (unlikely(block_dump)) {
 		char b[BDEVNAME_SIZE];
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 4929e923b5c..e7da9fa724e 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -80,4 +80,6 @@ source "drivers/rtc/Kconfig"
 
 source "drivers/dma/Kconfig"
 
+source "drivers/kvm/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 50f76da598c..0dd96d1afd3 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_SPI)		+= spi/
 obj-$(CONFIG_PCCARD)		+= pcmcia/
 obj-$(CONFIG_DIO)		+= dio/
 obj-$(CONFIG_SBUS)		+= sbus/
+obj-$(CONFIG_KVM)		+= kvm/
 obj-$(CONFIG_ZORRO)		+= zorro/
 obj-$(CONFIG_MAC)		+= macintosh/
 obj-$(CONFIG_ATA_OVER_ETH)	+= block/aoe/
diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index 6c59baa887a..e736119b649 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -37,8 +37,10 @@
 #define BT_DEBUG_ENABLE	1	/* Generic messages */
 #define BT_DEBUG_MSG	2	/* Prints all request/response buffers */
 #define BT_DEBUG_STATES	4	/* Verbose look at state changes */
+/* BT_DEBUG_OFF must be zero to correspond to the default uninitialized
+   value */
 
-static int bt_debug = BT_DEBUG_OFF;
+static int bt_debug; /* 0 == BT_DEBUG_OFF */
 
 module_param(bt_debug, int, 0644);
 MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states");
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index e257835a9a7..ff2d052177c 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -834,7 +834,7 @@ static const struct file_operations ipmi_fops = {
 
 #define DEVICE_NAME     "ipmidev"
 
-static int ipmi_major = 0;
+static int ipmi_major;
 module_param(ipmi_major, int, 0);
 MODULE_PARM_DESC(ipmi_major, "Sets the major number of the IPMI device.  By"
 		 " default, or if you set it to zero, it will choose the next"
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 5703ee28e1c..4e4691a5389 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -53,10 +53,10 @@
 static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void);
 static int ipmi_init_msghandler(void);
 
-static int initialized = 0;
+static int initialized;
 
 #ifdef CONFIG_PROC_FS
-static struct proc_dir_entry *proc_ipmi_root = NULL;
+static struct proc_dir_entry *proc_ipmi_root;
 #endif /* CONFIG_PROC_FS */
 
 /* Remain in auto-maintenance mode for this amount of time (in ms). */
@@ -2142,8 +2142,7 @@ cleanup_bmc_device(struct kref *ref)
 	bmc = container_of(ref, struct bmc_device, refcount);
 
 	remove_files(bmc);
-	if (bmc->dev)
-		platform_device_unregister(bmc->dev);
+	platform_device_unregister(bmc->dev);
 	kfree(bmc);
 }
 
@@ -2341,8 +2340,7 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
 
 		while (ipmi_find_bmc_prod_dev_id(&ipmidriver,
 						 bmc->id.product_id,
-						 bmc->id.device_id))
-		{
+						 bmc->id.device_id)) {
 			if (!warn_printed) {
 				printk(KERN_WARNING PFX
 				       "This machine has two different BMCs"
@@ -4043,7 +4041,7 @@ static void send_panic_events(char *str)
 }
 #endif /* CONFIG_IPMI_PANIC_EVENT */
 
-static int has_panicked = 0;
+static int has_panicked;
 
 static int panic_event(struct notifier_block *this,
 		       unsigned long         event,
diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c
index 597eb4f88b8..9d23136e598 100644
--- a/drivers/char/ipmi/ipmi_poweroff.c
+++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -58,10 +58,10 @@ static int poweroff_powercycle;
 static int ifnum_to_use = -1;
 
 /* Our local state. */
-static int ready = 0;
+static int ready;
 static ipmi_user_t ipmi_user;
 static int ipmi_ifnum;
-static void (*specific_poweroff_func)(ipmi_user_t user) = NULL;
+static void (*specific_poweroff_func)(ipmi_user_t user);
 
 /* Holds the old poweroff function so we can restore it on removal. */
 static void (*old_poweroff_func)(void);
@@ -182,7 +182,7 @@ static int ipmi_request_in_rc_mode(ipmi_user_t            user,
 #define IPMI_MOTOROLA_MANUFACTURER_ID		0x0000A1
 #define IPMI_MOTOROLA_PPS_IPMC_PRODUCT_ID	0x0051
 
-static void (*atca_oem_poweroff_hook)(ipmi_user_t user) = NULL;
+static void (*atca_oem_poweroff_hook)(ipmi_user_t user);
 
 static void pps_poweroff_atca (ipmi_user_t user)
 {
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 81a0c89598e..f1afd26a509 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -845,7 +845,7 @@ static void request_events(void *send_info)
 	atomic_set(&smi_info->req_events, 1);
 }
 
-static int initialized = 0;
+static int initialized;
 
 static void smi_timeout(unsigned long data)
 {
@@ -1018,17 +1018,17 @@ static int num_ports;
 static int           irqs[SI_MAX_PARMS];
 static int num_irqs;
 static int           regspacings[SI_MAX_PARMS];
-static int num_regspacings = 0;
+static int num_regspacings;
 static int           regsizes[SI_MAX_PARMS];
-static int num_regsizes = 0;
+static int num_regsizes;
 static int           regshifts[SI_MAX_PARMS];
-static int num_regshifts = 0;
+static int num_regshifts;
 static int slave_addrs[SI_MAX_PARMS];
-static int num_slave_addrs = 0;
+static int num_slave_addrs;
 
 #define IPMI_IO_ADDR_SPACE  0
 #define IPMI_MEM_ADDR_SPACE 1
-static char *addr_space_to_str[] = { "I/O", "mem" };
+static char *addr_space_to_str[] = { "i/o", "mem" };
 
 static int hotmod_handler(const char *val, struct kernel_param *kp);
 
@@ -1397,20 +1397,7 @@ static struct hotmod_vals hotmod_as[] = {
 	{ "i/o",	IPMI_IO_ADDR_SPACE },
 	{ NULL }
 };
-static int ipmi_strcasecmp(const char *s1, const char *s2)
-{
-	while (*s1 || *s2) {
-		if (!*s1)
-			return -1;
-		if (!*s2)
-			return 1;
-		if (*s1 != *s2)
-			return *s1 - *s2;
-		s1++;
-		s2++;
-	}
-	return 0;
-}
+
 static int parse_str(struct hotmod_vals *v, int *val, char *name, char **curr)
 {
 	char *s;
@@ -1424,7 +1411,7 @@ static int parse_str(struct hotmod_vals *v, int *val, char *name, char **curr)
 	*s = '\0';
 	s++;
 	for (i = 0; hotmod_ops[i].name; i++) {
-		if (ipmi_strcasecmp(*curr, v[i].name) == 0) {
+		if (strcmp(*curr, v[i].name) == 0) {
 			*val = v[i].val;
 			*curr = s;
 			return 0;
@@ -1435,10 +1422,34 @@ static int parse_str(struct hotmod_vals *v, int *val, char *name, char **curr)
 	return -EINVAL;
 }
 
+static int check_hotmod_int_op(const char *curr, const char *option,
+			       const char *name, int *val)
+{
+	char *n;
+
+	if (strcmp(curr, name) == 0) {
+		if (!option) {
+			printk(KERN_WARNING PFX
+			       "No option given for '%s'\n",
+			       curr);
+			return -EINVAL;
+		}
+		*val = simple_strtoul(option, &n, 0);
+		if ((*n != '\0') || (*option == '\0')) {
+			printk(KERN_WARNING PFX
+			       "Bad option given for '%s'\n",
+			       curr);
+			return -EINVAL;
+		}
+		return 1;
+	}
+	return 0;
+}
+
 static int hotmod_handler(const char *val, struct kernel_param *kp)
 {
 	char *str = kstrdup(val, GFP_KERNEL);
-	int  rv = -EINVAL;
+	int  rv;
 	char *next, *curr, *s, *n, *o;
 	enum hotmod_op op;
 	enum si_type si_type;
@@ -1450,13 +1461,15 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
 	int irq;
 	int ipmb;
 	int ival;
+	int len;
 	struct smi_info *info;
 
 	if (!str)
 		return -ENOMEM;
 
 	/* Kill any trailing spaces, as we can get a "\n" from echo. */
-	ival = strlen(str) - 1;
+	len = strlen(str);
+	ival = len - 1;
 	while ((ival >= 0) && isspace(str[ival])) {
 		str[ival] = '\0';
 		ival--;
@@ -1513,35 +1526,37 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
 				*o = '\0';
 				o++;
 			}
-#define HOTMOD_INT_OPT(name, val) \
-			if (ipmi_strcasecmp(curr, name) == 0) {		\
-				if (!o) {				\
-					printk(KERN_WARNING PFX		\
-					       "No option given for '%s'\n", \
-						curr);			\
-					goto out;			\
-				}					\
-				val = simple_strtoul(o, &n, 0);		\
-				if ((*n != '\0') || (*o == '\0')) {	\
-					printk(KERN_WARNING PFX		\
-					       "Bad option given for '%s'\n", \
-					       curr);			\
-					goto out;			\
-				}					\
-			}
-
-			HOTMOD_INT_OPT("rsp", regspacing)
-			else HOTMOD_INT_OPT("rsi", regsize)
-			else HOTMOD_INT_OPT("rsh", regshift)
-			else HOTMOD_INT_OPT("irq", irq)
-			else HOTMOD_INT_OPT("ipmb", ipmb)
-			else {
-				printk(KERN_WARNING PFX
-				       "Invalid hotmod option '%s'\n",
-				       curr);
+			rv = check_hotmod_int_op(curr, o, "rsp", &regspacing);
+			if (rv < 0)
 				goto out;
-			}
-#undef HOTMOD_INT_OPT
+			else if (rv)
+				continue;
+			rv = check_hotmod_int_op(curr, o, "rsi", &regsize);
+			if (rv < 0)
+				goto out;
+			else if (rv)
+				continue;
+			rv = check_hotmod_int_op(curr, o, "rsh", &regshift);
+			if (rv < 0)
+				goto out;
+			else if (rv)
+				continue;
+			rv = check_hotmod_int_op(curr, o, "irq", &irq);
+			if (rv < 0)
+				goto out;
+			else if (rv)
+				continue;
+			rv = check_hotmod_int_op(curr, o, "ipmb", &ipmb);
+			if (rv < 0)
+				goto out;
+			else if (rv)
+				continue;
+
+			rv = -EINVAL;
+			printk(KERN_WARNING PFX
+			       "Invalid hotmod option '%s'\n",
+			       curr);
+			goto out;
 		}
 
 		if (op == HM_ADD) {
@@ -1590,6 +1605,7 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
 			mutex_unlock(&smi_infos_lock);
 		}
 	}
+	rv = len;
  out:
 	kfree(str);
 	return rv;
@@ -1610,11 +1626,11 @@ static __devinit void hardcode_find_bmc(void)
 
 		info->addr_source = "hardcoded";
 
-		if (!si_type[i] || ipmi_strcasecmp(si_type[i], "kcs") == 0) {
+		if (!si_type[i] || strcmp(si_type[i], "kcs") == 0) {
 			info->si_type = SI_KCS;
-		} else if (ipmi_strcasecmp(si_type[i], "smic") == 0) {
+		} else if (strcmp(si_type[i], "smic") == 0) {
 			info->si_type = SI_SMIC;
-		} else if (ipmi_strcasecmp(si_type[i], "bt") == 0) {
+		} else if (strcmp(si_type[i], "bt") == 0) {
 			info->si_type = SI_BT;
 		} else {
 			printk(KERN_WARNING
@@ -1668,7 +1684,7 @@ static __devinit void hardcode_find_bmc(void)
 /* Once we get an ACPI failure, we don't try any more, because we go
    through the tables sequentially.  Once we don't find a table, there
    are no more. */
-static int acpi_failure = 0;
+static int acpi_failure;
 
 /* For GPE-type interrupts. */
 static u32 ipmi_acpi_gpe(void *context)
@@ -1779,7 +1795,6 @@ struct SPMITable {
 static __devinit int try_init_acpi(struct SPMITable *spmi)
 {
 	struct smi_info  *info;
-	char             *io_type;
 	u8 		 addr_space;
 
 	if (spmi->IPMIlegacy != 1) {
@@ -1843,11 +1858,9 @@ static __devinit int try_init_acpi(struct SPMITable *spmi)
 	info->io.regshift = spmi->addr.register_bit_offset;
 
 	if (spmi->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-		io_type = "memory";
 		info->io_setup = mem_setup;
 		info->io.addr_type = IPMI_IO_ADDR_SPACE;
 	} else if (spmi->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
-		io_type = "I/O";
 		info->io_setup = port_setup;
 		info->io.addr_type = IPMI_MEM_ADDR_SPACE;
 	} else {
@@ -2773,8 +2786,7 @@ static __devinit int init_ipmi_si(void)
 #endif
 
 #ifdef CONFIG_ACPI
-	if (si_trydefaults)
-		acpi_find_bmc();
+	acpi_find_bmc();
 #endif
 
 #ifdef CONFIG_PCI
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 90fb2a54191..78280380a90 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -134,14 +134,14 @@
 
 static int nowayout = WATCHDOG_NOWAYOUT;
 
-static ipmi_user_t watchdog_user = NULL;
+static ipmi_user_t watchdog_user;
 static int watchdog_ifnum;
 
 /* Default the timeout to 10 seconds. */
 static int timeout = 10;
 
 /* The pre-timeout is disabled by default. */
-static int pretimeout = 0;
+static int pretimeout;
 
 /* Default action is to reset the board on a timeout. */
 static unsigned char action_val = WDOG_TIMEOUT_RESET;
@@ -156,10 +156,10 @@ static unsigned char preop_val = WDOG_PREOP_NONE;
 
 static char preop[16] = "preop_none";
 static DEFINE_SPINLOCK(ipmi_read_lock);
-static char data_to_read = 0;
+static char data_to_read;
 static DECLARE_WAIT_QUEUE_HEAD(read_q);
-static struct fasync_struct *fasync_q = NULL;
-static char pretimeout_since_last_heartbeat = 0;
+static struct fasync_struct *fasync_q;
+static char pretimeout_since_last_heartbeat;
 static char expect_close;
 
 static int ifnum_to_use = -1;
@@ -177,7 +177,7 @@ static void ipmi_unregister_watchdog(int ipmi_intf);
 
 /* If true, the driver will start running as soon as it is configured
    and ready. */
-static int start_now = 0;
+static int start_now;
 
 static int set_param_int(const char *val, struct kernel_param *kp)
 {
@@ -300,16 +300,16 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
 static unsigned char ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
 
 /* If shutting down via IPMI, we ignore the heartbeat. */
-static int ipmi_ignore_heartbeat = 0;
+static int ipmi_ignore_heartbeat;
 
 /* Is someone using the watchdog?  Only one user is allowed. */
-static unsigned long ipmi_wdog_open = 0;
+static unsigned long ipmi_wdog_open;
 
 /* If set to 1, the heartbeat command will set the state to reset and
    start the timer.  The timer doesn't normally run when the driver is
    first opened until the heartbeat is set the first time, this
    variable is used to accomplish this. */
-static int ipmi_start_timer_on_heartbeat = 0;
+static int ipmi_start_timer_on_heartbeat;
 
 /* IPMI version of the BMC. */
 static unsigned char ipmi_version_major;
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 089020e0ee5..4f1813e0475 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -646,7 +646,8 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
 			count = size;
 
 		zap_page_range(vma, addr, count, NULL);
-        	zeromap_page_range(vma, addr, count, PAGE_COPY);
+        	if (zeromap_page_range(vma, addr, count, PAGE_COPY))
+			break;
 
 		size -= count;
 		buf += count;
@@ -713,11 +714,14 @@ out:
 
 static int mmap_zero(struct file * file, struct vm_area_struct * vma)
 {
+	int err;
+
 	if (vma->vm_flags & VM_SHARED)
 		return shmem_zero_setup(vma);
-	if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
-		return -EAGAIN;
-	return 0;
+	err = zeromap_page_range(vma, vma->vm_start,
+			vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	BUG_ON(err == -EEXIST);
+	return err;
 }
 #else /* CONFIG_MMU */
 static ssize_t read_zero(struct file * file, char * buf, 
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 092a01cc02d..13d0b1350a6 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1203,7 +1203,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
 
 static int uuid_strategy(ctl_table *table, int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
-			 void __user *newval, size_t newlen, void **context)
+			 void __user *newval, size_t newlen)
 {
 	unsigned char tmp_uuid[16], *uuid;
 	unsigned int len;
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 8ab61ef97b4..b6bcdbbf57b 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -77,11 +77,11 @@ static struct clocksource clocksource_acpi_pm = {
 
 
 #ifdef CONFIG_PCI
-static int acpi_pm_good;
+static int __devinitdata acpi_pm_good;
 static int __init acpi_pm_good_setup(char *__str)
 {
-       acpi_pm_good = 1;
-       return 1;
+	acpi_pm_good = 1;
+	return 1;
 }
 __setup("acpi_pm_good", acpi_pm_good_setup);
 
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index e816535ab30..879250d3d06 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -53,7 +53,7 @@ config CRYPTO_DEV_PADLOCK_SHA
 
 config CRYPTO_DEV_GEODE
 	tristate "Support for the Geode LX AES engine"
-	depends on CRYPTO && X86_32
+	depends on CRYPTO && X86_32 && PCI
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	default m
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 88214943d00..5969cec58dc 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -687,8 +687,15 @@ static void ide_dump_status_no_sense(ide_drive_t *drive, const char *msg, u8 sta
 static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 {
 	struct request *rq = HWGROUP(drive)->rq;
+	ide_hwif_t *hwif = HWIF(drive);
 	int stat, err, sense_key;
 	
+	/* We may have bogus DMA interrupts in PIO state here */
+	if (HWIF(drive)->dma_status && hwif->atapi_irq_bogon) {
+		stat = hwif->INB(hwif->dma_status);
+		/* Should we force the bit as well ? */
+		hwif->OUTB(stat, hwif->dma_status);
+	}
 	/* Check for errors. */
 	stat = HWIF(drive)->INB(IDE_STATUS_REG);
 	if (stat_ret)
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 3ca581063f7..7cb48576e47 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -39,6 +39,14 @@
 
 #define PDC202_DEBUG_CABLE	0
 
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt, args...) printk("%s: " fmt, __FUNCTION__, ## args)
+#else
+#define DBG(fmt, args...)
+#endif
+
 static const char *pdc_quirk_drives[] = {
 	"QUANTUM FIREBALLlct08 08",
 	"QUANTUM FIREBALLP KA6.4",
@@ -51,37 +59,11 @@ static const char *pdc_quirk_drives[] = {
 	NULL
 };
 
-#define set_2regs(a, b)					\
-	do {						\
-		hwif->OUTB((a + adj), indexreg);	\
-		hwif->OUTB(b, datareg);			\
-	} while(0)
-
-#define set_ultra(a, b, c)				\
-	do {						\
-		set_2regs(0x10,(a));			\
-		set_2regs(0x11,(b));			\
-		set_2regs(0x12,(c));			\
-	} while(0)
-
-#define set_ata2(a, b)					\
-	do {						\
-		set_2regs(0x0e,(a));			\
-		set_2regs(0x0f,(b));			\
-	} while(0)
-
-#define set_pio(a, b, c)				\
-	do { 						\
-		set_2regs(0x0c,(a));			\
-		set_2regs(0x0d,(b));			\
-		set_2regs(0x13,(c));			\
-	} while(0)
-
-static u8 pdcnew_ratemask (ide_drive_t *drive)
+static u8 max_dma_rate(struct pci_dev *pdev)
 {
 	u8 mode;
 
-	switch(HWIF(drive)->pci_dev->device) {
+	switch(pdev->device) {
 		case PCI_DEVICE_ID_PROMISE_20277:
 		case PCI_DEVICE_ID_PROMISE_20276:
 		case PCI_DEVICE_ID_PROMISE_20275:
@@ -96,12 +78,21 @@ static u8 pdcnew_ratemask (ide_drive_t *drive)
 		default:
 			return 0;
 	}
-	if (!eighty_ninty_three(drive))
-		mode = min(mode, (u8)1);
+
 	return mode;
 }
 
-static int check_in_drive_lists (ide_drive_t *drive, const char **list)
+static u8 pdcnew_ratemask(ide_drive_t *drive)
+{
+	u8 mode = max_dma_rate(HWIF(drive)->pci_dev);
+
+	if (!eighty_ninty_three(drive))
+		mode = min_t(u8, mode, 1);
+
+	return	mode;
+}
+
+static int check_in_drive_lists(ide_drive_t *drive, const char **list)
 {
 	struct hd_driveid *id = drive->id;
 
@@ -121,43 +112,141 @@ static int check_in_drive_lists (ide_drive_t *drive, const char **list)
 	return 0;
 }
 
-static int pdcnew_new_tune_chipset (ide_drive_t *drive, u8 xferspeed)
+/**
+ * get_indexed_reg - Get indexed register
+ * @hwif: for the port address
+ * @index: index of the indexed register
+ */
+static u8 get_indexed_reg(ide_hwif_t *hwif, u8 index)
+{
+	u8 value;
+
+	hwif->OUTB(index, hwif->dma_vendor1);
+	value = hwif->INB(hwif->dma_vendor3);
+
+	DBG("index[%02X] value[%02X]\n", index, value);
+	return value;
+}
+
+/**
+ * set_indexed_reg - Set indexed register
+ * @hwif: for the port address
+ * @index: index of the indexed register
+ */
+static void set_indexed_reg(ide_hwif_t *hwif, u8 index, u8 value)
+{
+	hwif->OUTB(index, hwif->dma_vendor1);
+	hwif->OUTB(value, hwif->dma_vendor3);
+	DBG("index[%02X] value[%02X]\n", index, value);
+}
+
+/*
+ * ATA Timing Tables based on 133 MHz PLL output clock.
+ *
+ * If the PLL outputs 100 MHz clock, the ASIC hardware will set
+ * the timing registers automatically when "set features" command is
+ * issued to the device. However, if the PLL output clock is 133 MHz,
+ * the following tables must be used.
+ */
+static struct pio_timing {
+	u8 reg0c, reg0d, reg13;
+} pio_timings [] = {
+	{ 0xfb, 0x2b, 0xac },	/* PIO mode 0, IORDY off, Prefetch off */
+	{ 0x46, 0x29, 0xa4 },	/* PIO mode 1, IORDY off, Prefetch off */
+	{ 0x23, 0x26, 0x64 },	/* PIO mode 2, IORDY off, Prefetch off */
+	{ 0x27, 0x0d, 0x35 },	/* PIO mode 3, IORDY on,  Prefetch off */
+	{ 0x23, 0x09, 0x25 },	/* PIO mode 4, IORDY on,  Prefetch off */
+};
+
+static struct mwdma_timing {
+	u8 reg0e, reg0f;
+} mwdma_timings [] = {
+	{ 0xdf, 0x5f }, 	/* MWDMA mode 0 */
+	{ 0x6b, 0x27 }, 	/* MWDMA mode 1 */
+	{ 0x69, 0x25 }, 	/* MWDMA mode 2 */
+};
+
+static struct udma_timing {
+	u8 reg10, reg11, reg12;
+} udma_timings [] = {
+	{ 0x4a, 0x0f, 0xd5 },	/* UDMA mode 0 */
+	{ 0x3a, 0x0a, 0xd0 },	/* UDMA mode 1 */
+	{ 0x2a, 0x07, 0xcd },	/* UDMA mode 2 */
+	{ 0x1a, 0x05, 0xcd },	/* UDMA mode 3 */
+	{ 0x1a, 0x03, 0xcd },	/* UDMA mode 4 */
+	{ 0x1a, 0x02, 0xcb },	/* UDMA mode 5 */
+	{ 0x1a, 0x01, 0xcb },	/* UDMA mode 6 */
+};
+
+static int pdcnew_tune_chipset(ide_drive_t *drive, u8 speed)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
-	unsigned long indexreg	= hwif->dma_vendor1;
-	unsigned long datareg	= hwif->dma_vendor3;
-	u8 thold		= 0x10;
-	u8 adj			= (drive->dn%2) ? 0x08 : 0x00;
-	u8 speed		= ide_rate_filter(pdcnew_ratemask(drive), xferspeed);
-
-	if (speed == XFER_UDMA_2) {
-		hwif->OUTB((thold + adj), indexreg);
-		hwif->OUTB((hwif->INB(datareg) & 0x7f), datareg);
-	}
+	u8 adj			= (drive->dn & 1) ? 0x08 : 0x00;
+	int			err;
 
-	switch (speed) {
-		case XFER_UDMA_7:
-			speed = XFER_UDMA_6;
-		case XFER_UDMA_6:	set_ultra(0x1a, 0x01, 0xcb); break;
-		case XFER_UDMA_5:	set_ultra(0x1a, 0x02, 0xcb); break;
-		case XFER_UDMA_4:	set_ultra(0x1a, 0x03, 0xcd); break;
-		case XFER_UDMA_3:	set_ultra(0x1a, 0x05, 0xcd); break;
-		case XFER_UDMA_2:	set_ultra(0x2a, 0x07, 0xcd); break;
-		case XFER_UDMA_1:	set_ultra(0x3a, 0x0a, 0xd0); break;
-		case XFER_UDMA_0:	set_ultra(0x4a, 0x0f, 0xd5); break;
-		case XFER_MW_DMA_2:	set_ata2(0x69, 0x25); break;
-		case XFER_MW_DMA_1:	set_ata2(0x6b, 0x27); break;
-		case XFER_MW_DMA_0:	set_ata2(0xdf, 0x5f); break;
-		case XFER_PIO_4:	set_pio(0x23, 0x09, 0x25); break;
-		case XFER_PIO_3:	set_pio(0x27, 0x0d, 0x35); break;
-		case XFER_PIO_2:	set_pio(0x23, 0x26, 0x64); break;
-		case XFER_PIO_1:	set_pio(0x46, 0x29, 0xa4); break;
-		case XFER_PIO_0:	set_pio(0xfb, 0x2b, 0xac); break;
-		default:
-			;
-	}
+	speed = ide_rate_filter(pdcnew_ratemask(drive), speed);
+
+	/*
+	 * Issue SETFEATURES_XFER to the drive first. PDC202xx hardware will
+	 * automatically set the timing registers based on 100 MHz PLL output.
+	 */
+ 	err = ide_config_drive_speed(drive, speed);
+
+	/*
+	 * As we set up the PLL to output 133 MHz for UltraDMA/133 capable
+	 * chips, we must override the default register settings...
+	 */
+	if (max_dma_rate(hwif->pci_dev) == 4) {
+		u8 mode = speed & 0x07;
+
+		switch (speed) {
+			case XFER_UDMA_6:
+			case XFER_UDMA_5:
+			case XFER_UDMA_4:
+			case XFER_UDMA_3:
+			case XFER_UDMA_2:
+			case XFER_UDMA_1:
+			case XFER_UDMA_0:
+				set_indexed_reg(hwif, 0x10 + adj,
+						udma_timings[mode].reg10);
+				set_indexed_reg(hwif, 0x11 + adj,
+						udma_timings[mode].reg11);
+				set_indexed_reg(hwif, 0x12 + adj,
+						udma_timings[mode].reg12);
+				break;
+
+			case XFER_MW_DMA_2:
+			case XFER_MW_DMA_1:
+			case XFER_MW_DMA_0:
+				set_indexed_reg(hwif, 0x0e + adj,
+						mwdma_timings[mode].reg0e);
+				set_indexed_reg(hwif, 0x0f + adj,
+						mwdma_timings[mode].reg0f);
+				break;
+			case XFER_PIO_4:
+			case XFER_PIO_3:
+			case XFER_PIO_2:
+			case XFER_PIO_1:
+			case XFER_PIO_0:
+				set_indexed_reg(hwif, 0x0c + adj,
+						pio_timings[mode].reg0c);
+				set_indexed_reg(hwif, 0x0d + adj,
+						pio_timings[mode].reg0d);
+				set_indexed_reg(hwif, 0x13 + adj,
+						pio_timings[mode].reg13);
+				break;
+			default:
+				printk(KERN_ERR "pdc202xx_new: "
+				       "Unknown speed %d ignored\n", speed);
+		}
+	} else if (speed == XFER_UDMA_2) {
+		/* Set tHOLD bit to 0 if using UDMA mode 2 */
+		u8 tmp = get_indexed_reg(hwif, 0x10 + adj);
 
-	return (ide_config_drive_speed(drive, speed));
+		set_indexed_reg(hwif, 0x10 + adj, tmp & 0x7f);
+ 	}
+
+	return err;
 }
 
 /*   0    1    2    3    4    5    6   7   8
@@ -170,36 +259,42 @@ static int pdcnew_new_tune_chipset (ide_drive_t *drive, u8 xferspeed)
 static void pdcnew_tune_drive(ide_drive_t *drive, u8 pio)
 {
 	pio = ide_get_best_pio_mode(drive, pio, 4, NULL);
-	(void)pdcnew_new_tune_chipset(drive, XFER_PIO_0 + pio);
+	(void)pdcnew_tune_chipset(drive, XFER_PIO_0 + pio);
 }
 
-static u8 pdcnew_new_cable_detect (ide_hwif_t *hwif)
+static u8 pdcnew_cable_detect(ide_hwif_t *hwif)
 {
-	hwif->OUTB(0x0b, hwif->dma_vendor1);
-	return ((u8)((hwif->INB(hwif->dma_vendor3) & 0x04)));
+	return get_indexed_reg(hwif, 0x0b) & 0x04;
 }
-static int config_chipset_for_dma (ide_drive_t *drive)
+
+static int config_chipset_for_dma(ide_drive_t *drive)
 {
 	struct hd_driveid *id	= drive->id;
 	ide_hwif_t *hwif	= HWIF(drive);
-	u8 speed		= -1;
-	u8 cable;
-
-	u8 ultra_66		= ((id->dma_ultra & 0x0010) ||
-				   (id->dma_ultra & 0x0008)) ? 1 : 0;
-
-	cable = pdcnew_new_cable_detect(hwif);
+	u8 ultra_66		= (id->dma_ultra & 0x0078) ? 1 : 0;
+	u8 cable		= pdcnew_cable_detect(hwif);
+	u8 speed;
 
 	if (ultra_66 && cable) {
-		printk(KERN_WARNING "Warning: %s channel requires an 80-pin cable for operation.\n", hwif->channel ? "Secondary":"Primary");
+		printk(KERN_WARNING "Warning: %s channel "
+		       "requires an 80-pin cable for operation.\n",
+		       hwif->channel ? "Secondary" : "Primary");
 		printk(KERN_WARNING "%s reduced to Ultra33 mode.\n", drive->name);
 	}
 
 	if (drive->media != ide_disk)
 		return 0;
-	if (id->capability & 4) {	/* IORDY_EN & PREFETCH_EN */
-		hwif->OUTB((0x13 + ((drive->dn%2) ? 0x08 : 0x00)), hwif->dma_vendor1);
-		hwif->OUTB((hwif->INB(hwif->dma_vendor3)|0x03), hwif->dma_vendor3);
+
+	if (id->capability & 4) {
+		/*
+		 * Set IORDY_EN & PREFETCH_EN (this seems to have
+		 * NO real effect since this register is reloaded
+		 * by hardware when the transfer mode is selected)
+		 */
+		u8 tmp, adj = (drive->dn & 1) ? 0x08 : 0x00;
+
+		tmp = get_indexed_reg(hwif, 0x13 + adj);
+		set_indexed_reg(hwif, 0x13 + adj, tmp | 0x03);
 	}
 
 	speed = ide_dma_speed(drive, pdcnew_ratemask(drive));
@@ -211,7 +306,7 @@ static int config_chipset_for_dma (ide_drive_t *drive)
 	return ide_dma_enable(drive);
 }
 
-static int pdcnew_config_drive_xfer_rate (ide_drive_t *drive)
+static int pdcnew_config_drive_xfer_rate(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	struct hd_driveid *id	= drive->id;
@@ -236,9 +331,9 @@ fast_ata_pio:
 	return 0;
 }
 
-static int pdcnew_quirkproc (ide_drive_t *drive)
+static int pdcnew_quirkproc(ide_drive_t *drive)
 {
-	return ((int) check_in_drive_lists(drive, pdc_quirk_drives));
+	return check_in_drive_lists(drive, pdc_quirk_drives);
 }
 
 static int pdcnew_ide_dma_lostirq(ide_drive_t *drive)
@@ -255,21 +350,100 @@ static int pdcnew_ide_dma_timeout(ide_drive_t *drive)
 	return __ide_dma_timeout(drive);
 }
 
-static void pdcnew_new_reset (ide_drive_t *drive)
+static void pdcnew_reset(ide_drive_t *drive)
 {
 	/*
 	 * Deleted this because it is redundant from the caller.
 	 */
-	printk(KERN_WARNING "PDC202XX: %s channel reset.\n",
+	printk(KERN_WARNING "pdc202xx_new: %s channel reset.\n",
 		HWIF(drive)->channel ? "Secondary" : "Primary");
 }
 
+/**
+ * read_counter - Read the byte count registers
+ * @dma_base: for the port address
+ */
+static long __devinit read_counter(u32 dma_base)
+{
+	u32  pri_dma_base = dma_base, sec_dma_base = dma_base + 0x08;
+	u8   cnt0, cnt1, cnt2, cnt3;
+	long count = 0, last;
+	int  retry = 3;
+
+	do {
+		last = count;
+
+		/* Read the current count */
+		outb(0x20, pri_dma_base + 0x01);
+		cnt0 = inb(pri_dma_base + 0x03);
+		outb(0x21, pri_dma_base + 0x01);
+		cnt1 = inb(pri_dma_base + 0x03);
+		outb(0x20, sec_dma_base + 0x01);
+		cnt2 = inb(sec_dma_base + 0x03);
+		outb(0x21, sec_dma_base + 0x01);
+		cnt3 = inb(sec_dma_base + 0x03);
+
+		count = (cnt3 << 23) | (cnt2 << 15) | (cnt1 << 8) | cnt0;
+
+		/*
+		 * The 30-bit decrementing counter is read in 4 pieces.
+		 * Incorrect value may be read when the most significant bytes
+		 * are changing...
+		 */
+	} while (retry-- && (((last ^ count) & 0x3fff8000) || last < count));
+
+	DBG("cnt0[%02X] cnt1[%02X] cnt2[%02X] cnt3[%02X]\n",
+		  cnt0, cnt1, cnt2, cnt3);
+
+	return count;
+}
+
+/**
+ * detect_pll_input_clock - Detect the PLL input clock in Hz.
+ * @dma_base: for the port address
+ * E.g. 16949000 on 33 MHz PCI bus, i.e. half of the PCI clock.
+ */
+static long __devinit detect_pll_input_clock(unsigned long dma_base)
+{
+	long start_count, end_count;
+	long pll_input;
+	u8 scr1;
+
+	start_count = read_counter(dma_base);
+
+	/* Start the test mode */
+	outb(0x01, dma_base + 0x01);
+	scr1 = inb(dma_base + 0x03);
+	DBG("scr1[%02X]\n", scr1);
+	outb(scr1 | 0x40, dma_base + 0x03);
+
+	/* Let the counter run for 10 ms. */
+	mdelay(10);
+
+	end_count = read_counter(dma_base);
+
+	/* Stop the test mode */
+	outb(0x01, dma_base + 0x01);
+	scr1 = inb(dma_base + 0x03);
+	DBG("scr1[%02X]\n", scr1);
+	outb(scr1 & ~0x40, dma_base + 0x03);
+
+	/*
+	 * Calculate the input clock in Hz
+	 * (the clock counter is 30 bit wide and counts down)
+	 */
+	pll_input = ((start_count - end_count) & 0x3ffffff) * 100;
+
+	DBG("start[%ld] end[%ld]\n", start_count, end_count);
+
+	return pll_input;
+}
+
 #ifdef CONFIG_PPC_PMAC
 static void __devinit apple_kiwi_init(struct pci_dev *pdev)
 {
 	struct device_node *np = pci_device_to_OF_node(pdev);
 	unsigned int class_rev = 0;
-	void __iomem *mmio;
 	u8 conf;
 
 	if (np == NULL || !device_is_compatible(np, "kiwi-root"))
@@ -280,30 +454,20 @@ static void __devinit apple_kiwi_init(struct pci_dev *pdev)
 
 	if (class_rev >= 0x03) {
 		/* Setup chip magic config stuff (from darwin) */
-		pci_read_config_byte(pdev, 0x40, &conf);
-		pci_write_config_byte(pdev, 0x40, conf | 0x01);
-	}
-	mmio = ioremap(pci_resource_start(pdev, 5),
-				      pci_resource_len(pdev, 5));
-
-	/* Setup some PLL stuffs */
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_PROMISE_20270:
-		writew(0x0d2b, mmio + 0x1202);
-		mdelay(30);
-		break;
-	case PCI_DEVICE_ID_PROMISE_20271:
-		writew(0x0826, mmio + 0x1202);
-		mdelay(30);
-		break;
+		pci_read_config_byte (pdev, 0x40, &conf);
+		pci_write_config_byte(pdev, 0x40, (conf | 0x01));
 	}
-
-	iounmap(mmio);
 }
 #endif /* CONFIG_PPC_PMAC */
 
 static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev, const char *name)
 {
+	unsigned long dma_base = pci_resource_start(dev, 4);
+	unsigned long sec_dma_base = dma_base + 0x08;
+	long pll_input, pll_output, ratio;
+	int f, r;
+	u8 pll_ctl0, pll_ctl1;
+
 	if (dev->resource[PCI_ROM_RESOURCE].start) {
 		pci_write_config_dword(dev, PCI_ROM_ADDRESS,
 			dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
@@ -315,6 +479,106 @@ static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev, const cha
 	apple_kiwi_init(dev);
 #endif
 
+	/* Calculate the required PLL output frequency */
+	switch(max_dma_rate(dev)) {
+		case 4: /* it's 133 MHz for Ultra133 chips */
+			pll_output = 133333333;
+			break;
+		case 3: /* and  100 MHz for Ultra100 chips */
+		default:
+			pll_output = 100000000;
+			break;
+	}
+
+	/*
+	 * Detect PLL input clock.
+	 * On some systems, where PCI bus is running at non-standard clock rate
+	 * (e.g. 25 or 40 MHz), we have to adjust the cycle time.
+	 * PDC20268 and newer chips employ PLL circuit to help correct timing
+	 * registers setting.
+	 */
+	pll_input = detect_pll_input_clock(dma_base);
+	printk("%s: PLL input clock is %ld kHz\n", name, pll_input / 1000);
+
+	/* Sanity check */
+	if (unlikely(pll_input < 5000000L || pll_input > 70000000L)) {
+		printk(KERN_ERR "%s: Bad PLL input clock %ld Hz, giving up!\n",
+		       name, pll_input);
+		goto out;
+	}
+
+#ifdef DEBUG
+	DBG("pll_output is %ld Hz\n", pll_output);
+
+	/* Show the current clock value of PLL control register
+	 * (maybe already configured by the BIOS)
+	 */
+	outb(0x02, sec_dma_base + 0x01);
+	pll_ctl0 = inb(sec_dma_base + 0x03);
+	outb(0x03, sec_dma_base + 0x01);
+	pll_ctl1 = inb(sec_dma_base + 0x03);
+
+	DBG("pll_ctl[%02X][%02X]\n", pll_ctl0, pll_ctl1);
+#endif
+
+	/*
+	 * Calculate the ratio of F, R and NO
+	 * POUT = (F + 2) / (( R + 2) * NO)
+	 */
+	ratio = pll_output / (pll_input / 1000);
+	if (ratio < 8600L) { /* 8.6x */
+		/* Using NO = 0x01, R = 0x0d */
+		r = 0x0d;
+	} else if (ratio < 12900L) { /* 12.9x */
+		/* Using NO = 0x01, R = 0x08 */
+		r = 0x08;
+	} else if (ratio < 16100L) { /* 16.1x */
+		/* Using NO = 0x01, R = 0x06 */
+		r = 0x06;
+	} else if (ratio < 64000L) { /* 64x */
+		r = 0x00;
+	} else {
+		/* Invalid ratio */
+		printk(KERN_ERR "%s: Bad ratio %ld, giving up!\n", name, ratio);
+		goto out;
+	}
+
+	f = (ratio * (r + 2)) / 1000 - 2;
+
+	DBG("F[%d] R[%d] ratio*1000[%ld]\n", f, r, ratio);
+
+	if (unlikely(f < 0 || f > 127)) {
+		/* Invalid F */
+		printk(KERN_ERR "%s: F[%d] invalid!\n", name, f);
+		goto out;
+	}
+
+	pll_ctl0 = (u8) f;
+	pll_ctl1 = (u8) r;
+
+	DBG("Writing pll_ctl[%02X][%02X]\n", pll_ctl0, pll_ctl1);
+
+	outb(0x02,     sec_dma_base + 0x01);
+	outb(pll_ctl0, sec_dma_base + 0x03);
+	outb(0x03,     sec_dma_base + 0x01);
+	outb(pll_ctl1, sec_dma_base + 0x03);
+
+	/* Wait the PLL circuit to be stable */
+	mdelay(30);
+
+#ifdef DEBUG
+	/*
+	 *  Show the current clock value of PLL control register
+	 */
+	outb(0x02, sec_dma_base + 0x01);
+	pll_ctl0 = inb(sec_dma_base + 0x03);
+	outb(0x03, sec_dma_base + 0x01);
+	pll_ctl1 = inb(sec_dma_base + 0x03);
+
+	DBG("pll_ctl[%02X][%02X]\n", pll_ctl0, pll_ctl1);
+#endif
+
+ out:
 	return dev->irq;
 }
 
@@ -324,8 +588,8 @@ static void __devinit init_hwif_pdc202new(ide_hwif_t *hwif)
 
 	hwif->tuneproc  = &pdcnew_tune_drive;
 	hwif->quirkproc = &pdcnew_quirkproc;
-	hwif->speedproc = &pdcnew_new_tune_chipset;
-	hwif->resetproc = &pdcnew_new_reset;
+	hwif->speedproc = &pdcnew_tune_chipset;
+	hwif->resetproc = &pdcnew_reset;
 
 	hwif->drives[0].autotune = hwif->drives[1].autotune = 1;
 
@@ -337,11 +601,14 @@ static void __devinit init_hwif_pdc202new(ide_hwif_t *hwif)
 	hwif->ide_dma_check = &pdcnew_config_drive_xfer_rate;
 	hwif->ide_dma_lostirq = &pdcnew_ide_dma_lostirq;
 	hwif->ide_dma_timeout = &pdcnew_ide_dma_timeout;
-	if (!(hwif->udma_four))
-		hwif->udma_four = (pdcnew_new_cable_detect(hwif)) ? 0 : 1;
+
+	if (!hwif->udma_four)
+		hwif->udma_four = pdcnew_cable_detect(hwif) ? 0 : 1;
+
 	if (!noautodma)
 		hwif->autodma = 1;
 	hwif->drives[0].autodma = hwif->drives[1].autodma = hwif->autodma;
+
 #if PDC202_DEBUG_CABLE
 	printk(KERN_DEBUG "%s: %s-pin cable\n",
 		hwif->name, hwif->udma_four ? "80" : "40");
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index cdc3aab9ebc..b1e9a8eba6b 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -505,6 +505,10 @@ static void __devinit init_hwif_piix(ide_hwif_t *hwif)
 		/* This is a painful system best to let it self tune for now */
 		return;
 	}
+	/* ESB2 appears to generate spurious DMA interrupts in PIO mode
+	   when in native mode */
+	if (hwif->pci_dev->device == PCI_DEVICE_ID_INTEL_ESB2_18)
+		hwif->atapi_irq_bogon = 1;
 
 	hwif->autodma = 0;
 	hwif->tuneproc = &piix_tune_drive;
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 0719b648482..695e23904d3 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -844,11 +844,11 @@ void __init ide_scan_pcibus (int scan_direction)
 
 	pre_init = 0;
 	if (!scan_direction) {
-		while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 			ide_scan_pcidev(dev);
 		}
 	} else {
-		while ((dev = pci_find_device_reverse(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		while ((dev = pci_get_device_reverse(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 			ide_scan_pcidev(dev);
 		}
 	}
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 29ca0ab0acb..3d5f1965803 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -146,7 +146,7 @@ config TOUCHSCREEN_TOUCHWIN
 
 config TOUCHSCREEN_UCB1400
 	tristate "Philips UCB1400 touchscreen"
-	select SND_AC97_BUS
+	depends on SND_AC97_BUS
 	help
 	  This enables support for the Philips UCB1400 touchscreen interface.
 	  The UCB1400 is an AC97 audio codec.  The touchscreen interface
diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig
index 6fa12cc8e4f..34ab5f7dcab 100644
--- a/drivers/isdn/hisax/Kconfig
+++ b/drivers/isdn/hisax/Kconfig
@@ -110,7 +110,7 @@ config HISAX_16_3
 
 config HISAX_TELESPCI
 	bool "Teles PCI"
-	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
+	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the Teles PCI.
 	  See <file:Documentation/isdn/README.HiSax> on how to configure it.
@@ -238,7 +238,7 @@ config HISAX_MIC
 
 config HISAX_NETJET
 	bool "NETjet card"
-	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
+	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the NetJet from Traverse
 	  Technologies.
@@ -249,7 +249,7 @@ config HISAX_NETJET
 
 config HISAX_NETJET_U
 	bool "NETspider U card"
-	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
+	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the Netspider U interface ISDN card
 	  from Traverse Technologies.
@@ -317,7 +317,7 @@ config HISAX_GAZEL
 
 config HISAX_HFC_PCI
 	bool "HFC PCI-Bus cards"
-	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
+	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the HFC-S PCI 2BDS0 based cards.
 
@@ -344,7 +344,7 @@ config HISAX_HFC_SX
 
 config HISAX_ENTERNOW_PCI
 	bool "Formula-n enter:now PCI card"
-	depends on HISAX_NETJET && PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
+	depends on HISAX_NETJET && PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the Formula-n enter:now PCI
 	  ISDN card.
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
new file mode 100644
index 00000000000..36412e90f09
--- /dev/null
+++ b/drivers/kvm/Kconfig
@@ -0,0 +1,33 @@
+#
+# KVM configuration
+#
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	depends on X86 && EXPERIMENTAL
+	---help---
+	  Support hosting fully virtualized guest machines using hardware
+	  virtualization extensions.  You will need a fairly recent
+	  processor equipped with virtualization extensions. You will also
+	  need to select one or more of the processor modules below.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_INTEL
+	tristate "KVM for Intel processors support"
+	depends on KVM
+	---help---
+	  Provides support for KVM on Intel processors equipped with the VT
+	  extensions.
+
+config KVM_AMD
+	tristate "KVM for AMD processors support"
+	depends on KVM
+	---help---
+	  Provides support for KVM on AMD processors equipped with the AMD-V
+	  (SVM) extensions.
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
new file mode 100644
index 00000000000..c0a789fa9d6
--- /dev/null
+++ b/drivers/kvm/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+kvm-objs := kvm_main.o mmu.o x86_emulate.o
+obj-$(CONFIG_KVM) += kvm.o
+kvm-intel-objs = vmx.o
+obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
+kvm-amd-objs = svm.o
+obj-$(CONFIG_KVM_AMD) += kvm-amd.o
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
new file mode 100644
index 00000000000..5785d0870ab
--- /dev/null
+++ b/drivers/kvm/kvm.h
@@ -0,0 +1,551 @@
+#ifndef __KVM_H
+#define __KVM_H
+
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+#include "vmx.h"
+#include <linux/kvm.h>
+
+#define CR0_PE_MASK (1ULL << 0)
+#define CR0_TS_MASK (1ULL << 3)
+#define CR0_NE_MASK (1ULL << 5)
+#define CR0_WP_MASK (1ULL << 16)
+#define CR0_NW_MASK (1ULL << 29)
+#define CR0_CD_MASK (1ULL << 30)
+#define CR0_PG_MASK (1ULL << 31)
+
+#define CR3_WPT_MASK (1ULL << 3)
+#define CR3_PCD_MASK (1ULL << 4)
+
+#define CR3_RESEVED_BITS 0x07ULL
+#define CR3_L_MODE_RESEVED_BITS (~((1ULL << 40) - 1) | 0x0fe7ULL)
+#define CR3_FLAGS_MASK ((1ULL << 5) - 1)
+
+#define CR4_VME_MASK (1ULL << 0)
+#define CR4_PSE_MASK (1ULL << 4)
+#define CR4_PAE_MASK (1ULL << 5)
+#define CR4_PGE_MASK (1ULL << 7)
+#define CR4_VMXE_MASK (1ULL << 13)
+
+#define KVM_GUEST_CR0_MASK \
+	(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \
+	 | CR0_NW_MASK | CR0_CD_MASK)
+#define KVM_VM_CR0_ALWAYS_ON \
+	(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK)
+#define KVM_GUEST_CR4_MASK \
+	(CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK)
+#define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK)
+#define KVM_RMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK | CR4_VME_MASK)
+
+#define INVALID_PAGE (~(hpa_t)0)
+#define UNMAPPED_GVA (~(gpa_t)0)
+
+#define KVM_MAX_VCPUS 1
+#define KVM_MEMORY_SLOTS 4
+#define KVM_NUM_MMU_PAGES 256
+
+#define FX_IMAGE_SIZE 512
+#define FX_IMAGE_ALIGN 16
+#define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
+
+#define DE_VECTOR 0
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+
+#define SELECTOR_TI_MASK (1 << 2)
+#define SELECTOR_RPL_MASK 0x03
+
+#define IOPL_SHIFT 12
+
+/*
+ * Address types:
+ *
+ *  gva - guest virtual address
+ *  gpa - guest physical address
+ *  gfn - guest frame number
+ *  hva - host virtual address
+ *  hpa - host physical address
+ *  hfn - host frame number
+ */
+
+typedef unsigned long  gva_t;
+typedef u64            gpa_t;
+typedef unsigned long  gfn_t;
+
+typedef unsigned long  hva_t;
+typedef u64            hpa_t;
+typedef unsigned long  hfn_t;
+
+struct kvm_mmu_page {
+	struct list_head link;
+	hpa_t page_hpa;
+	unsigned long slot_bitmap; /* One bit set per slot which has memory
+				    * in this shadow page.
+				    */
+	int global;              /* Set if all ptes in this page are global */
+	u64 *parent_pte;
+};
+
+struct vmcs {
+	u32 revision_id;
+	u32 abort;
+	char data[0];
+};
+
+#define vmx_msr_entry kvm_msr_entry
+
+struct kvm_vcpu;
+
+/*
+ * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
+ * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
+ * mode.
+ */
+struct kvm_mmu {
+	void (*new_cr3)(struct kvm_vcpu *vcpu);
+	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
+	void (*inval_page)(struct kvm_vcpu *vcpu, gva_t gva);
+	void (*free)(struct kvm_vcpu *vcpu);
+	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
+	hpa_t root_hpa;
+	int root_level;
+	int shadow_root_level;
+};
+
+struct kvm_guest_debug {
+	int enabled;
+	unsigned long bp[4];
+	int singlestep;
+};
+
+enum {
+	VCPU_REGS_RAX = 0,
+	VCPU_REGS_RCX = 1,
+	VCPU_REGS_RDX = 2,
+	VCPU_REGS_RBX = 3,
+	VCPU_REGS_RSP = 4,
+	VCPU_REGS_RBP = 5,
+	VCPU_REGS_RSI = 6,
+	VCPU_REGS_RDI = 7,
+#ifdef __x86_64__
+	VCPU_REGS_R8 = 8,
+	VCPU_REGS_R9 = 9,
+	VCPU_REGS_R10 = 10,
+	VCPU_REGS_R11 = 11,
+	VCPU_REGS_R12 = 12,
+	VCPU_REGS_R13 = 13,
+	VCPU_REGS_R14 = 14,
+	VCPU_REGS_R15 = 15,
+#endif
+	NR_VCPU_REGS
+};
+
+enum {
+	VCPU_SREG_CS,
+	VCPU_SREG_DS,
+	VCPU_SREG_ES,
+	VCPU_SREG_FS,
+	VCPU_SREG_GS,
+	VCPU_SREG_SS,
+	VCPU_SREG_TR,
+	VCPU_SREG_LDTR,
+};
+
+struct kvm_vcpu {
+	struct kvm *kvm;
+	union {
+		struct vmcs *vmcs;
+		struct vcpu_svm *svm;
+	};
+	struct mutex mutex;
+	int   cpu;
+	int   launched;
+	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+	unsigned long irq_pending[NR_IRQ_WORDS];
+	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
+	unsigned long rip;      /* needs vcpu_load_rsp_rip() */
+
+	unsigned long cr0;
+	unsigned long cr2;
+	unsigned long cr3;
+	unsigned long cr4;
+	unsigned long cr8;
+	u64 shadow_efer;
+	u64 apic_base;
+	int nmsrs;
+	struct vmx_msr_entry *guest_msrs;
+	struct vmx_msr_entry *host_msrs;
+
+	struct list_head free_pages;
+	struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES];
+	struct kvm_mmu mmu;
+
+	struct kvm_guest_debug guest_debug;
+
+	char fx_buf[FX_BUF_SIZE];
+	char *host_fx_image;
+	char *guest_fx_image;
+
+	int mmio_needed;
+	int mmio_read_completed;
+	int mmio_is_write;
+	int mmio_size;
+	unsigned char mmio_data[8];
+	gpa_t mmio_phys_addr;
+
+	struct {
+		int active;
+		u8 save_iopl;
+		struct kvm_save_segment {
+			u16 selector;
+			unsigned long base;
+			u32 limit;
+			u32 ar;
+		} tr, es, ds, fs, gs;
+	} rmode;
+};
+
+struct kvm_memory_slot {
+	gfn_t base_gfn;
+	unsigned long npages;
+	unsigned long flags;
+	struct page **phys_mem;
+	unsigned long *dirty_bitmap;
+};
+
+struct kvm {
+	spinlock_t lock; /* protects everything except vcpus */
+	int nmemslots;
+	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
+	struct list_head active_mmu_pages;
+	struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+	int memory_config_version;
+	int busy;
+};
+
+struct kvm_stat {
+	u32 pf_fixed;
+	u32 pf_guest;
+	u32 tlb_flush;
+	u32 invlpg;
+
+	u32 exits;
+	u32 io_exits;
+	u32 mmio_exits;
+	u32 signal_exits;
+	u32 irq_exits;
+};
+
+struct descriptor_table {
+	u16 limit;
+	unsigned long base;
+} __attribute__((packed));
+
+struct kvm_arch_ops {
+	int (*cpu_has_kvm_support)(void);          /* __init */
+	int (*disabled_by_bios)(void);             /* __init */
+	void (*hardware_enable)(void *dummy);      /* __init */
+	void (*hardware_disable)(void *dummy);
+	int (*hardware_setup)(void);               /* __init */
+	void (*hardware_unsetup)(void);            /* __exit */
+
+	int (*vcpu_create)(struct kvm_vcpu *vcpu);
+	void (*vcpu_free)(struct kvm_vcpu *vcpu);
+
+	struct kvm_vcpu *(*vcpu_load)(struct kvm_vcpu *vcpu);
+	void (*vcpu_put)(struct kvm_vcpu *vcpu);
+
+	int (*set_guest_debug)(struct kvm_vcpu *vcpu,
+			       struct kvm_debug_guest *dbg);
+	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
+	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
+	void (*get_segment)(struct kvm_vcpu *vcpu,
+			    struct kvm_segment *var, int seg);
+	void (*set_segment)(struct kvm_vcpu *vcpu,
+			    struct kvm_segment *var, int seg);
+	int (*is_long_mode)(struct kvm_vcpu *vcpu);
+	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
+	void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
+				      unsigned long cr0);
+	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
+	void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+	void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
+	void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
+	void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
+	void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
+	void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
+	unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr);
+	void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value,
+		       int *exception);
+	void (*cache_regs)(struct kvm_vcpu *vcpu);
+	void (*decache_regs)(struct kvm_vcpu *vcpu);
+	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
+	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+
+	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t addr);
+	void (*tlb_flush)(struct kvm_vcpu *vcpu);
+	void (*inject_page_fault)(struct kvm_vcpu *vcpu,
+				  unsigned long addr, u32 err_code);
+
+	void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
+
+	int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
+	int (*vcpu_setup)(struct kvm_vcpu *vcpu);
+	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+};
+
+extern struct kvm_stat kvm_stat;
+extern struct kvm_arch_ops *kvm_arch_ops;
+
+#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
+#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
+
+int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
+void kvm_exit_arch(void);
+
+void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
+int kvm_mmu_init(struct kvm_vcpu *vcpu);
+
+int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+
+hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
+#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
+#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
+static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
+hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
+
+void kvm_emulator_want_group7_invlpg(void);
+
+extern hpa_t bad_page_address;
+
+static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	return slot->phys_mem[gfn - slot->base_gfn];
+}
+
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
+void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
+
+enum emulation_result {
+	EMULATE_DONE,       /* no further processing */
+	EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */
+	EMULATE_FAIL,         /* can't emulate this instruction */
+};
+
+int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
+			unsigned long cr2, u16 error_code);
+void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
+void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
+void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
+		   unsigned long *rflags);
+
+unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr);
+void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value,
+		     unsigned long *rflags);
+
+struct x86_emulate_ctxt;
+
+int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
+int emulate_clts(struct kvm_vcpu *vcpu);
+int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
+		    unsigned long *dest);
+int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
+		    unsigned long value);
+
+void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
+void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
+void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
+void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
+void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
+
+#ifdef __x86_64__
+void set_efer(struct kvm_vcpu *vcpu, u64 efer);
+#endif
+
+void fx_init(struct kvm_vcpu *vcpu);
+
+void load_msrs(struct vmx_msr_entry *e, int n);
+void save_msrs(struct vmx_msr_entry *e, int n);
+void kvm_resched(struct kvm_vcpu *vcpu);
+
+int kvm_read_guest(struct kvm_vcpu *vcpu,
+	       gva_t addr,
+	       unsigned long size,
+	       void *dest);
+
+int kvm_write_guest(struct kvm_vcpu *vcpu,
+		gva_t addr,
+		unsigned long size,
+		void *data);
+
+unsigned long segment_base(u16 selector);
+
+static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL;
+}
+
+static inline int is_pae(struct kvm_vcpu *vcpu)
+{
+	return vcpu->cr4 & CR4_PAE_MASK;
+}
+
+static inline int is_pse(struct kvm_vcpu *vcpu)
+{
+	return vcpu->cr4 & CR4_PSE_MASK;
+}
+
+static inline int is_paging(struct kvm_vcpu *vcpu)
+{
+	return vcpu->cr0 & CR0_PG_MASK;
+}
+
+static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	return slot - kvm->memslots;
+}
+
+static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
+{
+	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
+
+	return (struct kvm_mmu_page *)page->private;
+}
+
+static inline u16 read_fs(void)
+{
+	u16 seg;
+	asm ("mov %%fs, %0" : "=g"(seg));
+	return seg;
+}
+
+static inline u16 read_gs(void)
+{
+	u16 seg;
+	asm ("mov %%gs, %0" : "=g"(seg));
+	return seg;
+}
+
+static inline u16 read_ldt(void)
+{
+	u16 ldt;
+	asm ("sldt %0" : "=g"(ldt));
+	return ldt;
+}
+
+static inline void load_fs(u16 sel)
+{
+	asm ("mov %0, %%fs" : : "rm"(sel));
+}
+
+static inline void load_gs(u16 sel)
+{
+	asm ("mov %0, %%gs" : : "rm"(sel));
+}
+
+#ifndef load_ldt
+static inline void load_ldt(u16 sel)
+{
+	asm ("lldt %0" : : "g"(sel));
+}
+#endif
+
+static inline void get_idt(struct descriptor_table *table)
+{
+	asm ("sidt %0" : "=m"(*table));
+}
+
+static inline void get_gdt(struct descriptor_table *table)
+{
+	asm ("sgdt %0" : "=m"(*table));
+}
+
+static inline unsigned long read_tr_base(void)
+{
+	u16 tr;
+	asm ("str %0" : "=g"(tr));
+	return segment_base(tr);
+}
+
+#ifdef __x86_64__
+static inline unsigned long read_msr(unsigned long msr)
+{
+	u64 value;
+
+	rdmsrl(msr, value);
+	return value;
+}
+#endif
+
+static inline void fx_save(void *image)
+{
+	asm ("fxsave (%0)":: "r" (image));
+}
+
+static inline void fx_restore(void *image)
+{
+	asm ("fxrstor (%0)":: "r" (image));
+}
+
+static inline void fpu_init(void)
+{
+	asm ("finit");
+}
+
+static inline u32 get_rdx_init_val(void)
+{
+	return 0x600; /* P6 family */
+}
+
+#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
+#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
+#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
+#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
+#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
+#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
+#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
+
+#define MSR_IA32_TIME_STAMP_COUNTER		0x010
+
+#define TSS_IOPB_BASE_OFFSET 0x66
+#define TSS_BASE_SIZE 0x68
+#define TSS_IOPB_SIZE (65536 / 8)
+#define TSS_REDIRECTION_SIZE (256 / 8)
+#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
+
+#ifdef __x86_64__
+
+/*
+ * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.  Therefore
+ * we need to allocate shadow page tables in the first 4GB of memory, which
+ * happens to fit the DMA32 zone.
+ */
+#define GFP_KVM_MMU (GFP_KERNEL | __GFP_DMA32)
+
+#else
+
+#define GFP_KVM_MMU GFP_KERNEL
+
+#endif
+
+#endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
new file mode 100644
index 00000000000..b6b8a41b5ec
--- /dev/null
+++ b/drivers/kvm/kvm_main.c
@@ -0,0 +1,1935 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ *
+ * Authors:
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+
+#include <linux/kvm.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <asm/processor.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <asm/msr.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <linux/reboot.h>
+#include <asm/io.h>
+#include <linux/debugfs.h>
+#include <linux/highmem.h>
+#include <linux/file.h>
+#include <asm/desc.h>
+
+#include "x86_emulate.h"
+#include "segment_descriptor.h"
+
+MODULE_AUTHOR("Qumranet");
+MODULE_LICENSE("GPL");
+
+struct kvm_arch_ops *kvm_arch_ops;
+struct kvm_stat kvm_stat;
+EXPORT_SYMBOL_GPL(kvm_stat);
+
+static struct kvm_stats_debugfs_item {
+	const char *name;
+	u32 *data;
+	struct dentry *dentry;
+} debugfs_entries[] = {
+	{ "pf_fixed", &kvm_stat.pf_fixed },
+	{ "pf_guest", &kvm_stat.pf_guest },
+	{ "tlb_flush", &kvm_stat.tlb_flush },
+	{ "invlpg", &kvm_stat.invlpg },
+	{ "exits", &kvm_stat.exits },
+	{ "io_exits", &kvm_stat.io_exits },
+	{ "mmio_exits", &kvm_stat.mmio_exits },
+	{ "signal_exits", &kvm_stat.signal_exits },
+	{ "irq_exits", &kvm_stat.irq_exits },
+	{ 0, 0 }
+};
+
+static struct dentry *debugfs_dir;
+
+#define MAX_IO_MSRS 256
+
+#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL
+#define LMSW_GUEST_MASK 0x0eULL
+#define CR4_RESEVED_BITS (~((1ULL << 11) - 1))
+#define CR8_RESEVED_BITS (~0x0fULL)
+#define EFER_RESERVED_BITS 0xfffffffffffff2fe
+
+struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr)
+{
+	int i;
+
+	for (i = 0; i < vcpu->nmsrs; ++i)
+		if (vcpu->guest_msrs[i].index == msr)
+			return &vcpu->guest_msrs[i];
+	return 0;
+}
+EXPORT_SYMBOL_GPL(find_msr_entry);
+
+#ifdef __x86_64__
+// LDT or TSS descriptor in the GDT. 16 bytes.
+struct segment_descriptor_64 {
+	struct segment_descriptor s;
+	u32 base_higher;
+	u32 pad_zero;
+};
+
+#endif
+
+unsigned long segment_base(u16 selector)
+{
+	struct descriptor_table gdt;
+	struct segment_descriptor *d;
+	unsigned long table_base;
+	typedef unsigned long ul;
+	unsigned long v;
+
+	if (selector == 0)
+		return 0;
+
+	asm ("sgdt %0" : "=m"(gdt));
+	table_base = gdt.base;
+
+	if (selector & 4) {           /* from ldt */
+		u16 ldt_selector;
+
+		asm ("sldt %0" : "=g"(ldt_selector));
+		table_base = segment_base(ldt_selector);
+	}
+	d = (struct segment_descriptor *)(table_base + (selector & ~7));
+	v = d->base_low | ((ul)d->base_mid << 16) | ((ul)d->base_high << 24);
+#ifdef __x86_64__
+	if (d->system == 0
+	    && (d->type == 2 || d->type == 9 || d->type == 11))
+		v |= ((ul)((struct segment_descriptor_64 *)d)->base_higher) << 32;
+#endif
+	return v;
+}
+EXPORT_SYMBOL_GPL(segment_base);
+
+int kvm_read_guest(struct kvm_vcpu *vcpu,
+			     gva_t addr,
+			     unsigned long size,
+			     void *dest)
+{
+	unsigned char *host_buf = dest;
+	unsigned long req_size = size;
+
+	while (size) {
+		hpa_t paddr;
+		unsigned now;
+		unsigned offset;
+		hva_t guest_buf;
+
+		paddr = gva_to_hpa(vcpu, addr);
+
+		if (is_error_hpa(paddr))
+			break;
+
+		guest_buf = (hva_t)kmap_atomic(
+					pfn_to_page(paddr >> PAGE_SHIFT),
+					KM_USER0);
+		offset = addr & ~PAGE_MASK;
+		guest_buf |= offset;
+		now = min(size, PAGE_SIZE - offset);
+		memcpy(host_buf, (void*)guest_buf, now);
+		host_buf += now;
+		addr += now;
+		size -= now;
+		kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0);
+	}
+	return req_size - size;
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest);
+
+int kvm_write_guest(struct kvm_vcpu *vcpu,
+			     gva_t addr,
+			     unsigned long size,
+			     void *data)
+{
+	unsigned char *host_buf = data;
+	unsigned long req_size = size;
+
+	while (size) {
+		hpa_t paddr;
+		unsigned now;
+		unsigned offset;
+		hva_t guest_buf;
+
+		paddr = gva_to_hpa(vcpu, addr);
+
+		if (is_error_hpa(paddr))
+			break;
+
+		guest_buf = (hva_t)kmap_atomic(
+				pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0);
+		offset = addr & ~PAGE_MASK;
+		guest_buf |= offset;
+		now = min(size, PAGE_SIZE - offset);
+		memcpy((void*)guest_buf, host_buf, now);
+		host_buf += now;
+		addr += now;
+		size -= now;
+		kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0);
+	}
+	return req_size - size;
+}
+EXPORT_SYMBOL_GPL(kvm_write_guest);
+
+static int vcpu_slot(struct kvm_vcpu *vcpu)
+{
+	return vcpu - vcpu->kvm->vcpus;
+}
+
+/*
+ * Switches to specified vcpu, until a matching vcpu_put()
+ */
+static struct kvm_vcpu *vcpu_load(struct kvm *kvm, int vcpu_slot)
+{
+	struct kvm_vcpu *vcpu = &kvm->vcpus[vcpu_slot];
+
+	mutex_lock(&vcpu->mutex);
+	if (unlikely(!vcpu->vmcs)) {
+		mutex_unlock(&vcpu->mutex);
+		return 0;
+	}
+	return kvm_arch_ops->vcpu_load(vcpu);
+}
+
+static void vcpu_put(struct kvm_vcpu *vcpu)
+{
+	kvm_arch_ops->vcpu_put(vcpu);
+	put_cpu();
+	mutex_unlock(&vcpu->mutex);
+}
+
+static int kvm_dev_open(struct inode *inode, struct file *filp)
+{
+	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+	int i;
+
+	if (!kvm)
+		return -ENOMEM;
+
+	spin_lock_init(&kvm->lock);
+	INIT_LIST_HEAD(&kvm->active_mmu_pages);
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
+
+		mutex_init(&vcpu->mutex);
+		vcpu->mmu.root_hpa = INVALID_PAGE;
+		INIT_LIST_HEAD(&vcpu->free_pages);
+	}
+	filp->private_data = kvm;
+	return 0;
+}
+
+/*
+ * Free any memory in @free but not in @dont.
+ */
+static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+				  struct kvm_memory_slot *dont)
+{
+	int i;
+
+	if (!dont || free->phys_mem != dont->phys_mem)
+		if (free->phys_mem) {
+			for (i = 0; i < free->npages; ++i)
+				__free_page(free->phys_mem[i]);
+			vfree(free->phys_mem);
+		}
+
+	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
+		vfree(free->dirty_bitmap);
+
+	free->phys_mem = 0;
+	free->npages = 0;
+	free->dirty_bitmap = 0;
+}
+
+static void kvm_free_physmem(struct kvm *kvm)
+{
+	int i;
+
+	for (i = 0; i < kvm->nmemslots; ++i)
+		kvm_free_physmem_slot(&kvm->memslots[i], 0);
+}
+
+static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_arch_ops->vcpu_free(vcpu);
+	kvm_mmu_destroy(vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i)
+		kvm_free_vcpu(&kvm->vcpus[i]);
+}
+
+static int kvm_dev_release(struct inode *inode, struct file *filp)
+{
+	struct kvm *kvm = filp->private_data;
+
+	kvm_free_vcpus(kvm);
+	kvm_free_physmem(kvm);
+	kfree(kvm);
+	return 0;
+}
+
+static void inject_gp(struct kvm_vcpu *vcpu)
+{
+	kvm_arch_ops->inject_gp(vcpu, 0);
+}
+
+static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu,
+					 unsigned long cr3)
+{
+	gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
+	unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5;
+	int i;
+	u64 pdpte;
+	u64 *pdpt;
+	struct kvm_memory_slot *memslot;
+
+	spin_lock(&vcpu->kvm->lock);
+	memslot = gfn_to_memslot(vcpu->kvm, pdpt_gfn);
+	/* FIXME: !memslot - emulate? 0xff? */
+	pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0);
+
+	for (i = 0; i < 4; ++i) {
+		pdpte = pdpt[offset + i];
+		if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull))
+			break;
+	}
+
+	kunmap_atomic(pdpt, KM_USER0);
+	spin_unlock(&vcpu->kvm->lock);
+
+	return i != 4;
+}
+
+void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+	if (cr0 & CR0_RESEVED_BITS) {
+		printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
+		       cr0, vcpu->cr0);
+		inject_gp(vcpu);
+		return;
+	}
+
+	if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) {
+		printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
+		inject_gp(vcpu);
+		return;
+	}
+
+	if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) {
+		printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
+		       "and a clear PE flag\n");
+		inject_gp(vcpu);
+		return;
+	}
+
+	if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) {
+#ifdef __x86_64__
+		if ((vcpu->shadow_efer & EFER_LME)) {
+			int cs_db, cs_l;
+
+			if (!is_pae(vcpu)) {
+				printk(KERN_DEBUG "set_cr0: #GP, start paging "
+				       "in long mode while PAE is disabled\n");
+				inject_gp(vcpu);
+				return;
+			}
+			kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+			if (cs_l) {
+				printk(KERN_DEBUG "set_cr0: #GP, start paging "
+				       "in long mode while CS.L == 1\n");
+				inject_gp(vcpu);
+				return;
+
+			}
+		} else
+#endif
+		if (is_pae(vcpu) &&
+			    pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
+			printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
+			       "reserved bits\n");
+			inject_gp(vcpu);
+			return;
+		}
+
+	}
+
+	kvm_arch_ops->set_cr0(vcpu, cr0);
+	vcpu->cr0 = cr0;
+
+	spin_lock(&vcpu->kvm->lock);
+	kvm_mmu_reset_context(vcpu);
+	spin_unlock(&vcpu->kvm->lock);
+	return;
+}
+EXPORT_SYMBOL_GPL(set_cr0);
+
+void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
+{
+	set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
+}
+EXPORT_SYMBOL_GPL(lmsw);
+
+void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+	if (cr4 & CR4_RESEVED_BITS) {
+		printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
+		inject_gp(vcpu);
+		return;
+	}
+
+	if (kvm_arch_ops->is_long_mode(vcpu)) {
+		if (!(cr4 & CR4_PAE_MASK)) {
+			printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
+			       "in long mode\n");
+			inject_gp(vcpu);
+			return;
+		}
+	} else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK)
+		   && pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
+		printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
+		inject_gp(vcpu);
+	}
+
+	if (cr4 & CR4_VMXE_MASK) {
+		printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
+		inject_gp(vcpu);
+		return;
+	}
+	kvm_arch_ops->set_cr4(vcpu, cr4);
+	spin_lock(&vcpu->kvm->lock);
+	kvm_mmu_reset_context(vcpu);
+	spin_unlock(&vcpu->kvm->lock);
+}
+EXPORT_SYMBOL_GPL(set_cr4);
+
+void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+{
+	if (kvm_arch_ops->is_long_mode(vcpu)) {
+		if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
+			printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
+			inject_gp(vcpu);
+			return;
+		}
+	} else {
+		if (cr3 & CR3_RESEVED_BITS) {
+			printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
+			inject_gp(vcpu);
+			return;
+		}
+		if (is_paging(vcpu) && is_pae(vcpu) &&
+		    pdptrs_have_reserved_bits_set(vcpu, cr3)) {
+			printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
+			       "reserved bits\n");
+			inject_gp(vcpu);
+			return;
+		}
+	}
+
+	vcpu->cr3 = cr3;
+	spin_lock(&vcpu->kvm->lock);
+	vcpu->mmu.new_cr3(vcpu);
+	spin_unlock(&vcpu->kvm->lock);
+}
+EXPORT_SYMBOL_GPL(set_cr3);
+
+void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+{
+	if ( cr8 & CR8_RESEVED_BITS) {
+		printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
+		inject_gp(vcpu);
+		return;
+	}
+	vcpu->cr8 = cr8;
+}
+EXPORT_SYMBOL_GPL(set_cr8);
+
+void fx_init(struct kvm_vcpu *vcpu)
+{
+	struct __attribute__ ((__packed__)) fx_image_s {
+		u16 control; //fcw
+		u16 status; //fsw
+		u16 tag; // ftw
+		u16 opcode; //fop
+		u64 ip; // fpu ip
+		u64 operand;// fpu dp
+		u32 mxcsr;
+		u32 mxcsr_mask;
+
+	} *fx_image;
+
+	fx_save(vcpu->host_fx_image);
+	fpu_init();
+	fx_save(vcpu->guest_fx_image);
+	fx_restore(vcpu->host_fx_image);
+
+	fx_image = (struct fx_image_s *)vcpu->guest_fx_image;
+	fx_image->mxcsr = 0x1f80;
+	memset(vcpu->guest_fx_image + sizeof(struct fx_image_s),
+	       0, FX_IMAGE_SIZE - sizeof(struct fx_image_s));
+}
+EXPORT_SYMBOL_GPL(fx_init);
+
+/*
+ * Creates some virtual cpus.  Good luck creating more than one.
+ */
+static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
+{
+	int r;
+	struct kvm_vcpu *vcpu;
+
+	r = -EINVAL;
+	if (n < 0 || n >= KVM_MAX_VCPUS)
+		goto out;
+
+	vcpu = &kvm->vcpus[n];
+
+	mutex_lock(&vcpu->mutex);
+
+	if (vcpu->vmcs) {
+		mutex_unlock(&vcpu->mutex);
+		return -EEXIST;
+	}
+
+	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
+					   FX_IMAGE_ALIGN);
+	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
+
+	vcpu->cpu = -1;  /* First load will set up TR */
+	vcpu->kvm = kvm;
+	r = kvm_arch_ops->vcpu_create(vcpu);
+	if (r < 0)
+		goto out_free_vcpus;
+
+	kvm_arch_ops->vcpu_load(vcpu);
+
+	r = kvm_arch_ops->vcpu_setup(vcpu);
+	if (r >= 0)
+		r = kvm_mmu_init(vcpu);
+
+	vcpu_put(vcpu);
+
+	if (r < 0)
+		goto out_free_vcpus;
+
+	return 0;
+
+out_free_vcpus:
+	kvm_free_vcpu(vcpu);
+	mutex_unlock(&vcpu->mutex);
+out:
+	return r;
+}
+
+/*
+ * Allocate some memory and give it an address in the guest physical address
+ * space.
+ *
+ * Discontiguous memory is allowed, mostly for framebuffers.
+ */
+static int kvm_dev_ioctl_set_memory_region(struct kvm *kvm,
+					   struct kvm_memory_region *mem)
+{
+	int r;
+	gfn_t base_gfn;
+	unsigned long npages;
+	unsigned long i;
+	struct kvm_memory_slot *memslot;
+	struct kvm_memory_slot old, new;
+	int memory_config_version;
+
+	r = -EINVAL;
+	/* General sanity checks */
+	if (mem->memory_size & (PAGE_SIZE - 1))
+		goto out;
+	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
+		goto out;
+	if (mem->slot >= KVM_MEMORY_SLOTS)
+		goto out;
+	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
+		goto out;
+
+	memslot = &kvm->memslots[mem->slot];
+	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
+	npages = mem->memory_size >> PAGE_SHIFT;
+
+	if (!npages)
+		mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
+
+raced:
+	spin_lock(&kvm->lock);
+
+	memory_config_version = kvm->memory_config_version;
+	new = old = *memslot;
+
+	new.base_gfn = base_gfn;
+	new.npages = npages;
+	new.flags = mem->flags;
+
+	/* Disallow changing a memory slot's size. */
+	r = -EINVAL;
+	if (npages && old.npages && npages != old.npages)
+		goto out_unlock;
+
+	/* Check for overlaps */
+	r = -EEXIST;
+	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
+		struct kvm_memory_slot *s = &kvm->memslots[i];
+
+		if (s == memslot)
+			continue;
+		if (!((base_gfn + npages <= s->base_gfn) ||
+		      (base_gfn >= s->base_gfn + s->npages)))
+			goto out_unlock;
+	}
+	/*
+	 * Do memory allocations outside lock.  memory_config_version will
+	 * detect any races.
+	 */
+	spin_unlock(&kvm->lock);
+
+	/* Deallocate if slot is being removed */
+	if (!npages)
+		new.phys_mem = 0;
+
+	/* Free page dirty bitmap if unneeded */
+	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
+		new.dirty_bitmap = 0;
+
+	r = -ENOMEM;
+
+	/* Allocate if a slot is being created */
+	if (npages && !new.phys_mem) {
+		new.phys_mem = vmalloc(npages * sizeof(struct page *));
+
+		if (!new.phys_mem)
+			goto out_free;
+
+		memset(new.phys_mem, 0, npages * sizeof(struct page *));
+		for (i = 0; i < npages; ++i) {
+			new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+						     | __GFP_ZERO);
+			if (!new.phys_mem[i])
+				goto out_free;
+		}
+	}
+
+	/* Allocate page dirty bitmap if needed */
+	if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
+		unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
+
+		new.dirty_bitmap = vmalloc(dirty_bytes);
+		if (!new.dirty_bitmap)
+			goto out_free;
+		memset(new.dirty_bitmap, 0, dirty_bytes);
+	}
+
+	spin_lock(&kvm->lock);
+
+	if (memory_config_version != kvm->memory_config_version) {
+		spin_unlock(&kvm->lock);
+		kvm_free_physmem_slot(&new, &old);
+		goto raced;
+	}
+
+	r = -EAGAIN;
+	if (kvm->busy)
+		goto out_unlock;
+
+	if (mem->slot >= kvm->nmemslots)
+		kvm->nmemslots = mem->slot + 1;
+
+	*memslot = new;
+	++kvm->memory_config_version;
+
+	spin_unlock(&kvm->lock);
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		struct kvm_vcpu *vcpu;
+
+		vcpu = vcpu_load(kvm, i);
+		if (!vcpu)
+			continue;
+		kvm_mmu_reset_context(vcpu);
+		vcpu_put(vcpu);
+	}
+
+	kvm_free_physmem_slot(&old, &new);
+	return 0;
+
+out_unlock:
+	spin_unlock(&kvm->lock);
+out_free:
+	kvm_free_physmem_slot(&new, &old);
+out:
+	return r;
+}
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
+				       struct kvm_dirty_log *log)
+{
+	struct kvm_memory_slot *memslot;
+	int r, i;
+	int n;
+	unsigned long any = 0;
+
+	spin_lock(&kvm->lock);
+
+	/*
+	 * Prevent changes to guest memory configuration even while the lock
+	 * is not taken.
+	 */
+	++kvm->busy;
+	spin_unlock(&kvm->lock);
+	r = -EINVAL;
+	if (log->slot >= KVM_MEMORY_SLOTS)
+		goto out;
+
+	memslot = &kvm->memslots[log->slot];
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	n = ALIGN(memslot->npages, 8) / 8;
+
+	for (i = 0; !any && i < n; ++i)
+		any = memslot->dirty_bitmap[i];
+
+	r = -EFAULT;
+	if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
+		goto out;
+
+
+	if (any) {
+		spin_lock(&kvm->lock);
+		kvm_mmu_slot_remove_write_access(kvm, log->slot);
+		spin_unlock(&kvm->lock);
+		memset(memslot->dirty_bitmap, 0, n);
+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+			struct kvm_vcpu *vcpu = vcpu_load(kvm, i);
+
+			if (!vcpu)
+				continue;
+			kvm_arch_ops->tlb_flush(vcpu);
+			vcpu_put(vcpu);
+		}
+	}
+
+	r = 0;
+
+out:
+	spin_lock(&kvm->lock);
+	--kvm->busy;
+	spin_unlock(&kvm->lock);
+	return r;
+}
+
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
+{
+	int i;
+
+	for (i = 0; i < kvm->nmemslots; ++i) {
+		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+
+		if (gfn >= memslot->base_gfn
+		    && gfn < memslot->base_gfn + memslot->npages)
+			return memslot;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gfn_to_memslot);
+
+void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
+{
+	int i;
+	struct kvm_memory_slot *memslot = 0;
+	unsigned long rel_gfn;
+
+	for (i = 0; i < kvm->nmemslots; ++i) {
+		memslot = &kvm->memslots[i];
+
+		if (gfn >= memslot->base_gfn
+		    && gfn < memslot->base_gfn + memslot->npages) {
+
+			if (!memslot || !memslot->dirty_bitmap)
+				return;
+
+			rel_gfn = gfn - memslot->base_gfn;
+
+			/* avoid RMW */
+			if (!test_bit(rel_gfn, memslot->dirty_bitmap))
+				set_bit(rel_gfn, memslot->dirty_bitmap);
+			return;
+		}
+	}
+}
+
+static int emulator_read_std(unsigned long addr,
+			     unsigned long *val,
+			     unsigned int bytes,
+			     struct x86_emulate_ctxt *ctxt)
+{
+	struct kvm_vcpu *vcpu = ctxt->vcpu;
+	void *data = val;
+
+	while (bytes) {
+		gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+		unsigned offset = addr & (PAGE_SIZE-1);
+		unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
+		unsigned long pfn;
+		struct kvm_memory_slot *memslot;
+		void *page;
+
+		if (gpa == UNMAPPED_GVA)
+			return X86EMUL_PROPAGATE_FAULT;
+		pfn = gpa >> PAGE_SHIFT;
+		memslot = gfn_to_memslot(vcpu->kvm, pfn);
+		if (!memslot)
+			return X86EMUL_UNHANDLEABLE;
+		page = kmap_atomic(gfn_to_page(memslot, pfn), KM_USER0);
+
+		memcpy(data, page + offset, tocopy);
+
+		kunmap_atomic(page, KM_USER0);
+
+		bytes -= tocopy;
+		data += tocopy;
+		addr += tocopy;
+	}
+
+	return X86EMUL_CONTINUE;
+}
+
+static int emulator_write_std(unsigned long addr,
+			      unsigned long val,
+			      unsigned int bytes,
+			      struct x86_emulate_ctxt *ctxt)
+{
+	printk(KERN_ERR "emulator_write_std: addr %lx n %d\n",
+	       addr, bytes);
+	return X86EMUL_UNHANDLEABLE;
+}
+
+static int emulator_read_emulated(unsigned long addr,
+				  unsigned long *val,
+				  unsigned int bytes,
+				  struct x86_emulate_ctxt *ctxt)
+{
+	struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+	if (vcpu->mmio_read_completed) {
+		memcpy(val, vcpu->mmio_data, bytes);
+		vcpu->mmio_read_completed = 0;
+		return X86EMUL_CONTINUE;
+	} else if (emulator_read_std(addr, val, bytes, ctxt)
+		   == X86EMUL_CONTINUE)
+		return X86EMUL_CONTINUE;
+	else {
+		gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+		if (gpa == UNMAPPED_GVA)
+			return vcpu_printf(vcpu, "not present\n"), X86EMUL_PROPAGATE_FAULT;
+		vcpu->mmio_needed = 1;
+		vcpu->mmio_phys_addr = gpa;
+		vcpu->mmio_size = bytes;
+		vcpu->mmio_is_write = 0;
+
+		return X86EMUL_UNHANDLEABLE;
+	}
+}
+
+static int emulator_write_emulated(unsigned long addr,
+				   unsigned long val,
+				   unsigned int bytes,
+				   struct x86_emulate_ctxt *ctxt)
+{
+	struct kvm_vcpu *vcpu = ctxt->vcpu;
+	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+
+	if (gpa == UNMAPPED_GVA)
+		return X86EMUL_PROPAGATE_FAULT;
+
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_phys_addr = gpa;
+	vcpu->mmio_size = bytes;
+	vcpu->mmio_is_write = 1;
+	memcpy(vcpu->mmio_data, &val, bytes);
+
+	return X86EMUL_CONTINUE;
+}
+
+static int emulator_cmpxchg_emulated(unsigned long addr,
+				     unsigned long old,
+				     unsigned long new,
+				     unsigned int bytes,
+				     struct x86_emulate_ctxt *ctxt)
+{
+	static int reported;
+
+	if (!reported) {
+		reported = 1;
+		printk(KERN_WARNING "kvm: emulating exchange as write\n");
+	}
+	return emulator_write_emulated(addr, new, bytes, ctxt);
+}
+
+static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
+{
+	return kvm_arch_ops->get_segment_base(vcpu, seg);
+}
+
+int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
+{
+	spin_lock(&vcpu->kvm->lock);
+	vcpu->mmu.inval_page(vcpu, address);
+	spin_unlock(&vcpu->kvm->lock);
+	kvm_arch_ops->invlpg(vcpu, address);
+	return X86EMUL_CONTINUE;
+}
+
+int emulate_clts(struct kvm_vcpu *vcpu)
+{
+	unsigned long cr0 = vcpu->cr0;
+
+	cr0 &= ~CR0_TS_MASK;
+	kvm_arch_ops->set_cr0(vcpu, cr0);
+	return X86EMUL_CONTINUE;
+}
+
+int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest)
+{
+	struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+	switch (dr) {
+	case 0 ... 3:
+		*dest = kvm_arch_ops->get_dr(vcpu, dr);
+		return X86EMUL_CONTINUE;
+	default:
+		printk(KERN_DEBUG "%s: unexpected dr %u\n",
+		       __FUNCTION__, dr);
+		return X86EMUL_UNHANDLEABLE;
+	}
+}
+
+int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+{
+	unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
+	int exception;
+
+	kvm_arch_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
+	if (exception) {
+		/* FIXME: better handling */
+		return X86EMUL_UNHANDLEABLE;
+	}
+	return X86EMUL_CONTINUE;
+}
+
+static void report_emulation_failure(struct x86_emulate_ctxt *ctxt)
+{
+	static int reported;
+	u8 opcodes[4];
+	unsigned long rip = ctxt->vcpu->rip;
+	unsigned long rip_linear;
+
+	rip_linear = rip + get_segment_base(ctxt->vcpu, VCPU_SREG_CS);
+
+	if (reported)
+		return;
+
+	emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt);
+
+	printk(KERN_ERR "emulation failed but !mmio_needed?"
+	       " rip %lx %02x %02x %02x %02x\n",
+	       rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
+	reported = 1;
+}
+
+struct x86_emulate_ops emulate_ops = {
+	.read_std            = emulator_read_std,
+	.write_std           = emulator_write_std,
+	.read_emulated       = emulator_read_emulated,
+	.write_emulated      = emulator_write_emulated,
+	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
+};
+
+int emulate_instruction(struct kvm_vcpu *vcpu,
+			struct kvm_run *run,
+			unsigned long cr2,
+			u16 error_code)
+{
+	struct x86_emulate_ctxt emulate_ctxt;
+	int r;
+	int cs_db, cs_l;
+
+	kvm_arch_ops->cache_regs(vcpu);
+
+	kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+
+	emulate_ctxt.vcpu = vcpu;
+	emulate_ctxt.eflags = kvm_arch_ops->get_rflags(vcpu);
+	emulate_ctxt.cr2 = cr2;
+	emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
+		? X86EMUL_MODE_REAL : cs_l
+		? X86EMUL_MODE_PROT64 :	cs_db
+		? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+
+	if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
+		emulate_ctxt.cs_base = 0;
+		emulate_ctxt.ds_base = 0;
+		emulate_ctxt.es_base = 0;
+		emulate_ctxt.ss_base = 0;
+	} else {
+		emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS);
+		emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS);
+		emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES);
+		emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS);
+	}
+
+	emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS);
+	emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);
+
+	vcpu->mmio_is_write = 0;
+	r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);
+
+	if ((r || vcpu->mmio_is_write) && run) {
+		run->mmio.phys_addr = vcpu->mmio_phys_addr;
+		memcpy(run->mmio.data, vcpu->mmio_data, 8);
+		run->mmio.len = vcpu->mmio_size;
+		run->mmio.is_write = vcpu->mmio_is_write;
+	}
+
+	if (r) {
+		if (!vcpu->mmio_needed) {
+			report_emulation_failure(&emulate_ctxt);
+			return EMULATE_FAIL;
+		}
+		return EMULATE_DO_MMIO;
+	}
+
+	kvm_arch_ops->decache_regs(vcpu);
+	kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags);
+
+	if (vcpu->mmio_is_write)
+		return EMULATE_DO_MMIO;
+
+	return EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(emulate_instruction);
+
+static u64 mk_cr_64(u64 curr_cr, u32 new_val)
+{
+	return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
+}
+
+void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
+{
+	struct descriptor_table dt = { limit, base };
+
+	kvm_arch_ops->set_gdt(vcpu, &dt);
+}
+
+void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
+{
+	struct descriptor_table dt = { limit, base };
+
+	kvm_arch_ops->set_idt(vcpu, &dt);
+}
+
+void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
+		   unsigned long *rflags)
+{
+	lmsw(vcpu, msw);
+	*rflags = kvm_arch_ops->get_rflags(vcpu);
+}
+
+unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
+{
+	switch (cr) {
+	case 0:
+		return vcpu->cr0;
+	case 2:
+		return vcpu->cr2;
+	case 3:
+		return vcpu->cr3;
+	case 4:
+		return vcpu->cr4;
+	default:
+		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
+		return 0;
+	}
+}
+
+void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
+		     unsigned long *rflags)
+{
+	switch (cr) {
+	case 0:
+		set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
+		*rflags = kvm_arch_ops->get_rflags(vcpu);
+		break;
+	case 2:
+		vcpu->cr2 = val;
+		break;
+	case 3:
+		set_cr3(vcpu, val);
+		break;
+	case 4:
+		set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
+		break;
+	default:
+		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
+	}
+}
+
+/*
+ * Reads an msr value (of 'msr_index') into 'pdata'.
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+	return kvm_arch_ops->get_msr(vcpu, msr_index, pdata);
+}
+
+#ifdef __x86_64__
+
+void set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+	struct vmx_msr_entry *msr;
+
+	if (efer & EFER_RESERVED_BITS) {
+		printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
+		       efer);
+		inject_gp(vcpu);
+		return;
+	}
+
+	if (is_paging(vcpu)
+	    && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
+		printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
+		inject_gp(vcpu);
+		return;
+	}
+
+	efer &= ~EFER_LMA;
+	efer |= vcpu->shadow_efer & EFER_LMA;
+
+	vcpu->shadow_efer = efer;
+
+	msr = find_msr_entry(vcpu, MSR_EFER);
+
+	if (!(efer & EFER_LMA))
+	    efer &= ~EFER_LME;
+	msr->data = efer;
+}
+EXPORT_SYMBOL_GPL(set_efer);
+
+#endif
+
+/*
+ * Writes msr value into into the appropriate "register".
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+	return kvm_arch_ops->set_msr(vcpu, msr_index, data);
+}
+
+void kvm_resched(struct kvm_vcpu *vcpu)
+{
+	vcpu_put(vcpu);
+	cond_resched();
+	/* Cannot fail -  no vcpu unplug yet. */
+	vcpu_load(vcpu->kvm, vcpu_slot(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_resched);
+
+void load_msrs(struct vmx_msr_entry *e, int n)
+{
+	int i;
+
+	for (i = 0; i < n; ++i)
+		wrmsrl(e[i].index, e[i].data);
+}
+EXPORT_SYMBOL_GPL(load_msrs);
+
+void save_msrs(struct vmx_msr_entry *e, int n)
+{
+	int i;
+
+	for (i = 0; i < n; ++i)
+		rdmsrl(e[i].index, e[i].data);
+}
+EXPORT_SYMBOL_GPL(save_msrs);
+
+static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run)
+{
+	struct kvm_vcpu *vcpu;
+	int r;
+
+	if (kvm_run->vcpu < 0 || kvm_run->vcpu >= KVM_MAX_VCPUS)
+		return -EINVAL;
+
+	vcpu = vcpu_load(kvm, kvm_run->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+
+	if (kvm_run->emulated) {
+		kvm_arch_ops->skip_emulated_instruction(vcpu);
+		kvm_run->emulated = 0;
+	}
+
+	if (kvm_run->mmio_completed) {
+		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+		vcpu->mmio_read_completed = 1;
+	}
+
+	vcpu->mmio_needed = 0;
+
+	r = kvm_arch_ops->run(vcpu, kvm_run);
+
+	vcpu_put(vcpu);
+	return r;
+}
+
+static int kvm_dev_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs)
+{
+	struct kvm_vcpu *vcpu;
+
+	if (regs->vcpu < 0 || regs->vcpu >= KVM_MAX_VCPUS)
+		return -EINVAL;
+
+	vcpu = vcpu_load(kvm, regs->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+
+	kvm_arch_ops->cache_regs(vcpu);
+
+	regs->rax = vcpu->regs[VCPU_REGS_RAX];
+	regs->rbx = vcpu->regs[VCPU_REGS_RBX];
+	regs->rcx = vcpu->regs[VCPU_REGS_RCX];
+	regs->rdx = vcpu->regs[VCPU_REGS_RDX];
+	regs->rsi = vcpu->regs[VCPU_REGS_RSI];
+	regs->rdi = vcpu->regs[VCPU_REGS_RDI];
+	regs->rsp = vcpu->regs[VCPU_REGS_RSP];
+	regs->rbp = vcpu->regs[VCPU_REGS_RBP];
+#ifdef __x86_64__
+	regs->r8 = vcpu->regs[VCPU_REGS_R8];
+	regs->r9 = vcpu->regs[VCPU_REGS_R9];
+	regs->r10 = vcpu->regs[VCPU_REGS_R10];
+	regs->r11 = vcpu->regs[VCPU_REGS_R11];
+	regs->r12 = vcpu->regs[VCPU_REGS_R12];
+	regs->r13 = vcpu->regs[VCPU_REGS_R13];
+	regs->r14 = vcpu->regs[VCPU_REGS_R14];
+	regs->r15 = vcpu->regs[VCPU_REGS_R15];
+#endif
+
+	regs->rip = vcpu->rip;
+	regs->rflags = kvm_arch_ops->get_rflags(vcpu);
+
+	/*
+	 * Don't leak debug flags in case they were set for guest debugging
+	 */
+	if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+		regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
+static int kvm_dev_ioctl_set_regs(struct kvm *kvm, struct kvm_regs *regs)
+{
+	struct kvm_vcpu *vcpu;
+
+	if (regs->vcpu < 0 || regs->vcpu >= KVM_MAX_VCPUS)
+		return -EINVAL;
+
+	vcpu = vcpu_load(kvm, regs->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+
+	vcpu->regs[VCPU_REGS_RAX] = regs->rax;
+	vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
+	vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
+	vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
+	vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
+	vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
+	vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
+	vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
+#ifdef __x86_64__
+	vcpu->regs[VCPU_REGS_R8] = regs->r8;
+	vcpu->regs[VCPU_REGS_R9] = regs->r9;
+	vcpu->regs[VCPU_REGS_R10] = regs->r10;
+	vcpu->regs[VCPU_REGS_R11] = regs->r11;
+	vcpu->regs[VCPU_REGS_R12] = regs->r12;
+	vcpu->regs[VCPU_REGS_R13] = regs->r13;
+	vcpu->regs[VCPU_REGS_R14] = regs->r14;
+	vcpu->regs[VCPU_REGS_R15] = regs->r15;
+#endif
+
+	vcpu->rip = regs->rip;
+	kvm_arch_ops->set_rflags(vcpu, regs->rflags);
+
+	kvm_arch_ops->decache_regs(vcpu);
+
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
+static void get_segment(struct kvm_vcpu *vcpu,
+			struct kvm_segment *var, int seg)
+{
+	return kvm_arch_ops->get_segment(vcpu, var, seg);
+}
+
+static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
+{
+	struct kvm_vcpu *vcpu;
+	struct descriptor_table dt;
+
+	if (sregs->vcpu < 0 || sregs->vcpu >= KVM_MAX_VCPUS)
+		return -EINVAL;
+	vcpu = vcpu_load(kvm, sregs->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+
+	get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+	get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+	get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
+	get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
+	get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
+	get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
+
+	get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
+	get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
+
+	kvm_arch_ops->get_idt(vcpu, &dt);
+	sregs->idt.limit = dt.limit;
+	sregs->idt.base = dt.base;
+	kvm_arch_ops->get_gdt(vcpu, &dt);
+	sregs->gdt.limit = dt.limit;
+	sregs->gdt.base = dt.base;
+
+	sregs->cr0 = vcpu->cr0;
+	sregs->cr2 = vcpu->cr2;
+	sregs->cr3 = vcpu->cr3;
+	sregs->cr4 = vcpu->cr4;
+	sregs->cr8 = vcpu->cr8;
+	sregs->efer = vcpu->shadow_efer;
+	sregs->apic_base = vcpu->apic_base;
+
+	memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
+	       sizeof sregs->interrupt_bitmap);
+
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
+static void set_segment(struct kvm_vcpu *vcpu,
+			struct kvm_segment *var, int seg)
+{
+	return kvm_arch_ops->set_segment(vcpu, var, seg);
+}
+
+static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
+{
+	struct kvm_vcpu *vcpu;
+	int mmu_reset_needed = 0;
+	int i;
+	struct descriptor_table dt;
+
+	if (sregs->vcpu < 0 || sregs->vcpu >= KVM_MAX_VCPUS)
+		return -EINVAL;
+	vcpu = vcpu_load(kvm, sregs->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+
+	set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+	set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+	set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
+	set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
+	set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
+	set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
+
+	set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
+	set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
+
+	dt.limit = sregs->idt.limit;
+	dt.base = sregs->idt.base;
+	kvm_arch_ops->set_idt(vcpu, &dt);
+	dt.limit = sregs->gdt.limit;
+	dt.base = sregs->gdt.base;
+	kvm_arch_ops->set_gdt(vcpu, &dt);
+
+	vcpu->cr2 = sregs->cr2;
+	mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
+	vcpu->cr3 = sregs->cr3;
+
+	vcpu->cr8 = sregs->cr8;
+
+	mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
+#ifdef __x86_64__
+	kvm_arch_ops->set_efer(vcpu, sregs->efer);
+#endif
+	vcpu->apic_base = sregs->apic_base;
+
+	mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
+	kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0);
+
+	mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
+	kvm_arch_ops->set_cr4(vcpu, sregs->cr4);
+
+	if (mmu_reset_needed)
+		kvm_mmu_reset_context(vcpu);
+
+	memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
+	       sizeof vcpu->irq_pending);
+	vcpu->irq_summary = 0;
+	for (i = 0; i < NR_IRQ_WORDS; ++i)
+		if (vcpu->irq_pending[i])
+			__set_bit(i, &vcpu->irq_summary);
+
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
+/*
+ * List of msr numbers which we expose to userspace through KVM_GET_MSRS
+ * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
+ */
+static u32 msrs_to_save[] = {
+	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+	MSR_K6_STAR,
+#ifdef __x86_64__
+	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
+#endif
+	MSR_IA32_TIME_STAMP_COUNTER,
+};
+
+
+/*
+ * Adapt set_msr() to msr_io()'s calling convention
+ */
+static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+	return set_msr(vcpu, index, *data);
+}
+
+/*
+ * Read or write a bunch of msrs. All parameters are kernel addresses.
+ *
+ * @return number of msrs set successfully.
+ */
+static int __msr_io(struct kvm *kvm, struct kvm_msrs *msrs,
+		    struct kvm_msr_entry *entries,
+		    int (*do_msr)(struct kvm_vcpu *vcpu,
+				  unsigned index, u64 *data))
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	if (msrs->vcpu < 0 || msrs->vcpu >= KVM_MAX_VCPUS)
+		return -EINVAL;
+
+	vcpu = vcpu_load(kvm, msrs->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+
+	for (i = 0; i < msrs->nmsrs; ++i)
+		if (do_msr(vcpu, entries[i].index, &entries[i].data))
+			break;
+
+	vcpu_put(vcpu);
+
+	return i;
+}
+
+/*
+ * Read or write a bunch of msrs. Parameters are user addresses.
+ *
+ * @return number of msrs set successfully.
+ */
+static int msr_io(struct kvm *kvm, struct kvm_msrs __user *user_msrs,
+		  int (*do_msr)(struct kvm_vcpu *vcpu,
+				unsigned index, u64 *data),
+		  int writeback)
+{
+	struct kvm_msrs msrs;
+	struct kvm_msr_entry *entries;
+	int r, n;
+	unsigned size;
+
+	r = -EFAULT;
+	if (copy_from_user(&msrs, user_msrs, sizeof msrs))
+		goto out;
+
+	r = -E2BIG;
+	if (msrs.nmsrs >= MAX_IO_MSRS)
+		goto out;
+
+	r = -ENOMEM;
+	size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
+	entries = vmalloc(size);
+	if (!entries)
+		goto out;
+
+	r = -EFAULT;
+	if (copy_from_user(entries, user_msrs->entries, size))
+		goto out_free;
+
+	r = n = __msr_io(kvm, &msrs, entries, do_msr);
+	if (r < 0)
+		goto out_free;
+
+	r = -EFAULT;
+	if (writeback && copy_to_user(user_msrs->entries, entries, size))
+		goto out_free;
+
+	r = n;
+
+out_free:
+	vfree(entries);
+out:
+	return r;
+}
+
+/*
+ * Translate a guest virtual address to a guest physical address.
+ */
+static int kvm_dev_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr)
+{
+	unsigned long vaddr = tr->linear_address;
+	struct kvm_vcpu *vcpu;
+	gpa_t gpa;
+
+	vcpu = vcpu_load(kvm, tr->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+	spin_lock(&kvm->lock);
+	gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
+	tr->physical_address = gpa;
+	tr->valid = gpa != UNMAPPED_GVA;
+	tr->writeable = 1;
+	tr->usermode = 0;
+	spin_unlock(&kvm->lock);
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
+static int kvm_dev_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq)
+{
+	struct kvm_vcpu *vcpu;
+
+	if (irq->vcpu < 0 || irq->vcpu >= KVM_MAX_VCPUS)
+		return -EINVAL;
+	if (irq->irq < 0 || irq->irq >= 256)
+		return -EINVAL;
+	vcpu = vcpu_load(kvm, irq->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+
+	set_bit(irq->irq, vcpu->irq_pending);
+	set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
+
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
+static int kvm_dev_ioctl_debug_guest(struct kvm *kvm,
+				     struct kvm_debug_guest *dbg)
+{
+	struct kvm_vcpu *vcpu;
+	int r;
+
+	if (dbg->vcpu < 0 || dbg->vcpu >= KVM_MAX_VCPUS)
+		return -EINVAL;
+	vcpu = vcpu_load(kvm, dbg->vcpu);
+	if (!vcpu)
+		return -ENOENT;
+
+	r = kvm_arch_ops->set_guest_debug(vcpu, dbg);
+
+	vcpu_put(vcpu);
+
+	return r;
+}
+
+static long kvm_dev_ioctl(struct file *filp,
+			  unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	int r = -EINVAL;
+
+	switch (ioctl) {
+	case KVM_CREATE_VCPU: {
+		r = kvm_dev_ioctl_create_vcpu(kvm, arg);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_RUN: {
+		struct kvm_run kvm_run;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run))
+			goto out;
+		r = kvm_dev_ioctl_run(kvm, &kvm_run);
+		if (r < 0)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_GET_REGS: {
+		struct kvm_regs kvm_regs;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_regs, (void *)arg, sizeof kvm_regs))
+			goto out;
+		r = kvm_dev_ioctl_get_regs(kvm, &kvm_regs);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user((void *)arg, &kvm_regs, sizeof kvm_regs))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_SET_REGS: {
+		struct kvm_regs kvm_regs;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_regs, (void *)arg, sizeof kvm_regs))
+			goto out;
+		r = kvm_dev_ioctl_set_regs(kvm, &kvm_regs);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_GET_SREGS: {
+		struct kvm_sregs kvm_sregs;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_sregs, (void *)arg, sizeof kvm_sregs))
+			goto out;
+		r = kvm_dev_ioctl_get_sregs(kvm, &kvm_sregs);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user((void *)arg, &kvm_sregs, sizeof kvm_sregs))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_SET_SREGS: {
+		struct kvm_sregs kvm_sregs;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_sregs, (void *)arg, sizeof kvm_sregs))
+			goto out;
+		r = kvm_dev_ioctl_set_sregs(kvm, &kvm_sregs);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_TRANSLATE: {
+		struct kvm_translation tr;
+
+		r = -EFAULT;
+		if (copy_from_user(&tr, (void *)arg, sizeof tr))
+			goto out;
+		r = kvm_dev_ioctl_translate(kvm, &tr);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user((void *)arg, &tr, sizeof tr))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_INTERRUPT: {
+		struct kvm_interrupt irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq, (void *)arg, sizeof irq))
+			goto out;
+		r = kvm_dev_ioctl_interrupt(kvm, &irq);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_DEBUG_GUEST: {
+		struct kvm_debug_guest dbg;
+
+		r = -EFAULT;
+		if (copy_from_user(&dbg, (void *)arg, sizeof dbg))
+			goto out;
+		r = kvm_dev_ioctl_debug_guest(kvm, &dbg);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_SET_MEMORY_REGION: {
+		struct kvm_memory_region kvm_mem;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_mem, (void *)arg, sizeof kvm_mem))
+			goto out;
+		r = kvm_dev_ioctl_set_memory_region(kvm, &kvm_mem);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_GET_DIRTY_LOG: {
+		struct kvm_dirty_log log;
+
+		r = -EFAULT;
+		if (copy_from_user(&log, (void *)arg, sizeof log))
+			goto out;
+		r = kvm_dev_ioctl_get_dirty_log(kvm, &log);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_GET_MSRS:
+		r = msr_io(kvm, (void __user *)arg, get_msr, 1);
+		break;
+	case KVM_SET_MSRS:
+		r = msr_io(kvm, (void __user *)arg, do_set_msr, 0);
+		break;
+	case KVM_GET_MSR_INDEX_LIST: {
+		struct kvm_msr_list __user *user_msr_list = (void __user *)arg;
+		struct kvm_msr_list msr_list;
+		unsigned n;
+
+		r = -EFAULT;
+		if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
+			goto out;
+		n = msr_list.nmsrs;
+		msr_list.nmsrs = ARRAY_SIZE(msrs_to_save);
+		if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
+			goto out;
+		r = -E2BIG;
+		if (n < ARRAY_SIZE(msrs_to_save))
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(user_msr_list->indices, &msrs_to_save,
+				 sizeof msrs_to_save))
+			goto out;
+		r = 0;
+	}
+	default:
+		;
+	}
+out:
+	return r;
+}
+
+static struct page *kvm_dev_nopage(struct vm_area_struct *vma,
+				   unsigned long address,
+				   int *type)
+{
+	struct kvm *kvm = vma->vm_file->private_data;
+	unsigned long pgoff;
+	struct kvm_memory_slot *slot;
+	struct page *page;
+
+	*type = VM_FAULT_MINOR;
+	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+	slot = gfn_to_memslot(kvm, pgoff);
+	if (!slot)
+		return NOPAGE_SIGBUS;
+	page = gfn_to_page(slot, pgoff);
+	if (!page)
+		return NOPAGE_SIGBUS;
+	get_page(page);
+	return page;
+}
+
+static struct vm_operations_struct kvm_dev_vm_ops = {
+	.nopage = kvm_dev_nopage,
+};
+
+static int kvm_dev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	vma->vm_ops = &kvm_dev_vm_ops;
+	return 0;
+}
+
+static struct file_operations kvm_chardev_ops = {
+	.open		= kvm_dev_open,
+	.release        = kvm_dev_release,
+	.unlocked_ioctl = kvm_dev_ioctl,
+	.compat_ioctl   = kvm_dev_ioctl,
+	.mmap           = kvm_dev_mmap,
+};
+
+static struct miscdevice kvm_dev = {
+	MISC_DYNAMIC_MINOR,
+	"kvm",
+	&kvm_chardev_ops,
+};
+
+static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
+                       void *v)
+{
+	if (val == SYS_RESTART) {
+		/*
+		 * Some (well, at least mine) BIOSes hang on reboot if
+		 * in vmx root mode.
+		 */
+		printk(KERN_INFO "kvm: exiting hardware virtualization\n");
+		on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1);
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kvm_reboot_notifier = {
+	.notifier_call = kvm_reboot,
+	.priority = 0,
+};
+
+static __init void kvm_init_debug(void)
+{
+	struct kvm_stats_debugfs_item *p;
+
+	debugfs_dir = debugfs_create_dir("kvm", 0);
+	for (p = debugfs_entries; p->name; ++p)
+		p->dentry = debugfs_create_u32(p->name, 0444, debugfs_dir,
+					       p->data);
+}
+
+static void kvm_exit_debug(void)
+{
+	struct kvm_stats_debugfs_item *p;
+
+	for (p = debugfs_entries; p->name; ++p)
+		debugfs_remove(p->dentry);
+	debugfs_remove(debugfs_dir);
+}
+
+hpa_t bad_page_address;
+
+int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
+{
+	int r;
+
+	kvm_arch_ops = ops;
+
+	if (!kvm_arch_ops->cpu_has_kvm_support()) {
+		printk(KERN_ERR "kvm: no hardware support\n");
+		return -EOPNOTSUPP;
+	}
+	if (kvm_arch_ops->disabled_by_bios()) {
+		printk(KERN_ERR "kvm: disabled by bios\n");
+		return -EOPNOTSUPP;
+	}
+
+	r = kvm_arch_ops->hardware_setup();
+	if (r < 0)
+	    return r;
+
+	on_each_cpu(kvm_arch_ops->hardware_enable, 0, 0, 1);
+	register_reboot_notifier(&kvm_reboot_notifier);
+
+	kvm_chardev_ops.owner = module;
+
+	r = misc_register(&kvm_dev);
+	if (r) {
+		printk (KERN_ERR "kvm: misc device register failed\n");
+		goto out_free;
+	}
+
+	return r;
+
+out_free:
+	unregister_reboot_notifier(&kvm_reboot_notifier);
+	on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1);
+	kvm_arch_ops->hardware_unsetup();
+	return r;
+}
+
+void kvm_exit_arch(void)
+{
+	misc_deregister(&kvm_dev);
+
+	unregister_reboot_notifier(&kvm_reboot_notifier);
+	on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1);
+	kvm_arch_ops->hardware_unsetup();
+}
+
+static __init int kvm_init(void)
+{
+	static struct page *bad_page;
+	int r = 0;
+
+	kvm_init_debug();
+
+	if ((bad_page = alloc_page(GFP_KERNEL)) == NULL) {
+		r = -ENOMEM;
+		goto out;
+	}
+
+	bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT;
+	memset(__va(bad_page_address), 0, PAGE_SIZE);
+
+	return r;
+
+out:
+	kvm_exit_debug();
+	return r;
+}
+
+static __exit void kvm_exit(void)
+{
+	kvm_exit_debug();
+	__free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
+}
+
+module_init(kvm_init)
+module_exit(kvm_exit)
+
+EXPORT_SYMBOL_GPL(kvm_init_arch);
+EXPORT_SYMBOL_GPL(kvm_exit_arch);
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
new file mode 100644
index 00000000000..7d7f2aa1096
--- /dev/null
+++ b/drivers/kvm/kvm_svm.h
@@ -0,0 +1,44 @@
+#ifndef __KVM_SVM_H
+#define __KVM_SVM_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <asm/msr.h>
+
+#include "svm.h"
+#include "kvm.h"
+
+static const u32 host_save_msrs[] = {
+#ifdef __x86_64__
+	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
+	MSR_FS_BASE, MSR_GS_BASE,
+#endif
+	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+	MSR_IA32_DEBUGCTLMSR, /*MSR_IA32_LASTBRANCHFROMIP,
+	MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/
+};
+
+#define NR_HOST_SAVE_MSRS (sizeof(host_save_msrs) / sizeof(*host_save_msrs))
+#define NUM_DB_REGS 4
+
+struct vcpu_svm {
+	struct vmcb *vmcb;
+	unsigned long vmcb_pa;
+	struct svm_cpu_data *svm_data;
+	uint64_t asid_generation;
+
+	unsigned long cr0;
+	unsigned long cr4;
+	unsigned long db_regs[NUM_DB_REGS];
+
+	u64 next_rip;
+
+	u64 host_msrs[NR_HOST_SAVE_MSRS];
+	unsigned long host_cr2;
+	unsigned long host_db_regs[NUM_DB_REGS];
+	unsigned long host_dr6;
+	unsigned long host_dr7;
+};
+
+#endif
+
diff --git a/drivers/kvm/kvm_vmx.h b/drivers/kvm/kvm_vmx.h
new file mode 100644
index 00000000000..87e12d2bfa1
--- /dev/null
+++ b/drivers/kvm/kvm_vmx.h
@@ -0,0 +1,14 @@
+#ifndef __KVM_VMX_H
+#define __KVM_VMX_H
+
+#ifdef __x86_64__
+/*
+ * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt
+ * mechanism (cpu bug AA24)
+ */
+#define NR_BAD_MSRS 2
+#else
+#define NR_BAD_MSRS 0
+#endif
+
+#endif
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
new file mode 100644
index 00000000000..4e29d9b7211
--- /dev/null
+++ b/drivers/kvm/mmu.c
@@ -0,0 +1,699 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * MMU support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ *
+ * Authors:
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Avi Kivity   <avi@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+#include <linux/types.h>
+#include <linux/string.h>
+#include <asm/page.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+
+#include "vmx.h"
+#include "kvm.h"
+
+#define pgprintk(x...) do { } while (0)
+
+#define ASSERT(x)							\
+	if (!(x)) {							\
+		printk(KERN_WARNING "assertion failed %s:%d: %s\n",	\
+		       __FILE__, __LINE__, #x);				\
+	}
+
+#define PT64_ENT_PER_PAGE 512
+#define PT32_ENT_PER_PAGE 1024
+
+#define PT_WRITABLE_SHIFT 1
+
+#define PT_PRESENT_MASK (1ULL << 0)
+#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
+#define PT_USER_MASK (1ULL << 2)
+#define PT_PWT_MASK (1ULL << 3)
+#define PT_PCD_MASK (1ULL << 4)
+#define PT_ACCESSED_MASK (1ULL << 5)
+#define PT_DIRTY_MASK (1ULL << 6)
+#define PT_PAGE_SIZE_MASK (1ULL << 7)
+#define PT_PAT_MASK (1ULL << 7)
+#define PT_GLOBAL_MASK (1ULL << 8)
+#define PT64_NX_MASK (1ULL << 63)
+
+#define PT_PAT_SHIFT 7
+#define PT_DIR_PAT_SHIFT 12
+#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
+
+#define PT32_DIR_PSE36_SIZE 4
+#define PT32_DIR_PSE36_SHIFT 13
+#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
+
+
+#define PT32_PTE_COPY_MASK \
+	(PT_PRESENT_MASK | PT_PWT_MASK | PT_PCD_MASK | \
+	PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAT_MASK | \
+	PT_GLOBAL_MASK )
+
+#define PT32_NON_PTE_COPY_MASK \
+	(PT_PRESENT_MASK | PT_PWT_MASK | PT_PCD_MASK | \
+	PT_ACCESSED_MASK | PT_DIRTY_MASK)
+
+
+#define PT64_PTE_COPY_MASK \
+	(PT64_NX_MASK | PT32_PTE_COPY_MASK)
+
+#define PT64_NON_PTE_COPY_MASK \
+	(PT64_NX_MASK | PT32_NON_PTE_COPY_MASK)
+
+
+
+#define PT_FIRST_AVAIL_BITS_SHIFT 9
+#define PT64_SECOND_AVAIL_BITS_SHIFT 52
+
+#define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+
+#define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1)
+#define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT)
+
+#define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1)
+#define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT))
+
+#define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT)
+
+#define VALID_PAGE(x) ((x) != INVALID_PAGE)
+
+#define PT64_LEVEL_BITS 9
+
+#define PT64_LEVEL_SHIFT(level) \
+		( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
+
+#define PT64_LEVEL_MASK(level) \
+		(((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
+
+#define PT64_INDEX(address, level)\
+	(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
+
+
+#define PT32_LEVEL_BITS 10
+
+#define PT32_LEVEL_SHIFT(level) \
+		( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
+
+#define PT32_LEVEL_MASK(level) \
+		(((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
+
+#define PT32_INDEX(address, level)\
+	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
+
+
+#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK)
+#define PT64_DIR_BASE_ADDR_MASK \
+	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
+
+#define PT32_BASE_ADDR_MASK PAGE_MASK
+#define PT32_DIR_BASE_ADDR_MASK \
+	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
+
+
+#define PFERR_PRESENT_MASK (1U << 0)
+#define PFERR_WRITE_MASK (1U << 1)
+#define PFERR_USER_MASK (1U << 2)
+
+#define PT64_ROOT_LEVEL 4
+#define PT32_ROOT_LEVEL 2
+#define PT32E_ROOT_LEVEL 3
+
+#define PT_DIRECTORY_LEVEL 2
+#define PT_PAGE_TABLE_LEVEL 1
+
+static int is_write_protection(struct kvm_vcpu *vcpu)
+{
+	return vcpu->cr0 & CR0_WP_MASK;
+}
+
+static int is_cpuid_PSE36(void)
+{
+	return 1;
+}
+
+static int is_present_pte(unsigned long pte)
+{
+	return pte & PT_PRESENT_MASK;
+}
+
+static int is_writeble_pte(unsigned long pte)
+{
+	return pte & PT_WRITABLE_MASK;
+}
+
+static int is_io_pte(unsigned long pte)
+{
+	return pte & PT_SHADOW_IO_MARK;
+}
+
+static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
+{
+	struct kvm_mmu_page *page_head = page_header(page_hpa);
+
+	list_del(&page_head->link);
+	page_head->page_hpa = page_hpa;
+	list_add(&page_head->link, &vcpu->free_pages);
+}
+
+static int is_empty_shadow_page(hpa_t page_hpa)
+{
+	u32 *pos;
+	u32 *end;
+	for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32);
+		      pos != end; pos++)
+		if (*pos != 0)
+			return 0;
+	return 1;
+}
+
+static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte)
+{
+	struct kvm_mmu_page *page;
+
+	if (list_empty(&vcpu->free_pages))
+		return INVALID_PAGE;
+
+	page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
+	list_del(&page->link);
+	list_add(&page->link, &vcpu->kvm->active_mmu_pages);
+	ASSERT(is_empty_shadow_page(page->page_hpa));
+	page->slot_bitmap = 0;
+	page->global = 1;
+	page->parent_pte = parent_pte;
+	return page->page_hpa;
+}
+
+static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
+{
+	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
+	struct kvm_mmu_page *page_head = page_header(__pa(pte));
+
+	__set_bit(slot, &page_head->slot_bitmap);
+}
+
+hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	hpa_t hpa = gpa_to_hpa(vcpu, gpa);
+
+	return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
+}
+
+hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	struct kvm_memory_slot *slot;
+	struct page *page;
+
+	ASSERT((gpa & HPA_ERR_MASK) == 0);
+	slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
+	if (!slot)
+		return gpa | HPA_ERR_MASK;
+	page = gfn_to_page(slot, gpa >> PAGE_SHIFT);
+	return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
+		| (gpa & (PAGE_SIZE-1));
+}
+
+hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
+{
+	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
+
+	if (gpa == UNMAPPED_GVA)
+		return UNMAPPED_GVA;
+	return gpa_to_hpa(vcpu, gpa);
+}
+
+
+static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
+			       int level)
+{
+	ASSERT(vcpu);
+	ASSERT(VALID_PAGE(page_hpa));
+	ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
+
+	if (level == 1)
+		memset(__va(page_hpa), 0, PAGE_SIZE);
+	else {
+		u64 *pos;
+		u64 *end;
+
+		for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
+		     pos != end; pos++) {
+			u64 current_ent = *pos;
+
+			*pos = 0;
+			if (is_present_pte(current_ent))
+				release_pt_page_64(vcpu,
+						  current_ent &
+						  PT64_BASE_ADDR_MASK,
+						  level - 1);
+		}
+	}
+	kvm_mmu_free_page(vcpu, page_hpa);
+}
+
+static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
+{
+}
+
+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
+{
+	int level = PT32E_ROOT_LEVEL;
+	hpa_t table_addr = vcpu->mmu.root_hpa;
+
+	for (; ; level--) {
+		u32 index = PT64_INDEX(v, level);
+		u64 *table;
+
+		ASSERT(VALID_PAGE(table_addr));
+		table = __va(table_addr);
+
+		if (level == 1) {
+			mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
+			page_header_update_slot(vcpu->kvm, table, v);
+			table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
+								PT_USER_MASK;
+			return 0;
+		}
+
+		if (table[index] == 0) {
+			hpa_t new_table = kvm_mmu_alloc_page(vcpu,
+							     &table[index]);
+
+			if (!VALID_PAGE(new_table)) {
+				pgprintk("nonpaging_map: ENOMEM\n");
+				return -ENOMEM;
+			}
+
+			if (level == PT32E_ROOT_LEVEL)
+				table[index] = new_table | PT_PRESENT_MASK;
+			else
+				table[index] = new_table | PT_PRESENT_MASK |
+						PT_WRITABLE_MASK | PT_USER_MASK;
+		}
+		table_addr = table[index] & PT64_BASE_ADDR_MASK;
+	}
+}
+
+static void nonpaging_flush(struct kvm_vcpu *vcpu)
+{
+	hpa_t root = vcpu->mmu.root_hpa;
+
+	++kvm_stat.tlb_flush;
+	pgprintk("nonpaging_flush\n");
+	ASSERT(VALID_PAGE(root));
+	release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
+	root = kvm_mmu_alloc_page(vcpu, NULL);
+	ASSERT(VALID_PAGE(root));
+	vcpu->mmu.root_hpa = root;
+	if (is_paging(vcpu))
+		root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK));
+	kvm_arch_ops->set_cr3(vcpu, root);
+	kvm_arch_ops->tlb_flush(vcpu);
+}
+
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
+{
+	return vaddr;
+}
+
+static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
+			       u32 error_code)
+{
+	int ret;
+	gpa_t addr = gva;
+
+	ASSERT(vcpu);
+	ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
+
+	for (;;) {
+	     hpa_t paddr;
+
+	     paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
+
+	     if (is_error_hpa(paddr))
+		     return 1;
+
+	     ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
+	     if (ret) {
+		     nonpaging_flush(vcpu);
+		     continue;
+	     }
+	     break;
+	}
+	return ret;
+}
+
+static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
+{
+}
+
+static void nonpaging_free(struct kvm_vcpu *vcpu)
+{
+	hpa_t root;
+
+	ASSERT(vcpu);
+	root = vcpu->mmu.root_hpa;
+	if (VALID_PAGE(root))
+		release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
+	vcpu->mmu.root_hpa = INVALID_PAGE;
+}
+
+static int nonpaging_init_context(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu *context = &vcpu->mmu;
+
+	context->new_cr3 = nonpaging_new_cr3;
+	context->page_fault = nonpaging_page_fault;
+	context->inval_page = nonpaging_inval_page;
+	context->gva_to_gpa = nonpaging_gva_to_gpa;
+	context->free = nonpaging_free;
+	context->root_level = PT32E_ROOT_LEVEL;
+	context->shadow_root_level = PT32E_ROOT_LEVEL;
+	context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+	ASSERT(VALID_PAGE(context->root_hpa));
+	kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
+	return 0;
+}
+
+
+static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu_page *page, *npage;
+
+	list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages,
+				 link) {
+		if (page->global)
+			continue;
+
+		if (!page->parent_pte)
+			continue;
+
+		*page->parent_pte = 0;
+		release_pt_page_64(vcpu, page->page_hpa, 1);
+	}
+	++kvm_stat.tlb_flush;
+	kvm_arch_ops->tlb_flush(vcpu);
+}
+
+static void paging_new_cr3(struct kvm_vcpu *vcpu)
+{
+	kvm_mmu_flush_tlb(vcpu);
+}
+
+static void mark_pagetable_nonglobal(void *shadow_pte)
+{
+	page_header(__pa(shadow_pte))->global = 0;
+}
+
+static inline void set_pte_common(struct kvm_vcpu *vcpu,
+			     u64 *shadow_pte,
+			     gpa_t gaddr,
+			     int dirty,
+			     u64 access_bits)
+{
+	hpa_t paddr;
+
+	*shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET;
+	if (!dirty)
+		access_bits &= ~PT_WRITABLE_MASK;
+
+	if (access_bits & PT_WRITABLE_MASK)
+		mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
+
+	*shadow_pte |= access_bits;
+
+	paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
+
+	if (!(*shadow_pte & PT_GLOBAL_MASK))
+		mark_pagetable_nonglobal(shadow_pte);
+
+	if (is_error_hpa(paddr)) {
+		*shadow_pte |= gaddr;
+		*shadow_pte |= PT_SHADOW_IO_MARK;
+		*shadow_pte &= ~PT_PRESENT_MASK;
+	} else {
+		*shadow_pte |= paddr;
+		page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
+	}
+}
+
+static void inject_page_fault(struct kvm_vcpu *vcpu,
+			      u64 addr,
+			      u32 err_code)
+{
+	kvm_arch_ops->inject_page_fault(vcpu, addr, err_code);
+}
+
+static inline int fix_read_pf(u64 *shadow_ent)
+{
+	if ((*shadow_ent & PT_SHADOW_USER_MASK) &&
+	    !(*shadow_ent & PT_USER_MASK)) {
+		/*
+		 * If supervisor write protect is disabled, we shadow kernel
+		 * pages as user pages so we can trap the write access.
+		 */
+		*shadow_ent |= PT_USER_MASK;
+		*shadow_ent &= ~PT_WRITABLE_MASK;
+
+		return 1;
+
+	}
+	return 0;
+}
+
+static int may_access(u64 pte, int write, int user)
+{
+
+	if (user && !(pte & PT_USER_MASK))
+		return 0;
+	if (write && !(pte & PT_WRITABLE_MASK))
+		return 0;
+	return 1;
+}
+
+/*
+ * Remove a shadow pte.
+ */
+static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
+{
+	hpa_t page_addr = vcpu->mmu.root_hpa;
+	int level = vcpu->mmu.shadow_root_level;
+
+	++kvm_stat.invlpg;
+
+	for (; ; level--) {
+		u32 index = PT64_INDEX(addr, level);
+		u64 *table = __va(page_addr);
+
+		if (level == PT_PAGE_TABLE_LEVEL ) {
+			table[index] = 0;
+			return;
+		}
+
+		if (!is_present_pte(table[index]))
+			return;
+
+		page_addr = table[index] & PT64_BASE_ADDR_MASK;
+
+		if (level == PT_DIRECTORY_LEVEL &&
+			  (table[index] & PT_SHADOW_PS_MARK)) {
+			table[index] = 0;
+			release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL);
+
+			kvm_arch_ops->tlb_flush(vcpu);
+			return;
+		}
+	}
+}
+
+static void paging_free(struct kvm_vcpu *vcpu)
+{
+	nonpaging_free(vcpu);
+}
+
+#define PTTYPE 64
+#include "paging_tmpl.h"
+#undef PTTYPE
+
+#define PTTYPE 32
+#include "paging_tmpl.h"
+#undef PTTYPE
+
+static int paging64_init_context(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu *context = &vcpu->mmu;
+
+	ASSERT(is_pae(vcpu));
+	context->new_cr3 = paging_new_cr3;
+	context->page_fault = paging64_page_fault;
+	context->inval_page = paging_inval_page;
+	context->gva_to_gpa = paging64_gva_to_gpa;
+	context->free = paging_free;
+	context->root_level = PT64_ROOT_LEVEL;
+	context->shadow_root_level = PT64_ROOT_LEVEL;
+	context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+	ASSERT(VALID_PAGE(context->root_hpa));
+	kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
+		    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
+	return 0;
+}
+
+static int paging32_init_context(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu *context = &vcpu->mmu;
+
+	context->new_cr3 = paging_new_cr3;
+	context->page_fault = paging32_page_fault;
+	context->inval_page = paging_inval_page;
+	context->gva_to_gpa = paging32_gva_to_gpa;
+	context->free = paging_free;
+	context->root_level = PT32_ROOT_LEVEL;
+	context->shadow_root_level = PT32E_ROOT_LEVEL;
+	context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+	ASSERT(VALID_PAGE(context->root_hpa));
+	kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
+		    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
+	return 0;
+}
+
+static int paging32E_init_context(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	if ((ret = paging64_init_context(vcpu)))
+		return ret;
+
+	vcpu->mmu.root_level = PT32E_ROOT_LEVEL;
+	vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL;
+	return 0;
+}
+
+static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+{
+	ASSERT(vcpu);
+	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
+
+	if (!is_paging(vcpu))
+		return nonpaging_init_context(vcpu);
+	else if (kvm_arch_ops->is_long_mode(vcpu))
+		return paging64_init_context(vcpu);
+	else if (is_pae(vcpu))
+		return paging32E_init_context(vcpu);
+	else
+		return paging32_init_context(vcpu);
+}
+
+static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
+{
+	ASSERT(vcpu);
+	if (VALID_PAGE(vcpu->mmu.root_hpa)) {
+		vcpu->mmu.free(vcpu);
+		vcpu->mmu.root_hpa = INVALID_PAGE;
+	}
+}
+
+int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
+{
+	destroy_kvm_mmu(vcpu);
+	return init_kvm_mmu(vcpu);
+}
+
+static void free_mmu_pages(struct kvm_vcpu *vcpu)
+{
+	while (!list_empty(&vcpu->free_pages)) {
+		struct kvm_mmu_page *page;
+
+		page = list_entry(vcpu->free_pages.next,
+				  struct kvm_mmu_page, link);
+		list_del(&page->link);
+		__free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
+		page->page_hpa = INVALID_PAGE;
+	}
+}
+
+static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	ASSERT(vcpu);
+
+	for (i = 0; i < KVM_NUM_MMU_PAGES; i++) {
+		struct page *page;
+		struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
+
+		INIT_LIST_HEAD(&page_header->link);
+		if ((page = alloc_page(GFP_KVM_MMU)) == NULL)
+			goto error_1;
+		page->private = (unsigned long)page_header;
+		page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
+		memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
+		list_add(&page_header->link, &vcpu->free_pages);
+	}
+	return 0;
+
+error_1:
+	free_mmu_pages(vcpu);
+	return -ENOMEM;
+}
+
+int kvm_mmu_init(struct kvm_vcpu *vcpu)
+{
+	int r;
+
+	ASSERT(vcpu);
+	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
+	ASSERT(list_empty(&vcpu->free_pages));
+
+	if ((r = alloc_mmu_pages(vcpu)))
+		return r;
+
+	if ((r = init_kvm_mmu(vcpu))) {
+		free_mmu_pages(vcpu);
+		return r;
+	}
+	return 0;
+}
+
+void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+	ASSERT(vcpu);
+
+	destroy_kvm_mmu(vcpu);
+	free_mmu_pages(vcpu);
+}
+
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
+{
+	struct kvm_mmu_page *page;
+
+	list_for_each_entry(page, &kvm->active_mmu_pages, link) {
+		int i;
+		u64 *pt;
+
+		if (!test_bit(slot, &page->slot_bitmap))
+			continue;
+
+		pt = __va(page->page_hpa);
+		for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
+			/* avoid RMW */
+			if (pt[i] & PT_WRITABLE_MASK)
+				pt[i] &= ~PT_WRITABLE_MASK;
+
+	}
+}
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
new file mode 100644
index 00000000000..765c2e1a048
--- /dev/null
+++ b/drivers/kvm/paging_tmpl.h
@@ -0,0 +1,397 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * MMU support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ *
+ * Authors:
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Avi Kivity   <avi@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * We need the mmu code to access both 32-bit and 64-bit guest ptes,
+ * so the code in this file is compiled twice, once per pte size.
+ */
+
+#if PTTYPE == 64
+	#define pt_element_t u64
+	#define guest_walker guest_walker64
+	#define FNAME(name) paging##64_##name
+	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
+	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
+	#define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK
+	#define PT_NON_PTE_COPY_MASK PT64_NON_PTE_COPY_MASK
+#elif PTTYPE == 32
+	#define pt_element_t u32
+	#define guest_walker guest_walker32
+	#define FNAME(name) paging##32_##name
+	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
+	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
+	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
+	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
+	#define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK
+	#define PT_NON_PTE_COPY_MASK PT32_NON_PTE_COPY_MASK
+#else
+	#error Invalid PTTYPE value
+#endif
+
+/*
+ * The guest_walker structure emulates the behavior of the hardware page
+ * table walker.
+ */
+struct guest_walker {
+	int level;
+	pt_element_t *table;
+	pt_element_t inherited_ar;
+};
+
+static void FNAME(init_walker)(struct guest_walker *walker,
+			       struct kvm_vcpu *vcpu)
+{
+	hpa_t hpa;
+	struct kvm_memory_slot *slot;
+
+	walker->level = vcpu->mmu.root_level;
+	slot = gfn_to_memslot(vcpu->kvm,
+			      (vcpu->cr3 & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+	hpa = safe_gpa_to_hpa(vcpu, vcpu->cr3 & PT64_BASE_ADDR_MASK);
+	walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);
+
+	ASSERT((!kvm_arch_ops->is_long_mode(vcpu) && is_pae(vcpu)) ||
+	       (vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0);
+
+	walker->table = (pt_element_t *)( (unsigned long)walker->table |
+		(unsigned long)(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) );
+	walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
+}
+
+static void FNAME(release_walker)(struct guest_walker *walker)
+{
+	kunmap_atomic(walker->table, KM_USER0);
+}
+
+static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
+			   u64 *shadow_pte, u64 access_bits)
+{
+	ASSERT(*shadow_pte == 0);
+	access_bits &= guest_pte;
+	*shadow_pte = (guest_pte & PT_PTE_COPY_MASK);
+	set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK,
+		       guest_pte & PT_DIRTY_MASK, access_bits);
+}
+
+static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde,
+			   u64 *shadow_pte, u64 access_bits,
+			   int index)
+{
+	gpa_t gaddr;
+
+	ASSERT(*shadow_pte == 0);
+	access_bits &= guest_pde;
+	gaddr = (guest_pde & PT_DIR_BASE_ADDR_MASK) + PAGE_SIZE * index;
+	if (PTTYPE == 32 && is_cpuid_PSE36())
+		gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) <<
+			(32 - PT32_DIR_PSE36_SHIFT);
+	*shadow_pte = (guest_pde & (PT_NON_PTE_COPY_MASK | PT_GLOBAL_MASK)) |
+		          ((guest_pde & PT_DIR_PAT_MASK) >>
+			            (PT_DIR_PAT_SHIFT - PT_PAT_SHIFT));
+	set_pte_common(vcpu, shadow_pte, gaddr,
+		       guest_pde & PT_DIRTY_MASK, access_bits);
+}
+
+/*
+ * Fetch a guest pte from a specific level in the paging hierarchy.
+ */
+static pt_element_t *FNAME(fetch_guest)(struct kvm_vcpu *vcpu,
+					struct guest_walker *walker,
+					int level,
+					gva_t addr)
+{
+
+	ASSERT(level > 0  && level <= walker->level);
+
+	for (;;) {
+		int index = PT_INDEX(addr, walker->level);
+		hpa_t paddr;
+
+		ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
+		       ((unsigned long)&walker->table[index] & PAGE_MASK));
+		if (level == walker->level ||
+		    !is_present_pte(walker->table[index]) ||
+		    (walker->level == PT_DIRECTORY_LEVEL &&
+		     (walker->table[index] & PT_PAGE_SIZE_MASK) &&
+		     (PTTYPE == 64 || is_pse(vcpu))))
+			return &walker->table[index];
+		if (walker->level != 3 || kvm_arch_ops->is_long_mode(vcpu))
+			walker->inherited_ar &= walker->table[index];
+		paddr = safe_gpa_to_hpa(vcpu, walker->table[index] & PT_BASE_ADDR_MASK);
+		kunmap_atomic(walker->table, KM_USER0);
+		walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
+					    KM_USER0);
+		--walker->level;
+	}
+}
+
+/*
+ * Fetch a shadow pte for a specific level in the paging hierarchy.
+ */
+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+			      struct guest_walker *walker)
+{
+	hpa_t shadow_addr;
+	int level;
+	u64 *prev_shadow_ent = NULL;
+
+	shadow_addr = vcpu->mmu.root_hpa;
+	level = vcpu->mmu.shadow_root_level;
+
+	for (; ; level--) {
+		u32 index = SHADOW_PT_INDEX(addr, level);
+		u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index;
+		pt_element_t *guest_ent;
+
+		if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
+			if (level == PT_PAGE_TABLE_LEVEL)
+				return shadow_ent;
+			shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
+			prev_shadow_ent = shadow_ent;
+			continue;
+		}
+
+		if (PTTYPE == 32 && level > PT32_ROOT_LEVEL) {
+			ASSERT(level == PT32E_ROOT_LEVEL);
+			guest_ent = FNAME(fetch_guest)(vcpu, walker,
+						       PT32_ROOT_LEVEL, addr);
+		} else
+			guest_ent = FNAME(fetch_guest)(vcpu, walker,
+						       level, addr);
+
+		if (!is_present_pte(*guest_ent))
+			return NULL;
+
+		/* Don't set accessed bit on PAE PDPTRs */
+		if (vcpu->mmu.root_level != 3 || walker->level != 3)
+			*guest_ent |= PT_ACCESSED_MASK;
+
+		if (level == PT_PAGE_TABLE_LEVEL) {
+
+			if (walker->level == PT_DIRECTORY_LEVEL) {
+				if (prev_shadow_ent)
+					*prev_shadow_ent |= PT_SHADOW_PS_MARK;
+				FNAME(set_pde)(vcpu, *guest_ent, shadow_ent,
+					       walker->inherited_ar,
+				          PT_INDEX(addr, PT_PAGE_TABLE_LEVEL));
+			} else {
+				ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
+				FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, walker->inherited_ar);
+			}
+			return shadow_ent;
+		}
+
+		shadow_addr = kvm_mmu_alloc_page(vcpu, shadow_ent);
+		if (!VALID_PAGE(shadow_addr))
+			return ERR_PTR(-ENOMEM);
+		if (!kvm_arch_ops->is_long_mode(vcpu) && level == 3)
+			*shadow_ent = shadow_addr |
+				(*guest_ent & (PT_PRESENT_MASK | PT_PWT_MASK | PT_PCD_MASK));
+		else {
+			*shadow_ent = shadow_addr |
+				(*guest_ent & PT_NON_PTE_COPY_MASK);
+			*shadow_ent |= (PT_WRITABLE_MASK | PT_USER_MASK);
+		}
+		prev_shadow_ent = shadow_ent;
+	}
+}
+
+/*
+ * The guest faulted for write.  We need to
+ *
+ * - check write permissions
+ * - update the guest pte dirty bit
+ * - update our own dirty page tracking structures
+ */
+static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
+			       u64 *shadow_ent,
+			       struct guest_walker *walker,
+			       gva_t addr,
+			       int user)
+{
+	pt_element_t *guest_ent;
+	int writable_shadow;
+	gfn_t gfn;
+
+	if (is_writeble_pte(*shadow_ent))
+		return 0;
+
+	writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK;
+	if (user) {
+		/*
+		 * User mode access.  Fail if it's a kernel page or a read-only
+		 * page.
+		 */
+		if (!(*shadow_ent & PT_SHADOW_USER_MASK) || !writable_shadow)
+			return 0;
+		ASSERT(*shadow_ent & PT_USER_MASK);
+	} else
+		/*
+		 * Kernel mode access.  Fail if it's a read-only page and
+		 * supervisor write protection is enabled.
+		 */
+		if (!writable_shadow) {
+			if (is_write_protection(vcpu))
+				return 0;
+			*shadow_ent &= ~PT_USER_MASK;
+		}
+
+	guest_ent = FNAME(fetch_guest)(vcpu, walker, PT_PAGE_TABLE_LEVEL, addr);
+
+	if (!is_present_pte(*guest_ent)) {
+		*shadow_ent = 0;
+		return 0;
+	}
+
+	gfn = (*guest_ent & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+	mark_page_dirty(vcpu->kvm, gfn);
+	*shadow_ent |= PT_WRITABLE_MASK;
+	*guest_ent |= PT_DIRTY_MASK;
+
+	return 1;
+}
+
+/*
+ * Page fault handler.  There are several causes for a page fault:
+ *   - there is no shadow pte for the guest pte
+ *   - write access through a shadow pte marked read only so that we can set
+ *     the dirty bit
+ *   - write access to a shadow pte marked read only so we can update the page
+ *     dirty bitmap, when userspace requests it
+ *   - mmio access; in this case we will never install a present shadow pte
+ *   - normal guest page fault due to the guest pte marked not present, not
+ *     writable, or not executable
+ *
+ *  Returns: 1 if we need to emulate the instruction, 0 otherwise
+ */
+static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
+			       u32 error_code)
+{
+	int write_fault = error_code & PFERR_WRITE_MASK;
+	int pte_present = error_code & PFERR_PRESENT_MASK;
+	int user_fault = error_code & PFERR_USER_MASK;
+	struct guest_walker walker;
+	u64 *shadow_pte;
+	int fixed;
+
+	/*
+	 * Look up the shadow pte for the faulting address.
+	 */
+	for (;;) {
+		FNAME(init_walker)(&walker, vcpu);
+		shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
+		if (IS_ERR(shadow_pte)) {  /* must be -ENOMEM */
+			nonpaging_flush(vcpu);
+			FNAME(release_walker)(&walker);
+			continue;
+		}
+		break;
+	}
+
+	/*
+	 * The page is not mapped by the guest.  Let the guest handle it.
+	 */
+	if (!shadow_pte) {
+		inject_page_fault(vcpu, addr, error_code);
+		FNAME(release_walker)(&walker);
+		return 0;
+	}
+
+	/*
+	 * Update the shadow pte.
+	 */
+	if (write_fault)
+		fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr,
+					    user_fault);
+	else
+		fixed = fix_read_pf(shadow_pte);
+
+	FNAME(release_walker)(&walker);
+
+	/*
+	 * mmio: emulate if accessible, otherwise its a guest fault.
+	 */
+	if (is_io_pte(*shadow_pte)) {
+		if (may_access(*shadow_pte, write_fault, user_fault))
+			return 1;
+		pgprintk("%s: io work, no access\n", __FUNCTION__);
+		inject_page_fault(vcpu, addr,
+				  error_code | PFERR_PRESENT_MASK);
+		return 0;
+	}
+
+	/*
+	 * pte not present, guest page fault.
+	 */
+	if (pte_present && !fixed) {
+		inject_page_fault(vcpu, addr, error_code);
+		return 0;
+	}
+
+	++kvm_stat.pf_fixed;
+
+	return 0;
+}
+
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
+{
+	struct guest_walker walker;
+	pt_element_t guest_pte;
+	gpa_t gpa;
+
+	FNAME(init_walker)(&walker, vcpu);
+	guest_pte = *FNAME(fetch_guest)(vcpu, &walker, PT_PAGE_TABLE_LEVEL,
+					vaddr);
+	FNAME(release_walker)(&walker);
+
+	if (!is_present_pte(guest_pte))
+		return UNMAPPED_GVA;
+
+	if (walker.level == PT_DIRECTORY_LEVEL) {
+		ASSERT((guest_pte & PT_PAGE_SIZE_MASK));
+		ASSERT(PTTYPE == 64 || is_pse(vcpu));
+
+		gpa = (guest_pte & PT_DIR_BASE_ADDR_MASK) | (vaddr &
+			(PT_LEVEL_MASK(PT_PAGE_TABLE_LEVEL) | ~PAGE_MASK));
+
+		if (PTTYPE == 32 && is_cpuid_PSE36())
+			gpa |= (guest_pte & PT32_DIR_PSE36_MASK) <<
+					(32 - PT32_DIR_PSE36_SHIFT);
+	} else {
+		gpa = (guest_pte & PT_BASE_ADDR_MASK);
+		gpa |= (vaddr & ~PAGE_MASK);
+	}
+
+	return gpa;
+}
+
+#undef pt_element_t
+#undef guest_walker
+#undef FNAME
+#undef PT_BASE_ADDR_MASK
+#undef PT_INDEX
+#undef SHADOW_PT_INDEX
+#undef PT_LEVEL_MASK
+#undef PT_PTE_COPY_MASK
+#undef PT_NON_PTE_COPY_MASK
+#undef PT_DIR_BASE_ADDR_MASK
diff --git a/drivers/kvm/segment_descriptor.h b/drivers/kvm/segment_descriptor.h
new file mode 100644
index 00000000000..71fdf458619
--- /dev/null
+++ b/drivers/kvm/segment_descriptor.h
@@ -0,0 +1,17 @@
+struct segment_descriptor {
+	u16 limit_low;
+	u16 base_low;
+	u8  base_mid;
+	u8  type : 4;
+	u8  system : 1;
+	u8  dpl : 2;
+	u8  present : 1;
+	u8  limit_high : 4;
+	u8  avl : 1;
+	u8  long_mode : 1;
+	u8  default_op : 1;
+	u8  granularity : 1;
+	u8  base_high;
+} __attribute__((packed));
+
+
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
new file mode 100644
index 00000000000..a33a89c6813
--- /dev/null
+++ b/drivers/kvm/svm.c
@@ -0,0 +1,1677 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * AMD SVM support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ *
+ * Authors:
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Avi Kivity   <avi@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <asm/desc.h>
+
+#include "kvm_svm.h"
+#include "x86_emulate.h"
+
+MODULE_AUTHOR("Qumranet");
+MODULE_LICENSE("GPL");
+
+#define IOPM_ALLOC_ORDER 2
+#define MSRPM_ALLOC_ORDER 1
+
+#define DB_VECTOR 1
+#define UD_VECTOR 6
+#define GP_VECTOR 13
+
+#define DR7_GD_MASK (1 << 13)
+#define DR6_BD_MASK (1 << 13)
+#define CR4_DE_MASK (1UL << 3)
+
+#define SEG_TYPE_LDT 2
+#define SEG_TYPE_BUSY_TSS16 3
+
+#define KVM_EFER_LMA (1 << 10)
+#define KVM_EFER_LME (1 << 8)
+
+unsigned long iopm_base;
+unsigned long msrpm_base;
+
+struct kvm_ldttss_desc {
+	u16 limit0;
+	u16 base0;
+	unsigned base1 : 8, type : 5, dpl : 2, p : 1;
+	unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
+	u32 base3;
+	u32 zero1;
+} __attribute__((packed));
+
+struct svm_cpu_data {
+	int cpu;
+
+	uint64_t asid_generation;
+	uint32_t max_asid;
+	uint32_t next_asid;
+	struct kvm_ldttss_desc *tss_desc;
+
+	struct page *save_area;
+};
+
+static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+
+struct svm_init_data {
+	int cpu;
+	int r;
+};
+
+static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
+
+#define NUM_MSR_MAPS (sizeof(msrpm_ranges) / sizeof(*msrpm_ranges))
+#define MSRS_RANGE_SIZE 2048
+#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
+
+#define MAX_INST_SIZE 15
+
+static unsigned get_addr_size(struct kvm_vcpu *vcpu)
+{
+	struct vmcb_save_area *sa = &vcpu->svm->vmcb->save;
+	u16 cs_attrib;
+
+	if (!(sa->cr0 & CR0_PE_MASK) || (sa->rflags & X86_EFLAGS_VM))
+		return 2;
+
+	cs_attrib = sa->cs.attrib;
+
+	return (cs_attrib & SVM_SELECTOR_L_MASK) ? 8 :
+				(cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2;
+}
+
+static inline u8 pop_irq(struct kvm_vcpu *vcpu)
+{
+	int word_index = __ffs(vcpu->irq_summary);
+	int bit_index = __ffs(vcpu->irq_pending[word_index]);
+	int irq = word_index * BITS_PER_LONG + bit_index;
+
+	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
+	if (!vcpu->irq_pending[word_index])
+		clear_bit(word_index, &vcpu->irq_summary);
+	return irq;
+}
+
+static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
+{
+	set_bit(irq, vcpu->irq_pending);
+	set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+}
+
+static inline void clgi(void)
+{
+	asm volatile (SVM_CLGI);
+}
+
+static inline void stgi(void)
+{
+	asm volatile (SVM_STGI);
+}
+
+static inline void invlpga(unsigned long addr, u32 asid)
+{
+	asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid));
+}
+
+static inline unsigned long kvm_read_cr2(void)
+{
+	unsigned long cr2;
+
+	asm volatile ("mov %%cr2, %0" : "=r" (cr2));
+	return cr2;
+}
+
+static inline void kvm_write_cr2(unsigned long val)
+{
+	asm volatile ("mov %0, %%cr2" :: "r" (val));
+}
+
+static inline unsigned long read_dr6(void)
+{
+	unsigned long dr6;
+
+	asm volatile ("mov %%dr6, %0" : "=r" (dr6));
+	return dr6;
+}
+
+static inline void write_dr6(unsigned long val)
+{
+	asm volatile ("mov %0, %%dr6" :: "r" (val));
+}
+
+static inline unsigned long read_dr7(void)
+{
+	unsigned long dr7;
+
+	asm volatile ("mov %%dr7, %0" : "=r" (dr7));
+	return dr7;
+}
+
+static inline void write_dr7(unsigned long val)
+{
+	asm volatile ("mov %0, %%dr7" :: "r" (val));
+}
+
+static inline int svm_is_long_mode(struct kvm_vcpu *vcpu)
+{
+	return vcpu->svm->vmcb->save.efer & KVM_EFER_LMA;
+}
+
+static inline void force_new_asid(struct kvm_vcpu *vcpu)
+{
+	vcpu->svm->asid_generation--;
+}
+
+static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
+{
+	force_new_asid(vcpu);
+}
+
+static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+	if (!(efer & KVM_EFER_LMA))
+		efer &= ~KVM_EFER_LME;
+
+	vcpu->svm->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+	vcpu->shadow_efer = efer;
+}
+
+static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
+{
+	vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+						SVM_EVTINJ_VALID_ERR |
+						SVM_EVTINJ_TYPE_EXEPT |
+						GP_VECTOR;
+	vcpu->svm->vmcb->control.event_inj_err = error_code;
+}
+
+static void inject_ud(struct kvm_vcpu *vcpu)
+{
+	vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+						SVM_EVTINJ_TYPE_EXEPT |
+						UD_VECTOR;
+}
+
+static void inject_db(struct kvm_vcpu *vcpu)
+{
+	vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+						SVM_EVTINJ_TYPE_EXEPT |
+						DB_VECTOR;
+}
+
+static int is_page_fault(uint32_t info)
+{
+	info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
+	return info == (PF_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT);
+}
+
+static int is_external_interrupt(u32 info)
+{
+	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
+	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
+}
+
+static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+	if (!vcpu->svm->next_rip) {
+		printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
+		return;
+	}
+	if (vcpu->svm->next_rip - vcpu->svm->vmcb->save.rip > 15) {
+		printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
+		       __FUNCTION__,
+		       vcpu->svm->vmcb->save.rip,
+		       vcpu->svm->next_rip);
+	}
+
+	vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip;
+	vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+}
+
+static int has_svm(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+		printk(KERN_INFO "has_svm: not amd\n");
+		return 0;
+	}
+
+	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+	if (eax < SVM_CPUID_FUNC) {
+		printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
+		return 0;
+	}
+
+	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+		printk(KERN_DEBUG "has_svm: svm not available\n");
+		return 0;
+	}
+	return 1;
+}
+
+static void svm_hardware_disable(void *garbage)
+{
+	struct svm_cpu_data *svm_data
+		= per_cpu(svm_data, raw_smp_processor_id());
+
+	if (svm_data) {
+		uint64_t efer;
+
+		wrmsrl(MSR_VM_HSAVE_PA, 0);
+		rdmsrl(MSR_EFER, efer);
+		wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+		per_cpu(svm_data, raw_smp_processor_id()) = 0;
+		__free_page(svm_data->save_area);
+		kfree(svm_data);
+	}
+}
+
+static void svm_hardware_enable(void *garbage)
+{
+
+	struct svm_cpu_data *svm_data;
+	uint64_t efer;
+#ifdef __x86_64__
+	struct desc_ptr gdt_descr;
+#else
+	struct Xgt_desc_struct gdt_descr;
+#endif
+	struct desc_struct *gdt;
+	int me = raw_smp_processor_id();
+
+	if (!has_svm()) {
+		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
+		return;
+	}
+	svm_data = per_cpu(svm_data, me);
+
+	if (!svm_data) {
+		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
+		       me);
+		return;
+	}
+
+	svm_data->asid_generation = 1;
+	svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
+	svm_data->next_asid = svm_data->max_asid + 1;
+
+	asm volatile ( "sgdt %0" : "=m"(gdt_descr) );
+	gdt = (struct desc_struct *)gdt_descr.address;
+	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
+
+	rdmsrl(MSR_EFER, efer);
+	wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+
+	wrmsrl(MSR_VM_HSAVE_PA,
+	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
+}
+
+static int svm_cpu_init(int cpu)
+{
+	struct svm_cpu_data *svm_data;
+	int r;
+
+	svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
+	if (!svm_data)
+		return -ENOMEM;
+	svm_data->cpu = cpu;
+	svm_data->save_area = alloc_page(GFP_KERNEL);
+	r = -ENOMEM;
+	if (!svm_data->save_area)
+		goto err_1;
+
+	per_cpu(svm_data, cpu) = svm_data;
+
+	return 0;
+
+err_1:
+	kfree(svm_data);
+	return r;
+
+}
+
+static int set_msr_interception(u32 *msrpm, unsigned msr,
+				int read, int write)
+{
+	int i;
+
+	for (i = 0; i < NUM_MSR_MAPS; i++) {
+		if (msr >= msrpm_ranges[i] &&
+		    msr < msrpm_ranges[i] + MSRS_IN_RANGE) {
+			u32 msr_offset = (i * MSRS_IN_RANGE + msr -
+					  msrpm_ranges[i]) * 2;
+
+			u32 *base = msrpm + (msr_offset / 32);
+			u32 msr_shift = msr_offset % 32;
+			u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1);
+			*base = (*base & ~(0x3 << msr_shift)) |
+				(mask << msr_shift);
+			return 1;
+		}
+	}
+	printk(KERN_DEBUG "%s: not found 0x%x\n", __FUNCTION__, msr);
+	return 0;
+}
+
+static __init int svm_hardware_setup(void)
+{
+	int cpu;
+	struct page *iopm_pages;
+	struct page *msrpm_pages;
+	void *msrpm_va;
+	int r;
+
+
+	iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
+
+	if (!iopm_pages)
+		return -ENOMEM;
+	memset(page_address(iopm_pages), 0xff,
+					PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
+	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+
+
+	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+
+	r = -ENOMEM;
+	if (!msrpm_pages)
+		goto err_1;
+
+	msrpm_va = page_address(msrpm_pages);
+	memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
+	msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT;
+
+#ifdef __x86_64__
+	set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1);
+	set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1);
+	set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1);
+	set_msr_interception(msrpm_va, MSR_STAR, 1, 1);
+	set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1);
+	set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1);
+	set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1);
+#endif
+	set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1);
+	set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1);
+	set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1);
+
+	for_each_online_cpu(cpu) {
+		r = svm_cpu_init(cpu);
+		if (r)
+			goto err_2;
+	}
+	return 0;
+
+err_2:
+	__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
+	msrpm_base = 0;
+err_1:
+	__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
+	iopm_base = 0;
+	return r;
+}
+
+static __exit void svm_hardware_unsetup(void)
+{
+	__free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER);
+	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
+	iopm_base = msrpm_base = 0;
+}
+
+static void init_seg(struct vmcb_seg *seg)
+{
+	seg->selector = 0;
+	seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
+		SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
+	seg->limit = 0xffff;
+	seg->base = 0;
+}
+
+static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
+{
+	seg->selector = 0;
+	seg->attrib = SVM_SELECTOR_P_MASK | type;
+	seg->limit = 0xffff;
+	seg->base = 0;
+}
+
+static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+static void init_vmcb(struct vmcb *vmcb)
+{
+	struct vmcb_control_area *control = &vmcb->control;
+	struct vmcb_save_area *save = &vmcb->save;
+	u64 tsc;
+
+	control->intercept_cr_read = 	INTERCEPT_CR0_MASK |
+					INTERCEPT_CR3_MASK |
+					INTERCEPT_CR4_MASK;
+
+	control->intercept_cr_write = 	INTERCEPT_CR0_MASK |
+					INTERCEPT_CR3_MASK |
+					INTERCEPT_CR4_MASK;
+
+	control->intercept_dr_read = 	INTERCEPT_DR0_MASK |
+					INTERCEPT_DR1_MASK |
+					INTERCEPT_DR2_MASK |
+					INTERCEPT_DR3_MASK;
+
+	control->intercept_dr_write = 	INTERCEPT_DR0_MASK |
+					INTERCEPT_DR1_MASK |
+					INTERCEPT_DR2_MASK |
+					INTERCEPT_DR3_MASK |
+					INTERCEPT_DR5_MASK |
+					INTERCEPT_DR7_MASK;
+
+	control->intercept_exceptions = 1 << PF_VECTOR;
+
+
+	control->intercept = 	(1ULL << INTERCEPT_INTR) |
+				(1ULL << INTERCEPT_NMI) |
+		/*
+		 * selective cr0 intercept bug?
+		 *    	0:   0f 22 d8                mov    %eax,%cr3
+		 *	3:   0f 20 c0                mov    %cr0,%eax
+		 *	6:   0d 00 00 00 80          or     $0x80000000,%eax
+		 *	b:   0f 22 c0                mov    %eax,%cr0
+		 * set cr3 ->interception
+		 * get cr0 ->interception
+		 * set cr0 -> no interception
+		 */
+		/*              (1ULL << INTERCEPT_SELECTIVE_CR0) | */
+				(1ULL << INTERCEPT_CPUID) |
+				(1ULL << INTERCEPT_HLT) |
+				(1ULL << INTERCEPT_INVLPG) |
+				(1ULL << INTERCEPT_INVLPGA) |
+				(1ULL << INTERCEPT_IOIO_PROT) |
+				(1ULL << INTERCEPT_MSR_PROT) |
+				(1ULL << INTERCEPT_TASK_SWITCH) |
+				(1ULL << INTERCEPT_VMRUN) |
+				(1ULL << INTERCEPT_VMMCALL) |
+				(1ULL << INTERCEPT_VMLOAD) |
+				(1ULL << INTERCEPT_VMSAVE) |
+				(1ULL << INTERCEPT_STGI) |
+				(1ULL << INTERCEPT_CLGI) |
+				(1ULL << INTERCEPT_SKINIT);
+
+	control->iopm_base_pa = iopm_base;
+	control->msrpm_base_pa = msrpm_base;
+	rdtscll(tsc);
+	control->tsc_offset = -tsc;
+	control->int_ctl = V_INTR_MASKING_MASK;
+
+	init_seg(&save->es);
+	init_seg(&save->ss);
+	init_seg(&save->ds);
+	init_seg(&save->fs);
+	init_seg(&save->gs);
+
+	save->cs.selector = 0xf000;
+	/* Executable/Readable Code Segment */
+	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
+		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
+	save->cs.limit = 0xffff;
+	save->cs.base = 0xffff0000;
+
+	save->gdtr.limit = 0xffff;
+	save->idtr.limit = 0xffff;
+
+	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
+	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
+
+	save->efer = MSR_EFER_SVME_MASK;
+
+        save->dr6 = 0xffff0ff0;
+	save->dr7 = 0x400;
+	save->rflags = 2;
+	save->rip = 0x0000fff0;
+
+	/*
+	 * cr0 val on cpu init should be 0x60000010, we enable cpu
+	 * cache by default. the orderly way is to enable cache in bios.
+	 */
+	save->cr0 = 0x00000010 | CR0_PG_MASK;
+	save->cr4 = CR4_PAE_MASK;
+	/* rdx = ?? */
+}
+
+static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct page *page;
+	int r;
+
+	r = -ENOMEM;
+	vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
+	if (!vcpu->svm)
+		goto out1;
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		goto out2;
+
+	vcpu->svm->vmcb = page_address(page);
+	memset(vcpu->svm->vmcb, 0, PAGE_SIZE);
+	vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
+	vcpu->svm->cr0 = 0x00000010;
+	vcpu->svm->asid_generation = 0;
+	memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
+	init_vmcb(vcpu->svm->vmcb);
+
+	return 0;
+
+out2:
+	kfree(vcpu->svm);
+out1:
+	return r;
+}
+
+static void svm_free_vcpu(struct kvm_vcpu *vcpu)
+{
+	if (!vcpu->svm)
+		return;
+	if (vcpu->svm->vmcb)
+		__free_page(pfn_to_page(vcpu->svm->vmcb_pa >> PAGE_SHIFT));
+	kfree(vcpu->svm);
+}
+
+static struct kvm_vcpu *svm_vcpu_load(struct kvm_vcpu *vcpu)
+{
+	get_cpu();
+	return vcpu;
+}
+
+static void svm_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	put_cpu();
+}
+
+static void svm_cache_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax;
+	vcpu->regs[VCPU_REGS_RSP] = vcpu->svm->vmcb->save.rsp;
+	vcpu->rip = vcpu->svm->vmcb->save.rip;
+}
+
+static void svm_decache_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
+	vcpu->svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
+	vcpu->svm->vmcb->save.rip = vcpu->rip;
+}
+
+static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
+{
+	return vcpu->svm->vmcb->save.rflags;
+}
+
+static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+{
+	vcpu->svm->vmcb->save.rflags = rflags;
+}
+
+static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
+{
+	struct vmcb_save_area *save = &vcpu->svm->vmcb->save;
+
+	switch (seg) {
+	case VCPU_SREG_CS: return &save->cs;
+	case VCPU_SREG_DS: return &save->ds;
+	case VCPU_SREG_ES: return &save->es;
+	case VCPU_SREG_FS: return &save->fs;
+	case VCPU_SREG_GS: return &save->gs;
+	case VCPU_SREG_SS: return &save->ss;
+	case VCPU_SREG_TR: return &save->tr;
+	case VCPU_SREG_LDTR: return &save->ldtr;
+	}
+	BUG();
+	return 0;
+}
+
+static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
+{
+	struct vmcb_seg *s = svm_seg(vcpu, seg);
+
+	return s->base;
+}
+
+static void svm_get_segment(struct kvm_vcpu *vcpu,
+			    struct kvm_segment *var, int seg)
+{
+	struct vmcb_seg *s = svm_seg(vcpu, seg);
+
+	var->base = s->base;
+	var->limit = s->limit;
+	var->selector = s->selector;
+	var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
+	var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
+	var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
+	var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
+	var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
+	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
+	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
+	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+	var->unusable = !var->present;
+}
+
+static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
+{
+	struct vmcb_seg *s = svm_seg(vcpu, VCPU_SREG_CS);
+
+	*db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
+	*l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
+}
+
+static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+	dt->limit = vcpu->svm->vmcb->save.ldtr.limit;
+	dt->base = vcpu->svm->vmcb->save.ldtr.base;
+}
+
+static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+	vcpu->svm->vmcb->save.ldtr.limit = dt->limit;
+	vcpu->svm->vmcb->save.ldtr.base = dt->base ;
+}
+
+static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+	dt->limit = vcpu->svm->vmcb->save.gdtr.limit;
+	dt->base = vcpu->svm->vmcb->save.gdtr.base;
+}
+
+static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+	vcpu->svm->vmcb->save.gdtr.limit = dt->limit;
+	vcpu->svm->vmcb->save.gdtr.base = dt->base ;
+}
+
+static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+#ifdef __x86_64__
+	if (vcpu->shadow_efer & KVM_EFER_LME) {
+		if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) {
+			vcpu->shadow_efer |= KVM_EFER_LMA;
+			vcpu->svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
+		}
+
+		if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK) ) {
+			vcpu->shadow_efer &= ~KVM_EFER_LMA;
+			vcpu->svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
+		}
+	}
+#endif
+	vcpu->svm->cr0 = cr0;
+	vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK;
+	vcpu->cr0 = cr0;
+}
+
+static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+       vcpu->cr4 = cr4;
+       vcpu->svm->vmcb->save.cr4 = cr4 | CR4_PAE_MASK;
+}
+
+static void svm_set_segment(struct kvm_vcpu *vcpu,
+			    struct kvm_segment *var, int seg)
+{
+	struct vmcb_seg *s = svm_seg(vcpu, seg);
+
+	s->base = var->base;
+	s->limit = var->limit;
+	s->selector = var->selector;
+	if (var->unusable)
+		s->attrib = 0;
+	else {
+		s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
+		s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
+		s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
+		s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
+		s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
+		s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
+		s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
+		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
+	}
+	if (seg == VCPU_SREG_CS)
+		vcpu->svm->vmcb->save.cpl
+			= (vcpu->svm->vmcb->save.cs.attrib
+			   >> SVM_SELECTOR_DPL_SHIFT) & 3;
+
+}
+
+/* FIXME:
+
+	vcpu->svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
+	vcpu->svm->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
+
+*/
+
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+{
+	return -EOPNOTSUPP;
+}
+
+static void load_host_msrs(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	for ( i = 0; i < NR_HOST_SAVE_MSRS; i++)
+		wrmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]);
+}
+
+static void save_host_msrs(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	for ( i = 0; i < NR_HOST_SAVE_MSRS; i++)
+		rdmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]);
+}
+
+static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data)
+{
+	if (svm_data->next_asid > svm_data->max_asid) {
+		++svm_data->asid_generation;
+		svm_data->next_asid = 1;
+		vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+	}
+
+	vcpu->cpu = svm_data->cpu;
+	vcpu->svm->asid_generation = svm_data->asid_generation;
+	vcpu->svm->vmcb->control.asid = svm_data->next_asid++;
+}
+
+static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address)
+{
+	invlpga(address, vcpu->svm->vmcb->control.asid); // is needed?
+}
+
+static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
+{
+	return vcpu->svm->db_regs[dr];
+}
+
+static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
+		       int *exception)
+{
+	*exception = 0;
+
+	if (vcpu->svm->vmcb->save.dr7 & DR7_GD_MASK) {
+		vcpu->svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
+		vcpu->svm->vmcb->save.dr6 |= DR6_BD_MASK;
+		*exception = DB_VECTOR;
+		return;
+	}
+
+	switch (dr) {
+	case 0 ... 3:
+		vcpu->svm->db_regs[dr] = value;
+		return;
+	case 4 ... 5:
+		if (vcpu->cr4 & CR4_DE_MASK) {
+			*exception = UD_VECTOR;
+			return;
+		}
+	case 7: {
+		if (value & ~((1ULL << 32) - 1)) {
+			*exception = GP_VECTOR;
+			return;
+		}
+		vcpu->svm->vmcb->save.dr7 = value;
+		return;
+	}
+	default:
+		printk(KERN_DEBUG "%s: unexpected dr %u\n",
+		       __FUNCTION__, dr);
+		*exception = UD_VECTOR;
+		return;
+	}
+}
+
+static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 exit_int_info = vcpu->svm->vmcb->control.exit_int_info;
+	u64 fault_address;
+	u32 error_code;
+	enum emulation_result er;
+
+	if (is_external_interrupt(exit_int_info))
+		push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
+
+	spin_lock(&vcpu->kvm->lock);
+
+	fault_address  = vcpu->svm->vmcb->control.exit_info_2;
+	error_code = vcpu->svm->vmcb->control.exit_info_1;
+	if (!vcpu->mmu.page_fault(vcpu, fault_address, error_code)) {
+		spin_unlock(&vcpu->kvm->lock);
+		return 1;
+	}
+	er = emulate_instruction(vcpu, kvm_run, fault_address, error_code);
+	spin_unlock(&vcpu->kvm->lock);
+
+	switch (er) {
+	case EMULATE_DONE:
+		return 1;
+	case EMULATE_DO_MMIO:
+		++kvm_stat.mmio_exits;
+		kvm_run->exit_reason = KVM_EXIT_MMIO;
+		return 0;
+	case EMULATE_FAIL:
+		vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__);
+		break;
+	default:
+		BUG();
+	}
+
+	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+	return 0;
+}
+
+static int io_get_override(struct kvm_vcpu *vcpu,
+			  struct vmcb_seg **seg,
+			  int *addr_override)
+{
+	u8 inst[MAX_INST_SIZE];
+	unsigned ins_length;
+	gva_t rip;
+	int i;
+
+	rip =  vcpu->svm->vmcb->save.rip;
+	ins_length = vcpu->svm->next_rip - rip;
+	rip += vcpu->svm->vmcb->save.cs.base;
+
+	if (ins_length > MAX_INST_SIZE)
+		printk(KERN_DEBUG
+		       "%s: inst length err, cs base 0x%llx rip 0x%llx "
+		       "next rip 0x%llx ins_length %u\n",
+		       __FUNCTION__,
+		       vcpu->svm->vmcb->save.cs.base,
+		       vcpu->svm->vmcb->save.rip,
+		       vcpu->svm->vmcb->control.exit_info_2,
+		       ins_length);
+
+	if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length)
+		/* #PF */
+		return 0;
+
+	*addr_override = 0;
+	*seg = 0;
+	for (i = 0; i < ins_length; i++)
+		switch (inst[i]) {
+		case 0xf0:
+		case 0xf2:
+		case 0xf3:
+		case 0x66:
+			continue;
+		case 0x67:
+			*addr_override = 1;
+			continue;
+		case 0x2e:
+			*seg = &vcpu->svm->vmcb->save.cs;
+			continue;
+		case 0x36:
+			*seg = &vcpu->svm->vmcb->save.ss;
+			continue;
+		case 0x3e:
+			*seg = &vcpu->svm->vmcb->save.ds;
+			continue;
+		case 0x26:
+			*seg = &vcpu->svm->vmcb->save.es;
+			continue;
+		case 0x64:
+			*seg = &vcpu->svm->vmcb->save.fs;
+			continue;
+		case 0x65:
+			*seg = &vcpu->svm->vmcb->save.gs;
+			continue;
+		default:
+			return 1;
+		}
+	printk(KERN_DEBUG "%s: unexpected\n", __FUNCTION__);
+	return 0;
+}
+
+static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address)
+{
+	unsigned long addr_mask;
+	unsigned long *reg;
+	struct vmcb_seg *seg;
+	int addr_override;
+	struct vmcb_save_area *save_area = &vcpu->svm->vmcb->save;
+	u16 cs_attrib = save_area->cs.attrib;
+	unsigned addr_size = get_addr_size(vcpu);
+
+	if (!io_get_override(vcpu, &seg, &addr_override))
+		return 0;
+
+	if (addr_override)
+		addr_size = (addr_size == 2) ? 4: (addr_size >> 1);
+
+	if (ins) {
+		reg = &vcpu->regs[VCPU_REGS_RDI];
+		seg = &vcpu->svm->vmcb->save.es;
+	} else {
+		reg = &vcpu->regs[VCPU_REGS_RSI];
+		seg = (seg) ? seg : &vcpu->svm->vmcb->save.ds;
+	}
+
+	addr_mask = ~0ULL >> (64 - (addr_size * 8));
+
+	if ((cs_attrib & SVM_SELECTOR_L_MASK) &&
+	    !(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_VM)) {
+		*address = (*reg & addr_mask);
+		return addr_mask;
+	}
+
+	if (!(seg->attrib & SVM_SELECTOR_P_SHIFT)) {
+		svm_inject_gp(vcpu, 0);
+		return 0;
+	}
+
+	*address = (*reg & addr_mask) + seg->base;
+	return addr_mask;
+}
+
+static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug?
+	int _in = io_info & SVM_IOIO_TYPE_MASK;
+
+	++kvm_stat.io_exits;
+
+	vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2;
+
+	kvm_run->exit_reason = KVM_EXIT_IO;
+	kvm_run->io.port = io_info >> 16;
+	kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
+	kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT);
+	kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0;
+	kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0;
+
+	if (kvm_run->io.string) {
+		unsigned addr_mask;
+
+		addr_mask = io_adress(vcpu, _in, &kvm_run->io.address);
+		if (!addr_mask) {
+			printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__);
+			return 1;
+		}
+
+		if (kvm_run->io.rep) {
+			kvm_run->io.count = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
+			kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags
+						   & X86_EFLAGS_DF) != 0;
+		}
+	} else {
+		kvm_run->io.value = vcpu->svm->vmcb->save.rax;
+	}
+	return 0;
+}
+
+
+static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
+	skip_emulated_instruction(vcpu);
+	if (vcpu->irq_summary && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF))
+		return 1;
+
+	kvm_run->exit_reason = KVM_EXIT_HLT;
+	return 0;
+}
+
+static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	inject_ud(vcpu);
+	return 1;
+}
+
+static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	printk(KERN_DEBUG "%s: task swiche is unsupported\n", __FUNCTION__);
+	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+	return 0;
+}
+
+static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
+	kvm_run->exit_reason = KVM_EXIT_CPUID;
+	return 0;
+}
+
+static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	if (emulate_instruction(vcpu, 0, 0, 0) != EMULATE_DONE)
+		printk(KERN_ERR "%s: failed\n", __FUNCTION__);
+	return 1;
+}
+
+static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
+{
+	switch (ecx) {
+	case MSR_IA32_MC0_CTL:
+	case MSR_IA32_MCG_STATUS:
+	case MSR_IA32_MCG_CAP:
+	case MSR_IA32_MC0_MISC:
+	case MSR_IA32_MC0_MISC+4:
+	case MSR_IA32_MC0_MISC+8:
+	case MSR_IA32_MC0_MISC+12:
+	case MSR_IA32_MC0_MISC+16:
+	case MSR_IA32_UCODE_REV:
+		/* MTRR registers */
+	case 0xfe:
+	case 0x200 ... 0x2ff:
+		*data = 0;
+		break;
+	case MSR_IA32_TIME_STAMP_COUNTER: {
+		u64 tsc;
+
+		rdtscll(tsc);
+		*data = vcpu->svm->vmcb->control.tsc_offset + tsc;
+		break;
+	}
+	case MSR_EFER:
+		*data = vcpu->shadow_efer;
+		break;
+	case MSR_IA32_APICBASE:
+		*data = vcpu->apic_base;
+		break;
+#ifdef __x86_64__
+	case MSR_STAR:
+		*data = vcpu->svm->vmcb->save.star;
+		break;
+	case MSR_LSTAR:
+		*data = vcpu->svm->vmcb->save.lstar;
+		break;
+	case MSR_CSTAR:
+		*data = vcpu->svm->vmcb->save.cstar;
+		break;
+	case MSR_KERNEL_GS_BASE:
+		*data = vcpu->svm->vmcb->save.kernel_gs_base;
+		break;
+	case MSR_SYSCALL_MASK:
+		*data = vcpu->svm->vmcb->save.sfmask;
+		break;
+#endif
+	case MSR_IA32_SYSENTER_CS:
+		*data = vcpu->svm->vmcb->save.sysenter_cs;
+		break;
+	case MSR_IA32_SYSENTER_EIP:
+		*data = vcpu->svm->vmcb->save.sysenter_eip;
+		break;
+	case MSR_IA32_SYSENTER_ESP:
+		*data = vcpu->svm->vmcb->save.sysenter_esp;
+		break;
+	default:
+		printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", ecx);
+		return 1;
+	}
+	return 0;
+}
+
+static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 ecx = vcpu->regs[VCPU_REGS_RCX];
+	u64 data;
+
+	if (svm_get_msr(vcpu, ecx, &data))
+		svm_inject_gp(vcpu, 0);
+	else {
+		vcpu->svm->vmcb->save.rax = data & 0xffffffff;
+		vcpu->regs[VCPU_REGS_RDX] = data >> 32;
+		vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
+		skip_emulated_instruction(vcpu);
+	}
+	return 1;
+}
+
+static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
+{
+	switch (ecx) {
+#ifdef __x86_64__
+	case MSR_EFER:
+		set_efer(vcpu, data);
+		break;
+#endif
+	case MSR_IA32_MC0_STATUS:
+		printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
+			    , __FUNCTION__, data);
+		break;
+	case MSR_IA32_TIME_STAMP_COUNTER: {
+		u64 tsc;
+
+		rdtscll(tsc);
+		vcpu->svm->vmcb->control.tsc_offset = data - tsc;
+		break;
+	}
+	case MSR_IA32_UCODE_REV:
+	case MSR_IA32_UCODE_WRITE:
+	case 0x200 ... 0x2ff: /* MTRRs */
+		break;
+	case MSR_IA32_APICBASE:
+		vcpu->apic_base = data;
+		break;
+#ifdef __x86_64___
+	case MSR_STAR:
+		vcpu->svm->vmcb->save.star = data;
+		break;
+	case MSR_LSTAR:
+		vcpu->svm->vmcb->save.lstar = data;
+		break;
+	case MSR_CSTAR:
+		vcpu->svm->vmcb->save.cstar = data;
+		break;
+	case MSR_KERNEL_GS_BASE:
+		vcpu->svm->vmcb->save.kernel_gs_base = data;
+		break;
+	case MSR_SYSCALL_MASK:
+		vcpu->svm->vmcb->save.sfmask = data;
+		break;
+#endif
+	case MSR_IA32_SYSENTER_CS:
+		vcpu->svm->vmcb->save.sysenter_cs = data;
+		break;
+	case MSR_IA32_SYSENTER_EIP:
+		vcpu->svm->vmcb->save.sysenter_eip = data;
+		break;
+	case MSR_IA32_SYSENTER_ESP:
+		vcpu->svm->vmcb->save.sysenter_esp = data;
+		break;
+	default:
+		printk(KERN_ERR "kvm: unhandled wrmsr: %x\n", ecx);
+		return 1;
+	}
+	return 0;
+}
+
+static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 ecx = vcpu->regs[VCPU_REGS_RCX];
+	u64 data = (vcpu->svm->vmcb->save.rax & -1u)
+		| ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
+	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
+	if (svm_set_msr(vcpu, ecx, data))
+		svm_inject_gp(vcpu, 0);
+	else
+		skip_emulated_instruction(vcpu);
+	return 1;
+}
+
+static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	if (vcpu->svm->vmcb->control.exit_info_1)
+		return wrmsr_interception(vcpu, kvm_run);
+	else
+		return rdmsr_interception(vcpu, kvm_run);
+}
+
+static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
+				      struct kvm_run *kvm_run) = {
+	[SVM_EXIT_READ_CR0]           		= emulate_on_interception,
+	[SVM_EXIT_READ_CR3]           		= emulate_on_interception,
+	[SVM_EXIT_READ_CR4]           		= emulate_on_interception,
+	/* for now: */
+	[SVM_EXIT_WRITE_CR0]          		= emulate_on_interception,
+	[SVM_EXIT_WRITE_CR3]          		= emulate_on_interception,
+	[SVM_EXIT_WRITE_CR4]          		= emulate_on_interception,
+	[SVM_EXIT_READ_DR0] 			= emulate_on_interception,
+	[SVM_EXIT_READ_DR1]			= emulate_on_interception,
+	[SVM_EXIT_READ_DR2]			= emulate_on_interception,
+	[SVM_EXIT_READ_DR3]			= emulate_on_interception,
+	[SVM_EXIT_WRITE_DR0]			= emulate_on_interception,
+	[SVM_EXIT_WRITE_DR1]			= emulate_on_interception,
+	[SVM_EXIT_WRITE_DR2]			= emulate_on_interception,
+	[SVM_EXIT_WRITE_DR3]			= emulate_on_interception,
+	[SVM_EXIT_WRITE_DR5]			= emulate_on_interception,
+	[SVM_EXIT_WRITE_DR7]			= emulate_on_interception,
+	[SVM_EXIT_EXCP_BASE + PF_VECTOR] 	= pf_interception,
+	[SVM_EXIT_INTR] 			= nop_on_interception,
+	[SVM_EXIT_NMI]				= nop_on_interception,
+	[SVM_EXIT_SMI]				= nop_on_interception,
+	[SVM_EXIT_INIT]				= nop_on_interception,
+	/* [SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception, */
+	[SVM_EXIT_CPUID]			= cpuid_interception,
+	[SVM_EXIT_HLT]				= halt_interception,
+	[SVM_EXIT_INVLPG]			= emulate_on_interception,
+	[SVM_EXIT_INVLPGA]			= invalid_op_interception,
+	[SVM_EXIT_IOIO] 		  	= io_interception,
+	[SVM_EXIT_MSR]				= msr_interception,
+	[SVM_EXIT_TASK_SWITCH]			= task_switch_interception,
+	[SVM_EXIT_VMRUN]			= invalid_op_interception,
+	[SVM_EXIT_VMMCALL]			= invalid_op_interception,
+	[SVM_EXIT_VMLOAD]			= invalid_op_interception,
+	[SVM_EXIT_VMSAVE]			= invalid_op_interception,
+	[SVM_EXIT_STGI]				= invalid_op_interception,
+	[SVM_EXIT_CLGI]				= invalid_op_interception,
+	[SVM_EXIT_SKINIT]			= invalid_op_interception,
+};
+
+
+static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 exit_code = vcpu->svm->vmcb->control.exit_code;
+
+	kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT;
+
+	if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) &&
+	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR)
+		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
+		       "exit_code 0x%x\n",
+		       __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info,
+		       exit_code);
+
+	if (exit_code >= sizeof(svm_exit_handlers) / sizeof(*svm_exit_handlers)
+	    || svm_exit_handlers[exit_code] == 0) {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n",
+		       __FUNCTION__,
+		       exit_code,
+		       vcpu->svm->vmcb->save.rip,
+		       vcpu->cr0,
+		       vcpu->svm->vmcb->save.rflags);
+		return 0;
+	}
+
+	return svm_exit_handlers[exit_code](vcpu, kvm_run);
+}
+
+static void reload_tss(struct kvm_vcpu *vcpu)
+{
+	int cpu = raw_smp_processor_id();
+
+	struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
+	svm_data->tss_desc->type = 9; //available 32/64-bit TSS
+	load_TR_desc();
+}
+
+static void pre_svm_run(struct kvm_vcpu *vcpu)
+{
+	int cpu = raw_smp_processor_id();
+
+	struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
+
+	vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+	if (vcpu->cpu != cpu ||
+	    vcpu->svm->asid_generation != svm_data->asid_generation)
+		new_asid(vcpu, svm_data);
+}
+
+
+static inline void kvm_try_inject_irq(struct kvm_vcpu *vcpu)
+{
+	struct vmcb_control_area *control;
+
+	if (!vcpu->irq_summary)
+		return;
+
+	control = &vcpu->svm->vmcb->control;
+
+	control->int_vector = pop_irq(vcpu);
+	control->int_ctl &= ~V_INTR_PRIO_MASK;
+	control->int_ctl |= V_IRQ_MASK |
+		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
+}
+
+static void kvm_reput_irq(struct kvm_vcpu *vcpu)
+{
+	struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+
+	if (control->int_ctl & V_IRQ_MASK) {
+		control->int_ctl &= ~V_IRQ_MASK;
+		push_irq(vcpu, control->int_vector);
+	}
+}
+
+static void save_db_regs(unsigned long *db_regs)
+{
+#ifdef __x86_64__
+	asm ("mov %%dr0, %%rax \n\t"
+	     "mov %%rax, %[dr0] \n\t"
+	     "mov %%dr1, %%rax \n\t"
+	     "mov %%rax, %[dr1] \n\t"
+	     "mov %%dr2, %%rax \n\t"
+	     "mov %%rax, %[dr2] \n\t"
+	     "mov %%dr3, %%rax \n\t"
+	     "mov %%rax, %[dr3] \n\t"
+	     : [dr0] "=m"(db_regs[0]),
+	       [dr1] "=m"(db_regs[1]),
+	       [dr2] "=m"(db_regs[2]),
+	       [dr3] "=m"(db_regs[3])
+	     : : "rax");
+#else
+	asm ("mov %%dr0, %%eax \n\t"
+	     "mov %%eax, %[dr0] \n\t"
+	     "mov %%dr1, %%eax \n\t"
+	     "mov %%eax, %[dr1] \n\t"
+	     "mov %%dr2, %%eax \n\t"
+	     "mov %%eax, %[dr2] \n\t"
+	     "mov %%dr3, %%eax \n\t"
+	     "mov %%eax, %[dr3] \n\t"
+	     : [dr0] "=m"(db_regs[0]),
+	       [dr1] "=m"(db_regs[1]),
+	       [dr2] "=m"(db_regs[2]),
+	       [dr3] "=m"(db_regs[3])
+	     : : "eax");
+#endif
+}
+
+static void load_db_regs(unsigned long *db_regs)
+{
+	asm volatile ("mov %[dr0], %%dr0 \n\t"
+	     "mov %[dr1], %%dr1 \n\t"
+	     "mov %[dr2], %%dr2 \n\t"
+	     "mov %[dr3], %%dr3 \n\t"
+	     :
+	     : [dr0] "r"(db_regs[0]),
+	       [dr1] "r"(db_regs[1]),
+	       [dr2] "r"(db_regs[2]),
+	       [dr3] "r"(db_regs[3])
+#ifdef __x86_64__
+	     : "rax");
+#else
+	     : "eax");
+#endif
+}
+
+static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u16 fs_selector;
+	u16 gs_selector;
+	u16 ldt_selector;
+
+again:
+	kvm_try_inject_irq(vcpu);
+
+	clgi();
+
+	pre_svm_run(vcpu);
+
+	save_host_msrs(vcpu);
+	fs_selector = read_fs();
+	gs_selector = read_gs();
+	ldt_selector = read_ldt();
+	vcpu->svm->host_cr2 = kvm_read_cr2();
+	vcpu->svm->host_dr6 = read_dr6();
+	vcpu->svm->host_dr7 = read_dr7();
+	vcpu->svm->vmcb->save.cr2 = vcpu->cr2;
+
+	if (vcpu->svm->vmcb->save.dr7 & 0xff) {
+		write_dr7(0);
+		save_db_regs(vcpu->svm->host_db_regs);
+		load_db_regs(vcpu->svm->db_regs);
+	}
+	asm volatile (
+#ifdef __x86_64__
+		"push %%rbx; push %%rcx; push %%rdx;"
+		"push %%rsi; push %%rdi; push %%rbp;"
+		"push %%r8;  push %%r9;  push %%r10; push %%r11;"
+		"push %%r12; push %%r13; push %%r14; push %%r15;"
+#else
+		"push %%ebx; push %%ecx; push %%edx;"
+		"push %%esi; push %%edi; push %%ebp;"
+#endif
+
+#ifdef __x86_64__
+		"mov %c[rbx](%[vcpu]), %%rbx \n\t"
+		"mov %c[rcx](%[vcpu]), %%rcx \n\t"
+		"mov %c[rdx](%[vcpu]), %%rdx \n\t"
+		"mov %c[rsi](%[vcpu]), %%rsi \n\t"
+		"mov %c[rdi](%[vcpu]), %%rdi \n\t"
+		"mov %c[rbp](%[vcpu]), %%rbp \n\t"
+		"mov %c[r8](%[vcpu]),  %%r8  \n\t"
+		"mov %c[r9](%[vcpu]),  %%r9  \n\t"
+		"mov %c[r10](%[vcpu]), %%r10 \n\t"
+		"mov %c[r11](%[vcpu]), %%r11 \n\t"
+		"mov %c[r12](%[vcpu]), %%r12 \n\t"
+		"mov %c[r13](%[vcpu]), %%r13 \n\t"
+		"mov %c[r14](%[vcpu]), %%r14 \n\t"
+		"mov %c[r15](%[vcpu]), %%r15 \n\t"
+#else
+		"mov %c[rbx](%[vcpu]), %%ebx \n\t"
+		"mov %c[rcx](%[vcpu]), %%ecx \n\t"
+		"mov %c[rdx](%[vcpu]), %%edx \n\t"
+		"mov %c[rsi](%[vcpu]), %%esi \n\t"
+		"mov %c[rdi](%[vcpu]), %%edi \n\t"
+		"mov %c[rbp](%[vcpu]), %%ebp \n\t"
+#endif
+
+#ifdef __x86_64__
+		/* Enter guest mode */
+		"push %%rax \n\t"
+		"mov %c[svm](%[vcpu]), %%rax \n\t"
+		"mov %c[vmcb](%%rax), %%rax \n\t"
+		SVM_VMLOAD "\n\t"
+		SVM_VMRUN "\n\t"
+		SVM_VMSAVE "\n\t"
+		"pop %%rax \n\t"
+#else
+		/* Enter guest mode */
+		"push %%eax \n\t"
+		"mov %c[svm](%[vcpu]), %%eax \n\t"
+		"mov %c[vmcb](%%eax), %%eax \n\t"
+		SVM_VMLOAD "\n\t"
+		SVM_VMRUN "\n\t"
+		SVM_VMSAVE "\n\t"
+		"pop %%eax \n\t"
+#endif
+
+		/* Save guest registers, load host registers */
+#ifdef __x86_64__
+		"mov %%rbx, %c[rbx](%[vcpu]) \n\t"
+		"mov %%rcx, %c[rcx](%[vcpu]) \n\t"
+		"mov %%rdx, %c[rdx](%[vcpu]) \n\t"
+		"mov %%rsi, %c[rsi](%[vcpu]) \n\t"
+		"mov %%rdi, %c[rdi](%[vcpu]) \n\t"
+		"mov %%rbp, %c[rbp](%[vcpu]) \n\t"
+		"mov %%r8,  %c[r8](%[vcpu]) \n\t"
+		"mov %%r9,  %c[r9](%[vcpu]) \n\t"
+		"mov %%r10, %c[r10](%[vcpu]) \n\t"
+		"mov %%r11, %c[r11](%[vcpu]) \n\t"
+		"mov %%r12, %c[r12](%[vcpu]) \n\t"
+		"mov %%r13, %c[r13](%[vcpu]) \n\t"
+		"mov %%r14, %c[r14](%[vcpu]) \n\t"
+		"mov %%r15, %c[r15](%[vcpu]) \n\t"
+
+		"pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
+		"pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
+		"pop  %%rbp; pop  %%rdi; pop  %%rsi;"
+		"pop  %%rdx; pop  %%rcx; pop  %%rbx; \n\t"
+#else
+		"mov %%ebx, %c[rbx](%[vcpu]) \n\t"
+		"mov %%ecx, %c[rcx](%[vcpu]) \n\t"
+		"mov %%edx, %c[rdx](%[vcpu]) \n\t"
+		"mov %%esi, %c[rsi](%[vcpu]) \n\t"
+		"mov %%edi, %c[rdi](%[vcpu]) \n\t"
+		"mov %%ebp, %c[rbp](%[vcpu]) \n\t"
+
+		"pop  %%ebp; pop  %%edi; pop  %%esi;"
+		"pop  %%edx; pop  %%ecx; pop  %%ebx; \n\t"
+#endif
+		:
+		: [vcpu]"a"(vcpu),
+		  [svm]"i"(offsetof(struct kvm_vcpu, svm)),
+		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
+		  [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
+		  [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
+		  [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
+		  [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])),
+		  [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])),
+		  [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP]))
+#ifdef __x86_64__
+		  ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])),
+		  [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])),
+		  [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])),
+		  [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])),
+		  [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])),
+		  [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])),
+		  [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])),
+		  [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15]))
+#endif
+		: "cc", "memory" );
+
+	if ((vcpu->svm->vmcb->save.dr7 & 0xff))
+		load_db_regs(vcpu->svm->host_db_regs);
+
+	vcpu->cr2 = vcpu->svm->vmcb->save.cr2;
+
+	write_dr6(vcpu->svm->host_dr6);
+	write_dr7(vcpu->svm->host_dr7);
+	kvm_write_cr2(vcpu->svm->host_cr2);
+
+	load_fs(fs_selector);
+	load_gs(gs_selector);
+	load_ldt(ldt_selector);
+	load_host_msrs(vcpu);
+
+	reload_tss(vcpu);
+
+	stgi();
+
+	kvm_reput_irq(vcpu);
+
+	vcpu->svm->next_rip = 0;
+
+	if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
+		kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
+		kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code;
+		return 0;
+	}
+
+	if (handle_exit(vcpu, kvm_run)) {
+		if (signal_pending(current)) {
+			++kvm_stat.signal_exits;
+			return -EINTR;
+		}
+		kvm_resched(vcpu);
+		goto again;
+	}
+	return 0;
+}
+
+static void svm_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	force_new_asid(vcpu);
+}
+
+static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
+{
+	vcpu->svm->vmcb->save.cr3 = root;
+	force_new_asid(vcpu);
+}
+
+static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
+				  unsigned long  addr,
+				  uint32_t err_code)
+{
+	uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info;
+
+	++kvm_stat.pf_guest;
+
+	if (is_page_fault(exit_int_info)) {
+
+		vcpu->svm->vmcb->control.event_inj_err = 0;
+		vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+							SVM_EVTINJ_VALID_ERR |
+							SVM_EVTINJ_TYPE_EXEPT |
+							DF_VECTOR;
+		return;
+	}
+	vcpu->cr2 = addr;
+	vcpu->svm->vmcb->save.cr2 = addr;
+	vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+						SVM_EVTINJ_VALID_ERR |
+						SVM_EVTINJ_TYPE_EXEPT |
+						PF_VECTOR;
+	vcpu->svm->vmcb->control.event_inj_err = err_code;
+}
+
+
+static int is_disabled(void)
+{
+	return 0;
+}
+
+static struct kvm_arch_ops svm_arch_ops = {
+	.cpu_has_kvm_support = has_svm,
+	.disabled_by_bios = is_disabled,
+	.hardware_setup = svm_hardware_setup,
+	.hardware_unsetup = svm_hardware_unsetup,
+	.hardware_enable = svm_hardware_enable,
+	.hardware_disable = svm_hardware_disable,
+
+	.vcpu_create = svm_create_vcpu,
+	.vcpu_free = svm_free_vcpu,
+
+	.vcpu_load = svm_vcpu_load,
+	.vcpu_put = svm_vcpu_put,
+
+	.set_guest_debug = svm_guest_debug,
+	.get_msr = svm_get_msr,
+	.set_msr = svm_set_msr,
+	.get_segment_base = svm_get_segment_base,
+	.get_segment = svm_get_segment,
+	.set_segment = svm_set_segment,
+	.is_long_mode = svm_is_long_mode,
+	.get_cs_db_l_bits = svm_get_cs_db_l_bits,
+	.set_cr0 = svm_set_cr0,
+	.set_cr0_no_modeswitch = svm_set_cr0,
+	.set_cr3 = svm_set_cr3,
+	.set_cr4 = svm_set_cr4,
+	.set_efer = svm_set_efer,
+	.get_idt = svm_get_idt,
+	.set_idt = svm_set_idt,
+	.get_gdt = svm_get_gdt,
+	.set_gdt = svm_set_gdt,
+	.get_dr = svm_get_dr,
+	.set_dr = svm_set_dr,
+	.cache_regs = svm_cache_regs,
+	.decache_regs = svm_decache_regs,
+	.get_rflags = svm_get_rflags,
+	.set_rflags = svm_set_rflags,
+
+	.invlpg = svm_invlpg,
+	.tlb_flush = svm_flush_tlb,
+	.inject_page_fault = svm_inject_page_fault,
+
+	.inject_gp = svm_inject_gp,
+
+	.run = svm_vcpu_run,
+	.skip_emulated_instruction = skip_emulated_instruction,
+	.vcpu_setup = svm_vcpu_setup,
+};
+
+static int __init svm_init(void)
+{
+	kvm_emulator_want_group7_invlpg();
+	kvm_init_arch(&svm_arch_ops, THIS_MODULE);
+	return 0;
+}
+
+static void __exit svm_exit(void)
+{
+	kvm_exit_arch();
+}
+
+module_init(svm_init)
+module_exit(svm_exit)
diff --git a/drivers/kvm/svm.h b/drivers/kvm/svm.h
new file mode 100644
index 00000000000..df731c3fb58
--- /dev/null
+++ b/drivers/kvm/svm.h
@@ -0,0 +1,315 @@
+#ifndef __SVM_H
+#define __SVM_H
+
+enum {
+	INTERCEPT_INTR,
+	INTERCEPT_NMI,
+	INTERCEPT_SMI,
+	INTERCEPT_INIT,
+	INTERCEPT_VINTR,
+	INTERCEPT_SELECTIVE_CR0,
+	INTERCEPT_STORE_IDTR,
+	INTERCEPT_STORE_GDTR,
+	INTERCEPT_STORE_LDTR,
+	INTERCEPT_STORE_TR,
+	INTERCEPT_LOAD_IDTR,
+	INTERCEPT_LOAD_GDTR,
+	INTERCEPT_LOAD_LDTR,
+	INTERCEPT_LOAD_TR,
+	INTERCEPT_RDTSC,
+	INTERCEPT_RDPMC,
+	INTERCEPT_PUSHF,
+	INTERCEPT_POPF,
+	INTERCEPT_CPUID,
+	INTERCEPT_RSM,
+	INTERCEPT_IRET,
+	INTERCEPT_INTn,
+	INTERCEPT_INVD,
+	INTERCEPT_PAUSE,
+	INTERCEPT_HLT,
+	INTERCEPT_INVLPG,
+	INTERCEPT_INVLPGA,
+	INTERCEPT_IOIO_PROT,
+	INTERCEPT_MSR_PROT,
+	INTERCEPT_TASK_SWITCH,
+	INTERCEPT_FERR_FREEZE,
+	INTERCEPT_SHUTDOWN,
+	INTERCEPT_VMRUN,
+	INTERCEPT_VMMCALL,
+	INTERCEPT_VMLOAD,
+	INTERCEPT_VMSAVE,
+	INTERCEPT_STGI,
+	INTERCEPT_CLGI,
+	INTERCEPT_SKINIT,
+	INTERCEPT_RDTSCP,
+	INTERCEPT_ICEBP,
+	INTERCEPT_WBINVD,
+};
+
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+	u16 intercept_cr_read;
+	u16 intercept_cr_write;
+	u16 intercept_dr_read;
+	u16 intercept_dr_write;
+	u32 intercept_exceptions;
+	u64 intercept;
+	u8 reserved_1[44];
+	u64 iopm_base_pa;
+	u64 msrpm_base_pa;
+	u64 tsc_offset;
+	u32 asid;
+	u8 tlb_ctl;
+	u8 reserved_2[3];
+	u32 int_ctl;
+	u32 int_vector;
+	u32 int_state;
+	u8 reserved_3[4];
+	u32 exit_code;
+	u32 exit_code_hi;
+	u64 exit_info_1;
+	u64 exit_info_2;
+	u32 exit_int_info;
+	u32 exit_int_info_err;
+	u64 nested_ctl;
+	u8 reserved_4[16];
+	u32 event_inj;
+	u32 event_inj_err;
+	u64 nested_cr3;
+	u64 lbr_ctl;
+	u8 reserved_5[832];
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+	u16 selector;
+	u16 attrib;
+	u32 limit;
+	u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+	struct vmcb_seg es;
+	struct vmcb_seg cs;
+	struct vmcb_seg ss;
+	struct vmcb_seg ds;
+	struct vmcb_seg fs;
+	struct vmcb_seg gs;
+	struct vmcb_seg gdtr;
+	struct vmcb_seg ldtr;
+	struct vmcb_seg idtr;
+	struct vmcb_seg tr;
+	u8 reserved_1[43];
+	u8 cpl;
+	u8 reserved_2[4];
+	u64 efer;
+	u8 reserved_3[112];
+	u64 cr4;
+	u64 cr3;
+	u64 cr0;
+	u64 dr7;
+	u64 dr6;
+	u64 rflags;
+	u64 rip;
+	u8 reserved_4[88];
+	u64 rsp;
+	u8 reserved_5[24];
+	u64 rax;
+	u64 star;
+	u64 lstar;
+	u64 cstar;
+	u64 sfmask;
+	u64 kernel_gs_base;
+	u64 sysenter_cs;
+	u64 sysenter_esp;
+	u64 sysenter_eip;
+	u64 cr2;
+	u8 reserved_6[32];
+	u64 g_pat;
+	u64 dbgctl;
+	u64 br_from;
+	u64 br_to;
+	u64 last_excp_from;
+	u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+	struct vmcb_control_area control;
+	struct vmcb_save_area save;
+};
+
+#define SVM_CPUID_FEATURE_SHIFT 2
+#define SVM_CPUID_FUNC 0x8000000a
+
+#define MSR_EFER_SVME_MASK (1ULL << 12)
+#define MSR_VM_HSAVE_PA 0xc0010117ULL
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_MASK 1
+#define INTERCEPT_CR3_MASK (1 << 3)
+#define INTERCEPT_CR4_MASK (1 << 4)
+
+#define INTERCEPT_DR0_MASK 1
+#define INTERCEPT_DR1_MASK (1 << 1)
+#define INTERCEPT_DR2_MASK (1 << 2)
+#define INTERCEPT_DR3_MASK (1 << 3)
+#define INTERCEPT_DR4_MASK (1 << 4)
+#define INTERCEPT_DR5_MASK (1 << 5)
+#define INTERCEPT_DR6_MASK (1 << 6)
+#define INTERCEPT_DR7_MASK (1 << 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+
+#define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define	SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define	SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define	SVM_EXIT_READ_CR0 	0x000
+#define	SVM_EXIT_READ_CR3 	0x003
+#define	SVM_EXIT_READ_CR4 	0x004
+#define	SVM_EXIT_READ_CR8 	0x008
+#define	SVM_EXIT_WRITE_CR0 	0x010
+#define	SVM_EXIT_WRITE_CR3 	0x013
+#define	SVM_EXIT_WRITE_CR4 	0x014
+#define	SVM_EXIT_WRITE_CR8 	0x018
+#define	SVM_EXIT_READ_DR0 	0x020
+#define	SVM_EXIT_READ_DR1 	0x021
+#define	SVM_EXIT_READ_DR2 	0x022
+#define	SVM_EXIT_READ_DR3 	0x023
+#define	SVM_EXIT_READ_DR4 	0x024
+#define	SVM_EXIT_READ_DR5 	0x025
+#define	SVM_EXIT_READ_DR6 	0x026
+#define	SVM_EXIT_READ_DR7 	0x027
+#define	SVM_EXIT_WRITE_DR0 	0x030
+#define	SVM_EXIT_WRITE_DR1 	0x031
+#define	SVM_EXIT_WRITE_DR2 	0x032
+#define	SVM_EXIT_WRITE_DR3 	0x033
+#define	SVM_EXIT_WRITE_DR4 	0x034
+#define	SVM_EXIT_WRITE_DR5 	0x035
+#define	SVM_EXIT_WRITE_DR6 	0x036
+#define	SVM_EXIT_WRITE_DR7 	0x037
+#define SVM_EXIT_EXCP_BASE      0x040
+#define SVM_EXIT_INTR		0x060
+#define SVM_EXIT_NMI		0x061
+#define SVM_EXIT_SMI		0x062
+#define SVM_EXIT_INIT		0x063
+#define SVM_EXIT_VINTR		0x064
+#define SVM_EXIT_CR0_SEL_WRITE	0x065
+#define SVM_EXIT_IDTR_READ	0x066
+#define SVM_EXIT_GDTR_READ	0x067
+#define SVM_EXIT_LDTR_READ	0x068
+#define SVM_EXIT_TR_READ	0x069
+#define SVM_EXIT_IDTR_WRITE	0x06a
+#define SVM_EXIT_GDTR_WRITE	0x06b
+#define SVM_EXIT_LDTR_WRITE	0x06c
+#define SVM_EXIT_TR_WRITE	0x06d
+#define SVM_EXIT_RDTSC		0x06e
+#define SVM_EXIT_RDPMC		0x06f
+#define SVM_EXIT_PUSHF		0x070
+#define SVM_EXIT_POPF		0x071
+#define SVM_EXIT_CPUID		0x072
+#define SVM_EXIT_RSM		0x073
+#define SVM_EXIT_IRET		0x074
+#define SVM_EXIT_SWINT		0x075
+#define SVM_EXIT_INVD		0x076
+#define SVM_EXIT_PAUSE		0x077
+#define SVM_EXIT_HLT		0x078
+#define SVM_EXIT_INVLPG		0x079
+#define SVM_EXIT_INVLPGA	0x07a
+#define SVM_EXIT_IOIO		0x07b
+#define SVM_EXIT_MSR		0x07c
+#define SVM_EXIT_TASK_SWITCH	0x07d
+#define SVM_EXIT_FERR_FREEZE	0x07e
+#define SVM_EXIT_SHUTDOWN	0x07f
+#define SVM_EXIT_VMRUN		0x080
+#define SVM_EXIT_VMMCALL	0x081
+#define SVM_EXIT_VMLOAD		0x082
+#define SVM_EXIT_VMSAVE		0x083
+#define SVM_EXIT_STGI		0x084
+#define SVM_EXIT_CLGI		0x085
+#define SVM_EXIT_SKINIT		0x086
+#define SVM_EXIT_RDTSCP		0x087
+#define SVM_EXIT_ICEBP		0x088
+#define SVM_EXIT_WBINVD		0x089
+#define SVM_EXIT_NPF  		0x400
+
+#define SVM_EXIT_ERR		-1
+
+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) // TS and MP
+
+#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
+#define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
+#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
+#define SVM_CLGI   ".byte 0x0f, 0x01, 0xdd"
+#define SVM_STGI   ".byte 0x0f, 0x01, 0xdc"
+#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
+
+#endif
+
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
new file mode 100644
index 00000000000..bda7a7ae216
--- /dev/null
+++ b/drivers/kvm/vmx.c
@@ -0,0 +1,2002 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ *
+ * Authors:
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+#include "vmx.h"
+#include "kvm_vmx.h"
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+
+#include "segment_descriptor.h"
+
+#define MSR_IA32_FEATURE_CONTROL 		0x03a
+
+MODULE_AUTHOR("Qumranet");
+MODULE_LICENSE("GPL");
+
+static DEFINE_PER_CPU(struct vmcs *, vmxarea);
+static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+
+#ifdef __x86_64__
+#define HOST_IS_64 1
+#else
+#define HOST_IS_64 0
+#endif
+
+static struct vmcs_descriptor {
+	int size;
+	int order;
+	u32 revision_id;
+} vmcs_descriptor;
+
+#define VMX_SEGMENT_FIELD(seg)					\
+	[VCPU_SREG_##seg] = {                                   \
+		.selector = GUEST_##seg##_SELECTOR,		\
+		.base = GUEST_##seg##_BASE,		   	\
+		.limit = GUEST_##seg##_LIMIT,		   	\
+		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\
+	}
+
+static struct kvm_vmx_segment_field {
+	unsigned selector;
+	unsigned base;
+	unsigned limit;
+	unsigned ar_bytes;
+} kvm_vmx_segment_fields[] = {
+	VMX_SEGMENT_FIELD(CS),
+	VMX_SEGMENT_FIELD(DS),
+	VMX_SEGMENT_FIELD(ES),
+	VMX_SEGMENT_FIELD(FS),
+	VMX_SEGMENT_FIELD(GS),
+	VMX_SEGMENT_FIELD(SS),
+	VMX_SEGMENT_FIELD(TR),
+	VMX_SEGMENT_FIELD(LDTR),
+};
+
+static const u32 vmx_msr_index[] = {
+#ifdef __x86_64__
+	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
+#endif
+	MSR_EFER, MSR_K6_STAR,
+};
+#define NR_VMX_MSR (sizeof(vmx_msr_index) / sizeof(*vmx_msr_index))
+
+struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr);
+
+static inline int is_page_fault(u32 intr_info)
+{
+	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
+			     INTR_INFO_VALID_MASK)) ==
+		(INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+}
+
+static inline int is_external_interrupt(u32 intr_info)
+{
+	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+		== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+}
+
+static void vmcs_clear(struct vmcs *vmcs)
+{
+	u64 phys_addr = __pa(vmcs);
+	u8 error;
+
+	asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0"
+		      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+		      : "cc", "memory");
+	if (error)
+		printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
+		       vmcs, phys_addr);
+}
+
+static void __vcpu_clear(void *arg)
+{
+	struct kvm_vcpu *vcpu = arg;
+	int cpu = smp_processor_id();
+
+	if (vcpu->cpu == cpu)
+		vmcs_clear(vcpu->vmcs);
+	if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+		per_cpu(current_vmcs, cpu) = NULL;
+}
+
+static unsigned long vmcs_readl(unsigned long field)
+{
+	unsigned long value;
+
+	asm volatile (ASM_VMX_VMREAD_RDX_RAX
+		      : "=a"(value) : "d"(field) : "cc");
+	return value;
+}
+
+static u16 vmcs_read16(unsigned long field)
+{
+	return vmcs_readl(field);
+}
+
+static u32 vmcs_read32(unsigned long field)
+{
+	return vmcs_readl(field);
+}
+
+static u64 vmcs_read64(unsigned long field)
+{
+#ifdef __x86_64__
+	return vmcs_readl(field);
+#else
+	return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32);
+#endif
+}
+
+static void vmcs_writel(unsigned long field, unsigned long value)
+{
+	u8 error;
+
+	asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"
+		       : "=q"(error) : "a"(value), "d"(field) : "cc" );
+	if (error)
+		printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
+		       field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
+}
+
+static void vmcs_write16(unsigned long field, u16 value)
+{
+	vmcs_writel(field, value);
+}
+
+static void vmcs_write32(unsigned long field, u32 value)
+{
+	vmcs_writel(field, value);
+}
+
+static void vmcs_write64(unsigned long field, u64 value)
+{
+#ifdef __x86_64__
+	vmcs_writel(field, value);
+#else
+	vmcs_writel(field, value);
+	asm volatile ("");
+	vmcs_writel(field+1, value >> 32);
+#endif
+}
+
+/*
+ * Switches to specified vcpu, until a matching vcpu_put(), but assumes
+ * vcpu mutex is already taken.
+ */
+static struct kvm_vcpu *vmx_vcpu_load(struct kvm_vcpu *vcpu)
+{
+	u64 phys_addr = __pa(vcpu->vmcs);
+	int cpu;
+
+	cpu = get_cpu();
+
+	if (vcpu->cpu != cpu) {
+		smp_call_function(__vcpu_clear, vcpu, 0, 1);
+		vcpu->launched = 0;
+	}
+
+	if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+		u8 error;
+
+		per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
+			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+			      : "cc");
+		if (error)
+			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
+			       vcpu->vmcs, phys_addr);
+	}
+
+	if (vcpu->cpu != cpu) {
+		struct descriptor_table dt;
+		unsigned long sysenter_esp;
+
+		vcpu->cpu = cpu;
+		/*
+		 * Linux uses per-cpu TSS and GDT, so set these when switching
+		 * processors.
+		 */
+		vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */
+		get_gdt(&dt);
+		vmcs_writel(HOST_GDTR_BASE, dt.base);   /* 22.2.4 */
+
+		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
+		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+	}
+	return vcpu;
+}
+
+static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	put_cpu();
+}
+
+static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
+{
+	return vmcs_readl(GUEST_RFLAGS);
+}
+
+static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+{
+	vmcs_writel(GUEST_RFLAGS, rflags);
+}
+
+static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+	unsigned long rip;
+	u32 interruptibility;
+
+	rip = vmcs_readl(GUEST_RIP);
+	rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+	vmcs_writel(GUEST_RIP, rip);
+
+	/*
+	 * We emulated an instruction, so temporary interrupt blocking
+	 * should be removed, if set.
+	 */
+	interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	if (interruptibility & 3)
+		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+			     interruptibility & ~3);
+}
+
+static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
+{
+	printk(KERN_DEBUG "inject_general_protection: rip 0x%lx\n",
+	       vmcs_readl(GUEST_RIP));
+	vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+		     GP_VECTOR |
+		     INTR_TYPE_EXCEPTION |
+		     INTR_INFO_DELIEVER_CODE_MASK |
+		     INTR_INFO_VALID_MASK);
+}
+
+/*
+ * reads and returns guest's timestamp counter "register"
+ * guest_tsc = host_tsc + tsc_offset    -- 21.3
+ */
+static u64 guest_read_tsc(void)
+{
+	u64 host_tsc, tsc_offset;
+
+	rdtscll(host_tsc);
+	tsc_offset = vmcs_read64(TSC_OFFSET);
+	return host_tsc + tsc_offset;
+}
+
+/*
+ * writes 'guest_tsc' into guest's timestamp counter "register"
+ * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
+ */
+static void guest_write_tsc(u64 guest_tsc)
+{
+	u64 host_tsc;
+
+	rdtscll(host_tsc);
+	vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
+}
+
+static void reload_tss(void)
+{
+#ifndef __x86_64__
+
+	/*
+	 * VT restores TR but not its size.  Useless.
+	 */
+	struct descriptor_table gdt;
+	struct segment_descriptor *descs;
+
+	get_gdt(&gdt);
+	descs = (void *)gdt.base;
+	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
+	load_TR_desc();
+#endif
+}
+
+/*
+ * Reads an msr value (of 'msr_index') into 'pdata'.
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+	u64 data;
+	struct vmx_msr_entry *msr;
+
+	if (!pdata) {
+		printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
+		return -EINVAL;
+	}
+
+	switch (msr_index) {
+#ifdef __x86_64__
+	case MSR_FS_BASE:
+		data = vmcs_readl(GUEST_FS_BASE);
+		break;
+	case MSR_GS_BASE:
+		data = vmcs_readl(GUEST_GS_BASE);
+		break;
+	case MSR_EFER:
+		data = vcpu->shadow_efer;
+		break;
+#endif
+	case MSR_IA32_TIME_STAMP_COUNTER:
+		data = guest_read_tsc();
+		break;
+	case MSR_IA32_SYSENTER_CS:
+		data = vmcs_read32(GUEST_SYSENTER_CS);
+		break;
+	case MSR_IA32_SYSENTER_EIP:
+		data = vmcs_read32(GUEST_SYSENTER_EIP);
+		break;
+	case MSR_IA32_SYSENTER_ESP:
+		data = vmcs_read32(GUEST_SYSENTER_ESP);
+		break;
+	case MSR_IA32_MC0_CTL:
+	case MSR_IA32_MCG_STATUS:
+	case MSR_IA32_MCG_CAP:
+	case MSR_IA32_MC0_MISC:
+	case MSR_IA32_MC0_MISC+4:
+	case MSR_IA32_MC0_MISC+8:
+	case MSR_IA32_MC0_MISC+12:
+	case MSR_IA32_MC0_MISC+16:
+	case MSR_IA32_UCODE_REV:
+		/* MTRR registers */
+	case 0xfe:
+	case 0x200 ... 0x2ff:
+		data = 0;
+		break;
+	case MSR_IA32_APICBASE:
+		data = vcpu->apic_base;
+		break;
+	default:
+		msr = find_msr_entry(vcpu, msr_index);
+		if (!msr) {
+			printk(KERN_ERR "kvm: unhandled rdmsr: %x\n", msr_index);
+			return 1;
+		}
+		data = msr->data;
+		break;
+	}
+
+	*pdata = data;
+	return 0;
+}
+
+/*
+ * Writes msr value into into the appropriate "register".
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+	struct vmx_msr_entry *msr;
+	switch (msr_index) {
+#ifdef __x86_64__
+	case MSR_FS_BASE:
+		vmcs_writel(GUEST_FS_BASE, data);
+		break;
+	case MSR_GS_BASE:
+		vmcs_writel(GUEST_GS_BASE, data);
+		break;
+#endif
+	case MSR_IA32_SYSENTER_CS:
+		vmcs_write32(GUEST_SYSENTER_CS, data);
+		break;
+	case MSR_IA32_SYSENTER_EIP:
+		vmcs_write32(GUEST_SYSENTER_EIP, data);
+		break;
+	case MSR_IA32_SYSENTER_ESP:
+		vmcs_write32(GUEST_SYSENTER_ESP, data);
+		break;
+#ifdef __x86_64
+	case MSR_EFER:
+		set_efer(vcpu, data);
+		break;
+	case MSR_IA32_MC0_STATUS:
+		printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
+			    , __FUNCTION__, data);
+		break;
+#endif
+	case MSR_IA32_TIME_STAMP_COUNTER: {
+		guest_write_tsc(data);
+		break;
+	}
+	case MSR_IA32_UCODE_REV:
+	case MSR_IA32_UCODE_WRITE:
+	case 0x200 ... 0x2ff: /* MTRRs */
+		break;
+	case MSR_IA32_APICBASE:
+		vcpu->apic_base = data;
+		break;
+	default:
+		msr = find_msr_entry(vcpu, msr_index);
+		if (!msr) {
+			printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr_index);
+			return 1;
+		}
+		msr->data = data;
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * Sync the rsp and rip registers into the vcpu structure.  This allows
+ * registers to be accessed by indexing vcpu->regs.
+ */
+static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu)
+{
+	vcpu->regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
+	vcpu->rip = vmcs_readl(GUEST_RIP);
+}
+
+/*
+ * Syncs rsp and rip back into the vmcs.  Should be called after possible
+ * modification.
+ */
+static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu)
+{
+	vmcs_writel(GUEST_RSP, vcpu->regs[VCPU_REGS_RSP]);
+	vmcs_writel(GUEST_RIP, vcpu->rip);
+}
+
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+{
+	unsigned long dr7 = 0x400;
+	u32 exception_bitmap;
+	int old_singlestep;
+
+	exception_bitmap = vmcs_read32(EXCEPTION_BITMAP);
+	old_singlestep = vcpu->guest_debug.singlestep;
+
+	vcpu->guest_debug.enabled = dbg->enabled;
+	if (vcpu->guest_debug.enabled) {
+		int i;
+
+		dr7 |= 0x200;  /* exact */
+		for (i = 0; i < 4; ++i) {
+			if (!dbg->breakpoints[i].enabled)
+				continue;
+			vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
+			dr7 |= 2 << (i*2);    /* global enable */
+			dr7 |= 0 << (i*4+16); /* execution breakpoint */
+		}
+
+		exception_bitmap |= (1u << 1);  /* Trap debug exceptions */
+
+		vcpu->guest_debug.singlestep = dbg->singlestep;
+	} else {
+		exception_bitmap &= ~(1u << 1); /* Ignore debug exceptions */
+		vcpu->guest_debug.singlestep = 0;
+	}
+
+	if (old_singlestep && !vcpu->guest_debug.singlestep) {
+		unsigned long flags;
+
+		flags = vmcs_readl(GUEST_RFLAGS);
+		flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+		vmcs_writel(GUEST_RFLAGS, flags);
+	}
+
+	vmcs_write32(EXCEPTION_BITMAP, exception_bitmap);
+	vmcs_writel(GUEST_DR7, dr7);
+
+	return 0;
+}
+
+static __init int cpu_has_kvm_support(void)
+{
+	unsigned long ecx = cpuid_ecx(1);
+	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+}
+
+static __init int vmx_disabled_by_bios(void)
+{
+	u64 msr;
+
+	rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+	return (msr & 5) == 1; /* locked but not enabled */
+}
+
+static __init void hardware_enable(void *garbage)
+{
+	int cpu = raw_smp_processor_id();
+	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+	u64 old;
+
+	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
+	if ((old & 5) == 0)
+		/* enable and lock */
+		wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 5);
+	write_cr4(read_cr4() | CR4_VMXE); /* FIXME: not cpu hotplug safe */
+	asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr)
+		      : "memory", "cc");
+}
+
+static void hardware_disable(void *garbage)
+{
+	asm volatile (ASM_VMX_VMXOFF : : : "cc");
+}
+
+static __init void setup_vmcs_descriptor(void)
+{
+	u32 vmx_msr_low, vmx_msr_high;
+
+	rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
+	vmcs_descriptor.size = vmx_msr_high & 0x1fff;
+	vmcs_descriptor.order = get_order(vmcs_descriptor.size);
+	vmcs_descriptor.revision_id = vmx_msr_low;
+};
+
+static struct vmcs *alloc_vmcs_cpu(int cpu)
+{
+	int node = cpu_to_node(cpu);
+	struct page *pages;
+	struct vmcs *vmcs;
+
+	pages = alloc_pages_node(node, GFP_KERNEL, vmcs_descriptor.order);
+	if (!pages)
+		return NULL;
+	vmcs = page_address(pages);
+	memset(vmcs, 0, vmcs_descriptor.size);
+	vmcs->revision_id = vmcs_descriptor.revision_id; /* vmcs revision id */
+	return vmcs;
+}
+
+static struct vmcs *alloc_vmcs(void)
+{
+	return alloc_vmcs_cpu(smp_processor_id());
+}
+
+static void free_vmcs(struct vmcs *vmcs)
+{
+	free_pages((unsigned long)vmcs, vmcs_descriptor.order);
+}
+
+static __exit void free_kvm_area(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		free_vmcs(per_cpu(vmxarea, cpu));
+}
+
+extern struct vmcs *alloc_vmcs_cpu(int cpu);
+
+static __init int alloc_kvm_area(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		struct vmcs *vmcs;
+
+		vmcs = alloc_vmcs_cpu(cpu);
+		if (!vmcs) {
+			free_kvm_area();
+			return -ENOMEM;
+		}
+
+		per_cpu(vmxarea, cpu) = vmcs;
+	}
+	return 0;
+}
+
+static __init int hardware_setup(void)
+{
+	setup_vmcs_descriptor();
+	return alloc_kvm_area();
+}
+
+static __exit void hardware_unsetup(void)
+{
+	free_kvm_area();
+}
+
+static void update_exception_bitmap(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->rmode.active)
+		vmcs_write32(EXCEPTION_BITMAP, ~0);
+	else
+		vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
+}
+
+static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
+{
+	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
+	if (vmcs_readl(sf->base) == save->base) {
+		vmcs_write16(sf->selector, save->selector);
+		vmcs_writel(sf->base, save->base);
+		vmcs_write32(sf->limit, save->limit);
+		vmcs_write32(sf->ar_bytes, save->ar);
+	} else {
+		u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)
+			<< AR_DPL_SHIFT;
+		vmcs_write32(sf->ar_bytes, 0x93 | dpl);
+	}
+}
+
+static void enter_pmode(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags;
+
+	vcpu->rmode.active = 0;
+
+	vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
+	vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
+	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
+
+	flags = vmcs_readl(GUEST_RFLAGS);
+	flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
+	flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
+	vmcs_writel(GUEST_RFLAGS, flags);
+
+	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) |
+			(vmcs_readl(CR4_READ_SHADOW) & CR4_VME_MASK));
+
+	update_exception_bitmap(vcpu);
+
+	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
+	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
+	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
+	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
+
+	vmcs_write16(GUEST_SS_SELECTOR, 0);
+	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
+
+	vmcs_write16(GUEST_CS_SELECTOR,
+		     vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
+	vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+}
+
+static int rmode_tss_base(struct kvm* kvm)
+{
+	gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3;
+	return base_gfn << PAGE_SHIFT;
+}
+
+static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+{
+	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
+	save->selector = vmcs_read16(sf->selector);
+	save->base = vmcs_readl(sf->base);
+	save->limit = vmcs_read32(sf->limit);
+	save->ar = vmcs_read32(sf->ar_bytes);
+	vmcs_write16(sf->selector, vmcs_readl(sf->base) >> 4);
+	vmcs_write32(sf->limit, 0xffff);
+	vmcs_write32(sf->ar_bytes, 0xf3);
+}
+
+static void enter_rmode(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags;
+
+	vcpu->rmode.active = 1;
+
+	vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
+	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
+
+	vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
+	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
+
+	vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
+	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
+
+	flags = vmcs_readl(GUEST_RFLAGS);
+	vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+
+	flags |= IOPL_MASK | X86_EFLAGS_VM;
+
+	vmcs_writel(GUEST_RFLAGS, flags);
+	vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | CR4_VME_MASK);
+	update_exception_bitmap(vcpu);
+
+	vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
+	vmcs_write32(GUEST_SS_LIMIT, 0xffff);
+	vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+
+	vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
+	vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
+
+	fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
+	fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
+	fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
+	fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
+}
+
+#ifdef __x86_64__
+
+static void enter_lmode(struct kvm_vcpu *vcpu)
+{
+	u32 guest_tr_ar;
+
+	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
+	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
+		printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
+		       __FUNCTION__);
+		vmcs_write32(GUEST_TR_AR_BYTES,
+			     (guest_tr_ar & ~AR_TYPE_MASK)
+			     | AR_TYPE_BUSY_64_TSS);
+	}
+
+	vcpu->shadow_efer |= EFER_LMA;
+
+	find_msr_entry(vcpu, MSR_EFER)->data |= EFER_LMA | EFER_LME;
+	vmcs_write32(VM_ENTRY_CONTROLS,
+		     vmcs_read32(VM_ENTRY_CONTROLS)
+		     | VM_ENTRY_CONTROLS_IA32E_MASK);
+}
+
+static void exit_lmode(struct kvm_vcpu *vcpu)
+{
+	vcpu->shadow_efer &= ~EFER_LMA;
+
+	vmcs_write32(VM_ENTRY_CONTROLS,
+		     vmcs_read32(VM_ENTRY_CONTROLS)
+		     & ~VM_ENTRY_CONTROLS_IA32E_MASK);
+}
+
+#endif
+
+static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+	if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
+		enter_pmode(vcpu);
+
+	if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
+		enter_rmode(vcpu);
+
+#ifdef __x86_64__
+	if (vcpu->shadow_efer & EFER_LME) {
+		if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK))
+			enter_lmode(vcpu);
+		if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK))
+			exit_lmode(vcpu);
+	}
+#endif
+
+	vmcs_writel(CR0_READ_SHADOW, cr0);
+	vmcs_writel(GUEST_CR0,
+		    (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
+	vcpu->cr0 = cr0;
+}
+
+/*
+ * Used when restoring the VM to avoid corrupting segment registers
+ */
+static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+	vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0);
+	update_exception_bitmap(vcpu);
+	vmcs_writel(CR0_READ_SHADOW, cr0);
+	vmcs_writel(GUEST_CR0,
+		    (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
+	vcpu->cr0 = cr0;
+}
+
+static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+{
+	vmcs_writel(GUEST_CR3, cr3);
+}
+
+static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+	vmcs_writel(CR4_READ_SHADOW, cr4);
+	vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
+		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
+	vcpu->cr4 = cr4;
+}
+
+#ifdef __x86_64__
+
+static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+	struct vmx_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER);
+
+	vcpu->shadow_efer = efer;
+	if (efer & EFER_LMA) {
+		vmcs_write32(VM_ENTRY_CONTROLS,
+				     vmcs_read32(VM_ENTRY_CONTROLS) |
+				     VM_ENTRY_CONTROLS_IA32E_MASK);
+		msr->data = efer;
+
+	} else {
+		vmcs_write32(VM_ENTRY_CONTROLS,
+				     vmcs_read32(VM_ENTRY_CONTROLS) &
+				     ~VM_ENTRY_CONTROLS_IA32E_MASK);
+
+		msr->data = efer & ~EFER_LME;
+	}
+}
+
+#endif
+
+static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
+{
+	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
+	return vmcs_readl(sf->base);
+}
+
+static void vmx_get_segment(struct kvm_vcpu *vcpu,
+			    struct kvm_segment *var, int seg)
+{
+	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	u32 ar;
+
+	var->base = vmcs_readl(sf->base);
+	var->limit = vmcs_read32(sf->limit);
+	var->selector = vmcs_read16(sf->selector);
+	ar = vmcs_read32(sf->ar_bytes);
+	if (ar & AR_UNUSABLE_MASK)
+		ar = 0;
+	var->type = ar & 15;
+	var->s = (ar >> 4) & 1;
+	var->dpl = (ar >> 5) & 3;
+	var->present = (ar >> 7) & 1;
+	var->avl = (ar >> 12) & 1;
+	var->l = (ar >> 13) & 1;
+	var->db = (ar >> 14) & 1;
+	var->g = (ar >> 15) & 1;
+	var->unusable = (ar >> 16) & 1;
+}
+
+static void vmx_set_segment(struct kvm_vcpu *vcpu,
+			    struct kvm_segment *var, int seg)
+{
+	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	u32 ar;
+
+	vmcs_writel(sf->base, var->base);
+	vmcs_write32(sf->limit, var->limit);
+	vmcs_write16(sf->selector, var->selector);
+	if (var->unusable)
+		ar = 1 << 16;
+	else {
+		ar = var->type & 15;
+		ar |= (var->s & 1) << 4;
+		ar |= (var->dpl & 3) << 5;
+		ar |= (var->present & 1) << 7;
+		ar |= (var->avl & 1) << 12;
+		ar |= (var->l & 1) << 13;
+		ar |= (var->db & 1) << 14;
+		ar |= (var->g & 1) << 15;
+	}
+	vmcs_write32(sf->ar_bytes, ar);
+}
+
+static int vmx_is_long_mode(struct kvm_vcpu *vcpu)
+{
+	return vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_CONTROLS_IA32E_MASK;
+}
+
+static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
+{
+	u32 ar = vmcs_read32(GUEST_CS_AR_BYTES);
+
+	*db = (ar >> 14) & 1;
+	*l = (ar >> 13) & 1;
+}
+
+static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+	dt->limit = vmcs_read32(GUEST_IDTR_LIMIT);
+	dt->base = vmcs_readl(GUEST_IDTR_BASE);
+}
+
+static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+	vmcs_write32(GUEST_IDTR_LIMIT, dt->limit);
+	vmcs_writel(GUEST_IDTR_BASE, dt->base);
+}
+
+static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+	dt->limit = vmcs_read32(GUEST_GDTR_LIMIT);
+	dt->base = vmcs_readl(GUEST_GDTR_BASE);
+}
+
+static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+	vmcs_write32(GUEST_GDTR_LIMIT, dt->limit);
+	vmcs_writel(GUEST_GDTR_BASE, dt->base);
+}
+
+static int init_rmode_tss(struct kvm* kvm)
+{
+	struct page *p1, *p2, *p3;
+	gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
+	char *page;
+
+	p1 = _gfn_to_page(kvm, fn++);
+	p2 = _gfn_to_page(kvm, fn++);
+	p3 = _gfn_to_page(kvm, fn);
+
+	if (!p1 || !p2 || !p3) {
+		kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__);
+		return 0;
+	}
+
+	page = kmap_atomic(p1, KM_USER0);
+	memset(page, 0, PAGE_SIZE);
+	*(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
+	kunmap_atomic(page, KM_USER0);
+
+	page = kmap_atomic(p2, KM_USER0);
+	memset(page, 0, PAGE_SIZE);
+	kunmap_atomic(page, KM_USER0);
+
+	page = kmap_atomic(p3, KM_USER0);
+	memset(page, 0, PAGE_SIZE);
+	*(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0;
+	kunmap_atomic(page, KM_USER0);
+
+	return 1;
+}
+
+static void vmcs_write32_fixedbits(u32 msr, u32 vmcs_field, u32 val)
+{
+	u32 msr_high, msr_low;
+
+	rdmsr(msr, msr_low, msr_high);
+
+	val &= msr_high;
+	val |= msr_low;
+	vmcs_write32(vmcs_field, val);
+}
+
+static void seg_setup(int seg)
+{
+	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
+	vmcs_write16(sf->selector, 0);
+	vmcs_writel(sf->base, 0);
+	vmcs_write32(sf->limit, 0xffff);
+	vmcs_write32(sf->ar_bytes, 0x93);
+}
+
+/*
+ * Sets up the vmcs for emulated real mode.
+ */
+static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	u32 host_sysenter_cs;
+	u32 junk;
+	unsigned long a;
+	struct descriptor_table dt;
+	int i;
+	int ret = 0;
+	int nr_good_msrs;
+	extern asmlinkage void kvm_vmx_return(void);
+
+	if (!init_rmode_tss(vcpu->kvm)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memset(vcpu->regs, 0, sizeof(vcpu->regs));
+	vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
+	vcpu->cr8 = 0;
+	vcpu->apic_base = 0xfee00000 |
+			/*for vcpu 0*/ MSR_IA32_APICBASE_BSP |
+			MSR_IA32_APICBASE_ENABLE;
+
+	fx_init(vcpu);
+
+	/*
+	 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
+	 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
+	 */
+	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
+	vmcs_writel(GUEST_CS_BASE, 0x000f0000);
+	vmcs_write32(GUEST_CS_LIMIT, 0xffff);
+	vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+
+	seg_setup(VCPU_SREG_DS);
+	seg_setup(VCPU_SREG_ES);
+	seg_setup(VCPU_SREG_FS);
+	seg_setup(VCPU_SREG_GS);
+	seg_setup(VCPU_SREG_SS);
+
+	vmcs_write16(GUEST_TR_SELECTOR, 0);
+	vmcs_writel(GUEST_TR_BASE, 0);
+	vmcs_write32(GUEST_TR_LIMIT, 0xffff);
+	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
+
+	vmcs_write16(GUEST_LDTR_SELECTOR, 0);
+	vmcs_writel(GUEST_LDTR_BASE, 0);
+	vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
+	vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
+
+	vmcs_write32(GUEST_SYSENTER_CS, 0);
+	vmcs_writel(GUEST_SYSENTER_ESP, 0);
+	vmcs_writel(GUEST_SYSENTER_EIP, 0);
+
+	vmcs_writel(GUEST_RFLAGS, 0x02);
+	vmcs_writel(GUEST_RIP, 0xfff0);
+	vmcs_writel(GUEST_RSP, 0);
+
+	vmcs_writel(GUEST_CR3, 0);
+
+	//todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0
+	vmcs_writel(GUEST_DR7, 0x400);
+
+	vmcs_writel(GUEST_GDTR_BASE, 0);
+	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
+
+	vmcs_writel(GUEST_IDTR_BASE, 0);
+	vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
+
+	vmcs_write32(GUEST_ACTIVITY_STATE, 0);
+	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
+	vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+
+	/* I/O */
+	vmcs_write64(IO_BITMAP_A, 0);
+	vmcs_write64(IO_BITMAP_B, 0);
+
+	guest_write_tsc(0);
+
+	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
+
+	/* Special registers */
+	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+	/* Control */
+	vmcs_write32_fixedbits(MSR_IA32_VMX_PINBASED_CTLS_MSR,
+			       PIN_BASED_VM_EXEC_CONTROL,
+			       PIN_BASED_EXT_INTR_MASK   /* 20.6.1 */
+			       | PIN_BASED_NMI_EXITING   /* 20.6.1 */
+			);
+	vmcs_write32_fixedbits(MSR_IA32_VMX_PROCBASED_CTLS_MSR,
+			       CPU_BASED_VM_EXEC_CONTROL,
+			       CPU_BASED_HLT_EXITING         /* 20.6.2 */
+			       | CPU_BASED_CR8_LOAD_EXITING    /* 20.6.2 */
+			       | CPU_BASED_CR8_STORE_EXITING   /* 20.6.2 */
+			       | CPU_BASED_UNCOND_IO_EXITING   /* 20.6.2 */
+			       | CPU_BASED_INVDPG_EXITING
+			       | CPU_BASED_MOV_DR_EXITING
+			       | CPU_BASED_USE_TSC_OFFSETING   /* 21.3 */
+			);
+
+	vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
+	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
+	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
+	vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
+
+	vmcs_writel(HOST_CR0, read_cr0());  /* 22.2.3 */
+	vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
+	vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
+
+	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
+	vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
+	vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
+	vmcs_write16(HOST_FS_SELECTOR, read_fs());    /* 22.2.4 */
+	vmcs_write16(HOST_GS_SELECTOR, read_gs());    /* 22.2.4 */
+	vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
+#ifdef __x86_64__
+	rdmsrl(MSR_FS_BASE, a);
+	vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
+	rdmsrl(MSR_GS_BASE, a);
+	vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
+#else
+	vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
+	vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
+#endif
+
+	vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
+
+	get_idt(&dt);
+	vmcs_writel(HOST_IDTR_BASE, dt.base);   /* 22.2.4 */
+
+
+	vmcs_writel(HOST_RIP, (unsigned long)kvm_vmx_return); /* 22.2.5 */
+
+	rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
+	vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
+	rdmsrl(MSR_IA32_SYSENTER_ESP, a);
+	vmcs_writel(HOST_IA32_SYSENTER_ESP, a);   /* 22.2.3 */
+	rdmsrl(MSR_IA32_SYSENTER_EIP, a);
+	vmcs_writel(HOST_IA32_SYSENTER_EIP, a);   /* 22.2.3 */
+
+	ret = -ENOMEM;
+	vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->guest_msrs)
+		goto out;
+	vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->host_msrs)
+		goto out_free_guest_msrs;
+
+	for (i = 0; i < NR_VMX_MSR; ++i) {
+		u32 index = vmx_msr_index[i];
+		u32 data_low, data_high;
+		u64 data;
+		int j = vcpu->nmsrs;
+
+		if (rdmsr_safe(index, &data_low, &data_high) < 0)
+			continue;
+		data = data_low | ((u64)data_high << 32);
+		vcpu->host_msrs[j].index = index;
+		vcpu->host_msrs[j].reserved = 0;
+		vcpu->host_msrs[j].data = data;
+		vcpu->guest_msrs[j] = vcpu->host_msrs[j];
+		++vcpu->nmsrs;
+	}
+	printk(KERN_DEBUG "kvm: msrs: %d\n", vcpu->nmsrs);
+
+	nr_good_msrs = vcpu->nmsrs - NR_BAD_MSRS;
+	vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
+		    virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
+	vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
+		    virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
+	vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
+		    virt_to_phys(vcpu->host_msrs + NR_BAD_MSRS));
+	vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS_MSR, VM_EXIT_CONTROLS,
+		     	       (HOST_IS_64 << 9));  /* 22.2,1, 20.7.1 */
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs);  /* 22.2.2 */
+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
+
+
+	/* 22.2.1, 20.8.1 */
+	vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS_MSR,
+                               VM_ENTRY_CONTROLS, 0);
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
+
+	vmcs_writel(VIRTUAL_APIC_PAGE_ADDR, 0);
+	vmcs_writel(TPR_THRESHOLD, 0);
+
+	vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK);
+	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
+
+	vcpu->cr0 = 0x60000010;
+	vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode
+	vmx_set_cr4(vcpu, 0);
+#ifdef __x86_64__
+	vmx_set_efer(vcpu, 0);
+#endif
+
+	return 0;
+
+out_free_guest_msrs:
+	kfree(vcpu->guest_msrs);
+out:
+	return ret;
+}
+
+static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
+{
+	u16 ent[2];
+	u16 cs;
+	u16 ip;
+	unsigned long flags;
+	unsigned long ss_base = vmcs_readl(GUEST_SS_BASE);
+	u16 sp =  vmcs_readl(GUEST_RSP);
+	u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT);
+
+	if (sp > ss_limit || sp - 6 > sp) {
+		vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
+			    __FUNCTION__,
+			    vmcs_readl(GUEST_RSP),
+			    vmcs_readl(GUEST_SS_BASE),
+			    vmcs_read32(GUEST_SS_LIMIT));
+		return;
+	}
+
+	if (kvm_read_guest(vcpu, irq * sizeof(ent), sizeof(ent), &ent) !=
+								sizeof(ent)) {
+		vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__);
+		return;
+	}
+
+	flags =  vmcs_readl(GUEST_RFLAGS);
+	cs =  vmcs_readl(GUEST_CS_BASE) >> 4;
+	ip =  vmcs_readl(GUEST_RIP);
+
+
+	if (kvm_write_guest(vcpu, ss_base + sp - 2, 2, &flags) != 2 ||
+	    kvm_write_guest(vcpu, ss_base + sp - 4, 2, &cs) != 2 ||
+	    kvm_write_guest(vcpu, ss_base + sp - 6, 2, &ip) != 2) {
+		vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__);
+		return;
+	}
+
+	vmcs_writel(GUEST_RFLAGS, flags &
+		    ~( X86_EFLAGS_IF | X86_EFLAGS_AC | X86_EFLAGS_TF));
+	vmcs_write16(GUEST_CS_SELECTOR, ent[1]) ;
+	vmcs_writel(GUEST_CS_BASE, ent[1] << 4);
+	vmcs_writel(GUEST_RIP, ent[0]);
+	vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
+}
+
+static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
+{
+	int word_index = __ffs(vcpu->irq_summary);
+	int bit_index = __ffs(vcpu->irq_pending[word_index]);
+	int irq = word_index * BITS_PER_LONG + bit_index;
+
+	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
+	if (!vcpu->irq_pending[word_index])
+		clear_bit(word_index, &vcpu->irq_summary);
+
+	if (vcpu->rmode.active) {
+		inject_rmode_irq(vcpu, irq);
+		return;
+	}
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+}
+
+static void kvm_try_inject_irq(struct kvm_vcpu *vcpu)
+{
+	if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)
+	    && (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0)
+		/*
+		 * Interrupts enabled, and not blocked by sti or mov ss. Good.
+		 */
+		kvm_do_inject_irq(vcpu);
+	else
+		/*
+		 * Interrupts blocked.  Wait for unblock.
+		 */
+		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
+			     vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)
+			     | CPU_BASED_VIRTUAL_INTR_PENDING);
+}
+
+static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
+{
+	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+
+	set_debugreg(dbg->bp[0], 0);
+	set_debugreg(dbg->bp[1], 1);
+	set_debugreg(dbg->bp[2], 2);
+	set_debugreg(dbg->bp[3], 3);
+
+	if (dbg->singlestep) {
+		unsigned long flags;
+
+		flags = vmcs_readl(GUEST_RFLAGS);
+		flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+		vmcs_writel(GUEST_RFLAGS, flags);
+	}
+}
+
+static int handle_rmode_exception(struct kvm_vcpu *vcpu,
+				  int vec, u32 err_code)
+{
+	if (!vcpu->rmode.active)
+		return 0;
+
+	if (vec == GP_VECTOR && err_code == 0)
+		if (emulate_instruction(vcpu, NULL, 0, 0) == EMULATE_DONE)
+			return 1;
+	return 0;
+}
+
+static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 intr_info, error_code;
+	unsigned long cr2, rip;
+	u32 vect_info;
+	enum emulation_result er;
+
+	vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+	if ((vect_info & VECTORING_INFO_VALID_MASK) &&
+						!is_page_fault(intr_info)) {
+		printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
+		       "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
+	}
+
+	if (is_external_interrupt(vect_info)) {
+		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
+		set_bit(irq, vcpu->irq_pending);
+		set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+	}
+
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+		asm ("int $2");
+		return 1;
+	}
+	error_code = 0;
+	rip = vmcs_readl(GUEST_RIP);
+	if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
+		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+	if (is_page_fault(intr_info)) {
+		cr2 = vmcs_readl(EXIT_QUALIFICATION);
+
+		spin_lock(&vcpu->kvm->lock);
+		if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) {
+			spin_unlock(&vcpu->kvm->lock);
+			return 1;
+		}
+
+		er = emulate_instruction(vcpu, kvm_run, cr2, error_code);
+		spin_unlock(&vcpu->kvm->lock);
+
+		switch (er) {
+		case EMULATE_DONE:
+			return 1;
+		case EMULATE_DO_MMIO:
+			++kvm_stat.mmio_exits;
+			kvm_run->exit_reason = KVM_EXIT_MMIO;
+			return 0;
+		 case EMULATE_FAIL:
+			vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__);
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	if (vcpu->rmode.active &&
+	    handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
+								error_code))
+		return 1;
+
+	if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) {
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		return 0;
+	}
+	kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+	kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
+	kvm_run->ex.error_code = error_code;
+	return 0;
+}
+
+static int handle_external_interrupt(struct kvm_vcpu *vcpu,
+				     struct kvm_run *kvm_run)
+{
+	++kvm_stat.irq_exits;
+	return 1;
+}
+
+
+static int get_io_count(struct kvm_vcpu *vcpu, u64 *count)
+{
+	u64 inst;
+	gva_t rip;
+	int countr_size;
+	int i, n;
+
+	if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) {
+		countr_size = 2;
+	} else {
+		u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
+
+		countr_size = (cs_ar & AR_L_MASK) ? 8:
+			      (cs_ar & AR_DB_MASK) ? 4: 2;
+	}
+
+	rip =  vmcs_readl(GUEST_RIP);
+	if (countr_size != 8)
+		rip += vmcs_readl(GUEST_CS_BASE);
+
+	n = kvm_read_guest(vcpu, rip, sizeof(inst), &inst);
+
+	for (i = 0; i < n; i++) {
+		switch (((u8*)&inst)[i]) {
+		case 0xf0:
+		case 0xf2:
+		case 0xf3:
+		case 0x2e:
+		case 0x36:
+		case 0x3e:
+		case 0x26:
+		case 0x64:
+		case 0x65:
+		case 0x66:
+			break;
+		case 0x67:
+			countr_size = (countr_size == 2) ? 4: (countr_size >> 1);
+		default:
+			goto done;
+		}
+	}
+	return 0;
+done:
+	countr_size *= 8;
+	*count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size));
+	return 1;
+}
+
+static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u64 exit_qualification;
+
+	++kvm_stat.io_exits;
+	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	kvm_run->exit_reason = KVM_EXIT_IO;
+	if (exit_qualification & 8)
+		kvm_run->io.direction = KVM_EXIT_IO_IN;
+	else
+		kvm_run->io.direction = KVM_EXIT_IO_OUT;
+	kvm_run->io.size = (exit_qualification & 7) + 1;
+	kvm_run->io.string = (exit_qualification & 16) != 0;
+	kvm_run->io.string_down
+		= (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
+	kvm_run->io.rep = (exit_qualification & 32) != 0;
+	kvm_run->io.port = exit_qualification >> 16;
+	if (kvm_run->io.string) {
+		if (!get_io_count(vcpu, &kvm_run->io.count))
+			return 1;
+		kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+	} else
+		kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */
+	return 0;
+}
+
+static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u64 address = vmcs_read64(EXIT_QUALIFICATION);
+	int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+	spin_lock(&vcpu->kvm->lock);
+	vcpu->mmu.inval_page(vcpu, address);
+	spin_unlock(&vcpu->kvm->lock);
+	vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length);
+	return 1;
+}
+
+static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u64 exit_qualification;
+	int cr;
+	int reg;
+
+	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	cr = exit_qualification & 15;
+	reg = (exit_qualification >> 8) & 15;
+	switch ((exit_qualification >> 4) & 3) {
+	case 0: /* mov to cr */
+		switch (cr) {
+		case 0:
+			vcpu_load_rsp_rip(vcpu);
+			set_cr0(vcpu, vcpu->regs[reg]);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		case 3:
+			vcpu_load_rsp_rip(vcpu);
+			set_cr3(vcpu, vcpu->regs[reg]);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		case 4:
+			vcpu_load_rsp_rip(vcpu);
+			set_cr4(vcpu, vcpu->regs[reg]);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		case 8:
+			vcpu_load_rsp_rip(vcpu);
+			set_cr8(vcpu, vcpu->regs[reg]);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		};
+		break;
+	case 1: /*mov from cr*/
+		switch (cr) {
+		case 3:
+			vcpu_load_rsp_rip(vcpu);
+			vcpu->regs[reg] = vcpu->cr3;
+			vcpu_put_rsp_rip(vcpu);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		case 8:
+			printk(KERN_DEBUG "handle_cr: read CR8 "
+			       "cpu erratum AA15\n");
+			vcpu_load_rsp_rip(vcpu);
+			vcpu->regs[reg] = vcpu->cr8;
+			vcpu_put_rsp_rip(vcpu);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		}
+		break;
+	case 3: /* lmsw */
+		lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
+
+		skip_emulated_instruction(vcpu);
+		return 1;
+	default:
+		break;
+	}
+	kvm_run->exit_reason = 0;
+	printk(KERN_ERR "kvm: unhandled control register: op %d cr %d\n",
+	       (int)(exit_qualification >> 4) & 3, cr);
+	return 0;
+}
+
+static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u64 exit_qualification;
+	unsigned long val;
+	int dr, reg;
+
+	/*
+	 * FIXME: this code assumes the host is debugging the guest.
+	 *        need to deal with guest debugging itself too.
+	 */
+	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	dr = exit_qualification & 7;
+	reg = (exit_qualification >> 8) & 15;
+	vcpu_load_rsp_rip(vcpu);
+	if (exit_qualification & 16) {
+		/* mov from dr */
+		switch (dr) {
+		case 6:
+			val = 0xffff0ff0;
+			break;
+		case 7:
+			val = 0x400;
+			break;
+		default:
+			val = 0;
+		}
+		vcpu->regs[reg] = val;
+	} else {
+		/* mov to dr */
+	}
+	vcpu_put_rsp_rip(vcpu);
+	skip_emulated_instruction(vcpu);
+	return 1;
+}
+
+static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_CPUID;
+	return 0;
+}
+
+static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 ecx = vcpu->regs[VCPU_REGS_RCX];
+	u64 data;
+
+	if (vmx_get_msr(vcpu, ecx, &data)) {
+		vmx_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	/* FIXME: handling of bits 32:63 of rax, rdx */
+	vcpu->regs[VCPU_REGS_RAX] = data & -1u;
+	vcpu->regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
+	skip_emulated_instruction(vcpu);
+	return 1;
+}
+
+static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 ecx = vcpu->regs[VCPU_REGS_RCX];
+	u64 data = (vcpu->regs[VCPU_REGS_RAX] & -1u)
+		| ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
+
+	if (vmx_set_msr(vcpu, ecx, data) != 0) {
+		vmx_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	skip_emulated_instruction(vcpu);
+	return 1;
+}
+
+static int handle_interrupt_window(struct kvm_vcpu *vcpu,
+				   struct kvm_run *kvm_run)
+{
+	/* Turn off interrupt window reporting. */
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
+		     vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)
+		     & ~CPU_BASED_VIRTUAL_INTR_PENDING);
+	return 1;
+}
+
+static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	skip_emulated_instruction(vcpu);
+	if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF))
+		return 1;
+
+	kvm_run->exit_reason = KVM_EXIT_HLT;
+	return 0;
+}
+
+/*
+ * The exit handlers return 1 if the exit was handled fully and guest execution
+ * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
+ * to be done to userspace and return 0.
+ */
+static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
+				      struct kvm_run *kvm_run) = {
+	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
+	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
+	[EXIT_REASON_IO_INSTRUCTION]          = handle_io,
+	[EXIT_REASON_INVLPG]                  = handle_invlpg,
+	[EXIT_REASON_CR_ACCESS]               = handle_cr,
+	[EXIT_REASON_DR_ACCESS]               = handle_dr,
+	[EXIT_REASON_CPUID]                   = handle_cpuid,
+	[EXIT_REASON_MSR_READ]                = handle_rdmsr,
+	[EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
+	[EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
+	[EXIT_REASON_HLT]                     = handle_halt,
+};
+
+static const int kvm_vmx_max_exit_handlers =
+	sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers);
+
+/*
+ * The guest has exited.  See if we can fix it or if we need userspace
+ * assistance.
+ */
+static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+	u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
+
+	if ( (vectoring_info & VECTORING_INFO_VALID_MASK) &&
+				exit_reason != EXIT_REASON_EXCEPTION_NMI )
+		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
+		       "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
+	kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+	if (exit_reason < kvm_vmx_max_exit_handlers
+	    && kvm_vmx_exit_handlers[exit_reason])
+		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = exit_reason;
+	}
+	return 0;
+}
+
+static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u8 fail;
+	u16 fs_sel, gs_sel, ldt_sel;
+	int fs_gs_ldt_reload_needed;
+
+again:
+	/*
+	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
+	 * allow segment selectors with cpl > 0 or ti == 1.
+	 */
+	fs_sel = read_fs();
+	gs_sel = read_gs();
+	ldt_sel = read_ldt();
+	fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel;
+	if (!fs_gs_ldt_reload_needed) {
+		vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+		vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+	} else {
+		vmcs_write16(HOST_FS_SELECTOR, 0);
+		vmcs_write16(HOST_GS_SELECTOR, 0);
+	}
+
+#ifdef __x86_64__
+	vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
+	vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
+#else
+	vmcs_writel(HOST_FS_BASE, segment_base(fs_sel));
+	vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
+#endif
+
+	if (vcpu->irq_summary &&
+	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
+		kvm_try_inject_irq(vcpu);
+
+	if (vcpu->guest_debug.enabled)
+		kvm_guest_debug_pre(vcpu);
+
+	fx_save(vcpu->host_fx_image);
+	fx_restore(vcpu->guest_fx_image);
+
+	save_msrs(vcpu->host_msrs, vcpu->nmsrs);
+	load_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
+
+	asm (
+		/* Store host registers */
+		"pushf \n\t"
+#ifdef __x86_64__
+		"push %%rax; push %%rbx; push %%rdx;"
+		"push %%rsi; push %%rdi; push %%rbp;"
+		"push %%r8;  push %%r9;  push %%r10; push %%r11;"
+		"push %%r12; push %%r13; push %%r14; push %%r15;"
+		"push %%rcx \n\t"
+		ASM_VMX_VMWRITE_RSP_RDX "\n\t"
+#else
+		"pusha; push %%ecx \n\t"
+		ASM_VMX_VMWRITE_RSP_RDX "\n\t"
+#endif
+		/* Check if vmlaunch of vmresume is needed */
+		"cmp $0, %1 \n\t"
+		/* Load guest registers.  Don't clobber flags. */
+#ifdef __x86_64__
+		"mov %c[cr2](%3), %%rax \n\t"
+		"mov %%rax, %%cr2 \n\t"
+		"mov %c[rax](%3), %%rax \n\t"
+		"mov %c[rbx](%3), %%rbx \n\t"
+		"mov %c[rdx](%3), %%rdx \n\t"
+		"mov %c[rsi](%3), %%rsi \n\t"
+		"mov %c[rdi](%3), %%rdi \n\t"
+		"mov %c[rbp](%3), %%rbp \n\t"
+		"mov %c[r8](%3),  %%r8  \n\t"
+		"mov %c[r9](%3),  %%r9  \n\t"
+		"mov %c[r10](%3), %%r10 \n\t"
+		"mov %c[r11](%3), %%r11 \n\t"
+		"mov %c[r12](%3), %%r12 \n\t"
+		"mov %c[r13](%3), %%r13 \n\t"
+		"mov %c[r14](%3), %%r14 \n\t"
+		"mov %c[r15](%3), %%r15 \n\t"
+		"mov %c[rcx](%3), %%rcx \n\t" /* kills %3 (rcx) */
+#else
+		"mov %c[cr2](%3), %%eax \n\t"
+		"mov %%eax,   %%cr2 \n\t"
+		"mov %c[rax](%3), %%eax \n\t"
+		"mov %c[rbx](%3), %%ebx \n\t"
+		"mov %c[rdx](%3), %%edx \n\t"
+		"mov %c[rsi](%3), %%esi \n\t"
+		"mov %c[rdi](%3), %%edi \n\t"
+		"mov %c[rbp](%3), %%ebp \n\t"
+		"mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */
+#endif
+		/* Enter guest mode */
+		"jne launched \n\t"
+		ASM_VMX_VMLAUNCH "\n\t"
+		"jmp kvm_vmx_return \n\t"
+		"launched: " ASM_VMX_VMRESUME "\n\t"
+		".globl kvm_vmx_return \n\t"
+		"kvm_vmx_return: "
+		/* Save guest registers, load host registers, keep flags */
+#ifdef __x86_64__
+		"xchg %3,     0(%%rsp) \n\t"
+		"mov %%rax, %c[rax](%3) \n\t"
+		"mov %%rbx, %c[rbx](%3) \n\t"
+		"pushq 0(%%rsp); popq %c[rcx](%3) \n\t"
+		"mov %%rdx, %c[rdx](%3) \n\t"
+		"mov %%rsi, %c[rsi](%3) \n\t"
+		"mov %%rdi, %c[rdi](%3) \n\t"
+		"mov %%rbp, %c[rbp](%3) \n\t"
+		"mov %%r8,  %c[r8](%3) \n\t"
+		"mov %%r9,  %c[r9](%3) \n\t"
+		"mov %%r10, %c[r10](%3) \n\t"
+		"mov %%r11, %c[r11](%3) \n\t"
+		"mov %%r12, %c[r12](%3) \n\t"
+		"mov %%r13, %c[r13](%3) \n\t"
+		"mov %%r14, %c[r14](%3) \n\t"
+		"mov %%r15, %c[r15](%3) \n\t"
+		"mov %%cr2, %%rax   \n\t"
+		"mov %%rax, %c[cr2](%3) \n\t"
+		"mov 0(%%rsp), %3 \n\t"
+
+		"pop  %%rcx; pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
+		"pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
+		"pop  %%rbp; pop  %%rdi; pop  %%rsi;"
+		"pop  %%rdx; pop  %%rbx; pop  %%rax \n\t"
+#else
+		"xchg %3, 0(%%esp) \n\t"
+		"mov %%eax, %c[rax](%3) \n\t"
+		"mov %%ebx, %c[rbx](%3) \n\t"
+		"pushl 0(%%esp); popl %c[rcx](%3) \n\t"
+		"mov %%edx, %c[rdx](%3) \n\t"
+		"mov %%esi, %c[rsi](%3) \n\t"
+		"mov %%edi, %c[rdi](%3) \n\t"
+		"mov %%ebp, %c[rbp](%3) \n\t"
+		"mov %%cr2, %%eax  \n\t"
+		"mov %%eax, %c[cr2](%3) \n\t"
+		"mov 0(%%esp), %3 \n\t"
+
+		"pop %%ecx; popa \n\t"
+#endif
+		"setbe %0 \n\t"
+		"popf \n\t"
+	      : "=g" (fail)
+	      : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP),
+		"c"(vcpu),
+		[rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])),
+		[rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
+		[rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
+		[rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
+		[rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])),
+		[rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])),
+		[rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP])),
+#ifdef __x86_64__
+		[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])),
+		[r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])),
+		[r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])),
+		[r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])),
+		[r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])),
+		[r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])),
+		[r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])),
+		[r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])),
+#endif
+		[cr2]"i"(offsetof(struct kvm_vcpu, cr2))
+	      : "cc", "memory" );
+
+	++kvm_stat.exits;
+
+	save_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
+	load_msrs(vcpu->host_msrs, NR_BAD_MSRS);
+
+	fx_save(vcpu->guest_fx_image);
+	fx_restore(vcpu->host_fx_image);
+
+#ifndef __x86_64__
+	asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
+#endif
+
+	kvm_run->exit_type = 0;
+	if (fail) {
+		kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
+		kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR);
+	} else {
+		if (fs_gs_ldt_reload_needed) {
+			load_ldt(ldt_sel);
+			load_fs(fs_sel);
+			/*
+			 * If we have to reload gs, we must take care to
+			 * preserve our gs base.
+			 */
+			local_irq_disable();
+			load_gs(gs_sel);
+#ifdef __x86_64__
+			wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
+#endif
+			local_irq_enable();
+
+			reload_tss();
+		}
+		vcpu->launched = 1;
+		kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT;
+		if (kvm_handle_exit(kvm_run, vcpu)) {
+			/* Give scheduler a change to reschedule. */
+			if (signal_pending(current)) {
+				++kvm_stat.signal_exits;
+				return -EINTR;
+			}
+			kvm_resched(vcpu);
+			goto again;
+		}
+	}
+	return 0;
+}
+
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3));
+}
+
+static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
+				  unsigned long addr,
+				  u32 err_code)
+{
+	u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+
+	++kvm_stat.pf_guest;
+
+	if (is_page_fault(vect_info)) {
+		printk(KERN_DEBUG "inject_page_fault: "
+		       "double fault 0x%lx @ 0x%lx\n",
+		       addr, vmcs_readl(GUEST_RIP));
+		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 0);
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			     DF_VECTOR |
+			     INTR_TYPE_EXCEPTION |
+			     INTR_INFO_DELIEVER_CODE_MASK |
+			     INTR_INFO_VALID_MASK);
+		return;
+	}
+	vcpu->cr2 = addr;
+	vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, err_code);
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+		     PF_VECTOR |
+		     INTR_TYPE_EXCEPTION |
+		     INTR_INFO_DELIEVER_CODE_MASK |
+		     INTR_INFO_VALID_MASK);
+
+}
+
+static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->vmcs) {
+		on_each_cpu(__vcpu_clear, vcpu, 0, 1);
+		free_vmcs(vcpu->vmcs);
+		vcpu->vmcs = NULL;
+	}
+}
+
+static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
+{
+	vmx_free_vmcs(vcpu);
+}
+
+static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct vmcs *vmcs;
+
+	vmcs = alloc_vmcs();
+	if (!vmcs)
+		return -ENOMEM;
+	vmcs_clear(vmcs);
+	vcpu->vmcs = vmcs;
+	vcpu->launched = 0;
+	return 0;
+}
+
+static struct kvm_arch_ops vmx_arch_ops = {
+	.cpu_has_kvm_support = cpu_has_kvm_support,
+	.disabled_by_bios = vmx_disabled_by_bios,
+	.hardware_setup = hardware_setup,
+	.hardware_unsetup = hardware_unsetup,
+	.hardware_enable = hardware_enable,
+	.hardware_disable = hardware_disable,
+
+	.vcpu_create = vmx_create_vcpu,
+	.vcpu_free = vmx_free_vcpu,
+
+	.vcpu_load = vmx_vcpu_load,
+	.vcpu_put = vmx_vcpu_put,
+
+	.set_guest_debug = set_guest_debug,
+	.get_msr = vmx_get_msr,
+	.set_msr = vmx_set_msr,
+	.get_segment_base = vmx_get_segment_base,
+	.get_segment = vmx_get_segment,
+	.set_segment = vmx_set_segment,
+	.is_long_mode = vmx_is_long_mode,
+	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+	.set_cr0 = vmx_set_cr0,
+	.set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch,
+	.set_cr3 = vmx_set_cr3,
+	.set_cr4 = vmx_set_cr4,
+#ifdef __x86_64__
+	.set_efer = vmx_set_efer,
+#endif
+	.get_idt = vmx_get_idt,
+	.set_idt = vmx_set_idt,
+	.get_gdt = vmx_get_gdt,
+	.set_gdt = vmx_set_gdt,
+	.cache_regs = vcpu_load_rsp_rip,
+	.decache_regs = vcpu_put_rsp_rip,
+	.get_rflags = vmx_get_rflags,
+	.set_rflags = vmx_set_rflags,
+
+	.tlb_flush = vmx_flush_tlb,
+	.inject_page_fault = vmx_inject_page_fault,
+
+	.inject_gp = vmx_inject_gp,
+
+	.run = vmx_vcpu_run,
+	.skip_emulated_instruction = skip_emulated_instruction,
+	.vcpu_setup = vmx_vcpu_setup,
+};
+
+static int __init vmx_init(void)
+{
+	kvm_init_arch(&vmx_arch_ops, THIS_MODULE);
+	return 0;
+}
+
+static void __exit vmx_exit(void)
+{
+	kvm_exit_arch();
+}
+
+module_init(vmx_init)
+module_exit(vmx_exit)
diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h
new file mode 100644
index 00000000000..79727834158
--- /dev/null
+++ b/drivers/kvm/vmx.h
@@ -0,0 +1,296 @@
+#ifndef VMX_H
+#define VMX_H
+
+/*
+ * vmx.h: VMX Architecture related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * A few random additions are:
+ * Copyright (C) 2006 Qumranet
+ *    Avi Kivity <avi@qumranet.com>
+ *    Yaniv Kamay <yaniv@qumranet.com>
+ *
+ */
+
+#define CPU_BASED_VIRTUAL_INTR_PENDING  0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING     0x00000008
+#define CPU_BASED_HLT_EXITING           0x00000080
+#define CPU_BASED_INVDPG_EXITING        0x00000200
+#define CPU_BASED_MWAIT_EXITING         0x00000400
+#define CPU_BASED_RDPMC_EXITING         0x00000800
+#define CPU_BASED_RDTSC_EXITING         0x00001000
+#define CPU_BASED_CR8_LOAD_EXITING      0x00080000
+#define CPU_BASED_CR8_STORE_EXITING     0x00100000
+#define CPU_BASED_TPR_SHADOW            0x00200000
+#define CPU_BASED_MOV_DR_EXITING        0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING     0x01000000
+#define CPU_BASED_ACTIVATE_IO_BITMAP    0x02000000
+#define CPU_BASED_MSR_BITMAPS           0x10000000
+#define CPU_BASED_MONITOR_EXITING       0x20000000
+#define CPU_BASED_PAUSE_EXITING         0x40000000
+
+#define PIN_BASED_EXT_INTR_MASK 0x1
+#define PIN_BASED_NMI_EXITING   0x8
+
+#define VM_EXIT_ACK_INTR_ON_EXIT        0x00008000
+#define VM_EXIT_HOST_ADD_SPACE_SIZE     0x00000200
+
+
+/* VMCS Encodings */
+enum vmcs_field {
+	GUEST_ES_SELECTOR               = 0x00000800,
+	GUEST_CS_SELECTOR               = 0x00000802,
+	GUEST_SS_SELECTOR               = 0x00000804,
+	GUEST_DS_SELECTOR               = 0x00000806,
+	GUEST_FS_SELECTOR               = 0x00000808,
+	GUEST_GS_SELECTOR               = 0x0000080a,
+	GUEST_LDTR_SELECTOR             = 0x0000080c,
+	GUEST_TR_SELECTOR               = 0x0000080e,
+	HOST_ES_SELECTOR                = 0x00000c00,
+	HOST_CS_SELECTOR                = 0x00000c02,
+	HOST_SS_SELECTOR                = 0x00000c04,
+	HOST_DS_SELECTOR                = 0x00000c06,
+	HOST_FS_SELECTOR                = 0x00000c08,
+	HOST_GS_SELECTOR                = 0x00000c0a,
+	HOST_TR_SELECTOR                = 0x00000c0c,
+	IO_BITMAP_A                     = 0x00002000,
+	IO_BITMAP_A_HIGH                = 0x00002001,
+	IO_BITMAP_B                     = 0x00002002,
+	IO_BITMAP_B_HIGH                = 0x00002003,
+	MSR_BITMAP                      = 0x00002004,
+	MSR_BITMAP_HIGH                 = 0x00002005,
+	VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
+	VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
+	VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
+	VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
+	VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
+	VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+	TSC_OFFSET                      = 0x00002010,
+	TSC_OFFSET_HIGH                 = 0x00002011,
+	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
+	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+	VMCS_LINK_POINTER               = 0x00002800,
+	VMCS_LINK_POINTER_HIGH          = 0x00002801,
+	GUEST_IA32_DEBUGCTL             = 0x00002802,
+	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
+	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
+	EXCEPTION_BITMAP                = 0x00004004,
+	PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
+	PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
+	CR3_TARGET_COUNT                = 0x0000400a,
+	VM_EXIT_CONTROLS                = 0x0000400c,
+	VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
+	VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
+	VM_ENTRY_CONTROLS               = 0x00004012,
+	VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
+	VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
+	VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
+	VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
+	TPR_THRESHOLD                   = 0x0000401c,
+	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+	VM_INSTRUCTION_ERROR            = 0x00004400,
+	VM_EXIT_REASON                  = 0x00004402,
+	VM_EXIT_INTR_INFO               = 0x00004404,
+	VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
+	IDT_VECTORING_INFO_FIELD        = 0x00004408,
+	IDT_VECTORING_ERROR_CODE        = 0x0000440a,
+	VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
+	VMX_INSTRUCTION_INFO            = 0x0000440e,
+	GUEST_ES_LIMIT                  = 0x00004800,
+	GUEST_CS_LIMIT                  = 0x00004802,
+	GUEST_SS_LIMIT                  = 0x00004804,
+	GUEST_DS_LIMIT                  = 0x00004806,
+	GUEST_FS_LIMIT                  = 0x00004808,
+	GUEST_GS_LIMIT                  = 0x0000480a,
+	GUEST_LDTR_LIMIT                = 0x0000480c,
+	GUEST_TR_LIMIT                  = 0x0000480e,
+	GUEST_GDTR_LIMIT                = 0x00004810,
+	GUEST_IDTR_LIMIT                = 0x00004812,
+	GUEST_ES_AR_BYTES               = 0x00004814,
+	GUEST_CS_AR_BYTES               = 0x00004816,
+	GUEST_SS_AR_BYTES               = 0x00004818,
+	GUEST_DS_AR_BYTES               = 0x0000481a,
+	GUEST_FS_AR_BYTES               = 0x0000481c,
+	GUEST_GS_AR_BYTES               = 0x0000481e,
+	GUEST_LDTR_AR_BYTES             = 0x00004820,
+	GUEST_TR_AR_BYTES               = 0x00004822,
+	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
+	GUEST_ACTIVITY_STATE            = 0X00004826,
+	GUEST_SYSENTER_CS               = 0x0000482A,
+	HOST_IA32_SYSENTER_CS           = 0x00004c00,
+	CR0_GUEST_HOST_MASK             = 0x00006000,
+	CR4_GUEST_HOST_MASK             = 0x00006002,
+	CR0_READ_SHADOW                 = 0x00006004,
+	CR4_READ_SHADOW                 = 0x00006006,
+	CR3_TARGET_VALUE0               = 0x00006008,
+	CR3_TARGET_VALUE1               = 0x0000600a,
+	CR3_TARGET_VALUE2               = 0x0000600c,
+	CR3_TARGET_VALUE3               = 0x0000600e,
+	EXIT_QUALIFICATION              = 0x00006400,
+	GUEST_LINEAR_ADDRESS            = 0x0000640a,
+	GUEST_CR0                       = 0x00006800,
+	GUEST_CR3                       = 0x00006802,
+	GUEST_CR4                       = 0x00006804,
+	GUEST_ES_BASE                   = 0x00006806,
+	GUEST_CS_BASE                   = 0x00006808,
+	GUEST_SS_BASE                   = 0x0000680a,
+	GUEST_DS_BASE                   = 0x0000680c,
+	GUEST_FS_BASE                   = 0x0000680e,
+	GUEST_GS_BASE                   = 0x00006810,
+	GUEST_LDTR_BASE                 = 0x00006812,
+	GUEST_TR_BASE                   = 0x00006814,
+	GUEST_GDTR_BASE                 = 0x00006816,
+	GUEST_IDTR_BASE                 = 0x00006818,
+	GUEST_DR7                       = 0x0000681a,
+	GUEST_RSP                       = 0x0000681c,
+	GUEST_RIP                       = 0x0000681e,
+	GUEST_RFLAGS                    = 0x00006820,
+	GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
+	GUEST_SYSENTER_ESP              = 0x00006824,
+	GUEST_SYSENTER_EIP              = 0x00006826,
+	HOST_CR0                        = 0x00006c00,
+	HOST_CR3                        = 0x00006c02,
+	HOST_CR4                        = 0x00006c04,
+	HOST_FS_BASE                    = 0x00006c06,
+	HOST_GS_BASE                    = 0x00006c08,
+	HOST_TR_BASE                    = 0x00006c0a,
+	HOST_GDTR_BASE                  = 0x00006c0c,
+	HOST_IDTR_BASE                  = 0x00006c0e,
+	HOST_IA32_SYSENTER_ESP          = 0x00006c10,
+	HOST_IA32_SYSENTER_EIP          = 0x00006c12,
+	HOST_RSP                        = 0x00006c14,
+	HOST_RIP                        = 0x00006c16,
+};
+
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+
+/*
+ * Interruption-information format
+ */
+#define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
+#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
+#define INTR_INFO_DELIEVER_CODE_MASK    0x800           /* 11 */
+#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
+
+#define VECTORING_INFO_VECTOR_MASK           	INTR_INFO_VECTOR_MASK
+#define VECTORING_INFO_TYPE_MASK        	INTR_INFO_INTR_TYPE_MASK
+#define VECTORING_INFO_DELIEVER_CODE_MASK    	INTR_INFO_DELIEVER_CODE_MASK
+#define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
+
+#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
+
+/*
+ * Exit Qualifications for MOV for Control Register Access
+ */
+#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control register */
+#define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
+#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose register */
+#define LMSW_SOURCE_DATA_SHIFT 16
+#define LMSW_SOURCE_DATA  (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
+#define REG_EAX                         (0 << 8)
+#define REG_ECX                         (1 << 8)
+#define REG_EDX                         (2 << 8)
+#define REG_EBX                         (3 << 8)
+#define REG_ESP                         (4 << 8)
+#define REG_EBP                         (5 << 8)
+#define REG_ESI                         (6 << 8)
+#define REG_EDI                         (7 << 8)
+#define REG_R8                         (8 << 8)
+#define REG_R9                         (9 << 8)
+#define REG_R10                        (10 << 8)
+#define REG_R11                        (11 << 8)
+#define REG_R12                        (12 << 8)
+#define REG_R13                        (13 << 8)
+#define REG_R14                        (14 << 8)
+#define REG_R15                        (15 << 8)
+
+/*
+ * Exit Qualifications for MOV for Debug Register Access
+ */
+#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug register */
+#define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
+#define TYPE_MOV_TO_DR                  (0 << 4)
+#define TYPE_MOV_FROM_DR                (1 << 4)
+#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose register */
+
+
+/* segment AR */
+#define SEGMENT_AR_L_MASK (1 << 13)
+
+/* entry controls */
+#define VM_ENTRY_CONTROLS_IA32E_MASK (1 << 9)
+
+#define AR_TYPE_ACCESSES_MASK 1
+#define AR_TYPE_READABLE_MASK (1 << 1)
+#define AR_TYPE_WRITEABLE_MASK (1 << 2)
+#define AR_TYPE_CODE_MASK (1 << 3)
+#define AR_TYPE_MASK 0x0f
+#define AR_TYPE_BUSY_64_TSS 11
+#define AR_TYPE_BUSY_32_TSS 11
+#define AR_TYPE_BUSY_16_TSS 3
+#define AR_TYPE_LDT 2
+
+#define AR_UNUSABLE_MASK (1 << 16)
+#define AR_S_MASK (1 << 4)
+#define AR_P_MASK (1 << 7)
+#define AR_L_MASK (1 << 13)
+#define AR_DB_MASK (1 << 14)
+#define AR_G_MASK (1 << 15)
+#define AR_DPL_SHIFT 5
+#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
+
+#define AR_RESERVD_MASK 0xfffe0f00
+
+#define CR4_VMXE 0x2000
+
+#define MSR_IA32_VMX_BASIC_MSR   		0x480
+#define MSR_IA32_FEATURE_CONTROL 		0x03a
+#define MSR_IA32_VMX_PINBASED_CTLS_MSR		0x481
+#define MSR_IA32_VMX_PROCBASED_CTLS_MSR		0x482
+#define MSR_IA32_VMX_EXIT_CTLS_MSR		0x483
+#define MSR_IA32_VMX_ENTRY_CTLS_MSR		0x484
+
+#endif
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
new file mode 100644
index 00000000000..7e838bf0592
--- /dev/null
+++ b/drivers/kvm/x86_emulate.c
@@ -0,0 +1,1409 @@
+/******************************************************************************
+ * x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005 Keir Fraser
+ *
+ * Linux coding style, mod r/m decoder, segment base fixes, real-mode
+ * privieged instructions:
+ *
+ * Copyright (C) 2006 Qumranet
+ *
+ *   Avi Kivity <avi@qumranet.com>
+ *   Yaniv Kamay <yaniv@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
+ */
+
+#ifndef __KERNEL__
+#include <stdio.h>
+#include <stdint.h>
+#include <public/xen.h>
+#define DPRINTF(_f, _a ...) printf( _f , ## _a )
+#else
+#include "kvm.h"
+#define DPRINTF(x...) do {} while (0)
+#endif
+#include "x86_emulate.h"
+#include <linux/module.h>
+
+/*
+ * Opcode effective-address decode tables.
+ * Note that we only emulate instructions that have at least one memory
+ * operand (excluding implicit stack references). We assume that stack
+ * references and instruction fetches will never occur in special memory
+ * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
+ * not be handled.
+ */
+
+/* Operand sizes: 8-bit operands or specified/overridden size. */
+#define ByteOp      (1<<0)	/* 8-bit operands. */
+/* Destination operand type. */
+#define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
+#define DstReg      (2<<1)	/* Register operand. */
+#define DstMem      (3<<1)	/* Memory operand. */
+#define DstMask     (3<<1)
+/* Source operand type. */
+#define SrcNone     (0<<3)	/* No source operand. */
+#define SrcImplicit (0<<3)	/* Source operand is implicit in the opcode. */
+#define SrcReg      (1<<3)	/* Register operand. */
+#define SrcMem      (2<<3)	/* Memory operand. */
+#define SrcMem16    (3<<3)	/* Memory operand (16-bit). */
+#define SrcMem32    (4<<3)	/* Memory operand (32-bit). */
+#define SrcImm      (5<<3)	/* Immediate operand. */
+#define SrcImmByte  (6<<3)	/* 8-bit sign-extended immediate operand. */
+#define SrcMask     (7<<3)
+/* Generic ModRM decode. */
+#define ModRM       (1<<6)
+/* Destination is only written; never read. */
+#define Mov         (1<<7)
+
+static u8 opcode_table[256] = {
+	/* 0x00 - 0x07 */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
+	0, 0, 0, 0,
+	/* 0x08 - 0x0F */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
+	0, 0, 0, 0,
+	/* 0x10 - 0x17 */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
+	0, 0, 0, 0,
+	/* 0x18 - 0x1F */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
+	0, 0, 0, 0,
+	/* 0x20 - 0x27 */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
+	0, 0, 0, 0,
+	/* 0x28 - 0x2F */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
+	0, 0, 0, 0,
+	/* 0x30 - 0x37 */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
+	0, 0, 0, 0,
+	/* 0x38 - 0x3F */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
+	0, 0, 0, 0,
+	/* 0x40 - 0x4F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x50 - 0x5F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x60 - 0x6F */
+	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x70 - 0x7F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x80 - 0x87 */
+	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	/* 0x88 - 0x8F */
+	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
+	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	0, 0, 0, DstMem | SrcNone | ModRM | Mov,
+	/* 0x90 - 0x9F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xA0 - 0xA7 */
+	ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov,
+	ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov,
+	ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
+	ByteOp | ImplicitOps, ImplicitOps,
+	/* 0xA8 - 0xAF */
+	0, 0, ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
+	ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
+	ByteOp | ImplicitOps, ImplicitOps,
+	/* 0xB0 - 0xBF */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xC0 - 0xC7 */
+	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 0, 0,
+	0, 0, ByteOp | DstMem | SrcImm | ModRM | Mov,
+	    DstMem | SrcImm | ModRM | Mov,
+	/* 0xC8 - 0xCF */
+	0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xD0 - 0xD7 */
+	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
+	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
+	0, 0, 0, 0,
+	/* 0xD8 - 0xDF */
+	0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xE0 - 0xEF */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xF0 - 0xF7 */
+	0, 0, 0, 0,
+	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
+	/* 0xF8 - 0xFF */
+	0, 0, 0, 0,
+	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
+};
+
+static u8 twobyte_table[256] = {
+	/* 0x00 - 0x0F */
+	0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
+	0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
+	/* 0x10 - 0x1F */
+	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x20 - 0x2F */
+	ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x30 - 0x3F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x40 - 0x47 */
+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	/* 0x48 - 0x4F */
+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
+	/* 0x50 - 0x5F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x60 - 0x6F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x70 - 0x7F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x80 - 0x8F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x90 - 0x9F */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xA0 - 0xA7 */
+	0, 0, 0, DstMem | SrcReg | ModRM, 0, 0, 0, 0,
+	/* 0xA8 - 0xAF */
+	0, 0, 0, DstMem | SrcReg | ModRM, 0, 0, 0, 0,
+	/* 0xB0 - 0xB7 */
+	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
+	    DstMem | SrcReg | ModRM,
+	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
+	    DstReg | SrcMem16 | ModRM | Mov,
+	/* 0xB8 - 0xBF */
+	0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM,
+	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
+	    DstReg | SrcMem16 | ModRM | Mov,
+	/* 0xC0 - 0xCF */
+	0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xD0 - 0xDF */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xE0 - 0xEF */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0xF0 - 0xFF */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * Tell the emulator that of the Group 7 instructions (sgdt, lidt, etc.) we
+ * are interested only in invlpg and not in any of the rest.
+ *
+ * invlpg is a special instruction in that the data it references may not
+ * be mapped.
+ */
+void kvm_emulator_want_group7_invlpg(void)
+{
+	twobyte_table[1] &= ~SrcMem;
+}
+EXPORT_SYMBOL_GPL(kvm_emulator_want_group7_invlpg);
+
+/* Type, address-of, and value of an instruction's operand. */
+struct operand {
+	enum { OP_REG, OP_MEM, OP_IMM } type;
+	unsigned int bytes;
+	unsigned long val, orig_val, *ptr;
+};
+
+/* EFLAGS bit definitions. */
+#define EFLG_OF (1<<11)
+#define EFLG_DF (1<<10)
+#define EFLG_SF (1<<7)
+#define EFLG_ZF (1<<6)
+#define EFLG_AF (1<<4)
+#define EFLG_PF (1<<2)
+#define EFLG_CF (1<<0)
+
+/*
+ * Instruction emulation:
+ * Most instructions are emulated directly via a fragment of inline assembly
+ * code. This allows us to save/restore EFLAGS and thus very easily pick up
+ * any modified flags.
+ */
+
+#if defined(__x86_64__)
+#define _LO32 "k"		/* force 32-bit operand */
+#define _STK  "%%rsp"		/* stack pointer */
+#elif defined(__i386__)
+#define _LO32 ""		/* force 32-bit operand */
+#define _STK  "%%esp"		/* stack pointer */
+#endif
+
+/*
+ * These EFLAGS bits are restored from saved value during emulation, and
+ * any changes are written back to the saved value after emulation.
+ */
+#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+
+/* Before executing instruction: restore necessary bits in EFLAGS. */
+#define _PRE_EFLAGS(_sav, _msk, _tmp) \
+	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); */	\
+	"push %"_sav"; "					\
+	"movl %"_msk",%"_LO32 _tmp"; "				\
+	"andl %"_LO32 _tmp",("_STK"); "				\
+	"pushf; "						\
+	"notl %"_LO32 _tmp"; "					\
+	"andl %"_LO32 _tmp",("_STK"); "				\
+	"pop  %"_tmp"; "					\
+	"orl  %"_LO32 _tmp",("_STK"); "				\
+	"popf; "						\
+	/* _sav &= ~msk; */					\
+	"movl %"_msk",%"_LO32 _tmp"; "				\
+	"notl %"_LO32 _tmp"; "					\
+	"andl %"_LO32 _tmp",%"_sav"; "
+
+/* After executing instruction: write-back necessary bits in EFLAGS. */
+#define _POST_EFLAGS(_sav, _msk, _tmp) \
+	/* _sav |= EFLAGS & _msk; */		\
+	"pushf; "				\
+	"pop  %"_tmp"; "			\
+	"andl %"_msk",%"_LO32 _tmp"; "		\
+	"orl  %"_LO32 _tmp",%"_sav"; "
+
+/* Raw emulation: instruction has two explicit operands. */
+#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
+	do { 								    \
+		unsigned long _tmp;					    \
+									    \
+		switch ((_dst).bytes) {					    \
+		case 2:							    \
+			__asm__ __volatile__ (				    \
+				_PRE_EFLAGS("0","4","2")		    \
+				_op"w %"_wx"3,%1; "			    \
+				_POST_EFLAGS("0","4","2")		    \
+				: "=m" (_eflags), "=m" ((_dst).val),        \
+				  "=&r" (_tmp)				    \
+				: _wy ((_src).val), "i" (EFLAGS_MASK) );    \
+			break;						    \
+		case 4:							    \
+			__asm__ __volatile__ (				    \
+				_PRE_EFLAGS("0","4","2")		    \
+				_op"l %"_lx"3,%1; "			    \
+				_POST_EFLAGS("0","4","2")		    \
+				: "=m" (_eflags), "=m" ((_dst).val),	    \
+				  "=&r" (_tmp)				    \
+				: _ly ((_src).val), "i" (EFLAGS_MASK) );    \
+			break;						    \
+		case 8:							    \
+			__emulate_2op_8byte(_op, _src, _dst,		    \
+					    _eflags, _qx, _qy);		    \
+			break;						    \
+		}							    \
+	} while (0)
+
+#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
+	do {								     \
+		unsigned long _tmp;					     \
+		switch ( (_dst).bytes )					     \
+		{							     \
+		case 1:							     \
+			__asm__ __volatile__ (				     \
+				_PRE_EFLAGS("0","4","2")		     \
+				_op"b %"_bx"3,%1; "			     \
+				_POST_EFLAGS("0","4","2")		     \
+				: "=m" (_eflags), "=m" ((_dst).val),	     \
+				  "=&r" (_tmp)				     \
+				: _by ((_src).val), "i" (EFLAGS_MASK) );     \
+			break;						     \
+		default:						     \
+			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
+					     _wx, _wy, _lx, _ly, _qx, _qy);  \
+			break;						     \
+		}							     \
+	} while (0)
+
+/* Source operand is byte-sized and may be restricted to just %cl. */
+#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
+	__emulate_2op(_op, _src, _dst, _eflags,				\
+		      "b", "c", "b", "c", "b", "c", "b", "c")
+
+/* Source operand is byte, word, long or quad sized. */
+#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
+	__emulate_2op(_op, _src, _dst, _eflags,				\
+		      "b", "q", "w", "r", _LO32, "r", "", "r")
+
+/* Source operand is word, long or quad sized. */
+#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
+	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
+			     "w", "r", _LO32, "r", "", "r")
+
+/* Instruction has only one explicit operand (no source operand). */
+#define emulate_1op(_op, _dst, _eflags)                                    \
+	do {								\
+		unsigned long _tmp;					\
+									\
+		switch ( (_dst).bytes )					\
+		{							\
+		case 1:							\
+			__asm__ __volatile__ (				\
+				_PRE_EFLAGS("0","3","2")		\
+				_op"b %1; "				\
+				_POST_EFLAGS("0","3","2")		\
+				: "=m" (_eflags), "=m" ((_dst).val),	\
+				  "=&r" (_tmp)				\
+				: "i" (EFLAGS_MASK) );			\
+			break;						\
+		case 2:							\
+			__asm__ __volatile__ (				\
+				_PRE_EFLAGS("0","3","2")		\
+				_op"w %1; "				\
+				_POST_EFLAGS("0","3","2")		\
+				: "=m" (_eflags), "=m" ((_dst).val),	\
+				  "=&r" (_tmp)				\
+				: "i" (EFLAGS_MASK) );			\
+			break;						\
+		case 4:							\
+			__asm__ __volatile__ (				\
+				_PRE_EFLAGS("0","3","2")		\
+				_op"l %1; "				\
+				_POST_EFLAGS("0","3","2")		\
+				: "=m" (_eflags), "=m" ((_dst).val),	\
+				  "=&r" (_tmp)				\
+				: "i" (EFLAGS_MASK) );			\
+			break;						\
+		case 8:							\
+			__emulate_1op_8byte(_op, _dst, _eflags);	\
+			break;						\
+		}							\
+	} while (0)
+
+/* Emulate an instruction with quadword operands (x86/64 only). */
+#if defined(__x86_64__)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
+	do {								  \
+		__asm__ __volatile__ (					  \
+			_PRE_EFLAGS("0","4","2")			  \
+			_op"q %"_qx"3,%1; "				  \
+			_POST_EFLAGS("0","4","2")			  \
+			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+			: _qy ((_src).val), "i" (EFLAGS_MASK) );	  \
+	} while (0)
+
+#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
+	do {								  \
+		__asm__ __volatile__ (					  \
+			_PRE_EFLAGS("0","3","2")			  \
+			_op"q %1; "					  \
+			_POST_EFLAGS("0","3","2")			  \
+			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+			: "i" (EFLAGS_MASK) );				  \
+	} while (0)
+
+#elif defined(__i386__)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
+#define __emulate_1op_8byte(_op, _dst, _eflags)
+#endif				/* __i386__ */
+
+/* Fetch next part of the instruction being emulated. */
+#define insn_fetch(_type, _size, _eip)                                  \
+({	unsigned long _x;						\
+	rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x,	\
+                                                  (_size), ctxt);       \
+	if ( rc != 0 )							\
+		goto done;						\
+	(_eip) += (_size);						\
+	(_type)_x;							\
+})
+
+/* Access/update address held in a register, based on addressing mode. */
+#define register_address(base, reg)                                     \
+	((base) + ((ad_bytes == sizeof(unsigned long)) ? (reg) :	\
+		   ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
+
+#define register_address_increment(reg, inc)                            \
+	do {								\
+		/* signed type ensures sign extension to long */        \
+		int _inc = (inc);					\
+		if ( ad_bytes == sizeof(unsigned long) )		\
+			(reg) += _inc;					\
+		else							\
+			(reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) | \
+			   (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \
+	} while (0)
+
+void *decode_register(u8 modrm_reg, unsigned long *regs,
+		      int highbyte_regs)
+{
+	void *p;
+
+	p = &regs[modrm_reg];
+	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
+		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
+	return p;
+}
+
+static int read_descriptor(struct x86_emulate_ctxt *ctxt,
+			   struct x86_emulate_ops *ops,
+			   void *ptr,
+			   u16 *size, unsigned long *address, int op_bytes)
+{
+	int rc;
+
+	if (op_bytes == 2)
+		op_bytes = 3;
+	*address = 0;
+	rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, ctxt);
+	if (rc)
+		return rc;
+	rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, ctxt);
+	return rc;
+}
+
+int
+x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
+{
+	u8 b, d, sib, twobyte = 0, rex_prefix = 0;
+	u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+	unsigned long *override_base = NULL;
+	unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
+	int rc = 0;
+	struct operand src, dst;
+	unsigned long cr2 = ctxt->cr2;
+	int mode = ctxt->mode;
+	unsigned long modrm_ea;
+	int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0;
+
+	/* Shadow copy of register state. Committed on successful emulation. */
+	unsigned long _regs[NR_VCPU_REGS];
+	unsigned long _eip = ctxt->vcpu->rip, _eflags = ctxt->eflags;
+	unsigned long modrm_val = 0;
+
+	memcpy(_regs, ctxt->vcpu->regs, sizeof _regs);
+
+	switch (mode) {
+	case X86EMUL_MODE_REAL:
+	case X86EMUL_MODE_PROT16:
+		op_bytes = ad_bytes = 2;
+		break;
+	case X86EMUL_MODE_PROT32:
+		op_bytes = ad_bytes = 4;
+		break;
+#ifdef __x86_64__
+	case X86EMUL_MODE_PROT64:
+		op_bytes = 4;
+		ad_bytes = 8;
+		break;
+#endif
+	default:
+		return -1;
+	}
+
+	/* Legacy prefixes. */
+	for (i = 0; i < 8; i++) {
+		switch (b = insn_fetch(u8, 1, _eip)) {
+		case 0x66:	/* operand-size override */
+			op_bytes ^= 6;	/* switch between 2/4 bytes */
+			break;
+		case 0x67:	/* address-size override */
+			if (mode == X86EMUL_MODE_PROT64)
+				ad_bytes ^= 12;	/* switch between 4/8 bytes */
+			else
+				ad_bytes ^= 6;	/* switch between 2/4 bytes */
+			break;
+		case 0x2e:	/* CS override */
+			override_base = &ctxt->cs_base;
+			break;
+		case 0x3e:	/* DS override */
+			override_base = &ctxt->ds_base;
+			break;
+		case 0x26:	/* ES override */
+			override_base = &ctxt->es_base;
+			break;
+		case 0x64:	/* FS override */
+			override_base = &ctxt->fs_base;
+			break;
+		case 0x65:	/* GS override */
+			override_base = &ctxt->gs_base;
+			break;
+		case 0x36:	/* SS override */
+			override_base = &ctxt->ss_base;
+			break;
+		case 0xf0:	/* LOCK */
+			lock_prefix = 1;
+			break;
+		case 0xf3:	/* REP/REPE/REPZ */
+			rep_prefix = 1;
+			break;
+		case 0xf2:	/* REPNE/REPNZ */
+			break;
+		default:
+			goto done_prefixes;
+		}
+	}
+
+done_prefixes:
+
+	/* REX prefix. */
+	if ((mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40)) {
+		rex_prefix = b;
+		if (b & 8)
+			op_bytes = 8;	/* REX.W */
+		modrm_reg = (b & 4) << 1;	/* REX.R */
+		index_reg = (b & 2) << 2; /* REX.X */
+		modrm_rm = base_reg = (b & 1) << 3; /* REG.B */
+		b = insn_fetch(u8, 1, _eip);
+	}
+
+	/* Opcode byte(s). */
+	d = opcode_table[b];
+	if (d == 0) {
+		/* Two-byte opcode? */
+		if (b == 0x0f) {
+			twobyte = 1;
+			b = insn_fetch(u8, 1, _eip);
+			d = twobyte_table[b];
+		}
+
+		/* Unrecognised? */
+		if (d == 0)
+			goto cannot_emulate;
+	}
+
+	/* ModRM and SIB bytes. */
+	if (d & ModRM) {
+		modrm = insn_fetch(u8, 1, _eip);
+		modrm_mod |= (modrm & 0xc0) >> 6;
+		modrm_reg |= (modrm & 0x38) >> 3;
+		modrm_rm |= (modrm & 0x07);
+		modrm_ea = 0;
+		use_modrm_ea = 1;
+
+		if (modrm_mod == 3) {
+			modrm_val = *(unsigned long *)
+				decode_register(modrm_rm, _regs, d & ByteOp);
+			goto modrm_done;
+		}
+
+		if (ad_bytes == 2) {
+			unsigned bx = _regs[VCPU_REGS_RBX];
+			unsigned bp = _regs[VCPU_REGS_RBP];
+			unsigned si = _regs[VCPU_REGS_RSI];
+			unsigned di = _regs[VCPU_REGS_RDI];
+
+			/* 16-bit ModR/M decode. */
+			switch (modrm_mod) {
+			case 0:
+				if (modrm_rm == 6)
+					modrm_ea += insn_fetch(u16, 2, _eip);
+				break;
+			case 1:
+				modrm_ea += insn_fetch(s8, 1, _eip);
+				break;
+			case 2:
+				modrm_ea += insn_fetch(u16, 2, _eip);
+				break;
+			}
+			switch (modrm_rm) {
+			case 0:
+				modrm_ea += bx + si;
+				break;
+			case 1:
+				modrm_ea += bx + di;
+				break;
+			case 2:
+				modrm_ea += bp + si;
+				break;
+			case 3:
+				modrm_ea += bp + di;
+				break;
+			case 4:
+				modrm_ea += si;
+				break;
+			case 5:
+				modrm_ea += di;
+				break;
+			case 6:
+				if (modrm_mod != 0)
+					modrm_ea += bp;
+				break;
+			case 7:
+				modrm_ea += bx;
+				break;
+			}
+			if (modrm_rm == 2 || modrm_rm == 3 ||
+			    (modrm_rm == 6 && modrm_mod != 0))
+				if (!override_base)
+					override_base = &ctxt->ss_base;
+			modrm_ea = (u16)modrm_ea;
+		} else {
+			/* 32/64-bit ModR/M decode. */
+			switch (modrm_rm) {
+			case 4:
+			case 12:
+				sib = insn_fetch(u8, 1, _eip);
+				index_reg |= (sib >> 3) & 7;
+				base_reg |= sib & 7;
+				scale = sib >> 6;
+
+				switch (base_reg) {
+				case 5:
+					if (modrm_mod != 0)
+						modrm_ea += _regs[base_reg];
+					else
+						modrm_ea += insn_fetch(s32, 4, _eip);
+					break;
+				default:
+					modrm_ea += _regs[base_reg];
+				}
+				switch (index_reg) {
+				case 4:
+					break;
+				default:
+					modrm_ea += _regs[index_reg] << scale;
+
+				}
+				break;
+			case 5:
+				if (modrm_mod != 0)
+					modrm_ea += _regs[modrm_rm];
+				else if (mode == X86EMUL_MODE_PROT64)
+					rip_relative = 1;
+				break;
+			default:
+				modrm_ea += _regs[modrm_rm];
+				break;
+			}
+			switch (modrm_mod) {
+			case 0:
+				if (modrm_rm == 5)
+					modrm_ea += insn_fetch(s32, 4, _eip);
+				break;
+			case 1:
+				modrm_ea += insn_fetch(s8, 1, _eip);
+				break;
+			case 2:
+				modrm_ea += insn_fetch(s32, 4, _eip);
+				break;
+			}
+		}
+		if (!override_base)
+			override_base = &ctxt->ds_base;
+		if (mode == X86EMUL_MODE_PROT64 &&
+		    override_base != &ctxt->fs_base &&
+		    override_base != &ctxt->gs_base)
+			override_base = NULL;
+
+		if (override_base)
+			modrm_ea += *override_base;
+
+		if (rip_relative) {
+			modrm_ea += _eip;
+			switch (d & SrcMask) {
+			case SrcImmByte:
+				modrm_ea += 1;
+				break;
+			case SrcImm:
+				if (d & ByteOp)
+					modrm_ea += 1;
+				else
+					if (op_bytes == 8)
+						modrm_ea += 4;
+					else
+						modrm_ea += op_bytes;
+			}
+		}
+		if (ad_bytes != 8)
+			modrm_ea = (u32)modrm_ea;
+		cr2 = modrm_ea;
+	modrm_done:
+		;
+	}
+
+	/* Decode and fetch the destination operand: register or memory. */
+	switch (d & DstMask) {
+	case ImplicitOps:
+		/* Special instructions do their own operand decoding. */
+		goto special_insn;
+	case DstReg:
+		dst.type = OP_REG;
+		if ((d & ByteOp)
+		    && !(twobyte_table && (b == 0xb6 || b == 0xb7))) {
+			dst.ptr = decode_register(modrm_reg, _regs,
+						  (rex_prefix == 0));
+			dst.val = *(u8 *) dst.ptr;
+			dst.bytes = 1;
+		} else {
+			dst.ptr = decode_register(modrm_reg, _regs, 0);
+			switch ((dst.bytes = op_bytes)) {
+			case 2:
+				dst.val = *(u16 *)dst.ptr;
+				break;
+			case 4:
+				dst.val = *(u32 *)dst.ptr;
+				break;
+			case 8:
+				dst.val = *(u64 *)dst.ptr;
+				break;
+			}
+		}
+		break;
+	case DstMem:
+		dst.type = OP_MEM;
+		dst.ptr = (unsigned long *)cr2;
+		dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+		if (!(d & Mov) && /* optimisation - avoid slow emulated read */
+		    ((rc = ops->read_emulated((unsigned long)dst.ptr,
+					      &dst.val, dst.bytes, ctxt)) != 0))
+			goto done;
+		break;
+	}
+	dst.orig_val = dst.val;
+
+	/*
+	 * Decode and fetch the source operand: register, memory
+	 * or immediate.
+	 */
+	switch (d & SrcMask) {
+	case SrcNone:
+		break;
+	case SrcReg:
+		src.type = OP_REG;
+		if (d & ByteOp) {
+			src.ptr = decode_register(modrm_reg, _regs,
+						  (rex_prefix == 0));
+			src.val = src.orig_val = *(u8 *) src.ptr;
+			src.bytes = 1;
+		} else {
+			src.ptr = decode_register(modrm_reg, _regs, 0);
+			switch ((src.bytes = op_bytes)) {
+			case 2:
+				src.val = src.orig_val = *(u16 *) src.ptr;
+				break;
+			case 4:
+				src.val = src.orig_val = *(u32 *) src.ptr;
+				break;
+			case 8:
+				src.val = src.orig_val = *(u64 *) src.ptr;
+				break;
+			}
+		}
+		break;
+	case SrcMem16:
+		src.bytes = 2;
+		goto srcmem_common;
+	case SrcMem32:
+		src.bytes = 4;
+		goto srcmem_common;
+	case SrcMem:
+		src.bytes = (d & ByteOp) ? 1 : op_bytes;
+	      srcmem_common:
+		src.type = OP_MEM;
+		src.ptr = (unsigned long *)cr2;
+		if ((rc = ops->read_emulated((unsigned long)src.ptr,
+					     &src.val, src.bytes, ctxt)) != 0)
+			goto done;
+		src.orig_val = src.val;
+		break;
+	case SrcImm:
+		src.type = OP_IMM;
+		src.ptr = (unsigned long *)_eip;
+		src.bytes = (d & ByteOp) ? 1 : op_bytes;
+		if (src.bytes == 8)
+			src.bytes = 4;
+		/* NB. Immediates are sign-extended as necessary. */
+		switch (src.bytes) {
+		case 1:
+			src.val = insn_fetch(s8, 1, _eip);
+			break;
+		case 2:
+			src.val = insn_fetch(s16, 2, _eip);
+			break;
+		case 4:
+			src.val = insn_fetch(s32, 4, _eip);
+			break;
+		}
+		break;
+	case SrcImmByte:
+		src.type = OP_IMM;
+		src.ptr = (unsigned long *)_eip;
+		src.bytes = 1;
+		src.val = insn_fetch(s8, 1, _eip);
+		break;
+	}
+
+	if (twobyte)
+		goto twobyte_insn;
+
+	switch (b) {
+	case 0x00 ... 0x05:
+	      add:		/* add */
+		emulate_2op_SrcV("add", src, dst, _eflags);
+		break;
+	case 0x08 ... 0x0d:
+	      or:		/* or */
+		emulate_2op_SrcV("or", src, dst, _eflags);
+		break;
+	case 0x10 ... 0x15:
+	      adc:		/* adc */
+		emulate_2op_SrcV("adc", src, dst, _eflags);
+		break;
+	case 0x18 ... 0x1d:
+	      sbb:		/* sbb */
+		emulate_2op_SrcV("sbb", src, dst, _eflags);
+		break;
+	case 0x20 ... 0x25:
+	      and:		/* and */
+		emulate_2op_SrcV("and", src, dst, _eflags);
+		break;
+	case 0x28 ... 0x2d:
+	      sub:		/* sub */
+		emulate_2op_SrcV("sub", src, dst, _eflags);
+		break;
+	case 0x30 ... 0x35:
+	      xor:		/* xor */
+		emulate_2op_SrcV("xor", src, dst, _eflags);
+		break;
+	case 0x38 ... 0x3d:
+	      cmp:		/* cmp */
+		emulate_2op_SrcV("cmp", src, dst, _eflags);
+		break;
+	case 0x63:		/* movsxd */
+		if (mode != X86EMUL_MODE_PROT64)
+			goto cannot_emulate;
+		dst.val = (s32) src.val;
+		break;
+	case 0x80 ... 0x83:	/* Grp1 */
+		switch (modrm_reg) {
+		case 0:
+			goto add;
+		case 1:
+			goto or;
+		case 2:
+			goto adc;
+		case 3:
+			goto sbb;
+		case 4:
+			goto and;
+		case 5:
+			goto sub;
+		case 6:
+			goto xor;
+		case 7:
+			goto cmp;
+		}
+		break;
+	case 0x84 ... 0x85:
+	      test:		/* test */
+		emulate_2op_SrcV("test", src, dst, _eflags);
+		break;
+	case 0x86 ... 0x87:	/* xchg */
+		/* Write back the register source. */
+		switch (dst.bytes) {
+		case 1:
+			*(u8 *) src.ptr = (u8) dst.val;
+			break;
+		case 2:
+			*(u16 *) src.ptr = (u16) dst.val;
+			break;
+		case 4:
+			*src.ptr = (u32) dst.val;
+			break;	/* 64b reg: zero-extend */
+		case 8:
+			*src.ptr = dst.val;
+			break;
+		}
+		/*
+		 * Write back the memory destination with implicit LOCK
+		 * prefix.
+		 */
+		dst.val = src.val;
+		lock_prefix = 1;
+		break;
+	case 0xa0 ... 0xa1:	/* mov */
+		dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
+		dst.val = src.val;
+		_eip += ad_bytes;	/* skip src displacement */
+		break;
+	case 0xa2 ... 0xa3:	/* mov */
+		dst.val = (unsigned long)_regs[VCPU_REGS_RAX];
+		_eip += ad_bytes;	/* skip dst displacement */
+		break;
+	case 0x88 ... 0x8b:	/* mov */
+	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
+		dst.val = src.val;
+		break;
+	case 0x8f:		/* pop (sole member of Grp1a) */
+		/* 64-bit mode: POP always pops a 64-bit operand. */
+		if (mode == X86EMUL_MODE_PROT64)
+			dst.bytes = 8;
+		if ((rc = ops->read_std(register_address(ctxt->ss_base,
+							 _regs[VCPU_REGS_RSP]),
+					&dst.val, dst.bytes, ctxt)) != 0)
+			goto done;
+		register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes);
+		break;
+	case 0xc0 ... 0xc1:
+	      grp2:		/* Grp2 */
+		switch (modrm_reg) {
+		case 0:	/* rol */
+			emulate_2op_SrcB("rol", src, dst, _eflags);
+			break;
+		case 1:	/* ror */
+			emulate_2op_SrcB("ror", src, dst, _eflags);
+			break;
+		case 2:	/* rcl */
+			emulate_2op_SrcB("rcl", src, dst, _eflags);
+			break;
+		case 3:	/* rcr */
+			emulate_2op_SrcB("rcr", src, dst, _eflags);
+			break;
+		case 4:	/* sal/shl */
+		case 6:	/* sal/shl */
+			emulate_2op_SrcB("sal", src, dst, _eflags);
+			break;
+		case 5:	/* shr */
+			emulate_2op_SrcB("shr", src, dst, _eflags);
+			break;
+		case 7:	/* sar */
+			emulate_2op_SrcB("sar", src, dst, _eflags);
+			break;
+		}
+		break;
+	case 0xd0 ... 0xd1:	/* Grp2 */
+		src.val = 1;
+		goto grp2;
+	case 0xd2 ... 0xd3:	/* Grp2 */
+		src.val = _regs[VCPU_REGS_RCX];
+		goto grp2;
+	case 0xf6 ... 0xf7:	/* Grp3 */
+		switch (modrm_reg) {
+		case 0 ... 1:	/* test */
+			/*
+			 * Special case in Grp3: test has an immediate
+			 * source operand.
+			 */
+			src.type = OP_IMM;
+			src.ptr = (unsigned long *)_eip;
+			src.bytes = (d & ByteOp) ? 1 : op_bytes;
+			if (src.bytes == 8)
+				src.bytes = 4;
+			switch (src.bytes) {
+			case 1:
+				src.val = insn_fetch(s8, 1, _eip);
+				break;
+			case 2:
+				src.val = insn_fetch(s16, 2, _eip);
+				break;
+			case 4:
+				src.val = insn_fetch(s32, 4, _eip);
+				break;
+			}
+			goto test;
+		case 2:	/* not */
+			dst.val = ~dst.val;
+			break;
+		case 3:	/* neg */
+			emulate_1op("neg", dst, _eflags);
+			break;
+		default:
+			goto cannot_emulate;
+		}
+		break;
+	case 0xfe ... 0xff:	/* Grp4/Grp5 */
+		switch (modrm_reg) {
+		case 0:	/* inc */
+			emulate_1op("inc", dst, _eflags);
+			break;
+		case 1:	/* dec */
+			emulate_1op("dec", dst, _eflags);
+			break;
+		case 6:	/* push */
+			/* 64-bit mode: PUSH always pushes a 64-bit operand. */
+			if (mode == X86EMUL_MODE_PROT64) {
+				dst.bytes = 8;
+				if ((rc = ops->read_std((unsigned long)dst.ptr,
+							&dst.val, 8,
+							ctxt)) != 0)
+					goto done;
+			}
+			register_address_increment(_regs[VCPU_REGS_RSP],
+						   -dst.bytes);
+			if ((rc = ops->write_std(
+				     register_address(ctxt->ss_base,
+						      _regs[VCPU_REGS_RSP]),
+				     dst.val, dst.bytes, ctxt)) != 0)
+				goto done;
+			dst.val = dst.orig_val;	/* skanky: disable writeback */
+			break;
+		default:
+			goto cannot_emulate;
+		}
+		break;
+	}
+
+writeback:
+	if ((d & Mov) || (dst.orig_val != dst.val)) {
+		switch (dst.type) {
+		case OP_REG:
+			/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+			switch (dst.bytes) {
+			case 1:
+				*(u8 *)dst.ptr = (u8)dst.val;
+				break;
+			case 2:
+				*(u16 *)dst.ptr = (u16)dst.val;
+				break;
+			case 4:
+				*dst.ptr = (u32)dst.val;
+				break;	/* 64b: zero-ext */
+			case 8:
+				*dst.ptr = dst.val;
+				break;
+			}
+			break;
+		case OP_MEM:
+			if (lock_prefix)
+				rc = ops->cmpxchg_emulated((unsigned long)dst.
+							   ptr, dst.orig_val,
+							   dst.val, dst.bytes,
+							   ctxt);
+			else
+				rc = ops->write_emulated((unsigned long)dst.ptr,
+							 dst.val, dst.bytes,
+							 ctxt);
+			if (rc != 0)
+				goto done;
+		default:
+			break;
+		}
+	}
+
+	/* Commit shadow register state. */
+	memcpy(ctxt->vcpu->regs, _regs, sizeof _regs);
+	ctxt->eflags = _eflags;
+	ctxt->vcpu->rip = _eip;
+
+done:
+	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+
+special_insn:
+	if (twobyte)
+		goto twobyte_special_insn;
+	if (rep_prefix) {
+		if (_regs[VCPU_REGS_RCX] == 0) {
+			ctxt->vcpu->rip = _eip;
+			goto done;
+		}
+		_regs[VCPU_REGS_RCX]--;
+		_eip = ctxt->vcpu->rip;
+	}
+	switch (b) {
+	case 0xa4 ... 0xa5:	/* movs */
+		dst.type = OP_MEM;
+		dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+		dst.ptr = (unsigned long *)register_address(ctxt->es_base,
+							_regs[VCPU_REGS_RDI]);
+		if ((rc = ops->read_emulated(register_address(
+		      override_base ? *override_base : ctxt->ds_base,
+		      _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt)) != 0)
+			goto done;
+		register_address_increment(_regs[VCPU_REGS_RSI],
+			     (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+		register_address_increment(_regs[VCPU_REGS_RDI],
+			     (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+		break;
+	case 0xa6 ... 0xa7:	/* cmps */
+		DPRINTF("Urk! I don't handle CMPS.\n");
+		goto cannot_emulate;
+	case 0xaa ... 0xab:	/* stos */
+		dst.type = OP_MEM;
+		dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+		dst.ptr = (unsigned long *)cr2;
+		dst.val = _regs[VCPU_REGS_RAX];
+		register_address_increment(_regs[VCPU_REGS_RDI],
+			     (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+		break;
+	case 0xac ... 0xad:	/* lods */
+		dst.type = OP_REG;
+		dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+		dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
+		if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0)
+			goto done;
+		register_address_increment(_regs[VCPU_REGS_RSI],
+			   (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+		break;
+	case 0xae ... 0xaf:	/* scas */
+		DPRINTF("Urk! I don't handle SCAS.\n");
+		goto cannot_emulate;
+	}
+	goto writeback;
+
+twobyte_insn:
+	switch (b) {
+	case 0x01: /* lgdt, lidt, lmsw */
+		switch (modrm_reg) {
+			u16 size;
+			unsigned long address;
+
+		case 2: /* lgdt */
+			rc = read_descriptor(ctxt, ops, src.ptr,
+					     &size, &address, op_bytes);
+			if (rc)
+				goto done;
+			realmode_lgdt(ctxt->vcpu, size, address);
+			break;
+		case 3: /* lidt */
+			rc = read_descriptor(ctxt, ops, src.ptr,
+					     &size, &address, op_bytes);
+			if (rc)
+				goto done;
+			realmode_lidt(ctxt->vcpu, size, address);
+			break;
+		case 4: /* smsw */
+			if (modrm_mod != 3)
+				goto cannot_emulate;
+			*(u16 *)&_regs[modrm_rm]
+				= realmode_get_cr(ctxt->vcpu, 0);
+			break;
+		case 6: /* lmsw */
+			if (modrm_mod != 3)
+				goto cannot_emulate;
+			realmode_lmsw(ctxt->vcpu, (u16)modrm_val, &_eflags);
+			break;
+		case 7: /* invlpg*/
+			emulate_invlpg(ctxt->vcpu, cr2);
+			break;
+		default:
+			goto cannot_emulate;
+		}
+		break;
+	case 0x21: /* mov from dr to reg */
+		if (modrm_mod != 3)
+			goto cannot_emulate;
+		rc = emulator_get_dr(ctxt, modrm_reg, &_regs[modrm_rm]);
+		break;
+	case 0x23: /* mov from reg to dr */
+		if (modrm_mod != 3)
+			goto cannot_emulate;
+		rc = emulator_set_dr(ctxt, modrm_reg, _regs[modrm_rm]);
+		break;
+	case 0x40 ... 0x4f:	/* cmov */
+		dst.val = dst.orig_val = src.val;
+		d &= ~Mov;	/* default to no move */
+		/*
+		 * First, assume we're decoding an even cmov opcode
+		 * (lsb == 0).
+		 */
+		switch ((b & 15) >> 1) {
+		case 0:	/* cmovo */
+			d |= (_eflags & EFLG_OF) ? Mov : 0;
+			break;
+		case 1:	/* cmovb/cmovc/cmovnae */
+			d |= (_eflags & EFLG_CF) ? Mov : 0;
+			break;
+		case 2:	/* cmovz/cmove */
+			d |= (_eflags & EFLG_ZF) ? Mov : 0;
+			break;
+		case 3:	/* cmovbe/cmovna */
+			d |= (_eflags & (EFLG_CF | EFLG_ZF)) ? Mov : 0;
+			break;
+		case 4:	/* cmovs */
+			d |= (_eflags & EFLG_SF) ? Mov : 0;
+			break;
+		case 5:	/* cmovp/cmovpe */
+			d |= (_eflags & EFLG_PF) ? Mov : 0;
+			break;
+		case 7:	/* cmovle/cmovng */
+			d |= (_eflags & EFLG_ZF) ? Mov : 0;
+			/* fall through */
+		case 6:	/* cmovl/cmovnge */
+			d |= (!(_eflags & EFLG_SF) !=
+			      !(_eflags & EFLG_OF)) ? Mov : 0;
+			break;
+		}
+		/* Odd cmov opcodes (lsb == 1) have inverted sense. */
+		d ^= (b & 1) ? Mov : 0;
+		break;
+	case 0xb0 ... 0xb1:	/* cmpxchg */
+		/*
+		 * Save real source value, then compare EAX against
+		 * destination.
+		 */
+		src.orig_val = src.val;
+		src.val = _regs[VCPU_REGS_RAX];
+		emulate_2op_SrcV("cmp", src, dst, _eflags);
+		/* Always write back. The question is: where to? */
+		d |= Mov;
+		if (_eflags & EFLG_ZF) {
+			/* Success: write back to memory. */
+			dst.val = src.orig_val;
+		} else {
+			/* Failure: write the value we saw to EAX. */
+			dst.type = OP_REG;
+			dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
+		}
+		break;
+	case 0xa3:
+	      bt:		/* bt */
+		src.val &= (dst.bytes << 3) - 1; /* only subword offset */
+		emulate_2op_SrcV_nobyte("bt", src, dst, _eflags);
+		break;
+	case 0xb3:
+	      btr:		/* btr */
+		src.val &= (dst.bytes << 3) - 1; /* only subword offset */
+		emulate_2op_SrcV_nobyte("btr", src, dst, _eflags);
+		break;
+	case 0xab:
+	      bts:		/* bts */
+		src.val &= (dst.bytes << 3) - 1; /* only subword offset */
+		emulate_2op_SrcV_nobyte("bts", src, dst, _eflags);
+		break;
+	case 0xb6 ... 0xb7:	/* movzx */
+		dst.bytes = op_bytes;
+		dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val;
+		break;
+	case 0xbb:
+	      btc:		/* btc */
+		src.val &= (dst.bytes << 3) - 1; /* only subword offset */
+		emulate_2op_SrcV_nobyte("btc", src, dst, _eflags);
+		break;
+	case 0xba:		/* Grp8 */
+		switch (modrm_reg & 3) {
+		case 0:
+			goto bt;
+		case 1:
+			goto bts;
+		case 2:
+			goto btr;
+		case 3:
+			goto btc;
+		}
+		break;
+	case 0xbe ... 0xbf:	/* movsx */
+		dst.bytes = op_bytes;
+		dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
+		break;
+	}
+	goto writeback;
+
+twobyte_special_insn:
+	/* Disable writeback. */
+	dst.orig_val = dst.val;
+	switch (b) {
+	case 0x0d:		/* GrpP (prefetch) */
+	case 0x18:		/* Grp16 (prefetch/nop) */
+		break;
+	case 0x06:
+		emulate_clts(ctxt->vcpu);
+		break;
+	case 0x20: /* mov cr, reg */
+		if (modrm_mod != 3)
+			goto cannot_emulate;
+		_regs[modrm_rm] = realmode_get_cr(ctxt->vcpu, modrm_reg);
+		break;
+	case 0x22: /* mov reg, cr */
+		if (modrm_mod != 3)
+			goto cannot_emulate;
+		realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags);
+		break;
+	case 0xc7:		/* Grp9 (cmpxchg8b) */
+#if defined(__i386__)
+		{
+			unsigned long old_lo, old_hi;
+			if (((rc = ops->read_emulated(cr2 + 0, &old_lo, 4,
+						      ctxt)) != 0)
+			    || ((rc = ops->read_emulated(cr2 + 4, &old_hi, 4,
+							 ctxt)) != 0))
+				goto done;
+			if ((old_lo != _regs[VCPU_REGS_RAX])
+			    || (old_hi != _regs[VCPU_REGS_RDI])) {
+				_regs[VCPU_REGS_RAX] = old_lo;
+				_regs[VCPU_REGS_RDX] = old_hi;
+				_eflags &= ~EFLG_ZF;
+			} else if (ops->cmpxchg8b_emulated == NULL) {
+				rc = X86EMUL_UNHANDLEABLE;
+				goto done;
+			} else {
+				if ((rc = ops->cmpxchg8b_emulated(cr2, old_lo,
+							  old_hi,
+							  _regs[VCPU_REGS_RBX],
+							  _regs[VCPU_REGS_RCX],
+							  ctxt)) != 0)
+					goto done;
+				_eflags |= EFLG_ZF;
+			}
+			break;
+		}
+#elif defined(__x86_64__)
+		{
+			unsigned long old, new;
+			if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0)
+				goto done;
+			if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) ||
+			    ((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) {
+				_regs[VCPU_REGS_RAX] = (u32) (old >> 0);
+				_regs[VCPU_REGS_RDX] = (u32) (old >> 32);
+				_eflags &= ~EFLG_ZF;
+			} else {
+				new = (_regs[VCPU_REGS_RCX] << 32) | (u32) _regs[VCPU_REGS_RBX];
+				if ((rc = ops->cmpxchg_emulated(cr2, old,
+							  new, 8, ctxt)) != 0)
+					goto done;
+				_eflags |= EFLG_ZF;
+			}
+			break;
+		}
+#endif
+	}
+	goto writeback;
+
+cannot_emulate:
+	DPRINTF("Cannot emulate %02x\n", b);
+	return -1;
+}
+
+#ifdef __XEN__
+
+#include <asm/mm.h>
+#include <asm/uaccess.h>
+
+int
+x86_emulate_read_std(unsigned long addr,
+		     unsigned long *val,
+		     unsigned int bytes, struct x86_emulate_ctxt *ctxt)
+{
+	unsigned int rc;
+
+	*val = 0;
+
+	if ((rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0) {
+		propagate_page_fault(addr + bytes - rc, 0);	/* read fault */
+		return X86EMUL_PROPAGATE_FAULT;
+	}
+
+	return X86EMUL_CONTINUE;
+}
+
+int
+x86_emulate_write_std(unsigned long addr,
+		      unsigned long val,
+		      unsigned int bytes, struct x86_emulate_ctxt *ctxt)
+{
+	unsigned int rc;
+
+	if ((rc = copy_to_user((void *)addr, (void *)&val, bytes)) != 0) {
+		propagate_page_fault(addr + bytes - rc, PGERR_write_access);
+		return X86EMUL_PROPAGATE_FAULT;
+	}
+
+	return X86EMUL_CONTINUE;
+}
+
+#endif
diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h
new file mode 100644
index 00000000000..658b58de30f
--- /dev/null
+++ b/drivers/kvm/x86_emulate.h
@@ -0,0 +1,185 @@
+/******************************************************************************
+ * x86_emulate.h
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005 Keir Fraser
+ *
+ * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
+ */
+
+#ifndef __X86_EMULATE_H__
+#define __X86_EMULATE_H__
+
+struct x86_emulate_ctxt;
+
+/*
+ * x86_emulate_ops:
+ *
+ * These operations represent the instruction emulator's interface to memory.
+ * There are two categories of operation: those that act on ordinary memory
+ * regions (*_std), and those that act on memory regions known to require
+ * special treatment or emulation (*_emulated).
+ *
+ * The emulator assumes that an instruction accesses only one 'emulated memory'
+ * location, that this location is the given linear faulting address (cr2), and
+ * that this is one of the instruction's data operands. Instruction fetches and
+ * stack operations are assumed never to access emulated memory. The emulator
+ * automatically deduces which operand of a string-move operation is accessing
+ * emulated memory, and assumes that the other operand accesses normal memory.
+ *
+ * NOTES:
+ *  1. The emulator isn't very smart about emulated vs. standard memory.
+ *     'Emulated memory' access addresses should be checked for sanity.
+ *     'Normal memory' accesses may fault, and the caller must arrange to
+ *     detect and handle reentrancy into the emulator via recursive faults.
+ *     Accesses may be unaligned and may cross page boundaries.
+ *  2. If the access fails (cannot emulate, or a standard access faults) then
+ *     it is up to the memop to propagate the fault to the guest VM via
+ *     some out-of-band mechanism, unknown to the emulator. The memop signals
+ *     failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will
+ *     then immediately bail.
+ *  3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
+ *     cmpxchg8b_emulated need support 8-byte accesses.
+ *  4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
+ */
+/* Access completed successfully: continue emulation as normal. */
+#define X86EMUL_CONTINUE        0
+/* Access is unhandleable: bail from emulation and return error to caller. */
+#define X86EMUL_UNHANDLEABLE    1
+/* Terminate emulation but return success to the caller. */
+#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
+#define X86EMUL_RETRY_INSTR     2 /* retry the instruction for some reason */
+#define X86EMUL_CMPXCHG_FAILED  2 /* cmpxchg did not see expected value */
+struct x86_emulate_ops {
+	/*
+	 * read_std: Read bytes of standard (non-emulated/special) memory.
+	 *           Used for instruction fetch, stack operations, and others.
+	 *  @addr:  [IN ] Linear address from which to read.
+	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
+	 *  @bytes: [IN ] Number of bytes to read from memory.
+	 */
+	int (*read_std)(unsigned long addr,
+			unsigned long *val,
+			unsigned int bytes, struct x86_emulate_ctxt * ctxt);
+
+	/*
+	 * write_std: Write bytes of standard (non-emulated/special) memory.
+	 *            Used for stack operations, and others.
+	 *  @addr:  [IN ] Linear address to which to write.
+	 *  @val:   [IN ] Value to write to memory (low-order bytes used as
+	 *                required).
+	 *  @bytes: [IN ] Number of bytes to write to memory.
+	 */
+	int (*write_std)(unsigned long addr,
+			 unsigned long val,
+			 unsigned int bytes, struct x86_emulate_ctxt * ctxt);
+
+	/*
+	 * read_emulated: Read bytes from emulated/special memory area.
+	 *  @addr:  [IN ] Linear address from which to read.
+	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
+	 *  @bytes: [IN ] Number of bytes to read from memory.
+	 */
+	int (*read_emulated) (unsigned long addr,
+			      unsigned long *val,
+			      unsigned int bytes,
+			      struct x86_emulate_ctxt * ctxt);
+
+	/*
+	 * write_emulated: Read bytes from emulated/special memory area.
+	 *  @addr:  [IN ] Linear address to which to write.
+	 *  @val:   [IN ] Value to write to memory (low-order bytes used as
+	 *                required).
+	 *  @bytes: [IN ] Number of bytes to write to memory.
+	 */
+	int (*write_emulated) (unsigned long addr,
+			       unsigned long val,
+			       unsigned int bytes,
+			       struct x86_emulate_ctxt * ctxt);
+
+	/*
+	 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
+	 *                   emulated/special memory area.
+	 *  @addr:  [IN ] Linear address to access.
+	 *  @old:   [IN ] Value expected to be current at @addr.
+	 *  @new:   [IN ] Value to write to @addr.
+	 *  @bytes: [IN ] Number of bytes to access using CMPXCHG.
+	 */
+	int (*cmpxchg_emulated) (unsigned long addr,
+				 unsigned long old,
+				 unsigned long new,
+				 unsigned int bytes,
+				 struct x86_emulate_ctxt * ctxt);
+
+	/*
+	 * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
+	 *                     emulated/special memory area.
+	 *  @addr:  [IN ] Linear address to access.
+	 *  @old:   [IN ] Value expected to be current at @addr.
+	 *  @new:   [IN ] Value to write to @addr.
+	 * NOTES:
+	 *  1. This function is only ever called when emulating a real CMPXCHG8B.
+	 *  2. This function is *never* called on x86/64 systems.
+	 *  2. Not defining this function (i.e., specifying NULL) is equivalent
+	 *     to defining a function that always returns X86EMUL_UNHANDLEABLE.
+	 */
+	int (*cmpxchg8b_emulated) (unsigned long addr,
+				   unsigned long old_lo,
+				   unsigned long old_hi,
+				   unsigned long new_lo,
+				   unsigned long new_hi,
+				   struct x86_emulate_ctxt * ctxt);
+};
+
+struct cpu_user_regs;
+
+struct x86_emulate_ctxt {
+	/* Register state before/after emulation. */
+	struct kvm_vcpu *vcpu;
+
+	/* Linear faulting address (if emulating a page-faulting instruction). */
+	unsigned long eflags;
+	unsigned long cr2;
+
+	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
+	int mode;
+
+	unsigned long cs_base;
+	unsigned long ds_base;
+	unsigned long es_base;
+	unsigned long ss_base;
+	unsigned long gs_base;
+	unsigned long fs_base;
+};
+
+/* Execution mode, passed to the emulator. */
+#define X86EMUL_MODE_REAL     0	/* Real mode.             */
+#define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
+#define X86EMUL_MODE_PROT32   4	/* 32-bit protected mode. */
+#define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
+
+/* Host execution mode. */
+#if defined(__i386__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
+#elif defined(__x86_64__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
+#endif
+
+/*
+ * x86_emulate_memop: Emulate an instruction that faulted attempting to
+ *                    read/write a 'special' memory area.
+ * Returns -1 on failure, 0 on success.
+ */
+int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
+		      struct x86_emulate_ops *ops);
+
+/*
+ * Given the 'reg' portion of a ModRM byte, and a register block, return a
+ * pointer into the block that addresses the relevant register.
+ * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
+ */
+void *decode_register(u8 modrm_reg, unsigned long *regs,
+		      int highbyte_regs);
+
+#endif				/* __X86_EMULATE_H__ */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 53bd46dba0c..21e2a7b0884 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3314,6 +3314,10 @@ static int do_md_stop(mddev_t * mddev, int mode)
 
 			module_put(mddev->pers->owner);
 			mddev->pers = NULL;
+
+			set_capacity(disk, 0);
+			mddev->changed = 1;
+
 			if (mddev->ro)
 				mddev->ro = 0;
 		}
@@ -3333,7 +3337,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
 	if (mode == 0) {
 		mdk_rdev_t *rdev;
 		struct list_head *tmp;
-		struct gendisk *disk;
+
 		printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
 
 		bitmap_destroy(mddev);
@@ -3358,10 +3362,6 @@ static int do_md_stop(mddev_t * mddev, int mode)
 		mddev->raid_disks = 0;
 		mddev->recovery_cp = 0;
 
-		disk = mddev->gendisk;
-		if (disk)
-			set_capacity(disk, 0);
-		mddev->changed = 1;
 	} else if (mddev->pers)
 		printk(KERN_INFO "md: %s switched to read-only mode.\n",
 			mdname(mddev));
@@ -3371,6 +3371,7 @@ out:
 	return err;
 }
 
+#ifndef MODULE
 static void autorun_array(mddev_t *mddev)
 {
 	mdk_rdev_t *rdev;
@@ -3485,6 +3486,7 @@ static void autorun_devices(int part)
 	}
 	printk(KERN_INFO "md: ... autorun DONE.\n");
 }
+#endif /* !MODULE */
 
 static int get_version(void __user * arg)
 {
@@ -3722,6 +3724,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 		if (err)
 			export_rdev(rdev);
 
+		md_update_sb(mddev, 1);
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 		return err;
@@ -5273,7 +5276,6 @@ void md_do_sync(mddev_t *mddev)
 	mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
 
 	if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
-	    test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
 	    !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
 	    mddev->curr_resync > 2) {
 		if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
@@ -5297,6 +5299,7 @@ void md_do_sync(mddev_t *mddev)
 					rdev->recovery_offset = mddev->curr_resync;
 		}
 	}
+	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 
  skip:
 	mddev->curr_resync = 0;
@@ -5593,7 +5596,7 @@ static void autostart_arrays(int part)
 	autorun_devices(part);
 }
 
-#endif
+#endif /* !MODULE */
 
 static __exit void md_exit(void)
 {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 656fae912fe..b3c5e12f081 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1951,6 +1951,7 @@ static int run(mddev_t *mddev)
 		    !test_bit(In_sync, &disk->rdev->flags)) {
 			disk->head_position = 0;
 			mddev->degraded++;
+			conf->fullsync = 1;
 		}
 	}
 	if (mddev->degraded == conf->raid_disks) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 52914d5cec7..377f8bc9b78 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -134,6 +134,8 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
 			if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
 				list_add_tail(&sh->lru, &conf->inactive_list);
 				wake_up(&conf->wait_for_stripe);
+				if (conf->retry_read_aligned)
+					md_wakeup_thread(conf->mddev->thread);
 			}
 		}
 	}
@@ -542,35 +544,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
 	}
 
 	if (uptodate) {
-#if 0
-		struct bio *bio;
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
-		/* we can return a buffer if we bypassed the cache or
-		 * if the top buffer is not in highmem.  If there are
-		 * multiple buffers, leave the extra work to
-		 * handle_stripe
-		 */
-		buffer = sh->bh_read[i];
-		if (buffer &&
-		    (!PageHighMem(buffer->b_page)
-		     || buffer->b_page == bh->b_page )
-			) {
-			sh->bh_read[i] = buffer->b_reqnext;
-			buffer->b_reqnext = NULL;
-		} else
-			buffer = NULL;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-		if (sh->bh_page[i]==bh->b_page)
-			set_buffer_uptodate(bh);
-		if (buffer) {
-			if (buffer->b_page != bh->b_page)
-				memcpy(buffer->b_data, bh->b_data, bh->b_size);
-			buffer->b_end_io(buffer, 1);
-		}
-#else
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
-#endif
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
 			rdev = conf->disks[i].rdev;
 			printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
@@ -616,14 +590,6 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
 		}
 	}
 	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
-#if 0
-	/* must restore b_page before unlocking buffer... */
-	if (sh->bh_page[i] != bh->b_page) {
-		bh->b_page = sh->bh_page[i];
-		bh->b_data = page_address(bh->b_page);
-		clear_buffer_uptodate(bh);
-	}
-#endif
 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
 	release_stripe(sh);
@@ -821,7 +787,8 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
 static sector_t compute_blocknr(struct stripe_head *sh, int i)
 {
 	raid5_conf_t *conf = sh->raid_conf;
-	int raid_disks = sh->disks, data_disks = raid_disks - 1;
+	int raid_disks = sh->disks;
+	int data_disks = raid_disks - conf->max_degraded;
 	sector_t new_sector = sh->sector, check;
 	int sectors_per_chunk = conf->chunk_size >> 9;
 	sector_t stripe;
@@ -857,7 +824,6 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 		}
 		break;
 	case 6:
-		data_disks = raid_disks - 2;
 		if (i == raid6_next_disk(sh->pd_idx, raid_disks))
 			return 0; /* It is the Q disk */
 		switch (conf->algorithm) {
@@ -1353,8 +1319,10 @@ static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
 	int pd_idx, dd_idx;
 	int chunk_offset = sector_div(stripe, sectors_per_chunk);
 
-	raid5_compute_sector(stripe*(disks-1)*sectors_per_chunk
-			     + chunk_offset, disks, disks-1, &dd_idx, &pd_idx, conf);
+	raid5_compute_sector(stripe * (disks - conf->max_degraded)
+			     *sectors_per_chunk + chunk_offset,
+			     disks, disks - conf->max_degraded,
+			     &dd_idx, &pd_idx, conf);
 	return pd_idx;
 }
 
@@ -1615,15 +1583,6 @@ static void handle_stripe5(struct stripe_head *sh)
 				} else if (test_bit(R5_Insync, &dev->flags)) {
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
-#if 0
-					/* if I am just reading this block and we don't have
-					   a failed drive, or any pending writes then sidestep the cache */
-					if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
-					    ! syncing && !failed && !to_write) {
-						sh->bh_cache[i]->b_page =  sh->bh_read[i]->b_page;
-						sh->bh_cache[i]->b_data =  sh->bh_read[i]->b_data;
-					}
-#endif
 					locked++;
 					PRINTK("Reading block %d (sync=%d)\n", 
 						i, syncing);
@@ -1641,9 +1600,6 @@ static void handle_stripe5(struct stripe_head *sh)
 			dev = &sh->dev[i];
 			if ((dev->towrite || i == sh->pd_idx) &&
 			    (!test_bit(R5_LOCKED, &dev->flags) 
-#if 0
-|| sh->bh_page[i]!=bh->b_page
-#endif
 				    ) &&
 			    !test_bit(R5_UPTODATE, &dev->flags)) {
 				if (test_bit(R5_Insync, &dev->flags)
@@ -1655,9 +1611,6 @@ static void handle_stripe5(struct stripe_head *sh)
 			/* Would I have to read this buffer for reconstruct_write */
 			if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
 			    (!test_bit(R5_LOCKED, &dev->flags) 
-#if 0
-|| sh->bh_page[i] != bh->b_page
-#endif
 				    ) &&
 			    !test_bit(R5_UPTODATE, &dev->flags)) {
 				if (test_bit(R5_Insync, &dev->flags)) rcw++;
@@ -1865,7 +1818,9 @@ static void handle_stripe5(struct stripe_head *sh)
 		return_bi = bi->bi_next;
 		bi->bi_next = NULL;
 		bi->bi_size = 0;
-		bi->bi_end_io(bi, bytes, 0);
+		bi->bi_end_io(bi, bytes,
+			      test_bit(BIO_UPTODATE, &bi->bi_flags)
+			        ? 0 : -EIO);
 	}
 	for (i=disks; i-- ;) {
 		int rw;
@@ -2193,15 +2148,6 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 				} else if (test_bit(R5_Insync, &dev->flags)) {
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
-#if 0
-					/* if I am just reading this block and we don't have
-					   a failed drive, or any pending writes then sidestep the cache */
-					if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
-					    ! syncing && !failed && !to_write) {
-						sh->bh_cache[i]->b_page =  sh->bh_read[i]->b_page;
-						sh->bh_cache[i]->b_data =  sh->bh_read[i]->b_data;
-					}
-#endif
 					locked++;
 					PRINTK("Reading block %d (sync=%d)\n",
 						i, syncing);
@@ -2220,9 +2166,6 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			if (!test_bit(R5_OVERWRITE, &dev->flags)
 			    && i != pd_idx && i != qd_idx
 			    && (!test_bit(R5_LOCKED, &dev->flags)
-#if 0
-				|| sh->bh_page[i] != bh->b_page
-#endif
 				    ) &&
 			    !test_bit(R5_UPTODATE, &dev->flags)) {
 				if (test_bit(R5_Insync, &dev->flags)) rcw++;
@@ -2418,7 +2361,9 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		return_bi = bi->bi_next;
 		bi->bi_next = NULL;
 		bi->bi_size = 0;
-		bi->bi_end_io(bi, bytes, 0);
+		bi->bi_end_io(bi, bytes,
+			      test_bit(BIO_UPTODATE, &bi->bi_flags)
+			        ? 0 : -EIO);
 	}
 	for (i=disks; i-- ;) {
 		int rw;
@@ -2611,6 +2556,180 @@ static int raid5_congested(void *data, int bits)
 	return 0;
 }
 
+/* We want read requests to align with chunks where possible,
+ * but write requests don't need to.
+ */
+static int raid5_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec)
+{
+	mddev_t *mddev = q->queuedata;
+	sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+	int max;
+	unsigned int chunk_sectors = mddev->chunk_size >> 9;
+	unsigned int bio_sectors = bio->bi_size >> 9;
+
+	if (bio_data_dir(bio))
+		return biovec->bv_len; /* always allow writes to be mergeable */
+
+	max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
+	if (max < 0) max = 0;
+	if (max <= biovec->bv_len && bio_sectors == 0)
+		return biovec->bv_len;
+	else
+		return max;
+}
+
+
+static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
+{
+	sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+	unsigned int chunk_sectors = mddev->chunk_size >> 9;
+	unsigned int bio_sectors = bio->bi_size >> 9;
+
+	return  chunk_sectors >=
+		((sector & (chunk_sectors - 1)) + bio_sectors);
+}
+
+/*
+ *  add bio to the retry LIFO  ( in O(1) ... we are in interrupt )
+ *  later sampled by raid5d.
+ */
+static void add_bio_to_retry(struct bio *bi,raid5_conf_t *conf)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&conf->device_lock, flags);
+
+	bi->bi_next = conf->retry_read_aligned_list;
+	conf->retry_read_aligned_list = bi;
+
+	spin_unlock_irqrestore(&conf->device_lock, flags);
+	md_wakeup_thread(conf->mddev->thread);
+}
+
+
+static struct bio *remove_bio_from_retry(raid5_conf_t *conf)
+{
+	struct bio *bi;
+
+	bi = conf->retry_read_aligned;
+	if (bi) {
+		conf->retry_read_aligned = NULL;
+		return bi;
+	}
+	bi = conf->retry_read_aligned_list;
+	if(bi) {
+		conf->retry_read_aligned = bi->bi_next;
+		bi->bi_next = NULL;
+		bi->bi_phys_segments = 1; /* biased count of active stripes */
+		bi->bi_hw_segments = 0; /* count of processed stripes */
+	}
+
+	return bi;
+}
+
+
+/*
+ *  The "raid5_align_endio" should check if the read succeeded and if it
+ *  did, call bio_endio on the original bio (having bio_put the new bio
+ *  first).
+ *  If the read failed..
+ */
+static int raid5_align_endio(struct bio *bi, unsigned int bytes, int error)
+{
+	struct bio* raid_bi  = bi->bi_private;
+	mddev_t *mddev;
+	raid5_conf_t *conf;
+	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+	mdk_rdev_t *rdev;
+
+	if (bi->bi_size)
+		return 1;
+	bio_put(bi);
+
+	mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata;
+	conf = mddev_to_conf(mddev);
+	rdev = (void*)raid_bi->bi_next;
+	raid_bi->bi_next = NULL;
+
+	rdev_dec_pending(rdev, conf->mddev);
+
+	if (!error && uptodate) {
+		bio_endio(raid_bi, bytes, 0);
+		if (atomic_dec_and_test(&conf->active_aligned_reads))
+			wake_up(&conf->wait_for_stripe);
+		return 0;
+	}
+
+
+	PRINTK("raid5_align_endio : io error...handing IO for a retry\n");
+
+	add_bio_to_retry(raid_bi, conf);
+	return 0;
+}
+
+static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
+{
+	mddev_t *mddev = q->queuedata;
+	raid5_conf_t *conf = mddev_to_conf(mddev);
+	const unsigned int raid_disks = conf->raid_disks;
+	const unsigned int data_disks = raid_disks - conf->max_degraded;
+	unsigned int dd_idx, pd_idx;
+	struct bio* align_bi;
+	mdk_rdev_t *rdev;
+
+	if (!in_chunk_boundary(mddev, raid_bio)) {
+		printk("chunk_aligned_read : non aligned\n");
+		return 0;
+	}
+	/*
+ 	 * use bio_clone to make a copy of the bio
+	 */
+	align_bi = bio_clone(raid_bio, GFP_NOIO);
+	if (!align_bi)
+		return 0;
+	/*
+	 *   set bi_end_io to a new function, and set bi_private to the
+	 *     original bio.
+	 */
+	align_bi->bi_end_io  = raid5_align_endio;
+	align_bi->bi_private = raid_bio;
+	/*
+	 *	compute position
+	 */
+	align_bi->bi_sector =  raid5_compute_sector(raid_bio->bi_sector,
+					raid_disks,
+					data_disks,
+					&dd_idx,
+					&pd_idx,
+					conf);
+
+	rcu_read_lock();
+	rdev = rcu_dereference(conf->disks[dd_idx].rdev);
+	if (rdev && test_bit(In_sync, &rdev->flags)) {
+		atomic_inc(&rdev->nr_pending);
+		rcu_read_unlock();
+		raid_bio->bi_next = (void*)rdev;
+		align_bi->bi_bdev =  rdev->bdev;
+		align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
+		align_bi->bi_sector += rdev->data_offset;
+
+		spin_lock_irq(&conf->device_lock);
+		wait_event_lock_irq(conf->wait_for_stripe,
+				    conf->quiesce == 0,
+				    conf->device_lock, /* nothing */);
+		atomic_inc(&conf->active_aligned_reads);
+		spin_unlock_irq(&conf->device_lock);
+
+		generic_make_request(align_bi);
+		return 1;
+	} else {
+		rcu_read_unlock();
+		bio_put(align_bi);
+		return 0;
+	}
+}
+
+
 static int make_request(request_queue_t *q, struct bio * bi)
 {
 	mddev_t *mddev = q->queuedata;
@@ -2632,6 +2751,11 @@ static int make_request(request_queue_t *q, struct bio * bi)
 	disk_stat_inc(mddev->gendisk, ios[rw]);
 	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
 
+	if (bio_data_dir(bi) == READ &&
+	     mddev->reshape_position == MaxSector &&
+	     chunk_aligned_read(q,bi))
+            	return 0;
+
 	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
 	last_sector = bi->bi_sector + (bi->bi_size>>9);
 	bi->bi_next = NULL;
@@ -2739,7 +2863,9 @@ static int make_request(request_queue_t *q, struct bio * bi)
 		if ( rw == WRITE )
 			md_write_end(mddev);
 		bi->bi_size = 0;
-		bi->bi_end_io(bi, bytes, 0);
+		bi->bi_end_io(bi, bytes,
+			      test_bit(BIO_UPTODATE, &bi->bi_flags)
+			        ? 0 : -EIO);
 	}
 	return 0;
 }
@@ -2950,6 +3076,74 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 	return STRIPE_SECTORS;
 }
 
+static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
+{
+	/* We may not be able to submit a whole bio at once as there
+	 * may not be enough stripe_heads available.
+	 * We cannot pre-allocate enough stripe_heads as we may need
+	 * more than exist in the cache (if we allow ever large chunks).
+	 * So we do one stripe head at a time and record in
+	 * ->bi_hw_segments how many have been done.
+	 *
+	 * We *know* that this entire raid_bio is in one chunk, so
+	 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
+	 */
+	struct stripe_head *sh;
+	int dd_idx, pd_idx;
+	sector_t sector, logical_sector, last_sector;
+	int scnt = 0;
+	int remaining;
+	int handled = 0;
+
+	logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+	sector = raid5_compute_sector(	logical_sector,
+					conf->raid_disks,
+					conf->raid_disks - conf->max_degraded,
+					&dd_idx,
+					&pd_idx,
+					conf);
+	last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
+
+	for (; logical_sector < last_sector;
+	     logical_sector += STRIPE_SECTORS, scnt++) {
+
+		if (scnt < raid_bio->bi_hw_segments)
+			/* already done this stripe */
+			continue;
+
+		sh = get_active_stripe(conf, sector, conf->raid_disks, pd_idx, 1);
+
+		if (!sh) {
+			/* failed to get a stripe - must wait */
+			raid_bio->bi_hw_segments = scnt;
+			conf->retry_read_aligned = raid_bio;
+			return handled;
+		}
+
+		set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
+		add_stripe_bio(sh, raid_bio, dd_idx, 0);
+		handle_stripe(sh, NULL);
+		release_stripe(sh);
+		handled++;
+	}
+	spin_lock_irq(&conf->device_lock);
+	remaining = --raid_bio->bi_phys_segments;
+	spin_unlock_irq(&conf->device_lock);
+	if (remaining == 0) {
+		int bytes = raid_bio->bi_size;
+
+		raid_bio->bi_size = 0;
+		raid_bio->bi_end_io(raid_bio, bytes,
+			      test_bit(BIO_UPTODATE, &raid_bio->bi_flags)
+			        ? 0 : -EIO);
+	}
+	if (atomic_dec_and_test(&conf->active_aligned_reads))
+		wake_up(&conf->wait_for_stripe);
+	return handled;
+}
+
+
+
 /*
  * This is our raid5 kernel thread.
  *
@@ -2971,6 +3165,7 @@ static void raid5d (mddev_t *mddev)
 	spin_lock_irq(&conf->device_lock);
 	while (1) {
 		struct list_head *first;
+		struct bio *bio;
 
 		if (conf->seq_flush != conf->seq_write) {
 			int seq = conf->seq_flush;
@@ -2987,6 +3182,16 @@ static void raid5d (mddev_t *mddev)
 		    !list_empty(&conf->delayed_list))
 			raid5_activate_delayed(conf);
 
+		while ((bio = remove_bio_from_retry(conf))) {
+			int ok;
+			spin_unlock_irq(&conf->device_lock);
+			ok = retry_aligned_read(conf, bio);
+			spin_lock_irq(&conf->device_lock);
+			if (!ok)
+				break;
+			handled++;
+		}
+
 		if (list_empty(&conf->handle_list))
 			break;
 
@@ -3174,6 +3379,7 @@ static int run(mddev_t *mddev)
 	INIT_LIST_HEAD(&conf->inactive_list);
 	atomic_set(&conf->active_stripes, 0);
 	atomic_set(&conf->preread_active_stripes, 0);
+	atomic_set(&conf->active_aligned_reads, 0);
 
 	PRINTK("raid5: run(%s) called.\n", mdname(mddev));
 
@@ -3320,6 +3526,8 @@ static int run(mddev_t *mddev)
 	mddev->array_size =  mddev->size * (conf->previous_raid_disks -
 					    conf->max_degraded);
 
+	blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
+
 	return 0;
 abort:
 	if (conf) {
@@ -3694,7 +3902,8 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 		spin_lock_irq(&conf->device_lock);
 		conf->quiesce = 1;
 		wait_event_lock_irq(conf->wait_for_stripe,
-				    atomic_read(&conf->active_stripes) == 0,
+				    atomic_read(&conf->active_stripes) == 0 &&
+				    atomic_read(&conf->active_aligned_reads) == 0,
 				    conf->device_lock, /* nothing */);
 		spin_unlock_irq(&conf->device_lock);
 		break;
diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig
index c7fa28a28b9..36c6a1bfe55 100644
--- a/drivers/parport/Kconfig
+++ b/drivers/parport/Kconfig
@@ -82,9 +82,6 @@ config PARPORT_PC_PCMCIA
 	  Say Y here if you need PCMCIA support for your PC-style parallel
 	  ports. If unsure, say N.
 
-config PARPORT_NOT_PC
-	bool
-
 config PARPORT_IP32
 	tristate "SGI IP32 builtin port (EXPERIMENTAL)"
 	depends on SGI_IP32 && PARPORT && EXPERIMENTAL
@@ -158,5 +155,8 @@ config PARPORT_1284
 	  transfer modes. Also say Y if you want device ID information to
 	  appear in /proc/sys/dev/parport/*/autoprobe*. It is safe to say N.
 
+config PARPORT_NOT_PC
+	bool
+
 endmenu
 
diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index 6e780db9454..adce4204d87 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -76,7 +76,8 @@ config HOTPLUG_PCI_IBM
 
 config HOTPLUG_PCI_ACPI
 	tristate "ACPI PCI Hotplug driver"
-	depends on (!ACPI_DOCK && ACPI && HOTPLUG_PCI) || (ACPI_DOCK && HOTPLUG_PCI)
+	depends on HOTPLUG_PCI
+	depends on (!ACPI_DOCK && ACPI) || (ACPI_DOCK)
 	help
 	  Say Y here if you have a system that supports PCI Hotplug using
 	  ACPI.
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 2a63ab2b47f..09660e2ab05 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -288,7 +288,7 @@ config RTC_DRV_PL031
 	  To compile this driver as a module, choose M here: the
 	  module will be called rtc-pl031.
 
-config RTC_DRV_AT91
+config RTC_DRV_AT91RM9200
 	tristate "AT91RM9200"
 	depends on RTC_CLASS && ARCH_AT91RM9200
 	help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index bd4c45d333f..e6beedacc96 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -35,5 +35,5 @@ obj-$(CONFIG_RTC_DRV_VR41XX)	+= rtc-vr41xx.o
 obj-$(CONFIG_RTC_DRV_PL031)	+= rtc-pl031.o
 obj-$(CONFIG_RTC_DRV_MAX6902)	+= rtc-max6902.o
 obj-$(CONFIG_RTC_DRV_V3020)	+= rtc-v3020.o
-obj-$(CONFIG_RTC_DRV_AT91)	+= rtc-at91.o
+obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
 obj-$(CONFIG_RTC_DRV_SH)	+= rtc-sh.o
diff --git a/drivers/rtc/rtc-at91.c b/drivers/rtc/rtc-at91rm9200.c
index 5c8addcaf1f..5c8addcaf1f 100644
--- a/drivers/rtc/rtc-at91.c
+++ b/drivers/rtc/rtc-at91rm9200.c
diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c
index dfef1637bfb..205fa28593b 100644
--- a/drivers/rtc/rtc-ds1672.c
+++ b/drivers/rtc/rtc-ds1672.c
@@ -199,7 +199,7 @@ static int ds1672_probe(struct i2c_adapter *adapter, int address, int kind)
 	struct i2c_client *client;
 	struct rtc_device *rtc;
 
-	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+	dev_dbg(adapter->class_dev.dev, "%s\n", __FUNCTION__);
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
 		err = -ENODEV;
diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c
index ba795a4db1e..7bbc26a34bd 100644
--- a/drivers/rtc/rtc-lib.c
+++ b/drivers/rtc/rtc-lib.c
@@ -117,4 +117,85 @@ int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
 }
 EXPORT_SYMBOL(rtc_tm_to_time);
 
+
+/* Merge the valid (i.e. non-negative) fields of alarm into the current
+ * time.  If the valid alarm fields are earlier than the equivalent
+ * fields in the time, carry one into the least significant invalid
+ * field, so that the alarm expiry is in the future.  It assumes that the
+ * least significant invalid field is more significant than the most
+ * significant valid field, and that the seconds field is valid.
+ *
+ * This is used by alarms that take relative (rather than absolute)
+ * times, and/or have a simple binary second counter instead of
+ * day/hour/minute/sec registers.
+ */
+void rtc_merge_alarm(struct rtc_time *now, struct rtc_time *alarm)
+{
+	int *alarmp = &alarm->tm_sec;
+	int *timep = &now->tm_sec;
+	int carry_into, i;
+
+	/* Ignore everything past the 6th element (tm_year). */
+	for (i = 5; i > 0; i--) {
+		if (alarmp[i] < 0)
+			alarmp[i] = timep[i];
+		else
+			break;
+	}
+
+	/* No carry needed if all fields are valid. */
+	if (i == 5)
+		return;
+
+	for (carry_into = i + 1; i >= 0; i--) {
+		if (alarmp[i] < timep[i])
+			break;
+
+		if (alarmp[i] > timep[i])
+			return;
+	}
+
+	switch (carry_into) {
+		case 1:
+			alarm->tm_min++;
+
+			if (alarm->tm_min < 60)
+				return;
+
+			alarm->tm_min = 0;
+			/* fall-through */
+
+		case 2:
+			alarm->tm_hour++;
+
+			if (alarm->tm_hour < 60)
+				return;
+
+			alarm->tm_hour = 0;
+			/* fall-through */
+
+		case 3:
+			alarm->tm_mday++;
+
+			if (alarm->tm_mday <= rtc_days_in_month[alarm->tm_mon])
+				return;
+
+			alarm->tm_mday = 1;
+			/* fall-through */
+
+		case 4:
+			alarm->tm_mon++;
+
+			if (alarm->tm_mon <= 12)
+				return;
+
+			alarm->tm_mon = 1;
+			/* fall-through */
+
+		case 5:
+			alarm->tm_year++;
+	}
+}
+EXPORT_SYMBOL(rtc_merge_alarm);
+
 MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index a760cf69af9..4b72b8ef5d6 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -192,7 +192,7 @@ static int pcf8563_validate_client(struct i2c_client *client)
 		xfer = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
 
 		if (xfer != ARRAY_SIZE(msgs)) {
-			dev_err(&client->adapter->dev,
+			dev_err(&client->dev,
 				"%s: could not read register 0x%02X\n",
 				__FUNCTION__, pattern[i].reg);
 
@@ -203,7 +203,7 @@ static int pcf8563_validate_client(struct i2c_client *client)
 
 		if (value > pattern[i].max ||
 			value < pattern[i].min) {
-			dev_dbg(&client->adapter->dev,
+			dev_dbg(&client->dev,
 				"%s: pattern=%d, reg=%x, mask=0x%02x, min=%d, "
 				"max=%d, value=%d, raw=0x%02X\n",
 				__FUNCTION__, i, pattern[i].reg, pattern[i].mask,
@@ -253,7 +253,7 @@ static int pcf8563_probe(struct i2c_adapter *adapter, int address, int kind)
 
 	int err = 0;
 
-	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+	dev_dbg(adapter->class_dev.dev, "%s\n", __FUNCTION__);
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
 		err = -ENODEV;
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index e2c7698fdba..1460f6b769f 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -200,7 +200,7 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
 	struct i2c_client *client;
 	struct rs5c372 *rs5c372;
 
-	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+	dev_dbg(adapter->class_dev.dev, "%s\n", __FUNCTION__);
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
 		err = -ENODEV;
diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c
index 9a67487d086..019ae255b0c 100644
--- a/drivers/rtc/rtc-x1205.c
+++ b/drivers/rtc/rtc-x1205.c
@@ -372,7 +372,7 @@ static int x1205_validate_client(struct i2c_client *client)
 		};
 
 		if ((xfer = i2c_transfer(client->adapter, msgs, 2)) != 2) {
-			dev_err(&client->adapter->dev,
+			dev_err(&client->dev,
 				"%s: could not read register %x\n",
 				__FUNCTION__, probe_zero_pattern[i]);
 
@@ -380,7 +380,7 @@ static int x1205_validate_client(struct i2c_client *client)
 		}
 
 		if ((buf & probe_zero_pattern[i+1]) != 0) {
-			dev_err(&client->adapter->dev,
+			dev_err(&client->dev,
 				"%s: register=%02x, zero pattern=%d, value=%x\n",
 				__FUNCTION__, probe_zero_pattern[i], i, buf);
 
@@ -400,7 +400,7 @@ static int x1205_validate_client(struct i2c_client *client)
 		};
 
 		if ((xfer = i2c_transfer(client->adapter, msgs, 2)) != 2) {
-			dev_err(&client->adapter->dev,
+			dev_err(&client->dev,
 				"%s: could not read register %x\n",
 				__FUNCTION__, probe_limits_pattern[i].reg);
 
@@ -411,7 +411,7 @@ static int x1205_validate_client(struct i2c_client *client)
 
 		if (value > probe_limits_pattern[i].max ||
 			value < probe_limits_pattern[i].min) {
-			dev_dbg(&client->adapter->dev,
+			dev_dbg(&client->dev,
 				"%s: register=%x, lim pattern=%d, value=%d\n",
 				__FUNCTION__, probe_limits_pattern[i].reg,
 				i, value);
@@ -506,7 +506,7 @@ static int x1205_probe(struct i2c_adapter *adapter, int address, int kind)
 	struct i2c_client *client;
 	struct rtc_device *rtc;
 
-	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+	dev_dbg(adapter->class_dev.dev, "%s\n", __FUNCTION__);
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
 		err = -ENODEV;
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index fc12d5df10e..0b36dd5cdac 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -151,32 +151,6 @@ config SERIAL_8250_MANY_PORTS
 	  say N here to save some memory. You can also say Y if you have an
 	  "intelligent" multiport card such as Cyclades, Digiboards, etc.
 
-config SERIAL_8250_SHARE_IRQ
-	bool "Support for sharing serial interrupts"
-	depends on SERIAL_8250_EXTENDED
-	help
-	  Some serial boards have hardware support which allows multiple dumb
-	  serial ports on the same board to share a single IRQ. To enable
-	  support for this in the serial driver, say Y here.
-
-config SERIAL_8250_DETECT_IRQ
-	bool "Autodetect IRQ on standard ports (unsafe)"
-	depends on SERIAL_8250_EXTENDED
-	help
-	  Say Y here if you want the kernel to try to guess which IRQ
-	  to use for your serial port.
-
-	  This is considered unsafe; it is far better to configure the IRQ in
-	  a boot script using the setserial command.
-
-	  If unsure, say N.
-
-config SERIAL_8250_RSA
-	bool "Support RSA serial ports"
-	depends on SERIAL_8250_EXTENDED
-	help
-	  ::: To be written :::
-
 #
 # Multi-port serial cards
 #
@@ -199,7 +173,6 @@ config SERIAL_8250_ACCENT
 	  To compile this driver as a module, choose M here: the module
 	  will be called 8250_accent.
 
-
 config SERIAL_8250_BOCA
 	tristate "Support Boca cards"
 	depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS
@@ -230,6 +203,32 @@ config SERIAL_8250_HUB6
 	  To compile this driver as a module, choose M here: the module
 	  will be called 8250_hub6.
 
+config SERIAL_8250_SHARE_IRQ
+	bool "Support for sharing serial interrupts"
+	depends on SERIAL_8250_EXTENDED
+	help
+	  Some serial boards have hardware support which allows multiple dumb
+	  serial ports on the same board to share a single IRQ. To enable
+	  support for this in the serial driver, say Y here.
+
+config SERIAL_8250_DETECT_IRQ
+	bool "Autodetect IRQ on standard ports (unsafe)"
+	depends on SERIAL_8250_EXTENDED
+	help
+	  Say Y here if you want the kernel to try to guess which IRQ
+	  to use for your serial port.
+
+	  This is considered unsafe; it is far better to configure the IRQ in
+	  a boot script using the setserial command.
+
+	  If unsure, say N.
+
+config SERIAL_8250_RSA
+	bool "Support RSA serial ports"
+	depends on SERIAL_8250_EXTENDED
+	help
+	  ::: To be written :::
+
 config SERIAL_8250_MCA
 	tristate "Support 8250-type ports on MCA buses"
 	depends on SERIAL_8250 != n && MCA
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 494d9b85648..6ed3f1da929 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -49,6 +49,14 @@ MODULE_LICENSE("GPL");
 #define RESET_DMA_CHANNEL (DCSR_NODESC | DMA_INT_MASK)
 #define IS_DMA_ALIGNED(x) (((u32)(x)&0x07)==0)
 
+/* for testing SSCR1 changes that require SSP restart, basically
+ * everything except the service and interrupt enables */
+#define SSCR1_CHANGE_MASK (SSCR1_TTELP | SSCR1_TTE | SSCR1_EBCEI | SSCR1_SCFR \
+				| SSCR1_ECRA | SSCR1_ECRB | SSCR1_SCLKDIR \
+				| SSCR1_RWOT | SSCR1_TRAIL | SSCR1_PINTE \
+				| SSCR1_STRF | SSCR1_EFWR |SSCR1_RFT \
+				| SSCR1_TFT | SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
+
 #define DEFINE_SSP_REG(reg, off) \
 static inline u32 read_##reg(void *p) { return __raw_readl(p + (off)); } \
 static inline void write_##reg(u32 v, void *p) { __raw_writel(v, p + (off)); }
@@ -123,8 +131,8 @@ struct driver_data {
 	u8 n_bytes;
 	u32 dma_width;
 	int cs_change;
-	void (*write)(struct driver_data *drv_data);
-	void (*read)(struct driver_data *drv_data);
+	int (*write)(struct driver_data *drv_data);
+	int (*read)(struct driver_data *drv_data);
 	irqreturn_t (*transfer_handler)(struct driver_data *drv_data);
 	void (*cs_control)(u32 command);
 };
@@ -132,7 +140,6 @@ struct driver_data {
 struct chip_data {
 	u32 cr0;
 	u32 cr1;
-	u32 to;
 	u32 psp;
 	u32 timeout;
 	u8 n_bytes;
@@ -143,8 +150,8 @@ struct chip_data {
 	u8 enable_dma;
 	u8 bits_per_word;
 	u32 speed_hz;
-	void (*write)(struct driver_data *drv_data);
-	void (*read)(struct driver_data *drv_data);
+	int (*write)(struct driver_data *drv_data);
+	int (*read)(struct driver_data *drv_data);
 	void (*cs_control)(u32 command);
 };
 
@@ -166,114 +173,118 @@ static int flush(struct driver_data *drv_data)
 	return limit;
 }
 
-static void restore_state(struct driver_data *drv_data)
-{
-	void *reg = drv_data->ioaddr;
-
-	/* Clear status and disable clock */
-	write_SSSR(drv_data->clear_sr, reg);
-	write_SSCR0(drv_data->cur_chip->cr0 & ~SSCR0_SSE, reg);
-
-	/* Load the registers */
-	write_SSCR1(drv_data->cur_chip->cr1, reg);
-	write_SSCR0(drv_data->cur_chip->cr0, reg);
-	if (drv_data->ssp_type != PXA25x_SSP) {
-		write_SSTO(0, reg);
-		write_SSPSP(drv_data->cur_chip->psp, reg);
-	}
-}
-
 static void null_cs_control(u32 command)
 {
 }
 
-static void null_writer(struct driver_data *drv_data)
+static int null_writer(struct driver_data *drv_data)
 {
 	void *reg = drv_data->ioaddr;
 	u8 n_bytes = drv_data->n_bytes;
 
-	while ((read_SSSR(reg) & SSSR_TNF)
-			&& (drv_data->tx < drv_data->tx_end)) {
-		write_SSDR(0, reg);
-		drv_data->tx += n_bytes;
-	}
+	if (((read_SSSR(reg) & 0x00000f00) == 0x00000f00)
+		|| (drv_data->tx == drv_data->tx_end))
+		return 0;
+
+	write_SSDR(0, reg);
+	drv_data->tx += n_bytes;
+
+	return 1;
 }
 
-static void null_reader(struct driver_data *drv_data)
+static int null_reader(struct driver_data *drv_data)
 {
 	void *reg = drv_data->ioaddr;
 	u8 n_bytes = drv_data->n_bytes;
 
 	while ((read_SSSR(reg) & SSSR_RNE)
-			&& (drv_data->rx < drv_data->rx_end)) {
+		&& (drv_data->rx < drv_data->rx_end)) {
 		read_SSDR(reg);
 		drv_data->rx += n_bytes;
 	}
+
+	return drv_data->rx == drv_data->rx_end;
 }
 
-static void u8_writer(struct driver_data *drv_data)
+static int u8_writer(struct driver_data *drv_data)
 {
 	void *reg = drv_data->ioaddr;
 
-	while ((read_SSSR(reg) & SSSR_TNF)
-			&& (drv_data->tx < drv_data->tx_end)) {
-		write_SSDR(*(u8 *)(drv_data->tx), reg);
-		++drv_data->tx;
-	}
+	if (((read_SSSR(reg) & 0x00000f00) == 0x00000f00)
+		|| (drv_data->tx == drv_data->tx_end))
+		return 0;
+
+	write_SSDR(*(u8 *)(drv_data->tx), reg);
+	++drv_data->tx;
+
+	return 1;
 }
 
-static void u8_reader(struct driver_data *drv_data)
+static int u8_reader(struct driver_data *drv_data)
 {
 	void *reg = drv_data->ioaddr;
 
 	while ((read_SSSR(reg) & SSSR_RNE)
-			&& (drv_data->rx < drv_data->rx_end)) {
+		&& (drv_data->rx < drv_data->rx_end)) {
 		*(u8 *)(drv_data->rx) = read_SSDR(reg);
 		++drv_data->rx;
 	}
+
+	return drv_data->rx == drv_data->rx_end;
 }
 
-static void u16_writer(struct driver_data *drv_data)
+static int u16_writer(struct driver_data *drv_data)
 {
 	void *reg = drv_data->ioaddr;
 
-	while ((read_SSSR(reg) & SSSR_TNF)
-			&& (drv_data->tx < drv_data->tx_end)) {
-		write_SSDR(*(u16 *)(drv_data->tx), reg);
-		drv_data->tx += 2;
-	}
+	if (((read_SSSR(reg) & 0x00000f00) == 0x00000f00)
+		|| (drv_data->tx == drv_data->tx_end))
+		return 0;
+
+	write_SSDR(*(u16 *)(drv_data->tx), reg);
+	drv_data->tx += 2;
+
+	return 1;
 }
 
-static void u16_reader(struct driver_data *drv_data)
+static int u16_reader(struct driver_data *drv_data)
 {
 	void *reg = drv_data->ioaddr;
 
 	while ((read_SSSR(reg) & SSSR_RNE)
-			&& (drv_data->rx < drv_data->rx_end)) {
+		&& (drv_data->rx < drv_data->rx_end)) {
 		*(u16 *)(drv_data->rx) = read_SSDR(reg);
 		drv_data->rx += 2;
 	}
+
+	return drv_data->rx == drv_data->rx_end;
 }
-static void u32_writer(struct driver_data *drv_data)
+
+static int u32_writer(struct driver_data *drv_data)
 {
 	void *reg = drv_data->ioaddr;
 
-	while ((read_SSSR(reg) & SSSR_TNF)
-			&& (drv_data->tx < drv_data->tx_end)) {
-		write_SSDR(*(u32 *)(drv_data->tx), reg);
-		drv_data->tx += 4;
-	}
+	if (((read_SSSR(reg) & 0x00000f00) == 0x00000f00)
+		|| (drv_data->tx == drv_data->tx_end))
+		return 0;
+
+	write_SSDR(*(u32 *)(drv_data->tx), reg);
+	drv_data->tx += 4;
+
+	return 1;
 }
 
-static void u32_reader(struct driver_data *drv_data)
+static int u32_reader(struct driver_data *drv_data)
 {
 	void *reg = drv_data->ioaddr;
 
 	while ((read_SSSR(reg) & SSSR_RNE)
-			&& (drv_data->rx < drv_data->rx_end)) {
+		&& (drv_data->rx < drv_data->rx_end)) {
 		*(u32 *)(drv_data->rx) = read_SSDR(reg);
 		drv_data->rx += 4;
 	}
+
+	return drv_data->rx == drv_data->rx_end;
 }
 
 static void *next_transfer(struct driver_data *drv_data)
@@ -409,166 +420,134 @@ static int wait_dma_channel_stop(int channel)
 	return limit;
 }
 
-static void dma_handler(int channel, void *data)
+void dma_error_stop(struct driver_data *drv_data, const char *msg)
 {
-	struct driver_data *drv_data = data;
-	struct spi_message *msg = drv_data->cur_msg;
 	void *reg = drv_data->ioaddr;
-	u32 irq_status = DCSR(channel) & DMA_INT_MASK;
-	u32 trailing_sssr = 0;
 
-	if (irq_status & DCSR_BUSERR) {
+	/* Stop and reset */
+	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
+	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	write_SSSR(drv_data->clear_sr, reg);
+	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+	if (drv_data->ssp_type != PXA25x_SSP)
+		write_SSTO(0, reg);
+	flush(drv_data);
+	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
 
-		/* Disable interrupts, clear status and reset DMA */
-		write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
-		write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-		if (drv_data->ssp_type != PXA25x_SSP)
-			write_SSTO(0, reg);
-		write_SSSR(drv_data->clear_sr, reg);
-		DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-		DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	unmap_dma_buffers(drv_data);
 
-		if (flush(drv_data) == 0)
-			dev_err(&drv_data->pdev->dev,
-					"dma_handler: flush fail\n");
+	dev_err(&drv_data->pdev->dev, "%s\n", msg);
 
-		unmap_dma_buffers(drv_data);
+	drv_data->cur_msg->state = ERROR_STATE;
+	tasklet_schedule(&drv_data->pump_transfers);
+}
+
+static void dma_transfer_complete(struct driver_data *drv_data)
+{
+	void *reg = drv_data->ioaddr;
+	struct spi_message *msg = drv_data->cur_msg;
+
+	/* Clear and disable interrupts on SSP and DMA channels*/
+	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+	write_SSSR(drv_data->clear_sr, reg);
+	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
+
+	if (wait_dma_channel_stop(drv_data->rx_channel) == 0)
+		dev_err(&drv_data->pdev->dev,
+			"dma_handler: dma rx channel stop failed\n");
+
+	if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
+		dev_err(&drv_data->pdev->dev,
+			"dma_transfer: ssp rx stall failed\n");
+
+	unmap_dma_buffers(drv_data);
+
+	/* update the buffer pointer for the amount completed in dma */
+	drv_data->rx += drv_data->len -
+			(DCMD(drv_data->rx_channel) & DCMD_LENGTH);
+
+	/* read trailing data from fifo, it does not matter how many
+	 * bytes are in the fifo just read until buffer is full
+	 * or fifo is empty, which ever occurs first */
+	drv_data->read(drv_data);
+
+	/* return count of what was actually read */
+	msg->actual_length += drv_data->len -
+				(drv_data->rx_end - drv_data->rx);
+
+	/* Release chip select if requested, transfer delays are
+	 * handled in pump_transfers */
+	if (drv_data->cs_change)
+		drv_data->cs_control(PXA2XX_CS_DEASSERT);
+
+	/* Move to next transfer */
+	msg->state = next_transfer(drv_data);
+
+	/* Schedule transfer tasklet */
+	tasklet_schedule(&drv_data->pump_transfers);
+}
+
+static void dma_handler(int channel, void *data)
+{
+	struct driver_data *drv_data = data;
+	u32 irq_status = DCSR(channel) & DMA_INT_MASK;
+
+	if (irq_status & DCSR_BUSERR) {
 
 		if (channel == drv_data->tx_channel)
-			dev_err(&drv_data->pdev->dev,
-				"dma_handler: bad bus address on "
-				"tx channel %d, source %x target = %x\n",
-				channel, DSADR(channel), DTADR(channel));
+			dma_error_stop(drv_data,
+					"dma_handler: "
+					"bad bus address on tx channel");
 		else
-			dev_err(&drv_data->pdev->dev,
-				"dma_handler: bad bus address on "
-				"rx channel %d, source %x target = %x\n",
-				channel, DSADR(channel), DTADR(channel));
-
-		msg->state = ERROR_STATE;
-		tasklet_schedule(&drv_data->pump_transfers);
+			dma_error_stop(drv_data,
+					"dma_handler: "
+					"bad bus address on rx channel");
+		return;
 	}
 
 	/* PXA255x_SSP has no timeout interrupt, wait for tailing bytes */
-	if ((drv_data->ssp_type == PXA25x_SSP)
-		&& (channel == drv_data->tx_channel)
-		&& (irq_status & DCSR_ENDINTR)) {
+	if ((channel == drv_data->tx_channel)
+		&& (irq_status & DCSR_ENDINTR)
+		&& (drv_data->ssp_type == PXA25x_SSP)) {
 
 		/* Wait for rx to stall */
 		if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
 			dev_err(&drv_data->pdev->dev,
 				"dma_handler: ssp rx stall failed\n");
 
-		/* Clear and disable interrupts on SSP and DMA channels*/
-		write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-		write_SSSR(drv_data->clear_sr, reg);
-		DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-		DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-		if (wait_dma_channel_stop(drv_data->rx_channel) == 0)
-			dev_err(&drv_data->pdev->dev,
-				"dma_handler: dma rx channel stop failed\n");
-
-		unmap_dma_buffers(drv_data);
-
-		/* Read trailing bytes */
-		/* Calculate number of trailing bytes, read them */
-		trailing_sssr = read_SSSR(reg);
-		if ((trailing_sssr & 0xf008) != 0xf000) {
-			drv_data->rx = drv_data->rx_end -
-					(((trailing_sssr >> 12) & 0x0f) + 1);
-			drv_data->read(drv_data);
-		}
-		msg->actual_length += drv_data->len;
-
-		/* Release chip select if requested, transfer delays are
-		 * handled in pump_transfers */
-		if (drv_data->cs_change)
-			drv_data->cs_control(PXA2XX_CS_DEASSERT);
-
-		/* Move to next transfer */
-		msg->state = next_transfer(drv_data);
-
-		/* Schedule transfer tasklet */
-		tasklet_schedule(&drv_data->pump_transfers);
+		/* finish this transfer, start the next */
+		dma_transfer_complete(drv_data);
 	}
 }
 
 static irqreturn_t dma_transfer(struct driver_data *drv_data)
 {
 	u32 irq_status;
-	u32 trailing_sssr = 0;
-	struct spi_message *msg = drv_data->cur_msg;
 	void *reg = drv_data->ioaddr;
 
 	irq_status = read_SSSR(reg) & drv_data->mask_sr;
 	if (irq_status & SSSR_ROR) {
-		/* Clear and disable interrupts on SSP and DMA channels*/
-		write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
-		write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-		if (drv_data->ssp_type != PXA25x_SSP)
-			write_SSTO(0, reg);
-		write_SSSR(drv_data->clear_sr, reg);
-		DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-		DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-		unmap_dma_buffers(drv_data);
-
-		if (flush(drv_data) == 0)
-			dev_err(&drv_data->pdev->dev,
-					"dma_transfer: flush fail\n");
-
-		dev_warn(&drv_data->pdev->dev, "dma_transfer: fifo overun\n");
-
-		drv_data->cur_msg->state = ERROR_STATE;
-		tasklet_schedule(&drv_data->pump_transfers);
-
+		dma_error_stop(drv_data, "dma_transfer: fifo overrun");
 		return IRQ_HANDLED;
 	}
 
 	/* Check for false positive timeout */
-	if ((irq_status & SSSR_TINT) && DCSR(drv_data->tx_channel) & DCSR_RUN) {
+	if ((irq_status & SSSR_TINT)
+		&& (DCSR(drv_data->tx_channel) & DCSR_RUN)) {
 		write_SSSR(SSSR_TINT, reg);
 		return IRQ_HANDLED;
 	}
 
 	if (irq_status & SSSR_TINT || drv_data->rx == drv_data->rx_end) {
 
-		/* Clear and disable interrupts on SSP and DMA channels*/
-		write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+		/* Clear and disable timeout interrupt, do the rest in
+		 * dma_transfer_complete */
 		if (drv_data->ssp_type != PXA25x_SSP)
 			write_SSTO(0, reg);
-		write_SSSR(drv_data->clear_sr, reg);
-		DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-		DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
 
-		if (wait_dma_channel_stop(drv_data->rx_channel) == 0)
-			dev_err(&drv_data->pdev->dev,
-				"dma_transfer: dma rx channel stop failed\n");
-
-		if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
-			dev_err(&drv_data->pdev->dev,
-				"dma_transfer: ssp rx stall failed\n");
-
-		unmap_dma_buffers(drv_data);
-
-		/* Calculate number of trailing bytes, read them */
-		trailing_sssr = read_SSSR(reg);
-		if ((trailing_sssr & 0xf008) != 0xf000) {
-			drv_data->rx = drv_data->rx_end -
-					(((trailing_sssr >> 12) & 0x0f) + 1);
-			drv_data->read(drv_data);
-		}
-		msg->actual_length += drv_data->len;
-
-		/* Release chip select if requested, transfer delays are
-		 * handled in pump_transfers */
-		if (drv_data->cs_change)
-			drv_data->cs_control(PXA2XX_CS_DEASSERT);
-
-		/* Move to next transfer */
-		msg->state = next_transfer(drv_data);
-
-		/* Schedule transfer tasklet */
-		tasklet_schedule(&drv_data->pump_transfers);
+		/* finish this transfer, start the next */
+		dma_transfer_complete(drv_data);
 
 		return IRQ_HANDLED;
 	}
@@ -577,89 +556,103 @@ static irqreturn_t dma_transfer(struct driver_data *drv_data)
 	return IRQ_NONE;
 }
 
-static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
+static void int_error_stop(struct driver_data *drv_data, const char* msg)
 {
-	struct spi_message *msg = drv_data->cur_msg;
 	void *reg = drv_data->ioaddr;
-	unsigned long limit = loops_per_jiffy << 1;
-	u32 irq_status;
-	u32 irq_mask = (read_SSCR1(reg) & SSCR1_TIE) ?
-			drv_data->mask_sr : drv_data->mask_sr & ~SSSR_TFS;
-
-	while ((irq_status = read_SSSR(reg) & irq_mask)) {
-
-		if (irq_status & SSSR_ROR) {
 
-			/* Clear and disable interrupts */
-			write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
-			write_SSCR1(read_SSCR1(reg) & ~drv_data->int_cr1, reg);
-			if (drv_data->ssp_type != PXA25x_SSP)
-				write_SSTO(0, reg);
-			write_SSSR(drv_data->clear_sr, reg);
+	/* Stop and reset SSP */
+	write_SSSR(drv_data->clear_sr, reg);
+	write_SSCR1(read_SSCR1(reg) & ~drv_data->int_cr1, reg);
+	if (drv_data->ssp_type != PXA25x_SSP)
+		write_SSTO(0, reg);
+	flush(drv_data);
+	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
 
-			if (flush(drv_data) == 0)
-				dev_err(&drv_data->pdev->dev,
-					"interrupt_transfer: flush fail\n");
+	dev_err(&drv_data->pdev->dev, "%s\n", msg);
 
-			/* Stop the SSP */
+	drv_data->cur_msg->state = ERROR_STATE;
+	tasklet_schedule(&drv_data->pump_transfers);
+}
 
-			dev_warn(&drv_data->pdev->dev,
-					"interrupt_transfer: fifo overun\n");
+static void int_transfer_complete(struct driver_data *drv_data)
+{
+	void *reg = drv_data->ioaddr;
 
-			msg->state = ERROR_STATE;
-			tasklet_schedule(&drv_data->pump_transfers);
+	/* Stop SSP */
+	write_SSSR(drv_data->clear_sr, reg);
+	write_SSCR1(read_SSCR1(reg) & ~drv_data->int_cr1, reg);
+	if (drv_data->ssp_type != PXA25x_SSP)
+		write_SSTO(0, reg);
 
-			return IRQ_HANDLED;
-		}
+	/* Update total byte transfered return count actual bytes read */
+	drv_data->cur_msg->actual_length += drv_data->len -
+				(drv_data->rx_end - drv_data->rx);
 
-		/* Look for false positive timeout */
-		if ((irq_status & SSSR_TINT)
-				&& (drv_data->rx < drv_data->rx_end))
-			write_SSSR(SSSR_TINT, reg);
+	/* Release chip select if requested, transfer delays are
+	 * handled in pump_transfers */
+	if (drv_data->cs_change)
+		drv_data->cs_control(PXA2XX_CS_DEASSERT);
 
-		/* Pump data */
-		drv_data->read(drv_data);
-		drv_data->write(drv_data);
+	/* Move to next transfer */
+	drv_data->cur_msg->state = next_transfer(drv_data);
 
-		if (drv_data->tx == drv_data->tx_end) {
-			/* Disable tx interrupt */
-			write_SSCR1(read_SSCR1(reg) & ~SSCR1_TIE, reg);
-			irq_mask = drv_data->mask_sr & ~SSSR_TFS;
+	/* Schedule transfer tasklet */
+	tasklet_schedule(&drv_data->pump_transfers);
+}
 
-			/* PXA25x_SSP has no timeout, read trailing bytes */
-			if (drv_data->ssp_type == PXA25x_SSP) {
-				while ((read_SSSR(reg) & SSSR_BSY) && limit--)
-					drv_data->read(drv_data);
+static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
+{
+	void *reg = drv_data->ioaddr;
 
-				if (limit == 0)
-					dev_err(&drv_data->pdev->dev,
-						"interrupt_transfer: "
-						"trailing byte read failed\n");
-			}
-		}
+	u32 irq_mask = (read_SSCR1(reg) & SSCR1_TIE) ?
+			drv_data->mask_sr : drv_data->mask_sr & ~SSSR_TFS;
 
-		if ((irq_status & SSSR_TINT)
-				|| (drv_data->rx == drv_data->rx_end)) {
+	u32 irq_status = read_SSSR(reg) & irq_mask;
 
-			/* Clear timeout */
-			write_SSCR1(read_SSCR1(reg) & ~drv_data->int_cr1, reg);
-			if (drv_data->ssp_type != PXA25x_SSP)
-				write_SSTO(0, reg);
-			write_SSSR(drv_data->clear_sr, reg);
+	if (irq_status & SSSR_ROR) {
+		int_error_stop(drv_data, "interrupt_transfer: fifo overrun");
+		return IRQ_HANDLED;
+	}
 
-			/* Update total byte transfered */
-			msg->actual_length += drv_data->len;
+	if (irq_status & SSSR_TINT) {
+		write_SSSR(SSSR_TINT, reg);
+		if (drv_data->read(drv_data)) {
+			int_transfer_complete(drv_data);
+			return IRQ_HANDLED;
+		}
+	}
 
-			/* Release chip select if requested, transfer delays are
-			 * handled in pump_transfers */
-			if (drv_data->cs_change)
-				drv_data->cs_control(PXA2XX_CS_DEASSERT);
+	/* Drain rx fifo, Fill tx fifo and prevent overruns */
+	do {
+		if (drv_data->read(drv_data)) {
+			int_transfer_complete(drv_data);
+			return IRQ_HANDLED;
+		}
+	} while (drv_data->write(drv_data));
 
-			/* Move to next transfer */
-			msg->state = next_transfer(drv_data);
+	if (drv_data->read(drv_data)) {
+		int_transfer_complete(drv_data);
+		return IRQ_HANDLED;
+	}
 
-			/* Schedule transfer tasklet */
-			tasklet_schedule(&drv_data->pump_transfers);
+	if (drv_data->tx == drv_data->tx_end) {
+		write_SSCR1(read_SSCR1(reg) & ~SSCR1_TIE, reg);
+		/* PXA25x_SSP has no timeout, read trailing bytes */
+		if (drv_data->ssp_type == PXA25x_SSP) {
+			if (!wait_ssp_rx_stall(reg))
+			{
+				int_error_stop(drv_data, "interrupt_transfer: "
+						"rx stall failed");
+				return IRQ_HANDLED;
+			}
+			if (!drv_data->read(drv_data))
+			{
+				int_error_stop(drv_data,
+						"interrupt_transfer: "
+						"trailing byte read failed");
+				return IRQ_HANDLED;
+			}
+			int_transfer_complete(drv_data);
 		}
 	}
 
@@ -681,7 +674,7 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 		write_SSSR(drv_data->clear_sr, reg);
 
 		dev_err(&drv_data->pdev->dev, "bad message state "
-				"in interrupt handler");
+			"in interrupt handler\n");
 
 		/* Never fail */
 		return IRQ_HANDLED;
@@ -690,6 +683,102 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 	return drv_data->transfer_handler(drv_data);
 }
 
+int set_dma_burst_and_threshold(struct chip_data *chip, struct spi_device *spi,
+				u8 bits_per_word, u32 *burst_code,
+				u32 *threshold)
+{
+	struct pxa2xx_spi_chip *chip_info =
+			(struct pxa2xx_spi_chip *)spi->controller_data;
+	int bytes_per_word;
+	int burst_bytes;
+	int thresh_words;
+	int req_burst_size;
+	int retval = 0;
+
+	/* Set the threshold (in registers) to equal the same amount of data
+	 * as represented by burst size (in bytes).  The computation below
+	 * is (burst_size rounded up to nearest 8 byte, word or long word)
+	 * divided by (bytes/register); the tx threshold is the inverse of
+	 * the rx, so that there will always be enough data in the rx fifo
+	 * to satisfy a burst, and there will always be enough space in the
+	 * tx fifo to accept a burst (a tx burst will overwrite the fifo if
+	 * there is not enough space), there must always remain enough empty
+	 * space in the rx fifo for any data loaded to the tx fifo.
+	 * Whenever burst_size (in bytes) equals bits/word, the fifo threshold
+	 * will be 8, or half the fifo;
+	 * The threshold can only be set to 2, 4 or 8, but not 16, because
+	 * to burst 16 to the tx fifo, the fifo would have to be empty;
+	 * however, the minimum fifo trigger level is 1, and the tx will
+	 * request service when the fifo is at this level, with only 15 spaces.
+	 */
+
+	/* find bytes/word */
+	if (bits_per_word <= 8)
+		bytes_per_word = 1;
+	else if (bits_per_word <= 16)
+		bytes_per_word = 2;
+	else
+		bytes_per_word = 4;
+
+	/* use struct pxa2xx_spi_chip->dma_burst_size if available */
+	if (chip_info)
+		req_burst_size = chip_info->dma_burst_size;
+	else {
+		switch (chip->dma_burst_size) {
+		default:
+			/* if the default burst size is not set,
+			 * do it now */
+			chip->dma_burst_size = DCMD_BURST8;
+		case DCMD_BURST8:
+			req_burst_size = 8;
+			break;
+		case DCMD_BURST16:
+			req_burst_size = 16;
+			break;
+		case DCMD_BURST32:
+			req_burst_size = 32;
+			break;
+		}
+	}
+	if (req_burst_size <= 8) {
+		*burst_code = DCMD_BURST8;
+		burst_bytes = 8;
+	} else if (req_burst_size <= 16) {
+		if (bytes_per_word == 1) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST8;
+			burst_bytes = 8;
+			retval = 1;
+		} else {
+			*burst_code = DCMD_BURST16;
+			burst_bytes = 16;
+		}
+	} else {
+		if (bytes_per_word == 1) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST8;
+			burst_bytes = 8;
+			retval = 1;
+		} else if (bytes_per_word == 2) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST16;
+			burst_bytes = 16;
+			retval = 1;
+		} else {
+			*burst_code = DCMD_BURST32;
+			burst_bytes = 32;
+		}
+	}
+
+	thresh_words = burst_bytes / bytes_per_word;
+
+	/* thresh_words will be between 2 and 8 */
+	*threshold = (SSCR1_RxTresh(thresh_words) & SSCR1_RFT)
+			| (SSCR1_TxTresh(16-thresh_words) & SSCR1_TFT);
+
+	return retval;
+}
+
 static void pump_transfers(unsigned long data)
 {
 	struct driver_data *drv_data = (struct driver_data *)data;
@@ -702,6 +791,9 @@ static void pump_transfers(unsigned long data)
 	u8 bits = 0;
 	u32 speed = 0;
 	u32 cr0;
+	u32 cr1;
+	u32 dma_thresh = drv_data->cur_chip->dma_threshold;
+	u32 dma_burst = drv_data->cur_chip->dma_burst_size;
 
 	/* Get current state information */
 	message = drv_data->cur_msg;
@@ -731,6 +823,16 @@ static void pump_transfers(unsigned long data)
 			udelay(previous->delay_usecs);
 	}
 
+	/* Check transfer length */
+	if (transfer->len > 8191)
+	{
+		dev_warn(&drv_data->pdev->dev, "pump_transfers: transfer "
+				"length greater than 8191\n");
+		message->status = -EINVAL;
+		giveback(drv_data);
+		return;
+	}
+
 	/* Setup the transfer state based on the type of transfer */
 	if (flush(drv_data) == 0) {
 		dev_err(&drv_data->pdev->dev, "pump_transfers: flush failed\n");
@@ -747,17 +849,15 @@ static void pump_transfers(unsigned long data)
 	drv_data->rx_end = drv_data->rx + transfer->len;
 	drv_data->rx_dma = transfer->rx_dma;
 	drv_data->tx_dma = transfer->tx_dma;
-	drv_data->len = transfer->len;
+	drv_data->len = transfer->len & DCMD_LENGTH;
 	drv_data->write = drv_data->tx ? chip->write : null_writer;
 	drv_data->read = drv_data->rx ? chip->read : null_reader;
 	drv_data->cs_change = transfer->cs_change;
 
 	/* Change speed and bit per word on a per transfer */
+	cr0 = chip->cr0;
 	if (transfer->speed_hz || transfer->bits_per_word) {
 
-		/* Disable clock */
-		write_SSCR0(chip->cr0 & ~SSCR0_SSE, reg);
-		cr0 = chip->cr0;
 		bits = chip->bits_per_word;
 		speed = chip->speed_hz;
 
@@ -796,15 +896,24 @@ static void pump_transfers(unsigned long data)
 			drv_data->write = drv_data->write != null_writer ?
 						u32_writer : null_writer;
 		}
+		/* if bits/word is changed in dma mode, then must check the
+		 * thresholds and burst also */
+		if (chip->enable_dma) {
+			if (set_dma_burst_and_threshold(chip, message->spi,
+							bits, &dma_burst,
+							&dma_thresh))
+				if (printk_ratelimit())
+					dev_warn(&message->spi->dev,
+						"pump_transfer: "
+						"DMA burst size reduced to "
+						"match bits_per_word\n");
+		}
 
 		cr0 = clk_div
 			| SSCR0_Motorola
 			| SSCR0_DataSize(bits > 16 ? bits - 16 : bits)
 			| SSCR0_SSE
 			| (bits > 16 ? SSCR0_EDSS : 0);
-
-		/* Start it back up */
-		write_SSCR0(cr0, reg);
 	}
 
 	message->state = RUNNING_STATE;
@@ -823,13 +932,13 @@ static void pump_transfers(unsigned long data)
 			/* No target address increment */
 			DCMD(drv_data->rx_channel) = DCMD_FLOWSRC
 							| drv_data->dma_width
-							| chip->dma_burst_size
+							| dma_burst
 							| drv_data->len;
 		else
 			DCMD(drv_data->rx_channel) = DCMD_INCTRGADDR
 							| DCMD_FLOWSRC
 							| drv_data->dma_width
-							| chip->dma_burst_size
+							| dma_burst
 							| drv_data->len;
 
 		/* Setup tx DMA Channel */
@@ -840,13 +949,13 @@ static void pump_transfers(unsigned long data)
 			/* No source address increment */
 			DCMD(drv_data->tx_channel) = DCMD_FLOWTRG
 							| drv_data->dma_width
-							| chip->dma_burst_size
+							| dma_burst
 							| drv_data->len;
 		else
 			DCMD(drv_data->tx_channel) = DCMD_INCSRCADDR
 							| DCMD_FLOWTRG
 							| drv_data->dma_width
-							| chip->dma_burst_size
+							| dma_burst
 							| drv_data->len;
 
 		/* Enable dma end irqs on SSP to detect end of transfer */
@@ -856,16 +965,11 @@ static void pump_transfers(unsigned long data)
 		/* Fix me, need to handle cs polarity */
 		drv_data->cs_control(PXA2XX_CS_ASSERT);
 
-		/* Go baby, go */
+		/* Clear status and start DMA engine */
+		cr1 = chip->cr1 | dma_thresh | drv_data->dma_cr1;
 		write_SSSR(drv_data->clear_sr, reg);
 		DCSR(drv_data->rx_channel) |= DCSR_RUN;
 		DCSR(drv_data->tx_channel) |= DCSR_RUN;
-		if (drv_data->ssp_type != PXA25x_SSP)
-			write_SSTO(chip->timeout, reg);
-		write_SSCR1(chip->cr1
-				| chip->dma_threshold
-				| drv_data->dma_cr1,
-				reg);
 	} else {
 		/* Ensure we have the correct interrupt handler	*/
 		drv_data->transfer_handler = interrupt_transfer;
@@ -873,14 +977,25 @@ static void pump_transfers(unsigned long data)
 		/* Fix me, need to handle cs polarity */
 		drv_data->cs_control(PXA2XX_CS_ASSERT);
 
-		/* Go baby, go */
+		/* Clear status  */
+		cr1 = chip->cr1 | chip->threshold | drv_data->int_cr1;
 		write_SSSR(drv_data->clear_sr, reg);
+	}
+
+	/* see if we need to reload the config registers */
+	if ((read_SSCR0(reg) != cr0)
+		|| (read_SSCR1(reg) & SSCR1_CHANGE_MASK) !=
+			(cr1 & SSCR1_CHANGE_MASK)) {
+
+		write_SSCR0(cr0 & ~SSCR0_SSE, reg);
 		if (drv_data->ssp_type != PXA25x_SSP)
 			write_SSTO(chip->timeout, reg);
-		write_SSCR1(chip->cr1
-				| chip->threshold
-				| drv_data->int_cr1,
-				reg);
+		write_SSCR1(cr1, reg);
+		write_SSCR0(cr0, reg);
+	} else {
+		if (drv_data->ssp_type != PXA25x_SSP)
+			write_SSTO(chip->timeout, reg);
+		write_SSCR1(cr1, reg);
 	}
 }
 
@@ -915,9 +1030,9 @@ static void pump_messages(struct work_struct *work)
 						struct spi_transfer,
 						transfer_list);
 
-	/* Setup the SSP using the per chip configuration */
+	/* prepare to setup the SSP, in pump_transfers, using the per
+	 * chip configuration */
 	drv_data->cur_chip = spi_get_ctldata(drv_data->cur_msg->spi);
-	restore_state(drv_data);
 
 	/* Mark as busy and launch transfers */
 	tasklet_schedule(&drv_data->pump_transfers);
@@ -963,63 +1078,77 @@ static int setup(struct spi_device *spi)
 		spi->bits_per_word = 8;
 
 	if (drv_data->ssp_type != PXA25x_SSP
-			&& (spi->bits_per_word < 4 || spi->bits_per_word > 32))
+		&& (spi->bits_per_word < 4 || spi->bits_per_word > 32)) {
+		dev_err(&spi->dev, "failed setup: ssp_type=%d, bits/wrd=%d "
+				"b/w not 4-32 for type non-PXA25x_SSP\n",
+				drv_data->ssp_type, spi->bits_per_word);
 		return -EINVAL;
-	else if (spi->bits_per_word < 4 || spi->bits_per_word > 16)
+	}
+	else if (drv_data->ssp_type == PXA25x_SSP
+			&& (spi->bits_per_word < 4
+				|| spi->bits_per_word > 16)) {
+		dev_err(&spi->dev, "failed setup: ssp_type=%d, bits/wrd=%d "
+				"b/w not 4-16 for type PXA25x_SSP\n",
+				drv_data->ssp_type, spi->bits_per_word);
 		return -EINVAL;
+	}
 
-	/* Only alloc (or use chip_info) on first setup */
+	/* Only alloc on first setup */
 	chip = spi_get_ctldata(spi);
-	if (chip == NULL) {
+	if (!chip) {
 		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
-		if (!chip)
+		if (!chip) {
+			dev_err(&spi->dev,
+				"failed setup: can't allocate chip data\n");
 			return -ENOMEM;
+		}
 
 		chip->cs_control = null_cs_control;
 		chip->enable_dma = 0;
-		chip->timeout = SSP_TIMEOUT(1000);
+		chip->timeout = 1000;
 		chip->threshold = SSCR1_RxTresh(1) | SSCR1_TxTresh(1);
 		chip->dma_burst_size = drv_data->master_info->enable_dma ?
 					DCMD_BURST8 : 0;
-
-		chip_info = spi->controller_data;
 	}
 
+	/* protocol drivers may change the chip settings, so...
+	 * if chip_info exists, use it */
+	chip_info = spi->controller_data;
+
 	/* chip_info isn't always needed */
+	chip->cr1 = 0;
 	if (chip_info) {
 		if (chip_info->cs_control)
 			chip->cs_control = chip_info->cs_control;
 
-		chip->timeout = SSP_TIMEOUT(chip_info->timeout_microsecs);
+		chip->timeout = chip_info->timeout;
 
-		chip->threshold = SSCR1_RxTresh(chip_info->rx_threshold)
-					| SSCR1_TxTresh(chip_info->tx_threshold);
+		chip->threshold = (SSCR1_RxTresh(chip_info->rx_threshold) &
+								SSCR1_RFT) |
+				(SSCR1_TxTresh(chip_info->tx_threshold) &
+								SSCR1_TFT);
 
 		chip->enable_dma = chip_info->dma_burst_size != 0
 					&& drv_data->master_info->enable_dma;
 		chip->dma_threshold = 0;
 
-		if (chip->enable_dma) {
-			if (chip_info->dma_burst_size <= 8) {
-				chip->dma_threshold = SSCR1_RxTresh(8)
-							| SSCR1_TxTresh(8);
-				chip->dma_burst_size = DCMD_BURST8;
-			} else if (chip_info->dma_burst_size <= 16) {
-				chip->dma_threshold = SSCR1_RxTresh(16)
-							| SSCR1_TxTresh(16);
-				chip->dma_burst_size = DCMD_BURST16;
-			} else {
-				chip->dma_threshold = SSCR1_RxTresh(32)
-							| SSCR1_TxTresh(32);
-				chip->dma_burst_size = DCMD_BURST32;
-			}
-		}
-
-
 		if (chip_info->enable_loopback)
 			chip->cr1 = SSCR1_LBM;
 	}
 
+	/* set dma burst and threshold outside of chip_info path so that if
+	 * chip_info goes away after setting chip->enable_dma, the
+	 * burst and threshold can still respond to changes in bits_per_word */
+	if (chip->enable_dma) {
+		/* set up legal burst and threshold for dma */
+		if (set_dma_burst_and_threshold(chip, spi, spi->bits_per_word,
+						&chip->dma_burst_size,
+						&chip->dma_threshold)) {
+			dev_warn(&spi->dev, "in setup: DMA burst size reduced "
+					"to match bits_per_word\n");
+		}
+	}
+
 	if (drv_data->ioaddr == SSP1_VIRT)
 		clk_div = SSP1_SerClkDiv(spi->max_speed_hz);
 	else if (drv_data->ioaddr == SSP2_VIRT)
@@ -1027,7 +1156,11 @@ static int setup(struct spi_device *spi)
 	else if (drv_data->ioaddr == SSP3_VIRT)
 		clk_div = SSP3_SerClkDiv(spi->max_speed_hz);
 	else
+	{
+		dev_err(&spi->dev, "failed setup: unknown IO address=0x%p\n",
+			drv_data->ioaddr);
 		return -ENODEV;
+	}
 	chip->speed_hz = spi->max_speed_hz;
 
 	chip->cr0 = clk_div
@@ -1071,7 +1204,6 @@ static int setup(struct spi_device *spi)
 		chip->write = u32_writer;
 	} else {
 		dev_err(&spi->dev, "invalid wordsize\n");
-		kfree(chip);
 		return -ENODEV;
 	}
 	chip->bits_per_word = spi->bits_per_word;
@@ -1162,6 +1294,12 @@ static int destroy_queue(struct driver_data *drv_data)
 	int status;
 
 	status = stop_queue(drv_data);
+	/* we are unloading the module or failing to load (only two calls
+	 * to this routine), and neither call can handle a return value.
+	 * However, destroy_workqueue calls flush_workqueue, and that will
+	 * block until all work is done.  If the reason that stop_queue
+	 * timed out is that the work will never finish, then it does no
+	 * good to call destroy_workqueue, so return anyway. */
 	if (status != 0)
 		return status;
 
@@ -1360,7 +1498,16 @@ static int pxa2xx_spi_remove(struct platform_device *pdev)
 	/* Remove the queue */
 	status = destroy_queue(drv_data);
 	if (status != 0)
-		return status;
+		/* the kernel does not check the return status of this
+		 * this routine (mod->exit, within the kernel).  Therefore
+		 * nothing is gained by returning from here, the module is
+		 * going away regardless, and we should not leave any more
+		 * resources allocated than necessary.  We cannot free the
+		 * message memory in drv_data->queue, but we can release the
+		 * resources below.  I think the kernel should honor -EBUSY
+		 * returns but... */
+		dev_err(&pdev->dev, "pxa2xx_spi_remove: workqueue will not "
+			"complete, message memory not freed\n");
 
 	/* Disable the SSP at the peripheral and SOC level */
 	write_SSCR0(0, drv_data->ioaddr);
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index ab1daecfeac..4e83f01e894 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1615,6 +1615,16 @@ config FB_PNX4008_DUM_RGB
 	---help---
 	  Say Y here to enable support for PNX4008 RGB Framebuffer
 
+config FB_IBM_GXT4500
+	tristate "Framebuffer support for IBM GXT4500P adaptor"
+	depends on PPC
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  Say Y here to enable support for the IBM GXT4500P display
+	  adaptor, found on some IBM System P (pSeries) machines.
+
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index a6980e9a248..309a26dd164 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_FB_IMX)              += imxfb.o
 obj-$(CONFIG_FB_S3C2410)	  += s3c2410fb.o
 obj-$(CONFIG_FB_PNX4008_DUM)	  += pnx4008/
 obj-$(CONFIG_FB_PNX4008_DUM_RGB)  += pnx4008/
+obj-$(CONFIG_FB_IBM_GXT4500)	  += gxt4500.o
 
 # Platform or fallback drivers go here
 obj-$(CONFIG_FB_VESA)             += vesafb.o
diff --git a/drivers/video/gxt4500.c b/drivers/video/gxt4500.c
new file mode 100644
index 00000000000..3adf6ab0768
--- /dev/null
+++ b/drivers/video/gxt4500.c
@@ -0,0 +1,741 @@
+/*
+ * Frame buffer device for IBM GXT4500P display adaptor
+ *
+ * Copyright (C) 2006 Paul Mackerras, IBM Corp. <paulus@samba.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fb.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/delay.h>
+
+#define PCI_DEVICE_ID_IBM_GXT4500P	0x21c
+
+/* GXT4500P registers */
+
+/* Registers in PCI config space */
+#define CFG_ENDIAN0		0x40
+
+/* Misc control/status registers */
+#define STATUS			0x1000
+#define CTRL_REG0		0x1004
+#define   CR0_HALT_DMA			0x4
+#define   CR0_RASTER_RESET		0x8
+#define   CR0_GEOM_RESET		0x10
+#define   CR0_MEM_CTRLER_RESET		0x20
+
+/* Framebuffer control registers */
+#define FB_AB_CTRL		0x1100
+#define FB_CD_CTRL		0x1104
+#define FB_WID_CTRL		0x1108
+#define FB_Z_CTRL		0x110c
+#define FB_VGA_CTRL		0x1110
+#define REFRESH_AB_CTRL		0x1114
+#define REFRESH_CD_CTRL		0x1118
+#define FB_OVL_CTRL		0x111c
+#define   FB_CTRL_TYPE			0x80000000
+#define   FB_CTRL_WIDTH_MASK		0x007f0000
+#define   FB_CTRL_WIDTH_SHIFT		16
+#define   FB_CTRL_START_SEG_MASK	0x00003fff
+
+#define REFRESH_START		0x1098
+#define REFRESH_SIZE		0x109c
+
+/* "Direct" framebuffer access registers */
+#define DFA_FB_A		0x11e0
+#define DFA_FB_B		0x11e4
+#define DFA_FB_C		0x11e8
+#define DFA_FB_D		0x11ec
+#define   DFA_FB_ENABLE			0x80000000
+#define   DFA_FB_BASE_MASK		0x03f00000
+#define   DFA_FB_STRIDE_1k		0x00000000
+#define   DFA_FB_STRIDE_2k		0x00000010
+#define   DFA_FB_STRIDE_4k		0x00000020
+#define   DFA_PIX_8BIT			0x00000000
+#define   DFA_PIX_16BIT_565		0x00000001
+#define   DFA_PIX_16BIT_1555		0x00000002
+#define   DFA_PIX_24BIT			0x00000004
+#define   DFA_PIX_32BIT			0x00000005
+
+/* maps DFA_PIX_* to pixel size in bytes */
+static const unsigned char pixsize[] = {
+	1, 2, 2, 2, 4, 4
+};
+
+/* Display timing generator registers */
+#define DTG_CONTROL		0x1900
+#define   DTG_CTL_SCREEN_REFRESH	2
+#define   DTG_CTL_ENABLE		1
+#define DTG_HORIZ_EXTENT	0x1904
+#define DTG_HORIZ_DISPLAY	0x1908
+#define DTG_HSYNC_START		0x190c
+#define DTG_HSYNC_END		0x1910
+#define DTG_HSYNC_END_COMP	0x1914
+#define DTG_VERT_EXTENT		0x1918
+#define DTG_VERT_DISPLAY	0x191c
+#define DTG_VSYNC_START		0x1920
+#define DTG_VSYNC_END		0x1924
+#define DTG_VERT_SHORT		0x1928
+
+/* PLL/RAMDAC registers */
+#define DISP_CTL		0x402c
+#define   DISP_CTL_OFF			2
+#define SYNC_CTL		0x4034
+#define   SYNC_CTL_SYNC_ON_RGB		1
+#define   SYNC_CTL_SYNC_OFF		2
+#define   SYNC_CTL_HSYNC_INV		8
+#define   SYNC_CTL_VSYNC_INV		0x10
+#define   SYNC_CTL_HSYNC_OFF		0x20
+#define   SYNC_CTL_VSYNC_OFF		0x40
+
+#define PLL_M			0x4040
+#define PLL_N			0x4044
+#define PLL_POSTDIV		0x4048
+
+/* Hardware cursor */
+#define CURSOR_X		0x4078
+#define CURSOR_Y		0x407c
+#define CURSOR_HOTSPOT		0x4080
+#define CURSOR_MODE		0x4084
+#define   CURSOR_MODE_OFF		0
+#define   CURSOR_MODE_4BPP		1
+#define CURSOR_PIXMAP		0x5000
+#define CURSOR_CMAP		0x7400
+
+/* Window attribute table */
+#define WAT_FMT			0x4100
+#define   WAT_FMT_24BIT			0
+#define   WAT_FMT_16BIT_565		1
+#define   WAT_FMT_16BIT_1555		2
+#define   WAT_FMT_32BIT			3	/* 0 vs. 3 is a guess */
+#define   WAT_FMT_8BIT_332		9
+#define   WAT_FMT_8BIT			0xa
+#define   WAT_FMT_NO_CMAP		4	/* ORd in to other values */
+#define WAT_CMAP_OFFSET		0x4104		/* 4-bit value gets << 6 */
+#define WAT_CTRL		0x4108
+#define   WAT_CTRL_SEL_B		1	/* select B buffer if 1 */
+#define   WAT_CTRL_NO_INC		2
+#define WAT_GAMMA_CTRL		0x410c
+#define   WAT_GAMMA_DISABLE		1	/* disables gamma cmap */
+#define WAT_OVL_CTRL		0x430c		/* controls overlay */
+
+/* Indexed by DFA_PIX_* values */
+static const unsigned char watfmt[] = {
+	WAT_FMT_8BIT, WAT_FMT_16BIT_565, WAT_FMT_16BIT_1555, 0,
+	WAT_FMT_24BIT, WAT_FMT_32BIT
+};
+
+/* Colormap array; 1k entries of 4 bytes each */
+#define CMAP			0x6000
+
+#define readreg(par, reg)	readl((par)->regs + (reg))
+#define writereg(par, reg, val)	writel((val), (par)->regs + (reg))
+
+struct gxt4500_par {
+	void __iomem *regs;
+
+	int pixfmt;		/* pixel format, see DFA_PIX_* values */
+
+	/* PLL parameters */
+	int pll_m;		/* ref clock divisor */
+	int pll_n;		/* VCO divisor */
+	int pll_pd1;		/* first post-divisor */
+	int pll_pd2;		/* second post-divisor */
+
+	u32 pseudo_palette[16];	/* used in color blits */
+};
+
+/* mode requested by user */
+static char *mode_option;
+
+/* default mode: 1280x1024 @ 60 Hz, 8 bpp */
+static const struct fb_videomode defaultmode __devinitdata = {
+	.refresh = 60,
+	.xres = 1280,
+	.yres = 1024,
+	.pixclock = 9295,
+	.left_margin = 248,
+	.right_margin = 48,
+	.upper_margin = 38,
+	.lower_margin = 1,
+	.hsync_len = 112,
+	.vsync_len = 3,
+	.vmode = FB_VMODE_NONINTERLACED
+};
+
+/*
+ * The refclk and VCO dividers appear to use a linear feedback shift
+ * register, which gets reloaded when it reaches a terminal value, at
+ * which point the divider output is toggled.  Thus one can obtain
+ * whatever divisor is required by putting the appropriate value into
+ * the reload register.  For a divisor of N, one puts the value from
+ * the LFSR sequence that comes N-1 places before the terminal value
+ * into the reload register.
+ */
+
+static const unsigned char mdivtab[] = {
+/* 1 */		      0x3f, 0x00, 0x20, 0x10, 0x28, 0x14, 0x2a, 0x15, 0x0a,
+/* 10 */	0x25, 0x32, 0x19, 0x0c, 0x26, 0x13, 0x09, 0x04, 0x22, 0x11,
+/* 20 */	0x08, 0x24, 0x12, 0x29, 0x34, 0x1a, 0x2d, 0x36, 0x1b, 0x0d,
+/* 30 */	0x06, 0x23, 0x31, 0x38, 0x1c, 0x2e, 0x17, 0x0b, 0x05, 0x02,
+/* 40 */	0x21, 0x30, 0x18, 0x2c, 0x16, 0x2b, 0x35, 0x3a, 0x1d, 0x0e,
+/* 50 */	0x27, 0x33, 0x39, 0x3c, 0x1e, 0x2f, 0x37, 0x3b, 0x3d, 0x3e,
+/* 60 */	0x1f, 0x0f, 0x07, 0x03, 0x01,
+};
+
+static const unsigned char ndivtab[] = {
+/* 2 */		            0x00, 0x80, 0xc0, 0xe0, 0xf0, 0x78, 0xbc, 0x5e,
+/* 10 */	0x2f, 0x17, 0x0b, 0x85, 0xc2, 0xe1, 0x70, 0x38, 0x9c, 0x4e,
+/* 20 */	0xa7, 0xd3, 0xe9, 0xf4, 0xfa, 0xfd, 0xfe, 0x7f, 0xbf, 0xdf,
+/* 30 */	0xef, 0x77, 0x3b, 0x1d, 0x8e, 0xc7, 0xe3, 0x71, 0xb8, 0xdc,
+/* 40 */	0x6e, 0xb7, 0x5b, 0x2d, 0x16, 0x8b, 0xc5, 0xe2, 0xf1, 0xf8,
+/* 50 */	0xfc, 0x7e, 0x3f, 0x9f, 0xcf, 0x67, 0xb3, 0xd9, 0x6c, 0xb6,
+/* 60 */	0xdb, 0x6d, 0x36, 0x9b, 0x4d, 0x26, 0x13, 0x89, 0xc4, 0x62,
+/* 70 */	0xb1, 0xd8, 0xec, 0xf6, 0xfb, 0x7d, 0xbe, 0x5f, 0xaf, 0x57,
+/* 80 */	0x2b, 0x95, 0x4a, 0x25, 0x92, 0x49, 0xa4, 0x52, 0x29, 0x94,
+/* 90 */	0xca, 0x65, 0xb2, 0x59, 0x2c, 0x96, 0xcb, 0xe5, 0xf2, 0x79,
+/* 100 */	0x3c, 0x1e, 0x0f, 0x07, 0x83, 0x41, 0x20, 0x90, 0x48, 0x24,
+/* 110 */	0x12, 0x09, 0x84, 0x42, 0xa1, 0x50, 0x28, 0x14, 0x8a, 0x45,
+/* 120 */	0xa2, 0xd1, 0xe8, 0x74, 0xba, 0xdd, 0xee, 0xf7, 0x7b, 0x3d,
+/* 130 */	0x9e, 0x4f, 0x27, 0x93, 0xc9, 0xe4, 0x72, 0x39, 0x1c, 0x0e,
+/* 140 */	0x87, 0xc3, 0x61, 0x30, 0x18, 0x8c, 0xc6, 0x63, 0x31, 0x98,
+/* 150 */	0xcc, 0xe6, 0x73, 0xb9, 0x5c, 0x2e, 0x97, 0x4b, 0xa5, 0xd2,
+/* 160 */	0x69, 0xb4, 0xda, 0xed, 0x76, 0xbb, 0x5d, 0xae, 0xd7, 0x6b,
+/* 170 */	0xb5, 0x5a, 0xad, 0x56, 0xab, 0xd5, 0x6a, 0x35, 0x1a, 0x8d,
+/* 180 */	0x46, 0x23, 0x11, 0x88, 0x44, 0x22, 0x91, 0xc8, 0x64, 0x32,
+/* 190 */	0x19, 0x0c, 0x86, 0x43, 0x21, 0x10, 0x08, 0x04, 0x02, 0x81,
+/* 200 */	0x40, 0xa0, 0xd0, 0x68, 0x34, 0x9a, 0xcd, 0x66, 0x33, 0x99,
+/* 210 */	0x4c, 0xa6, 0x53, 0xa9, 0xd4, 0xea, 0x75, 0x3a, 0x9d, 0xce,
+/* 220 */	0xe7, 0xf3, 0xf9, 0x7c, 0x3e, 0x1f, 0x8f, 0x47, 0xa3, 0x51,
+/* 230 */	0xa8, 0x54, 0xaa, 0x55, 0x2a, 0x15, 0x0a, 0x05, 0x82, 0xc1,
+/* 240 */	0x60, 0xb0, 0x58, 0xac, 0xd6, 0xeb, 0xf5, 0x7a, 0xbd, 0xde,
+/* 250 */	0x6f, 0x37, 0x1b, 0x0d, 0x06, 0x03, 0x01,
+};
+
+#define REF_PERIOD_PS	9259	/* period of reference clock in ps */
+
+static int calc_pll(int period_ps, struct gxt4500_par *par)
+{
+	int m, n, pdiv1, pdiv2, postdiv;
+	int pll_period, best_error, t;
+
+	/* only deal with range 1MHz - 400MHz */
+	if (period_ps < 2500 || period_ps > 1000000)
+		return -1;
+
+	best_error = 1000000;
+	for (pdiv1 = 1; pdiv1 <= 8; ++pdiv1) {
+		for (pdiv2 = 1; pdiv2 <= pdiv1; ++pdiv2) {
+			postdiv = pdiv1 * pdiv2;
+			pll_period = (period_ps + postdiv - 1) / postdiv;
+			/* keep pll in range 500..1250 MHz */
+			if (pll_period < 800 || pll_period > 2000)
+				continue;
+			for (m = 3; m <= 40; ++m) {
+				n = REF_PERIOD_PS * m * postdiv / period_ps;
+				if (n < 5 || n > 256)
+					continue;
+				t = REF_PERIOD_PS * m * postdiv / n;
+				t -= period_ps;
+				if (t >= 0 && t < best_error) {
+					par->pll_m = m;
+					par->pll_n = n;
+					par->pll_pd1 = pdiv1;
+					par->pll_pd2 = pdiv2;
+					best_error = t;
+				}
+			}
+		}
+	}
+	if (best_error == 1000000)
+		return -1;
+	return 0;
+}
+
+static int calc_pixclock(struct gxt4500_par *par)
+{
+	return REF_PERIOD_PS * par->pll_m * par->pll_pd1 * par->pll_pd2
+		/ par->pll_n;
+}
+
+static int gxt4500_var_to_par(struct fb_var_screeninfo *var,
+			      struct gxt4500_par *par)
+{
+	if (var->xres + var->xoffset > var->xres_virtual ||
+	    var->yres + var->yoffset > var->yres_virtual ||
+	    var->xres_virtual > 4096)
+		return -EINVAL;
+	if ((var->vmode & FB_VMODE_MASK) != FB_VMODE_NONINTERLACED)
+		return -EINVAL;
+
+	if (calc_pll(var->pixclock, par) < 0)
+		return -EINVAL;
+
+	switch (var->bits_per_pixel) {
+	case 32:
+		if (var->transp.length)
+			par->pixfmt = DFA_PIX_32BIT;
+		else
+			par->pixfmt = DFA_PIX_24BIT;
+		break;
+	case 24:
+		par->pixfmt = DFA_PIX_24BIT;
+		break;
+	case 16:
+		if (var->green.length == 5)
+			par->pixfmt = DFA_PIX_16BIT_1555;
+		else
+			par->pixfmt = DFA_PIX_16BIT_565;
+		break;
+	case 8:
+		par->pixfmt = DFA_PIX_8BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct fb_bitfield eightbits = {0, 8};
+static const struct fb_bitfield nobits = {0, 0};
+
+static void gxt4500_unpack_pixfmt(struct fb_var_screeninfo *var,
+				  int pixfmt)
+{
+	var->bits_per_pixel = pixsize[pixfmt] * 8;
+	var->red = eightbits;
+	var->green = eightbits;
+	var->blue = eightbits;
+	var->transp = nobits;
+
+	switch (pixfmt) {
+	case DFA_PIX_16BIT_565:
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		break;
+	case DFA_PIX_16BIT_1555:
+		var->red.length = 5;
+		var->green.length = 5;
+		var->blue.length = 5;
+		var->transp.length = 1;
+		break;
+	case DFA_PIX_32BIT:
+		var->transp.length = 8;
+		break;
+	}
+	if (pixfmt != DFA_PIX_8BIT) {
+		var->green.offset = var->red.length;
+		var->blue.offset = var->green.offset + var->green.length;
+		if (var->transp.length)
+			var->transp.offset =
+				var->blue.offset + var->blue.length;
+	}
+}
+
+static int gxt4500_check_var(struct fb_var_screeninfo *var,
+			     struct fb_info *info)
+{
+	struct gxt4500_par par;
+	int err;
+
+	par = *(struct gxt4500_par *)info->par;
+	err = gxt4500_var_to_par(var, &par);
+	if (!err) {
+		var->pixclock = calc_pixclock(&par);
+		gxt4500_unpack_pixfmt(var, par.pixfmt);
+	}
+	return err;
+}
+
+static int gxt4500_set_par(struct fb_info *info)
+{
+	struct gxt4500_par *par = info->par;
+	struct fb_var_screeninfo *var = &info->var;
+	int err;
+	u32 ctrlreg;
+	unsigned int dfa_ctl, pixfmt, stride;
+	unsigned int wid_tiles, i;
+	unsigned int prefetch_pix, htot;
+	struct gxt4500_par save_par;
+
+	save_par = *par;
+	err = gxt4500_var_to_par(var, par);
+	if (err) {
+		*par = save_par;
+		return err;
+	}
+
+	/* turn off DTG for now */
+	ctrlreg = readreg(par, DTG_CONTROL);
+	ctrlreg &= ~(DTG_CTL_ENABLE | DTG_CTL_SCREEN_REFRESH);
+	writereg(par, DTG_CONTROL, ctrlreg);
+
+	/* set PLL registers */
+	writereg(par, PLL_M, mdivtab[par->pll_m - 1]);
+	writereg(par, PLL_N, ndivtab[par->pll_n - 2]);
+	writereg(par, PLL_POSTDIV,
+		 ((8 - par->pll_pd1) << 3) | (8 - par->pll_pd2));
+	msleep(20);
+
+	/* turn off hardware cursor */
+	writereg(par, CURSOR_MODE, CURSOR_MODE_OFF);
+
+	/* reset raster engine */
+	writereg(par, CTRL_REG0, CR0_RASTER_RESET | (CR0_RASTER_RESET << 16));
+	udelay(10);
+	writereg(par, CTRL_REG0, CR0_RASTER_RESET << 16);
+
+	/* set display timing generator registers */
+	htot = var->xres + var->left_margin + var->right_margin +
+		var->hsync_len;
+	writereg(par, DTG_HORIZ_EXTENT, htot - 1);
+	writereg(par, DTG_HORIZ_DISPLAY, var->xres - 1);
+	writereg(par, DTG_HSYNC_START, var->xres + var->right_margin - 1);
+	writereg(par, DTG_HSYNC_END,
+		 var->xres + var->right_margin + var->hsync_len - 1);
+	writereg(par, DTG_HSYNC_END_COMP,
+		 var->xres + var->right_margin + var->hsync_len - 1);
+	writereg(par, DTG_VERT_EXTENT,
+		 var->yres + var->upper_margin + var->lower_margin +
+		 var->vsync_len - 1);
+	writereg(par, DTG_VERT_DISPLAY, var->yres - 1);
+	writereg(par, DTG_VSYNC_START, var->yres + var->lower_margin - 1);
+	writereg(par, DTG_VSYNC_END,
+		 var->yres + var->lower_margin + var->vsync_len - 1);
+	prefetch_pix = 3300000 / var->pixclock;
+	if (prefetch_pix >= htot)
+		prefetch_pix = htot - 1;
+	writereg(par, DTG_VERT_SHORT, htot - prefetch_pix - 1);
+	ctrlreg |= DTG_CTL_ENABLE | DTG_CTL_SCREEN_REFRESH;
+	writereg(par, DTG_CONTROL, ctrlreg);
+
+	/* calculate stride in DFA aperture */
+	if (var->xres_virtual > 2048) {
+		stride = 4096;
+		dfa_ctl = DFA_FB_STRIDE_4k;
+	} else if (var->xres_virtual > 1024) {
+		stride = 2048;
+		dfa_ctl = DFA_FB_STRIDE_2k;
+	} else {
+		stride = 1024;
+		dfa_ctl = DFA_FB_STRIDE_1k;
+	}
+
+	/* Set up framebuffer definition */
+	wid_tiles = (var->xres_virtual + 63) >> 6;
+
+	/* XXX add proper FB allocation here someday */
+	writereg(par, FB_AB_CTRL, FB_CTRL_TYPE | (wid_tiles << 16) | 0);
+	writereg(par, REFRESH_AB_CTRL, FB_CTRL_TYPE | (wid_tiles << 16) | 0);
+	writereg(par, FB_CD_CTRL, FB_CTRL_TYPE | (wid_tiles << 16) | 0);
+	writereg(par, REFRESH_CD_CTRL, FB_CTRL_TYPE | (wid_tiles << 16) | 0);
+	writereg(par, REFRESH_START, (var->xoffset << 16) | var->yoffset);
+	writereg(par, REFRESH_SIZE, (var->xres << 16) | var->yres);
+
+	/* Set up framebuffer access by CPU */
+
+	pixfmt = par->pixfmt;
+	dfa_ctl |= DFA_FB_ENABLE | pixfmt;
+	writereg(par, DFA_FB_A, dfa_ctl);
+
+	/*
+	 * Set up window attribute table.
+	 * We set all WAT entries the same so it doesn't matter what the
+	 * window ID (WID) plane contains.
+	 */
+	for (i = 0; i < 32; ++i) {
+		writereg(par, WAT_FMT + (i << 4), watfmt[pixfmt]);
+		writereg(par, WAT_CMAP_OFFSET + (i << 4), 0);
+		writereg(par, WAT_CTRL + (i << 4), 0);
+		writereg(par, WAT_GAMMA_CTRL + (i << 4), WAT_GAMMA_DISABLE);
+	}
+
+	/* Set sync polarity etc. */
+	ctrlreg = readreg(par, SYNC_CTL) &
+		~(SYNC_CTL_SYNC_ON_RGB | SYNC_CTL_HSYNC_INV |
+		  SYNC_CTL_VSYNC_INV);
+	if (var->sync & FB_SYNC_ON_GREEN)
+		ctrlreg |= SYNC_CTL_SYNC_ON_RGB;
+	if (!(var->sync & FB_SYNC_HOR_HIGH_ACT))
+		ctrlreg |= SYNC_CTL_HSYNC_INV;
+	if (!(var->sync & FB_SYNC_VERT_HIGH_ACT))
+		ctrlreg |= SYNC_CTL_VSYNC_INV;
+	writereg(par, SYNC_CTL, ctrlreg);
+
+	info->fix.line_length = stride * pixsize[pixfmt];
+	info->fix.visual = (pixfmt == DFA_PIX_8BIT)? FB_VISUAL_PSEUDOCOLOR:
+		FB_VISUAL_DIRECTCOLOR;
+
+	return 0;
+}
+
+static int gxt4500_setcolreg(unsigned int reg, unsigned int red,
+			     unsigned int green, unsigned int blue,
+			     unsigned int transp, struct fb_info *info)
+{
+	u32 cmap_entry;
+	struct gxt4500_par *par = info->par;
+
+	if (reg > 1023)
+		return 1;
+	cmap_entry = ((transp & 0xff00) << 16) | ((blue & 0xff00) << 8) |
+		(green & 0xff00) | (red >> 8);
+	writereg(par, CMAP + reg * 4, cmap_entry);
+
+	if (reg < 16 && par->pixfmt != DFA_PIX_8BIT) {
+		u32 *pal = info->pseudo_palette;
+		u32 val = reg;
+		switch (par->pixfmt) {
+		case DFA_PIX_16BIT_565:
+			val |= (reg << 11) | (reg << 6);
+			break;
+		case DFA_PIX_16BIT_1555:
+			val |= (reg << 10) | (reg << 5);
+			break;
+		case DFA_PIX_32BIT:
+			val |= (reg << 24);
+			/* fall through */
+		case DFA_PIX_24BIT:
+			val |= (reg << 16) | (reg << 8);
+			break;
+		}
+		pal[reg] = val;
+	}
+
+	return 0;
+}
+
+static int gxt4500_pan_display(struct fb_var_screeninfo *var,
+			       struct fb_info *info)
+{
+	struct gxt4500_par *par = info->par;
+
+	if (var->xoffset & 7)
+		return -EINVAL;
+	if (var->xoffset + var->xres > var->xres_virtual ||
+	    var->yoffset + var->yres > var->yres_virtual)
+		return -EINVAL;
+
+	writereg(par, REFRESH_START, (var->xoffset << 16) | var->yoffset);
+	return 0;
+}
+
+static int gxt4500_blank(int blank, struct fb_info *info)
+{
+	struct gxt4500_par *par = info->par;
+	int ctrl, dctl;
+
+	ctrl = readreg(par, SYNC_CTL);
+	ctrl &= ~(SYNC_CTL_SYNC_OFF | SYNC_CTL_HSYNC_OFF | SYNC_CTL_VSYNC_OFF);
+	dctl = readreg(par, DISP_CTL);
+	dctl |= DISP_CTL_OFF;
+	switch (blank) {
+	case FB_BLANK_UNBLANK:
+		dctl &= ~DISP_CTL_OFF;
+		break;
+	case FB_BLANK_POWERDOWN:
+		ctrl |= SYNC_CTL_SYNC_OFF;
+		break;
+	case FB_BLANK_HSYNC_SUSPEND:
+		ctrl |= SYNC_CTL_HSYNC_OFF;
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+		ctrl |= SYNC_CTL_VSYNC_OFF;
+		break;
+	default: ;
+	}
+	writereg(par, SYNC_CTL, ctrl);
+	writereg(par, DISP_CTL, dctl);
+
+	return 0;
+}
+
+static const struct fb_fix_screeninfo gxt4500_fix __devinitdata = {
+	.id = "IBM GXT4500P",
+	.type = FB_TYPE_PACKED_PIXELS,
+	.visual = FB_VISUAL_PSEUDOCOLOR,
+	.xpanstep = 8,
+	.ypanstep = 1,
+	.mmio_len = 0x20000,
+};
+
+static struct fb_ops gxt4500_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = gxt4500_check_var,
+	.fb_set_par = gxt4500_set_par,
+	.fb_setcolreg = gxt4500_setcolreg,
+	.fb_pan_display = gxt4500_pan_display,
+	.fb_blank = gxt4500_blank,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+};
+
+/* PCI functions */
+static int __devinit gxt4500_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *ent)
+{
+	int err;
+	unsigned long reg_phys, fb_phys;
+	struct gxt4500_par *par;
+	struct fb_info *info;
+	struct fb_var_screeninfo var;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "gxt4500: cannot enable PCI device: %d\n",
+			err);
+		return err;
+	}
+
+	reg_phys = pci_resource_start(pdev, 0);
+	if (!request_mem_region(reg_phys, pci_resource_len(pdev, 0),
+				"gxt4500 regs")) {
+		dev_err(&pdev->dev, "gxt4500: cannot get registers\n");
+		goto err_nodev;
+	}
+
+	fb_phys = pci_resource_start(pdev, 1);
+	if (!request_mem_region(fb_phys, pci_resource_len(pdev, 1),
+				"gxt4500 FB")) {
+		dev_err(&pdev->dev, "gxt4500: cannot get framebuffer\n");
+		goto err_free_regs;
+	}
+
+	info = framebuffer_alloc(sizeof(struct gxt4500_par), &pdev->dev);
+	if (!info) {
+		dev_err(&pdev->dev, "gxt4500: cannot alloc FB info record");
+		goto err_free_fb;
+	}
+	par = info->par;
+	info->fix = gxt4500_fix;
+	info->pseudo_palette = par->pseudo_palette;
+
+	info->fix.mmio_start = reg_phys;
+	par->regs = ioremap(reg_phys, pci_resource_len(pdev, 0));
+	if (!par->regs) {
+		dev_err(&pdev->dev, "gxt4500: cannot map registers\n");
+		goto err_free_all;
+	}
+
+	info->fix.smem_start = fb_phys;
+	info->fix.smem_len = pci_resource_len(pdev, 1);
+	info->screen_base = ioremap(fb_phys, pci_resource_len(pdev, 1));
+	if (!info->screen_base) {
+		dev_err(&pdev->dev, "gxt4500: cannot map framebuffer\n");
+		goto err_unmap_regs;
+	}
+
+	pci_set_drvdata(pdev, info);
+
+	/* Set byte-swapping for DFA aperture for all pixel sizes */
+	pci_write_config_dword(pdev, CFG_ENDIAN0, 0x333300);
+
+	info->fbops = &gxt4500_ops;
+	info->flags = FBINFO_FLAG_DEFAULT;
+
+	err = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (err) {
+		dev_err(&pdev->dev, "gxt4500: cannot allocate cmap\n");
+		goto err_unmap_all;
+	}
+
+	gxt4500_blank(FB_BLANK_UNBLANK, info);
+
+	if (!fb_find_mode(&var, info, mode_option, NULL, 0, &defaultmode, 8)) {
+		dev_err(&pdev->dev, "gxt4500: cannot find valid video mode\n");
+		goto err_free_cmap;
+	}
+	info->var = var;
+	if (gxt4500_set_par(info)) {
+		printk(KERN_ERR "gxt4500: cannot set video mode\n");
+		goto err_free_cmap;
+	}
+
+	if (register_framebuffer(info) < 0) {
+		dev_err(&pdev->dev, "gxt4500: cannot register framebuffer\n");
+		goto err_free_cmap;
+	}
+	printk(KERN_INFO "fb%d: %s frame buffer device\n",
+	       info->node, info->fix.id);
+
+	return 0;
+
+ err_free_cmap:
+	fb_dealloc_cmap(&info->cmap);
+ err_unmap_all:
+	iounmap(info->screen_base);
+ err_unmap_regs:
+	iounmap(par->regs);
+ err_free_all:
+	framebuffer_release(info);
+ err_free_fb:
+	release_mem_region(fb_phys, pci_resource_len(pdev, 1));
+ err_free_regs:
+	release_mem_region(reg_phys, pci_resource_len(pdev, 0));
+ err_nodev:
+	return -ENODEV;
+}
+
+static void __devexit gxt4500_remove(struct pci_dev *pdev)
+{
+	struct fb_info *info = pci_get_drvdata(pdev);
+	struct gxt4500_par *par;
+
+	if (!info)
+		return;
+	par = info->par;
+	unregister_framebuffer(info);
+	fb_dealloc_cmap(&info->cmap);
+	iounmap(par->regs);
+	iounmap(info->screen_base);
+	release_mem_region(pci_resource_start(pdev, 0),
+			   pci_resource_len(pdev, 0));
+	release_mem_region(pci_resource_start(pdev, 1),
+			   pci_resource_len(pdev, 1));
+	framebuffer_release(info);
+}
+
+/* supported chipsets */
+static const struct pci_device_id gxt4500_pci_tbl[] = {
+	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_GXT4500P,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, gxt4500_pci_tbl);
+
+static struct pci_driver gxt4500_driver = {
+	.name = "gxt4500",
+	.id_table = gxt4500_pci_tbl,
+	.probe = gxt4500_probe,
+	.remove = __devexit_p(gxt4500_remove),
+};
+
+static int __devinit gxt4500_init(void)
+{
+#ifndef MODULE
+	if (fb_get_options("gxt4500", &mode_option))
+		return -ENODEV;
+#endif
+
+	return pci_register_driver(&gxt4500_driver);
+}
+module_init(gxt4500_init);
+
+static void __exit gxt4500_exit(void)
+{
+	pci_unregister_driver(&gxt4500_driver);
+}
+module_exit(gxt4500_exit);
+
+MODULE_AUTHOR("Paul Mackerras <paulus@samba.org>");
+MODULE_DESCRIPTION("FBDev driver for IBM GXT4500P");
+MODULE_LICENSE("GPL");
+module_param(mode_option, charp, 0);
+MODULE_PARM_DESC(mode_option, "Specify resolution as \"<xres>x<yres>[-<bpp>][@<refresh>]\"");
diff --git a/fs/buffer.c b/fs/buffer.c
index 517860f2d75..d1f1b54d310 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -35,6 +35,7 @@
 #include <linux/hash.h>
 #include <linux/suspend.h>
 #include <linux/buffer_head.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/bio.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
@@ -724,20 +725,21 @@ int __set_page_dirty_buffers(struct page *page)
 	}
 	spin_unlock(&mapping->private_lock);
 
-	if (!TestSetPageDirty(page)) {
-		write_lock_irq(&mapping->tree_lock);
-		if (page->mapping) {	/* Race with truncate? */
-			if (mapping_cap_account_dirty(mapping))
-				__inc_zone_page_state(page, NR_FILE_DIRTY);
-			radix_tree_tag_set(&mapping->page_tree,
-						page_index(page),
-						PAGECACHE_TAG_DIRTY);
+	if (TestSetPageDirty(page))
+		return 0;
+
+	write_lock_irq(&mapping->tree_lock);
+	if (page->mapping) {	/* Race with truncate? */
+		if (mapping_cap_account_dirty(mapping)) {
+			__inc_zone_page_state(page, NR_FILE_DIRTY);
+			task_io_account_write(PAGE_CACHE_SIZE);
 		}
-		write_unlock_irq(&mapping->tree_lock);
-		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-		return 1;
+		radix_tree_tag_set(&mapping->page_tree,
+				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
-	return 0;
+	write_unlock_irq(&mapping->tree_lock);
+	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	return 1;
 }
 EXPORT_SYMBOL(__set_page_dirty_buffers);
 
@@ -2851,8 +2853,13 @@ int try_to_free_buffers(struct page *page)
 		 * could encounter a non-uptodate page, which is unresolvable.
 		 * This only applies in the rare case where try_to_free_buffers
 		 * succeeds but the page is not freed.
+		 *
+		 * Also, during truncate, discard_buffer will have marked all
+		 * the page's buffers clean.  We discover that here and clean
+		 * the page also.
 		 */
-		clear_page_dirty(page);
+		if (test_clear_page_dirty(page))
+			task_io_account_cancelled_write(PAGE_CACHE_SIZE);
 	}
 out:
 	if (buffers_to_free) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1aa95a50cac..0f05cab5d24 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -29,6 +29,7 @@
 #include <linux/pagevec.h>
 #include <linux/smp_lock.h>
 #include <linux/writeback.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/delay.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
@@ -1812,6 +1813,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 			cFYI(1, ("Read error in readpages: %d", rc));
 			break;
 		} else if (bytes_read > 0) {
+			task_io_account_read(bytes_read);
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
 			cifs_copy_cache_pages(mapping, page_list, bytes_read,
 				smb_read_data + 4 /* RFC1001 hdr */ +
diff --git a/fs/compat.c b/fs/compat.c
index b766964a625..0ec70e3cee0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1679,19 +1679,19 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 {
 	fd_set_bits fds;
 	char *bits;
-	int size, max_fdset, ret = -EINVAL;
+	int size, max_fds, ret = -EINVAL;
 	struct fdtable *fdt;
 
 	if (n < 0)
 		goto out_nofds;
 
-	/* max_fdset can increase, so grab it once to avoid race */
+	/* max_fds can increase, so grab it once to avoid race */
 	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	max_fdset = fdt->max_fdset;
+	max_fds = fdt->max_fds;
 	rcu_read_unlock();
-	if (n > max_fdset)
-		n = max_fdset;
+	if (n > max_fds)
+		n = max_fds;
 
 	/*
 	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 5981e17f46f..d9d0833444f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/bio.h>
 #include <linux/wait.h>
 #include <linux/err.h>
@@ -121,8 +122,7 @@ struct dio {
 
 	/* BIO completion state */
 	spinlock_t bio_lock;		/* protects BIO fields below */
-	int bio_count;			/* nr bios to be completed */
-	int bios_in_flight;		/* nr bios in flight */
+	unsigned long refcount;		/* direct_io_worker() and bios */
 	struct bio *bio_list;		/* singly linked via bi_private */
 	struct task_struct *waiter;	/* waiting task (NULL if none) */
 
@@ -209,76 +209,55 @@ static struct page *dio_get_page(struct dio *dio)
 	return dio->pages[dio->head++];
 }
 
-/*
- * Called when all DIO BIO I/O has been completed - let the filesystem
- * know, if it registered an interest earlier via get_block.  Pass the
- * private field of the map buffer_head so that filesystems can use it
- * to hold additional state between get_block calls and dio_complete.
- */
-static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
-{
-	if (dio->end_io && dio->result)
-		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
-	if (dio->lock_type == DIO_LOCKING)
-		/* lockdep: non-owner release */
-		up_read_non_owner(&dio->inode->i_alloc_sem);
-}
-
-/*
- * Called when a BIO has been processed.  If the count goes to zero then IO is
- * complete and we can signal this to the AIO layer.
+/**
+ * dio_complete() - called when all DIO BIO I/O has been completed
+ * @offset: the byte offset in the file of the completed operation
+ *
+ * This releases locks as dictated by the locking type, lets interested parties
+ * know that a DIO operation has completed, and calculates the resulting return
+ * code for the operation.
+ *
+ * It lets the filesystem know if it registered an interest earlier via
+ * get_block.  Pass the private field of the map buffer_head so that
+ * filesystems can use it to hold additional state between get_block calls and
+ * dio_complete.
  */
-static void finished_one_bio(struct dio *dio)
+static int dio_complete(struct dio *dio, loff_t offset, int ret)
 {
-	unsigned long flags;
+	ssize_t transferred = 0;
 
-	spin_lock_irqsave(&dio->bio_lock, flags);
-	if (dio->bio_count == 1) {
-		if (dio->is_async) {
-			ssize_t transferred;
-			loff_t offset;
-
-			/*
-			 * Last reference to the dio is going away.
-			 * Drop spinlock and complete the DIO.
-			 */
-			spin_unlock_irqrestore(&dio->bio_lock, flags);
+	/*
+	 * AIO submission can race with bio completion to get here while
+	 * expecting to have the last io completed by bio completion.
+	 * In that case -EIOCBQUEUED is in fact not an error we want
+	 * to preserve through this call.
+	 */
+	if (ret == -EIOCBQUEUED)
+		ret = 0;
 
-			/* Check for short read case */
-			transferred = dio->result;
-			offset = dio->iocb->ki_pos;
+	if (dio->result) {
+		transferred = dio->result;
 
-			if ((dio->rw == READ) &&
-			    ((offset + transferred) > dio->i_size))
-				transferred = dio->i_size - offset;
+		/* Check for short read case */
+		if ((dio->rw == READ) && ((offset + transferred) > dio->i_size))
+			transferred = dio->i_size - offset;
+	}
 
-			/* check for error in completion path */
-			if (dio->io_error)
-				transferred = dio->io_error;
+	if (dio->end_io && dio->result)
+		dio->end_io(dio->iocb, offset, transferred,
+			    dio->map_bh.b_private);
+	if (dio->lock_type == DIO_LOCKING)
+		/* lockdep: non-owner release */
+		up_read_non_owner(&dio->inode->i_alloc_sem);
 
-			dio_complete(dio, offset, transferred);
+	if (ret == 0)
+		ret = dio->page_errors;
+	if (ret == 0)
+		ret = dio->io_error;
+	if (ret == 0)
+		ret = transferred;
 
-			/* Complete AIO later if falling back to buffered i/o */
-			if (dio->result == dio->size ||
-				((dio->rw == READ) && dio->result)) {
-				aio_complete(dio->iocb, transferred, 0);
-				kfree(dio);
-				return;
-			} else {
-				/*
-				 * Falling back to buffered
-				 */
-				spin_lock_irqsave(&dio->bio_lock, flags);
-				dio->bio_count--;
-				if (dio->waiter)
-					wake_up_process(dio->waiter);
-				spin_unlock_irqrestore(&dio->bio_lock, flags);
-				return;
-			}
-		}
-	}
-	dio->bio_count--;
-	spin_unlock_irqrestore(&dio->bio_lock, flags);
+	return ret;
 }
 
 static int dio_bio_complete(struct dio *dio, struct bio *bio);
@@ -288,12 +267,27 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio);
 static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 {
 	struct dio *dio = bio->bi_private;
+	unsigned long remaining;
+	unsigned long flags;
 
 	if (bio->bi_size)
 		return 1;
 
 	/* cleanup the bio */
 	dio_bio_complete(dio, bio);
+
+	spin_lock_irqsave(&dio->bio_lock, flags);
+	remaining = --dio->refcount;
+	if (remaining == 1 && dio->waiter)
+		wake_up_process(dio->waiter);
+	spin_unlock_irqrestore(&dio->bio_lock, flags);
+
+	if (remaining == 0) {
+		int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
+		aio_complete(dio->iocb, ret, 0);
+		kfree(dio);
+	}
+
 	return 0;
 }
 
@@ -315,8 +309,7 @@ static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error)
 	spin_lock_irqsave(&dio->bio_lock, flags);
 	bio->bi_private = dio->bio_list;
 	dio->bio_list = bio;
-	dio->bios_in_flight--;
-	if (dio->waiter && dio->bios_in_flight == 0)
+	if (--dio->refcount == 1 && dio->waiter)
 		wake_up_process(dio->waiter);
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 	return 0;
@@ -347,6 +340,8 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
  * In the AIO read case we speculatively dirty the pages before starting IO.
  * During IO completion, any of these pages which happen to have been written
  * back will be redirtied by bio_check_pages_dirty().
+ *
+ * bios hold a dio reference between submit_bio and ->end_io.
  */
 static void dio_bio_submit(struct dio *dio)
 {
@@ -354,12 +349,14 @@ static void dio_bio_submit(struct dio *dio)
 	unsigned long flags;
 
 	bio->bi_private = dio;
+
 	spin_lock_irqsave(&dio->bio_lock, flags);
-	dio->bio_count++;
-	dio->bios_in_flight++;
+	dio->refcount++;
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
+
 	if (dio->is_async && dio->rw == READ)
 		bio_set_pages_dirty(bio);
+
 	submit_bio(dio->rw, bio);
 
 	dio->bio = NULL;
@@ -376,28 +373,37 @@ static void dio_cleanup(struct dio *dio)
 }
 
 /*
- * Wait for the next BIO to complete.  Remove it and return it.
+ * Wait for the next BIO to complete.  Remove it and return it.  NULL is
+ * returned once all BIOs have been completed.  This must only be called once
+ * all bios have been issued so that dio->refcount can only decrease.  This
+ * requires that that the caller hold a reference on the dio.
  */
 static struct bio *dio_await_one(struct dio *dio)
 {
 	unsigned long flags;
-	struct bio *bio;
+	struct bio *bio = NULL;
 
 	spin_lock_irqsave(&dio->bio_lock, flags);
-	while (dio->bio_list == NULL) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (dio->bio_list == NULL) {
-			dio->waiter = current;
-			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			blk_run_address_space(dio->inode->i_mapping);
-			io_schedule();
-			spin_lock_irqsave(&dio->bio_lock, flags);
-			dio->waiter = NULL;
-		}
-		set_current_state(TASK_RUNNING);
+
+	/*
+	 * Wait as long as the list is empty and there are bios in flight.  bio
+	 * completion drops the count, maybe adds to the list, and wakes while
+	 * holding the bio_lock so we don't need set_current_state()'s barrier
+	 * and can call it after testing our condition.
+	 */
+	while (dio->refcount > 1 && dio->bio_list == NULL) {
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+		dio->waiter = current;
+		spin_unlock_irqrestore(&dio->bio_lock, flags);
+		io_schedule();
+		/* wake up sets us TASK_RUNNING */
+		spin_lock_irqsave(&dio->bio_lock, flags);
+		dio->waiter = NULL;
+	}
+	if (dio->bio_list) {
+		bio = dio->bio_list;
+		dio->bio_list = bio->bi_private;
 	}
-	bio = dio->bio_list;
-	dio->bio_list = bio->bi_private;
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 	return bio;
 }
@@ -426,34 +432,24 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 		}
 		bio_put(bio);
 	}
-	finished_one_bio(dio);
 	return uptodate ? 0 : -EIO;
 }
 
 /*
- * Wait on and process all in-flight BIOs.
+ * Wait on and process all in-flight BIOs.  This must only be called once
+ * all bios have been issued so that the refcount can only decrease.
+ * This just waits for all bios to make it through dio_bio_complete.  IO
+ * errors are propogated through dio->io_error and should be propogated via
+ * dio_complete().
  */
-static int dio_await_completion(struct dio *dio)
+static void dio_await_completion(struct dio *dio)
 {
-	int ret = 0;
-
-	if (dio->bio)
-		dio_bio_submit(dio);
-
-	/*
-	 * The bio_lock is not held for the read of bio_count.
-	 * This is ok since it is the dio_bio_complete() that changes
-	 * bio_count.
-	 */
-	while (dio->bio_count) {
-		struct bio *bio = dio_await_one(dio);
-		int ret2;
-
-		ret2 = dio_bio_complete(dio, bio);
-		if (ret == 0)
-			ret = ret2;
-	}
-	return ret;
+	struct bio *bio;
+	do {
+		bio = dio_await_one(dio);
+		if (bio)
+			dio_bio_complete(dio, bio);
+	} while (bio);
 }
 
 /*
@@ -675,6 +671,13 @@ submit_page_section(struct dio *dio, struct page *page,
 {
 	int ret = 0;
 
+	if (dio->rw & WRITE) {
+		/*
+		 * Read accounting is performed in submit_bio()
+		 */
+		task_io_account_write(len);
+	}
+
 	/*
 	 * Can we just grow the current page's presence in the dio?
 	 */
@@ -953,6 +956,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	struct dio *dio)
 {
 	unsigned long user_addr; 
+	unsigned long flags;
 	int seg;
 	ssize_t ret = 0;
 	ssize_t ret2;
@@ -983,17 +987,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	dio->iocb = iocb;
 	dio->i_size = i_size_read(inode);
 
-	/*
-	 * BIO completion state.
-	 *
-	 * ->bio_count starts out at one, and we decrement it to zero after all
-	 * BIOs are submitted.  This to avoid the situation where a really fast
-	 * (or synchronous) device could take the count to zero while we're
-	 * still submitting BIOs.
-	 */
-	dio->bio_count = 1;
-	dio->bios_in_flight = 0;
 	spin_lock_init(&dio->bio_lock);
+	dio->refcount = 1;
 	dio->bio_list = NULL;
 	dio->waiter = NULL;
 
@@ -1069,6 +1064,9 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	if (dio->bio)
 		dio_bio_submit(dio);
 
+	/* All IO is now issued, send it on its way */
+	blk_run_address_space(inode->i_mapping);
+
 	/*
 	 * It is possible that, we return short IO due to end of file.
 	 * In that case, we need to release all the pages we got hold on.
@@ -1084,74 +1082,41 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 		mutex_unlock(&dio->inode->i_mutex);
 
 	/*
-	 * OK, all BIOs are submitted, so we can decrement bio_count to truly
-	 * reflect the number of to-be-processed BIOs.
+	 * The only time we want to leave bios in flight is when a successful
+	 * partial aio read or full aio write have been setup.  In that case
+	 * bio completion will call aio_complete.  The only time it's safe to
+	 * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
+	 * This had *better* be the only place that raises -EIOCBQUEUED.
 	 */
-	if (dio->is_async) {
-		int should_wait = 0;
+	BUG_ON(ret == -EIOCBQUEUED);
+	if (dio->is_async && ret == 0 && dio->result &&
+	    ((rw & READ) || (dio->result == dio->size)))
+		ret = -EIOCBQUEUED;
 
-		if (dio->result < dio->size && (rw & WRITE)) {
-			dio->waiter = current;
-			should_wait = 1;
-		}
-		if (ret == 0)
-			ret = dio->result;
-		finished_one_bio(dio);		/* This can free the dio */
-		blk_run_address_space(inode->i_mapping);
-		if (should_wait) {
-			unsigned long flags;
-			/*
-			 * Wait for already issued I/O to drain out and
-			 * release its references to user-space pages
-			 * before returning to fallback on buffered I/O
-			 */
-
-			spin_lock_irqsave(&dio->bio_lock, flags);
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			while (dio->bio_count) {
-				spin_unlock_irqrestore(&dio->bio_lock, flags);
-				io_schedule();
-				spin_lock_irqsave(&dio->bio_lock, flags);
-				set_current_state(TASK_UNINTERRUPTIBLE);
-			}
-			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			set_current_state(TASK_RUNNING);
-			kfree(dio);
-		}
-	} else {
-		ssize_t transferred = 0;
-
-		finished_one_bio(dio);
-		ret2 = dio_await_completion(dio);
-		if (ret == 0)
-			ret = ret2;
-		if (ret == 0)
-			ret = dio->page_errors;
-		if (dio->result) {
-			loff_t i_size = i_size_read(inode);
-
-			transferred = dio->result;
-			/*
-			 * Adjust the return value if the read crossed a
-			 * non-block-aligned EOF.
-			 */
-			if (rw == READ && (offset + transferred > i_size))
-				transferred = i_size - offset;
-		}
-		dio_complete(dio, offset, transferred);
-		if (ret == 0)
-			ret = transferred;
+	if (ret != -EIOCBQUEUED)
+		dio_await_completion(dio);
 
-		/* We could have also come here on an AIO file extend */
-		if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
-		    ret >= 0 && dio->result == dio->size)
-			/*
-			 * For AIO writes where we have completed the
-			 * i/o, we have to mark the the aio complete.
-			 */
-			aio_complete(iocb, ret, 0);
+	/*
+	 * Sync will always be dropping the final ref and completing the
+	 * operation.  AIO can if it was a broken operation described above or
+	 * in fact if all the bios race to complete before we get here.  In
+	 * that case dio_complete() translates the EIOCBQUEUED into the proper
+	 * return code that the caller will hand to aio_complete().
+	 *
+	 * This is managed by the bio_lock instead of being an atomic_t so that
+	 * completion paths can drop their ref and use the remaining count to
+	 * decide to wake the submission path atomically.
+	 */
+	spin_lock_irqsave(&dio->bio_lock, flags);
+	ret2 = --dio->refcount;
+	spin_unlock_irqrestore(&dio->bio_lock, flags);
+	BUG_ON(!dio->is_async && ret2 != 0);
+	if (ret2 == 0) {
+		ret = dio_complete(dio, offset, ret);
 		kfree(dio);
-	}
+	} else
+		BUG_ON(ret != -EIOCBQUEUED);
+
 	return ret;
 }
 
diff --git a/fs/exec.c b/fs/exec.c
index 12d8cd461b4..11fe93f7363 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -783,7 +783,7 @@ static void flush_old_files(struct files_struct * files)
 		j++;
 		i = j * __NFDBITS;
 		fdt = files_fdtable(files);
-		if (i >= fdt->max_fds || i >= fdt->max_fdset)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->close_on_exec->fds_bits[j];
 		if (!set)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2bdaef35da5..8e382a5d51b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -77,10 +77,9 @@ repeat:
 		start = files->next_fd;
 
 	newfd = start;
-	if (start < fdt->max_fdset) {
+	if (start < fdt->max_fds)
 		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
-			fdt->max_fdset, start);
-	}
+					   fdt->max_fds, start);
 	
 	error = -EMFILE;
 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
diff --git a/fs/file.c b/fs/file.c
index 51aef675470..857fa49e984 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -32,46 +32,28 @@ struct fdtable_defer {
  */
 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
 
-
-/*
- * Allocate an fd array, using kmalloc or vmalloc.
- * Note: the array isn't cleared at allocation time.
- */
-struct file ** alloc_fd_array(int num)
+static inline void * alloc_fdmem(unsigned int size)
 {
-	struct file **new_fds;
-	int size = num * sizeof(struct file *);
-
 	if (size <= PAGE_SIZE)
-		new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
-	else 
-		new_fds = (struct file **) vmalloc(size);
-	return new_fds;
+		return kmalloc(size, GFP_KERNEL);
+	else
+		return vmalloc(size);
 }
 
-void free_fd_array(struct file **array, int num)
+static inline void free_fdarr(struct fdtable *fdt)
 {
-	int size = num * sizeof(struct file *);
-
-	if (!array) {
-		printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num);
-		return;
-	}
-
-	if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */
-		return;
-	else if (size <= PAGE_SIZE)
-		kfree(array);
+	if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *)))
+		kfree(fdt->fd);
 	else
-		vfree(array);
+		vfree(fdt->fd);
 }
 
-static void __free_fdtable(struct fdtable *fdt)
+static inline void free_fdset(struct fdtable *fdt)
 {
-	free_fdset(fdt->open_fds, fdt->max_fdset);
-	free_fdset(fdt->close_on_exec, fdt->max_fdset);
-	free_fd_array(fdt->fd, fdt->max_fds);
-	kfree(fdt);
+	if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2))
+		kfree(fdt->open_fds);
+	else
+		vfree(fdt->open_fds);
 }
 
 static void free_fdtable_work(struct work_struct *work)
@@ -86,41 +68,32 @@ static void free_fdtable_work(struct work_struct *work)
 	spin_unlock_bh(&f->lock);
 	while(fdt) {
 		struct fdtable *next = fdt->next;
-		__free_fdtable(fdt);
+		vfree(fdt->fd);
+		free_fdset(fdt);
+		kfree(fdt);
 		fdt = next;
 	}
 }
 
-static void free_fdtable_rcu(struct rcu_head *rcu)
+void free_fdtable_rcu(struct rcu_head *rcu)
 {
 	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
-	int fdset_size, fdarray_size;
 	struct fdtable_defer *fddef;
 
 	BUG_ON(!fdt);
-	fdset_size = fdt->max_fdset / 8;
-	fdarray_size = fdt->max_fds * sizeof(struct file *);
 
-	if (fdt->free_files) {
+	if (fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
-		 * The this fdtable was embedded in the files structure
-		 * and the files structure itself was getting destroyed.
-		 * It is now safe to free the files structure.
+		 * This fdtable is embedded in the files structure and that
+		 * structure itself is getting destroyed.
 		 */
-		kmem_cache_free(files_cachep, fdt->free_files);
+		kmem_cache_free(files_cachep,
+				container_of(fdt, struct files_struct, fdtab));
 		return;
 	}
-	if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
-		fdt->max_fds <= NR_OPEN_DEFAULT) {
-		/*
-		 * The fdtable was embedded
-		 */
-		return;
-	}
-	if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
-		kfree(fdt->open_fds);
-		kfree(fdt->close_on_exec);
+	if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) {
 		kfree(fdt->fd);
+		kfree(fdt->open_fds);
 		kfree(fdt);
 	} else {
 		fddef = &get_cpu_var(fdtable_defer_list);
@@ -134,136 +107,74 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	}
 }
 
-void free_fdtable(struct fdtable *fdt)
-{
-	if (fdt->free_files ||
-		fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
-		fdt->max_fds > NR_OPEN_DEFAULT)
-		call_rcu(&fdt->rcu, free_fdtable_rcu);
-}
-
 /*
  * Expand the fdset in the files_struct.  Called with the files spinlock
  * held for write.
  */
-static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
+static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
 {
-	int i;
-	int count;
-
-	BUG_ON(nfdt->max_fdset < fdt->max_fdset);
-	BUG_ON(nfdt->max_fds < fdt->max_fds);
-	/* Copy the existing tables and install the new pointers */
-
-	i = fdt->max_fdset / (sizeof(unsigned long) * 8);
-	count = (nfdt->max_fdset - fdt->max_fdset) / 8;
+	unsigned int cpy, set;
 
-	/*
-	 * Don't copy the entire array if the current fdset is
-	 * not yet initialised.
-	 */
-	if (i) {
-		memcpy (nfdt->open_fds, fdt->open_fds,
-						fdt->max_fdset/8);
-		memcpy (nfdt->close_on_exec, fdt->close_on_exec,
-						fdt->max_fdset/8);
-		memset (&nfdt->open_fds->fds_bits[i], 0, count);
-		memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
-	}
-
-	/* Don't copy/clear the array if we are creating a new
-	   fd array for fork() */
-	if (fdt->max_fds) {
-		memcpy(nfdt->fd, fdt->fd,
-			fdt->max_fds * sizeof(struct file *));
-		/* clear the remainder of the array */
-		memset(&nfdt->fd[fdt->max_fds], 0,
-		       (nfdt->max_fds - fdt->max_fds) *
-					sizeof(struct file *));
-	}
-}
-
-/*
- * Allocate an fdset array, using kmalloc or vmalloc.
- * Note: the array isn't cleared at allocation time.
- */
-fd_set * alloc_fdset(int num)
-{
-	fd_set *new_fdset;
-	int size = num / 8;
+	BUG_ON(nfdt->max_fds < ofdt->max_fds);
+	if (ofdt->max_fds == 0)
+		return;
 
-	if (size <= PAGE_SIZE)
-		new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
-	else
-		new_fdset = (fd_set *) vmalloc(size);
-	return new_fdset;
+	cpy = ofdt->max_fds * sizeof(struct file *);
+	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
+	memcpy(nfdt->fd, ofdt->fd, cpy);
+	memset((char *)(nfdt->fd) + cpy, 0, set);
+
+	cpy = ofdt->max_fds / BITS_PER_BYTE;
+	set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
+	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
+	memset((char *)(nfdt->open_fds) + cpy, 0, set);
+	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
+	memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
 }
 
-void free_fdset(fd_set *array, int num)
+static struct fdtable * alloc_fdtable(unsigned int nr)
 {
-	if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
-		return;
-	else if (num <= 8 * PAGE_SIZE)
-		kfree(array);
-	else
-		vfree(array);
-}
+	struct fdtable *fdt;
+	char *data;
 
-static struct fdtable *alloc_fdtable(int nr)
-{
-	struct fdtable *fdt = NULL;
-	int nfds = 0;
-  	fd_set *new_openset = NULL, *new_execset = NULL;
-	struct file **new_fds;
+	/*
+	 * Figure out how many fds we actually want to support in this fdtable.
+	 * Allocation steps are keyed to the size of the fdarray, since it
+	 * grows far faster than any of the other dynamic data. We try to fit
+	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
+	 * and growing in powers of two from there on.
+	 */
+	nr /= (1024 / sizeof(struct file *));
+	nr = roundup_pow_of_two(nr + 1);
+	nr *= (1024 / sizeof(struct file *));
+	if (nr > NR_OPEN)
+		nr = NR_OPEN;
 
-	fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
+	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
 	if (!fdt)
-  		goto out;
-
-	nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1));
-	if (nfds > NR_OPEN)
-		nfds = NR_OPEN;
+		goto out;
+	fdt->max_fds = nr;
+	data = alloc_fdmem(nr * sizeof(struct file *));
+	if (!data)
+		goto out_fdt;
+	fdt->fd = (struct file **)data;
+	data = alloc_fdmem(max_t(unsigned int,
+				 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
+	if (!data)
+		goto out_arr;
+	fdt->open_fds = (fd_set *)data;
+	data += nr / BITS_PER_BYTE;
+	fdt->close_on_exec = (fd_set *)data;
+	INIT_RCU_HEAD(&fdt->rcu);
+	fdt->next = NULL;
 
-  	new_openset = alloc_fdset(nfds);
-  	new_execset = alloc_fdset(nfds);
-  	if (!new_openset || !new_execset)
-  		goto out;
-	fdt->open_fds = new_openset;
-	fdt->close_on_exec = new_execset;
-	fdt->max_fdset = nfds;
-
-	nfds = NR_OPEN_DEFAULT;
-	/*
-	 * Expand to the max in easy steps, and keep expanding it until
-	 * we have enough for the requested fd array size.
-	 */
-	do {
-#if NR_OPEN_DEFAULT < 256
-		if (nfds < 256)
-			nfds = 256;
-		else
-#endif
-		if (nfds < (PAGE_SIZE / sizeof(struct file *)))
-			nfds = PAGE_SIZE / sizeof(struct file *);
-		else {
-			nfds = nfds * 2;
-			if (nfds > NR_OPEN)
-				nfds = NR_OPEN;
-  		}
-	} while (nfds <= nr);
-	new_fds = alloc_fd_array(nfds);
-	if (!new_fds)
-		goto out2;
-	fdt->fd = new_fds;
-	fdt->max_fds = nfds;
-	fdt->free_files = NULL;
 	return fdt;
-out2:
-	nfds = fdt->max_fdset;
-out:
-	free_fdset(new_openset, nfds);
-	free_fdset(new_execset, nfds);
+
+out_arr:
+	free_fdarr(fdt);
+out_fdt:
 	kfree(fdt);
+out:
 	return NULL;
 }
 
@@ -290,14 +201,17 @@ static int expand_fdtable(struct files_struct *files, int nr)
 	 * we dropped the lock
 	 */
 	cur_fdt = files_fdtable(files);
-	if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) {
+	if (nr >= cur_fdt->max_fds) {
 		/* Continue as planned */
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
-		free_fdtable(cur_fdt);
+		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
+			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
-		__free_fdtable(new_fdt);
+		free_fdarr(new_fdt);
+		free_fdset(new_fdt);
+		kfree(new_fdt);
 	}
 	return 1;
 }
@@ -316,11 +230,10 @@ int expand_files(struct files_struct *files, int nr)
 
 	fdt = files_fdtable(files);
 	/* Do we need to expand? */
-	if (nr < fdt->max_fdset && nr < fdt->max_fds)
+	if (nr < fdt->max_fds)
 		return 0;
 	/* Can we expand? */
-	if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN ||
-	    nr >= NR_OPEN)
+	if (nr >= NR_OPEN)
 		return -EMFILE;
 
 	/* All good, so we try */
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index d38e0d575e4..cceaf57e377 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -55,7 +55,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
 	spin_lock_init(&transaction->t_handle_lock);
 
 	/* Set up the commit timer for the new transaction. */
-	journal->j_commit_timer.expires = transaction->t_expires;
+	journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
 	add_timer(&journal->j_commit_timer);
 
 	J_ASSERT(journal->j_running_transaction == NULL);
diff --git a/fs/open.c b/fs/open.c
index 0d94319e868..c989fb4cf7b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -864,8 +864,7 @@ int get_unused_fd(void)
 
 repeat:
 	fdt = files_fdtable(files);
- 	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
-				fdt->max_fdset,
+	fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
 				files->next_fd);
 
 	/*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fd959d5b5a8..77a57b5799c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1804,6 +1804,27 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filld
 				proc_base_instantiate, task, p);
 }
 
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
+{
+	return sprintf(buffer,
+			"rchar: %llu\n"
+			"wchar: %llu\n"
+			"syscr: %llu\n"
+			"syscw: %llu\n"
+			"read_bytes: %llu\n"
+			"write_bytes: %llu\n"
+			"cancelled_write_bytes: %llu\n",
+			(unsigned long long)task->rchar,
+			(unsigned long long)task->wchar,
+			(unsigned long long)task->syscr,
+			(unsigned long long)task->syscw,
+			(unsigned long long)task->ioac.read_bytes,
+			(unsigned long long)task->ioac.write_bytes,
+			(unsigned long long)task->ioac.cancelled_write_bytes);
+}
+#endif
+
 /*
  * Thread groups
  */
@@ -1855,6 +1876,9 @@ static struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
 #endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	INF("io",	S_IRUGO, pid_io_accounting),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file * filp,
diff --git a/fs/select.c b/fs/select.c
index dcbc1112b7e..fe0893afd93 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -311,7 +311,7 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 {
 	fd_set_bits fds;
 	void *bits;
-	int ret, max_fdset;
+	int ret, max_fds;
 	unsigned int size;
 	struct fdtable *fdt;
 	/* Allocate small arguments on the stack to save memory and be faster */
@@ -321,13 +321,13 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	if (n < 0)
 		goto out_nofds;
 
-	/* max_fdset can increase, so grab it once to avoid race */
+	/* max_fds can increase, so grab it once to avoid race */
 	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	max_fdset = fdt->max_fdset;
+	max_fds = fdt->max_fds;
 	rcu_read_unlock();
-	if (n > max_fdset)
-		n = max_fdset;
+	if (n > max_fds)
+		n = max_fds;
 
 	/*
 	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8e6b56fc1ca..b56eb754e2d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1406,7 +1406,7 @@ xfs_vm_direct_IO(
 			xfs_end_io_direct);
 	}
 
-	if (unlikely(ret <= 0 && iocb->private))
+	if (unlikely(ret != -EIOCBQUEUED && iocb->private))
 		xfs_destroy_ioend(iocb->private);
 	return ret;
 }
diff --git a/include/asm-arm/arch-pxa/pxa2xx_spi.h b/include/asm-arm/arch-pxa/pxa2xx_spi.h
index 915590c391c..acc7ec7a84a 100644
--- a/include/asm-arm/arch-pxa/pxa2xx_spi.h
+++ b/include/asm-arm/arch-pxa/pxa2xx_spi.h
@@ -27,16 +27,13 @@
 #define SSP1_SerClkDiv(x) (((CLOCK_SPEED_HZ/2/(x+1))<<8)&0x0000ff00)
 #define SSP2_SerClkDiv(x) (((CLOCK_SPEED_HZ/(x+1))<<8)&0x000fff00)
 #define SSP3_SerClkDiv(x) (((CLOCK_SPEED_HZ/(x+1))<<8)&0x000fff00)
-#define SSP_TIMEOUT_SCALE (2712)
 #elif defined(CONFIG_PXA27x)
 #define CLOCK_SPEED_HZ 13000000
 #define SSP1_SerClkDiv(x) (((CLOCK_SPEED_HZ/(x+1))<<8)&0x000fff00)
 #define SSP2_SerClkDiv(x) (((CLOCK_SPEED_HZ/(x+1))<<8)&0x000fff00)
 #define SSP3_SerClkDiv(x) (((CLOCK_SPEED_HZ/(x+1))<<8)&0x000fff00)
-#define SSP_TIMEOUT_SCALE (769)
 #endif
 
-#define SSP_TIMEOUT(x) ((x*10000)/SSP_TIMEOUT_SCALE)
 #define SSP1_VIRT ((void *)(io_p2v(__PREG(SSCR0_P(1)))))
 #define SSP2_VIRT ((void *)(io_p2v(__PREG(SSCR0_P(2)))))
 #define SSP3_VIRT ((void *)(io_p2v(__PREG(SSCR0_P(3)))))
@@ -63,7 +60,7 @@ struct pxa2xx_spi_chip {
 	u8 tx_threshold;
 	u8 rx_threshold;
 	u8 dma_burst_size;
-	u32 timeout_microsecs;
+	u32 timeout;
 	u8 enable_loopback;
 	void (*cs_control)(u32 command);
 };
diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index 978d0959613..ac58580ad66 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -89,6 +89,7 @@ static inline int node_to_first_cpu(int node)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
+				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index a6e38565ab4..22ed6749557 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -101,6 +101,7 @@ void build_cpu_to_node_map(void);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
+				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h
index 50c014007de..6610495f5f1 100644
--- a/include/asm-powerpc/topology.h
+++ b/include/asm-powerpc/topology.h
@@ -66,6 +66,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_WAKE_IDLE		\
+				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 5c8f49280db..2facec5914d 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -47,6 +47,7 @@ extern int __node_distance(int, int);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
+				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/asm-xtensa/asmmacro.h b/include/asm-xtensa/asmmacro.h
new file mode 100644
index 00000000000..76915cabad1
--- /dev/null
+++ b/include/asm-xtensa/asmmacro.h
@@ -0,0 +1,153 @@
+/*
+ * include/asm-xtensa/asmmacro.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005 Tensilica Inc.
+ */
+
+#ifndef _XTENSA_ASMMACRO_H
+#define _XTENSA_ASMMACRO_H
+
+#include <asm/variant/core.h>
+
+/*
+ * Some little helpers for loops. Use zero-overhead-loops
+ * where applicable and if supported by the processor.
+ *
+ * __loopi ar, at, size, inc
+ *         ar	register initialized with the start address
+ *	   at	scratch register used by macro
+ *	   size	size immediate value
+ *	   inc	increment
+ *
+ * __loops ar, as, at, inc_log2[, mask_log2][, cond][, ncond]
+ *	   ar	register initialized with the start address
+ *	   as	register initialized with the size
+ *	   at	scratch register use by macro
+ *	   inc_log2	increment [in log2]
+ *	   mask_log2	mask [in log2]
+ *	   cond		true condition (used in loop'cond')
+ *	   ncond	false condition (used in b'ncond')
+ *
+ * __loop  as
+ *	   restart loop. 'as' register must not have been modified!
+ *
+ * __endla ar, at, incr
+ *	   ar	start address (modified)
+ *	   as	scratch register used by macro
+ *	   inc	increment
+ */
+
+/*
+ * loop for given size as immediate
+ */
+
+	.macro	__loopi ar, at, size, incr
+
+#if XCHAL_HAVE_LOOPS
+		movi	\at, ((\size + \incr - 1) / (\incr))
+		loop	\at, 99f
+#else
+		addi	\at, \ar, \size
+		98:
+#endif
+
+	.endm
+
+/*
+ * loop for given size in register
+ */
+
+	.macro	__loops	ar, as, at, incr_log2, mask_log2, cond, ncond
+
+#if XCHAL_HAVE_LOOPS
+		.ifgt \incr_log2 - 1
+			addi	\at, \as, (1 << \incr_log2) - 1
+			.ifnc \mask_log2,
+				extui	\at, \at, \incr_log2, \mask_log2
+			.else
+				srli	\at, \at, \incr_log2
+			.endif
+		.endif
+		loop\cond	\at, 99f
+#else
+		.ifnc \mask_log2,
+			extui	\at, \as, \incr_log2, \mask_log2
+		.else
+			.ifnc \ncond,
+				srli	\at, \as, \incr_log2
+			.endif
+		.endif
+		.ifnc \ncond,
+			b\ncond	\at, 99f
+
+		.endif
+		.ifnc \mask_log2,
+			slli	\at, \at, \incr_log2
+			add	\at, \ar, \at
+		.else
+			add	\at, \ar, \as
+		.endif
+#endif
+		98:
+
+	.endm
+
+/*
+ * loop from ar to ax
+ */
+
+	.macro	__loopt	ar, as, at, incr_log2
+
+#if XCHAL_HAVE_LOOPS
+		sub	\at, \as, \ar
+		.ifgt	\incr_log2 - 1
+			addi	\at, \at, (1 << \incr_log2) - 1
+			srli	\at, \at, \incr_log2
+		.endif
+		loop	\at, 99f
+#else
+		98:
+#endif
+
+	.endm
+
+/*
+ * restart loop. registers must be unchanged
+ */
+
+	.macro	__loop	as
+
+#if XCHAL_HAVE_LOOPS
+		loop	\as, 99f
+#else
+		98:
+#endif
+
+	.endm
+
+/*
+ * end of loop with no increment of the address.
+ */
+
+	.macro	__endl	ar, as
+#if !XCHAL_HAVE_LOOPS
+		bltu	\ar, \as, 98b
+#endif
+		99:
+	.endm
+
+/*
+ * end of loop with increment of the address.
+ */
+
+	.macro	__endla	ar, as, incr
+		addi	\ar, \ar, \incr
+		__endl	\ar \as
+	.endm
+
+
+#endif /* _XTENSA_ASMMACRO_H */
diff --git a/include/asm-xtensa/bug.h b/include/asm-xtensa/bug.h
index 56703659b20..3e52d72712f 100644
--- a/include/asm-xtensa/bug.h
+++ b/include/asm-xtensa/bug.h
@@ -13,29 +13,6 @@
 #ifndef _XTENSA_BUG_H
 #define _XTENSA_BUG_H
 
-#include <linux/stringify.h>
-
-#define ILL	__asm__ __volatile__ (".byte 0,0,0\n")
-
-#ifdef CONFIG_KALLSYMS
-# define BUG() do {							\
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__);		\
-	ILL;								\
-} while (0)
-#else
-# define BUG() do {							\
-	printk("kernel BUG!\n");					\
-      	ILL;								\
-} while (0)
-#endif
-
-#define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while(0)
-#define PAGE_BUG(page) do {  BUG(); } while (0)
-#define WARN_ON(condition) do {						   \
-  if (unlikely((condition)!=0)) {					   \
-    printk ("Warning in %s at %s:%d\n", __FUNCTION__, __FILE__, __LINE__); \
-      dump_stack();							   \
-  }									   \
-} while (0)
+#include <asm-generic/bug.h>
 
 #endif	/* _XTENSA_BUG_H */
diff --git a/include/asm-xtensa/byteorder.h b/include/asm-xtensa/byteorder.h
index 0b1552569aa..0f540a5f4c0 100644
--- a/include/asm-xtensa/byteorder.h
+++ b/include/asm-xtensa/byteorder.h
@@ -11,10 +11,9 @@
 #ifndef _XTENSA_BYTEORDER_H
 #define _XTENSA_BYTEORDER_H
 
-#include <asm/processor.h>
 #include <asm/types.h>
 
-static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
+static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 x)
 {
     __u32 res;
     /* instruction sequence from Xtensa ISA release 2/2000 */
@@ -29,7 +28,7 @@ static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
     return res;
 }
 
-static __inline__ __const__ __u16 ___arch__swab16(__u16 x)
+static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 x)
 {
     /* Given that 'short' values are signed (i.e., can be negative),
      * we cannot assume that the upper 16-bits of the register are
diff --git a/include/asm-xtensa/cache.h b/include/asm-xtensa/cache.h
index 1e79c0e2746..1c4a78f29ae 100644
--- a/include/asm-xtensa/cache.h
+++ b/include/asm-xtensa/cache.h
@@ -4,7 +4,6 @@
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
- * 2 of the License, or (at your option) any later version.
  *
  * (C) 2001 - 2005 Tensilica Inc.
  */
@@ -12,21 +11,14 @@
 #ifndef _XTENSA_CACHE_H
 #define _XTENSA_CACHE_H
 
-#include <xtensa/config/core.h>
+#include <asm/variant/core.h>
 
-#if XCHAL_ICACHE_SIZE > 0
-# if (XCHAL_ICACHE_SIZE % (XCHAL_ICACHE_LINESIZE*XCHAL_ICACHE_WAYS*4)) != 0
-#  error cache configuration outside expected/supported range!
-# endif
-#endif
+#define L1_CACHE_SHIFT	XCHAL_DCACHE_LINEWIDTH
+#define L1_CACHE_BYTES	XCHAL_DCACHE_LINESIZE
+#define SMP_CACHE_BYTES	L1_CACHE_BYTES
 
-#if XCHAL_DCACHE_SIZE > 0
-# if (XCHAL_DCACHE_SIZE % (XCHAL_DCACHE_LINESIZE*XCHAL_DCACHE_WAYS*4)) != 0
-#  error cache configuration outside expected/supported range!
-# endif
-#endif
+#define DCACHE_WAY_SIZE	(XCHAL_DCACHE_SIZE/XCHAL_DCACHE_WAYS)
+#define ICACHE_WAY_SIZE	(XCHAL_ICACHE_SIZE/XCHAL_ICACHE_WAYS)
 
-#define L1_CACHE_SHIFT		XCHAL_CACHE_LINEWIDTH_MAX
-#define L1_CACHE_BYTES		XCHAL_CACHE_LINESIZE_MAX
 
 #endif	/* _XTENSA_CACHE_H */
diff --git a/include/asm-xtensa/cacheasm.h b/include/asm-xtensa/cacheasm.h
new file mode 100644
index 00000000000..2c20a58f94c
--- /dev/null
+++ b/include/asm-xtensa/cacheasm.h
@@ -0,0 +1,177 @@
+/*
+ * include/asm-xtensa/cacheasm.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Tensilica Inc.
+ */
+
+#include <asm/cache.h>
+#include <asm/asmmacro.h>
+#include <linux/stringify.h>
+
+/*
+ * Define cache functions as macros here so that they can be used
+ * by the kernel and boot loader. We should consider moving them to a
+ * library that can be linked by both.
+ *
+ * Locking
+ *
+ *   ___unlock_dcache_all
+ *   ___unlock_icache_all
+ *
+ * Flush and invaldating
+ *
+ *   ___flush_invalidate_dcache_{all|range|page}
+ *   ___flush_dcache_{all|range|page}
+ *   ___invalidate_dcache_{all|range|page}
+ *   ___invalidate_icache_{all|range|page}
+ *
+ */
+
+	.macro	__loop_cache_all ar at insn size line_width
+
+	movi	\ar, 0
+
+	__loopi	\ar, \at, \size, (4 << (\line_width))
+	\insn	\ar, 0 << (\line_width)
+	\insn	\ar, 1 << (\line_width)
+	\insn	\ar, 2 << (\line_width)
+	\insn	\ar, 3 << (\line_width)
+	__endla	\ar, \at, 4 << (\line_width)
+
+	.endm
+
+
+	.macro	__loop_cache_range ar as at insn line_width
+
+	extui	\at, \ar, 0, \line_width
+	add	\as, \as, \at
+
+	__loops	\ar, \as, \at, \line_width
+	\insn	\ar, 0
+	__endla	\ar, \at, (1 << (\line_width))
+
+	.endm
+
+
+	.macro	__loop_cache_page ar at insn line_width
+
+	__loopi	\ar, \at, PAGE_SIZE, 4 << (\line_width)
+	\insn	\ar, 0 << (\line_width)
+	\insn	\ar, 1 << (\line_width)
+	\insn	\ar, 2 << (\line_width)
+	\insn	\ar, 3 << (\line_width)
+	__endla	\ar, \at, 4 << (\line_width)
+
+	.endm
+
+
+#if XCHAL_DCACHE_LINE_LOCKABLE
+
+	.macro	___unlock_dcache_all ar at
+
+	__loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+#endif
+
+#if XCHAL_ICACHE_LINE_LOCKABLE
+
+	.macro	___unlock_icache_all ar at
+
+	__loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE XCHAL_ICACHE_LINEWIDTH
+
+	.endm
+#endif
+
+	.macro	___flush_invalidate_dcache_all ar at
+
+	__loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro	___flush_dcache_all ar at
+
+	__loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro	___invalidate_dcache_all ar at
+
+	__loop_cache_all \ar \at dii __stringify(DCACHE_WAY_SIZE) \
+			 XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro	___invalidate_icache_all ar at
+
+	__loop_cache_all \ar \at iii __stringify(ICACHE_WAY_SIZE) \
+			 XCHAL_ICACHE_LINEWIDTH
+
+	.endm
+
+
+
+	.macro	___flush_invalidate_dcache_range ar as at
+
+	__loop_cache_range \ar \as \at dhwbi XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro	___flush_dcache_range ar as at
+
+	__loop_cache_range \ar \as \at dhwb XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro	___invalidate_dcache_range ar as at
+
+	__loop_cache_range \ar \as \at dhi XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro	___invalidate_icache_range ar as at
+
+	__loop_cache_range \ar \as \at ihi XCHAL_ICACHE_LINEWIDTH
+
+	.endm
+
+
+
+	.macro	___flush_invalidate_dcache_page ar as
+
+	__loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro ___flush_dcache_page ar as
+
+	__loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro	___invalidate_dcache_page ar as
+
+	__loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH
+
+	.endm
+
+
+	.macro	___invalidate_icache_page ar as
+
+	__loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH
+
+	.endm
+
diff --git a/include/asm-xtensa/cacheflush.h b/include/asm-xtensa/cacheflush.h
index 44a36e08784..337765b629d 100644
--- a/include/asm-xtensa/cacheflush.h
+++ b/include/asm-xtensa/cacheflush.h
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * (C) 2001 - 2005 Tensilica Inc.
+ * (C) 2001 - 2006 Tensilica Inc.
  */
 
 #ifndef _XTENSA_CACHEFLUSH_H
diff --git a/include/asm-xtensa/checksum.h b/include/asm-xtensa/checksum.h
index 5435aff9a4b..23534c60b3a 100644
--- a/include/asm-xtensa/checksum.h
+++ b/include/asm-xtensa/checksum.h
@@ -12,7 +12,7 @@
 #define _XTENSA_CHECKSUM_H
 
 #include <linux/in6.h>
-#include <xtensa/config/core.h>
+#include <asm/variant/core.h>
 
 /*
  * computes the checksum of a memory block at buff, length len,
diff --git a/include/asm-xtensa/coprocessor.h b/include/asm-xtensa/coprocessor.h
index 5093034723b..bd09ec02d57 100644
--- a/include/asm-xtensa/coprocessor.h
+++ b/include/asm-xtensa/coprocessor.h
@@ -11,7 +11,16 @@
 #ifndef _XTENSA_COPROCESSOR_H
 #define _XTENSA_COPROCESSOR_H
 
-#include <xtensa/config/core.h>
+#include <asm/variant/core.h>
+#include <asm/variant/tie.h>
+
+#if !XCHAL_HAVE_CP
+
+#define XTENSA_CP_EXTRA_OFFSET 	0
+#define XTENSA_CP_EXTRA_ALIGN	1	/* must be a power of 2 */
+#define XTENSA_CP_EXTRA_SIZE	0
+
+#else
 
 #define XTOFS(last_start,last_size,align) \
 	((last_start+last_size+align-1) & -align)
@@ -67,4 +76,6 @@ extern void save_coprocessor_registers(void*, int);
 # endif
 #endif
 
+#endif
+
 #endif	/* _XTENSA_COPROCESSOR_H */
diff --git a/include/asm-xtensa/dma.h b/include/asm-xtensa/dma.h
index db2633f6778..e30f3abf48f 100644
--- a/include/asm-xtensa/dma.h
+++ b/include/asm-xtensa/dma.h
@@ -12,7 +12,6 @@
 #define _XTENSA_DMA_H
 
 #include <asm/io.h>		/* need byte IO */
-#include <xtensa/config/core.h>
 
 /*
  * This is only to be defined if we have PC-like DMA.
@@ -44,7 +43,9 @@
  *	enters another area, and virt_to_phys() may not return
  *	the value desired).
  */
-#define MAX_DMA_ADDRESS		(PAGE_OFFSET + XCHAL_KSEG_CACHED_SIZE - 1)
+
+#define MAX_DMA_ADDRESS		(PAGE_OFFSET + XCHAL_KIO_SIZE - 1)
+
 
 /* Reserve and release a DMA channel */
 extern int request_dma(unsigned int dmanr, const char * device_id);
diff --git a/include/asm-xtensa/elf.h b/include/asm-xtensa/elf.h
index de0667453b2..f0f9fd8560a 100644
--- a/include/asm-xtensa/elf.h
+++ b/include/asm-xtensa/elf.h
@@ -13,9 +13,8 @@
 #ifndef _XTENSA_ELF_H
 #define _XTENSA_ELF_H
 
+#include <asm/variant/core.h>
 #include <asm/ptrace.h>
-#include <asm/coprocessor.h>
-#include <xtensa/config/core.h>
 
 /* Xtensa processor ELF architecture-magic number */
 
@@ -118,11 +117,15 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG];
  * using memcpy().  But we do allow space for such alignment,
  * to allow optimizations of layout and copying.
  */
-
+#if 0
 #define TOTAL_FPREGS_SIZE						\
   	(4 + XTENSA_CPE_LTABLE_SIZE + XTENSA_CP_EXTRA_SIZE)
 #define ELF_NFPREG							\
 	((TOTAL_FPREGS_SIZE + sizeof(elf_fpreg_t) - 1) / sizeof(elf_fpreg_t))
+#else
+#define TOTAL_FPREGS_SIZE	0
+#define ELF_NFPREG		0
+#endif
 
 typedef unsigned int elf_fpreg_t;
 typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
diff --git a/include/asm-xtensa/fcntl.h b/include/asm-xtensa/fcntl.h
index ec066ae96ca..0609fc691b7 100644
--- a/include/asm-xtensa/fcntl.h
+++ b/include/asm-xtensa/fcntl.h
@@ -14,48 +14,86 @@
 
 /* open/fcntl - O_SYNC is only implemented on blocks devices and on files
    located on an ext2 file system */
-#define O_APPEND	0x0008
-#define O_SYNC		0x0010
-#define O_NONBLOCK	0x0080
-#define O_CREAT         0x0100	/* not fcntl */
-#define O_EXCL		0x0400	/* not fcntl */
-#define O_NOCTTY	0x0800	/* not fcntl */
-#define FASYNC		0x1000	/* fcntl, for BSD compatibility */
-#define O_LARGEFILE	0x2000	/* allow large file opens - currently ignored */
-#define O_DIRECT	0x8000	/* direct disk access hint - currently ignored*/
-#define O_NOATIME	0x100000
-
-#define F_GETLK		14
-#define F_GETLK64       15
+#define O_ACCMODE	   0003
+#define O_RDONLY	     00
+#define O_WRONLY	     01
+#define O_RDWR		     02
+#define O_CREAT		   0100	/* not fcntl */
+#define O_EXCL		   0200	/* not fcntl */
+#define O_NOCTTY	   0400	/* not fcntl */
+#define O_TRUNC		  01000	/* not fcntl */
+#define O_APPEND	  02000
+#define O_NONBLOCK	  04000
+#define O_NDELAY	O_NONBLOCK
+#define O_SYNC		 010000
+#define FASYNC		 020000	/* fcntl, for BSD compatibility */
+#define O_DIRECT	 040000	/* direct disk access hint */
+#define O_LARGEFILE	0100000
+#define O_DIRECTORY	0200000	/* must be a directory */
+#define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_NOATIME	01000000
+
+#define F_DUPFD		0	/* dup */
+#define F_GETFD		1	/* get close_on_exec */
+#define F_SETFD		2	/* set/clear close_on_exec */
+#define F_GETFL		3	/* get file->f_flags */
+#define F_SETFL		4	/* set file->f_flags */
+#define F_GETLK		5
 #define F_SETLK		6
 #define F_SETLKW	7
-#define F_SETLK64       16
-#define F_SETLKW64      17
 
-#define F_SETOWN	24	/*  for sockets. */
-#define F_GETOWN	23	/*  for sockets. */
+#define F_SETOWN	8	/*  for sockets. */
+#define F_GETOWN	9	/*  for sockets. */
+#define F_SETSIG	10	/*  for sockets. */
+#define F_GETSIG	11	/*  for sockets. */
+
+#define F_GETLK64	12	/*  using 'struct flock64' */
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
+/* for F_[GET|SET]FL */
+#define FD_CLOEXEC	1	/* actually anything with low bit set goes */
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK		0
+#define F_WRLCK		1
+#define F_UNLCK		2
+
+/* for old implementation of bsd flock () */
+#define F_EXLCK		4	/* or 3 */
+#define F_SHLCK		8	/* or 4 */
 
-typedef struct flock {
+/* for leases */
+#define F_INPROGRESS	16
+
+/* operations for bsd flock(), also used by the kernel implementation */
+#define LOCK_SH		1	/* shared lock */
+#define LOCK_EX		2	/* exclusive lock */
+#define LOCK_NB		4	/* or'd with one of the above to prevent
+				   blocking */
+#define LOCK_UN		8	/* remove lock */
+
+#define LOCK_MAND	32	/* This is a mandatory flock */
+#define LOCK_READ	64	/* ... Which allows concurrent read operations */
+#define LOCK_WRITE	128	/* ... Which allows concurrent write operations */
+#define LOCK_RW		192	/* ... Which allows concurrent read & write ops */
+
+struct flock {
 	short l_type;
 	short l_whence;
-	__kernel_off_t l_start;
-	__kernel_off_t l_len;
-	long  l_sysid;
-	__kernel_pid_t l_pid;
-	long  pad[4];
-} flock_t;
+	off_t l_start;
+	off_t l_len;
+	pid_t l_pid;
+};
 
 struct flock64 {
 	short  l_type;
 	short  l_whence;
-	__kernel_off_t l_start;
-	__kernel_off_t l_len;
+	loff_t l_start;
+	loff_t l_len;
 	pid_t  l_pid;
 };
 
-#define HAVE_ARCH_STRUCT_FLOCK
-#define HAVE_ARCH_STRUCT_FLOCK64
-
-#include <asm-generic/fcntl.h>
+#define F_LINUX_SPECIFIC_BASE	1024
 
 #endif /* _XTENSA_FCNTL_H */
diff --git a/include/asm-xtensa/fixmap.h b/include/asm-xtensa/fixmap.h
deleted file mode 100644
index 4423b8ad495..00000000000
--- a/include/asm-xtensa/fixmap.h
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * include/asm-xtensa/fixmap.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_FIXMAP_H
-#define _XTENSA_FIXMAP_H
-
-#include <asm/processor.h>
-
-#ifdef CONFIG_MMU
-
-/*
- * Here we define all the compile-time virtual addresses.
- */
-
-#if XCHAL_SEG_MAPPABLE_VADDR != 0
-# error "Current port requires virtual user space starting at 0"
-#endif
-#if XCHAL_SEG_MAPPABLE_SIZE < 0x80000000
-# error "Current port requires at least 0x8000000 bytes for user space"
-#endif
-
-/* Verify instruction/data ram/rom and xlmi don't overlay vmalloc space. */
-
-#define __IN_VMALLOC(addr)						\
-	(((addr) >= VMALLOC_START) && ((addr) < VMALLOC_END))
-#define __SPAN_VMALLOC(start,end)					\
-	(((start) < VMALLOC_START) && ((end) >= VMALLOC_END))
-#define INSIDE_VMALLOC(start,end) 					\
-	(__IN_VMALLOC((start)) || __IN_VMALLOC(end) || __SPAN_VMALLOC((start),(end)))
-
-#if XCHAL_NUM_INSTROM
-# if XCHAL_NUM_INSTROM == 1
-#  if INSIDE_VMALLOC(XCHAL_INSTROM0_VADDR,XCHAL_INSTROM0_VADDR+XCHAL_INSTROM0_SIZE)
-#   error vmalloc range conflicts with instrom0
-#  endif
-# endif
-# if XCHAL_NUM_INSTROM == 2
-#  if INSIDE_VMALLOC(XCHAL_INSTROM1_VADDR,XCHAL_INSTROM1_VADDR+XCHAL_INSTROM1_SIZE)
-#   error vmalloc range conflicts with instrom1
-#  endif
-# endif
-#endif
-
-#if XCHAL_NUM_INSTRAM
-# if XCHAL_NUM_INSTRAM == 1
-#  if INSIDE_VMALLOC(XCHAL_INSTRAM0_VADDR,XCHAL_INSTRAM0_VADDR+XCHAL_INSTRAM0_SIZE)
-#   error vmalloc range conflicts with instram0
-#  endif
-# endif
-# if XCHAL_NUM_INSTRAM == 2
-#  if INSIDE_VMALLOC(XCHAL_INSTRAM1_VADDR,XCHAL_INSTRAM1_VADDR+XCHAL_INSTRAM1_SIZE)
-#   error vmalloc range conflicts with instram1
-#  endif
-# endif
-#endif
-
-#if XCHAL_NUM_DATAROM
-# if XCHAL_NUM_DATAROM == 1
-#  if INSIDE_VMALLOC(XCHAL_DATAROM0_VADDR,XCHAL_DATAROM0_VADDR+XCHAL_DATAROM0_SIZE)
-#   error vmalloc range conflicts with datarom0
-#  endif
-# endif
-# if XCHAL_NUM_DATAROM == 2
-#  if INSIDE_VMALLOC(XCHAL_DATAROM1_VADDR,XCHAL_DATAROM1_VADDR+XCHAL_DATAROM1_SIZE)
-#   error vmalloc range conflicts with datarom1
-#  endif
-# endif
-#endif
-
-#if XCHAL_NUM_DATARAM
-# if XCHAL_NUM_DATARAM == 1
-#  if INSIDE_VMALLOC(XCHAL_DATARAM0_VADDR,XCHAL_DATARAM0_VADDR+XCHAL_DATARAM0_SIZE)
-#   error vmalloc range conflicts with dataram0
-#  endif
-# endif
-# if XCHAL_NUM_DATARAM == 2
-#  if INSIDE_VMALLOC(XCHAL_DATARAM1_VADDR,XCHAL_DATARAM1_VADDR+XCHAL_DATARAM1_SIZE)
-#   error vmalloc range conflicts with dataram1
-#  endif
-# endif
-#endif
-
-#if XCHAL_NUM_XLMI
-# if XCHAL_NUM_XLMI == 1
-#  if INSIDE_VMALLOC(XCHAL_XLMI0_VADDR,XCHAL_XLMI0_VADDR+XCHAL_XLMI0_SIZE)
-#   error vmalloc range conflicts with xlmi0
-#  endif
-# endif
-# if XCHAL_NUM_XLMI == 2
-#  if INSIDE_VMALLOC(XCHAL_XLMI1_VADDR,XCHAL_XLMI1_VADDR+XCHAL_XLMI1_SIZE)
-#   error vmalloc range conflicts with xlmi1
-#  endif
-# endif
-#endif
-
-#if (XCHAL_NUM_INSTROM > 2) || \
-    (XCHAL_NUM_INSTRAM > 2) || \
-    (XCHAL_NUM_DATARAM > 2) || \
-    (XCHAL_NUM_DATAROM > 2) || \
-    (XCHAL_NUM_XLMI    > 2)
-# error Insufficient checks on vmalloc above for more than 2 devices
-#endif
-
-/*
- * USER_VM_SIZE does not necessarily equal TASK_SIZE.  We bumped
- * TASK_SIZE down to 0x4000000 to simplify the handling of windowed
- * call instructions (currently limited to a range of 1 GByte).  User
- * tasks may very well reclaim the VM space from 0x40000000 to
- * 0x7fffffff in the future, so we do not want the kernel becoming
- * accustomed to having any of its stuff (e.g., page tables) in this
- * region.  This VM region is no-man's land for now.
- */
-
-#define USER_VM_START		XCHAL_SEG_MAPPABLE_VADDR
-#define USER_VM_SIZE		0x80000000
-
-/*  Size of page table:  */
-
-#define PGTABLE_SIZE_BITS	(32 - XCHAL_MMU_MIN_PTE_PAGE_SIZE + 2)
-#define PGTABLE_SIZE		(1L << PGTABLE_SIZE_BITS)
-
-/*  All kernel-mappable space:  */
-
-#define KERNEL_ALLMAP_START	(USER_VM_START + USER_VM_SIZE)
-#define KERNEL_ALLMAP_SIZE	(XCHAL_SEG_MAPPABLE_SIZE - KERNEL_ALLMAP_START)
-
-/*  Carve out page table at start of kernel-mappable area:  */
-
-#if KERNEL_ALLMAP_SIZE < PGTABLE_SIZE
-#error "Gimme some space for page table!"
-#endif
-#define PGTABLE_START		KERNEL_ALLMAP_START
-
-/*  Remaining kernel-mappable space:  */
-
-#define KERNEL_MAPPED_START	(KERNEL_ALLMAP_START + PGTABLE_SIZE)
-#define KERNEL_MAPPED_SIZE	(KERNEL_ALLMAP_SIZE - PGTABLE_SIZE)
-
-#if KERNEL_MAPPED_SIZE < 0x01000000	/* 16 MB is arbitrary for now */
-# error "Shouldn't the kernel have at least *some* mappable space?"
-#endif
-
-#define MAX_LOW_MEMORY		XCHAL_KSEG_CACHED_SIZE
-
-#endif
-
-/*
- *  Some constants used elsewhere, but perhaps only in Xtensa header
- *  files, so maybe we can get rid of some and access compile-time HAL
- *  directly...
- *
- *  Note:  We assume that system RAM is located at the very start of the
- *  	   kernel segments !!
- */
-#define KERNEL_VM_LOW           XCHAL_KSEG_CACHED_VADDR
-#define KERNEL_VM_HIGH          XCHAL_KSEG_BYPASS_VADDR
-#define KERNEL_SPACE            XCHAL_KSEG_CACHED_VADDR
-
-/*
- * Returns the physical/virtual addresses of the kernel space
- * (works with the cached kernel segment only, which is the
- *  one normally used for kernel operation).
- */
-
-/*			PHYSICAL	BYPASS		CACHED
- *
- *  bypass vaddr	bypass paddr	*		cached vaddr
- *  cached vaddr	cached paddr	bypass vaddr	*
- *  bypass paddr	*		bypass vaddr	cached vaddr
- *  cached paddr	*		bypass vaddr	cached vaddr
- *  other		*		*		*
- */
-
-#define PHYSADDR(a)							      \
-(((unsigned)(a) >= XCHAL_KSEG_BYPASS_VADDR				      \
-  && (unsigned)(a) < XCHAL_KSEG_BYPASS_VADDR + XCHAL_KSEG_BYPASS_SIZE) ?      \
-    (unsigned)(a) - XCHAL_KSEG_BYPASS_VADDR + XCHAL_KSEG_BYPASS_PADDR :       \
-    ((unsigned)(a) >= XCHAL_KSEG_CACHED_VADDR				      \
-     && (unsigned)(a) < XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_CACHED_SIZE) ?   \
-        (unsigned)(a) - XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_CACHED_PADDR :   \
-	(unsigned)(a))
-
-#define BYPASS_ADDR(a)							      \
-(((unsigned)(a) >= XCHAL_KSEG_BYPASS_PADDR				      \
-  && (unsigned)(a) < XCHAL_KSEG_BYPASS_PADDR + XCHAL_KSEG_BYPASS_SIZE) ?      \
-    (unsigned)(a) - XCHAL_KSEG_BYPASS_PADDR + XCHAL_KSEG_BYPASS_VADDR :       \
-    ((unsigned)(a) >= XCHAL_KSEG_CACHED_PADDR				      \
-     && (unsigned)(a) < XCHAL_KSEG_CACHED_PADDR + XCHAL_KSEG_CACHED_SIZE) ?   \
-        (unsigned)(a) - XCHAL_KSEG_CACHED_PADDR + XCHAL_KSEG_BYPASS_VADDR :   \
-        ((unsigned)(a) >= XCHAL_KSEG_CACHED_VADDR			      \
-         && (unsigned)(a) < XCHAL_KSEG_CACHED_VADDR+XCHAL_KSEG_CACHED_SIZE)?  \
-            (unsigned)(a) - XCHAL_KSEG_CACHED_VADDR+XCHAL_KSEG_BYPASS_VADDR:  \
-	    (unsigned)(a))
-
-#define CACHED_ADDR(a)							      \
-(((unsigned)(a) >= XCHAL_KSEG_BYPASS_PADDR				      \
-  && (unsigned)(a) < XCHAL_KSEG_BYPASS_PADDR + XCHAL_KSEG_BYPASS_SIZE) ?      \
-    (unsigned)(a) - XCHAL_KSEG_BYPASS_PADDR + XCHAL_KSEG_CACHED_VADDR :       \
-    ((unsigned)(a) >= XCHAL_KSEG_CACHED_PADDR			              \
-     && (unsigned)(a) < XCHAL_KSEG_CACHED_PADDR + XCHAL_KSEG_CACHED_SIZE) ?   \
-        (unsigned)(a) - XCHAL_KSEG_CACHED_PADDR + XCHAL_KSEG_CACHED_VADDR :   \
-        ((unsigned)(a) >= XCHAL_KSEG_BYPASS_VADDR			      \
-         && (unsigned)(a) < XCHAL_KSEG_BYPASS_VADDR+XCHAL_KSEG_BYPASS_SIZE) ? \
-            (unsigned)(a) - XCHAL_KSEG_BYPASS_VADDR+XCHAL_KSEG_CACHED_VADDR : \
-	    (unsigned)(a))
-
-#define PHYSADDR_IO(a)							      \
-(((unsigned)(a) >= XCHAL_KIO_BYPASS_VADDR				      \
-  && (unsigned)(a) < XCHAL_KIO_BYPASS_VADDR + XCHAL_KIO_BYPASS_SIZE) ?	      \
-    (unsigned)(a) - XCHAL_KIO_BYPASS_VADDR + XCHAL_KIO_BYPASS_PADDR :	      \
-    ((unsigned)(a) >= XCHAL_KIO_CACHED_VADDR				      \
-     && (unsigned)(a) < XCHAL_KIO_CACHED_VADDR + XCHAL_KIO_CACHED_SIZE) ?     \
-        (unsigned)(a) - XCHAL_KIO_CACHED_VADDR + XCHAL_KIO_CACHED_PADDR :     \
-	(unsigned)(a))
-
-#define BYPASS_ADDR_IO(a)						      \
-(((unsigned)(a) >= XCHAL_KIO_BYPASS_PADDR				      \
-  && (unsigned)(a) < XCHAL_KIO_BYPASS_PADDR + XCHAL_KIO_BYPASS_SIZE) ?	      \
-    (unsigned)(a) - XCHAL_KIO_BYPASS_PADDR + XCHAL_KIO_BYPASS_VADDR :	      \
-    ((unsigned)(a) >= XCHAL_KIO_CACHED_PADDR				      \
-     && (unsigned)(a) < XCHAL_KIO_CACHED_PADDR + XCHAL_KIO_CACHED_SIZE) ?     \
-        (unsigned)(a) - XCHAL_KIO_CACHED_PADDR + XCHAL_KIO_BYPASS_VADDR :     \
-        ((unsigned)(a) >= XCHAL_KIO_CACHED_VADDR			      \
-         && (unsigned)(a) < XCHAL_KIO_CACHED_VADDR + XCHAL_KIO_CACHED_SIZE) ? \
-            (unsigned)(a) - XCHAL_KIO_CACHED_VADDR + XCHAL_KIO_BYPASS_VADDR : \
-	    (unsigned)(a))
-
-#define CACHED_ADDR_IO(a)						      \
-(((unsigned)(a) >= XCHAL_KIO_BYPASS_PADDR				      \
-  && (unsigned)(a) < XCHAL_KIO_BYPASS_PADDR + XCHAL_KIO_BYPASS_SIZE) ?	      \
-    (unsigned)(a) - XCHAL_KIO_BYPASS_PADDR + XCHAL_KIO_CACHED_VADDR :	      \
-    ((unsigned)(a) >= XCHAL_KIO_CACHED_PADDR				      \
-     && (unsigned)(a) < XCHAL_KIO_CACHED_PADDR + XCHAL_KIO_CACHED_SIZE) ?     \
-        (unsigned)(a) - XCHAL_KIO_CACHED_PADDR + XCHAL_KIO_CACHED_VADDR :     \
-        ((unsigned)(a) >= XCHAL_KIO_BYPASS_VADDR			      \
-         && (unsigned)(a) < XCHAL_KIO_BYPASS_VADDR + XCHAL_KIO_BYPASS_SIZE) ? \
-            (unsigned)(a) - XCHAL_KIO_BYPASS_VADDR + XCHAL_KIO_CACHED_VADDR : \
-	    (unsigned)(a))
-
-#endif /* _XTENSA_ADDRSPACE_H */
-
-
-
-
-
diff --git a/include/asm-xtensa/io.h b/include/asm-xtensa/io.h
index 556e5eed34f..31ffc3f119c 100644
--- a/include/asm-xtensa/io.h
+++ b/include/asm-xtensa/io.h
@@ -1,5 +1,5 @@
 /*
- * linux/include/asm-xtensa/io.h
+ * include/asm-xtensa/io.h
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -15,10 +15,11 @@
 #include <asm/byteorder.h>
 
 #include <linux/types.h>
-#include <asm/fixmap.h>
-
-#define _IO_BASE 0
 
+#define XCHAL_KIO_CACHED_VADDR	0xf0000000
+#define XCHAL_KIO_BYPASS_VADDR	0xf8000000
+#define XCHAL_KIO_PADDR		0xf0000000
+#define XCHAL_KIO_SIZE		0x08000000
 
 /*
  * swap functions to change byte order from little-endian to big-endian and
@@ -42,40 +43,43 @@ static inline unsigned int _swapl (unsigned int v)
 
 static inline unsigned long virt_to_phys(volatile void * address)
 {
-	return PHYSADDR((unsigned long)address);
+	return __pa(address);
 }
 
 static inline void * phys_to_virt(unsigned long address)
 {
-	return (void*) CACHED_ADDR(address);
+	return __va(address);
 }
 
 /*
- * IO bus memory addresses are also 1:1 with the physical address
+ * virt_to_bus and bus_to_virt are deprecated.
  */
 
-static inline unsigned long virt_to_bus(volatile void * address)
-{
-	return PHYSADDR((unsigned long)address);
-}
-
-static inline void * bus_to_virt (unsigned long address)
-{
-	return (void *) CACHED_ADDR(address);
-}
+#define virt_to_bus(x)	virt_to_phys(x)
+#define bus_to_virt(x)	phys_to_virt(x)
 
 /*
- * Change "struct page" to physical address.
+ * Return the virtual (cached) address for the specified bus memory.
+ * Note that we currently don't support any address outside the KIO segment.
  */
 
 static inline void *ioremap(unsigned long offset, unsigned long size)
 {
-        return (void *) CACHED_ADDR_IO(offset);
+	if (offset >= XCHAL_KIO_PADDR
+	    && offset < XCHAL_KIO_PADDR + XCHAL_KIO_SIZE)
+		return (void*)(offset-XCHAL_KIO_PADDR+XCHAL_KIO_BYPASS_VADDR);
+
+	else
+		BUG();
 }
 
 static inline void *ioremap_nocache(unsigned long offset, unsigned long size)
 {
-        return (void *) BYPASS_ADDR_IO(offset);
+	if (offset >= XCHAL_KIO_PADDR
+	    && offset < XCHAL_KIO_PADDR + XCHAL_KIO_SIZE)
+		return (void*)(offset-XCHAL_KIO_PADDR+XCHAL_KIO_CACHED_VADDR);
+	else
+		BUG();
 }
 
 static inline void iounmap(void *addr)
@@ -121,9 +125,6 @@ static inline void __raw_writel(__u32 b, volatile void __iomem *addr)
           *(__force volatile __u32 *)(addr) = b;
 }
 
-
-
-
 /* These are the definitions for the x86 IO instructions
  * inb/inw/inl/outb/outw/outl, the "string" versions
  * insb/insw/insl/outsb/outsw/outsl, and the "pausing" versions
@@ -131,11 +132,11 @@ static inline void __raw_writel(__u32 b, volatile void __iomem *addr)
  * The macros don't do byte-swapping.
  */
 
-#define inb(port)		readb((u8 *)((port)+_IO_BASE))
-#define outb(val, port)		writeb((val),(u8 *)((unsigned long)(port)+_IO_BASE))
-#define inw(port)		readw((u16 *)((port)+_IO_BASE))
-#define outw(val, port)		writew((val),(u16 *)((unsigned long)(port)+_IO_BASE))
-#define inl(port)		readl((u32 *)((port)+_IO_BASE))
+#define inb(port)		readb((u8 *)((port)))
+#define outb(val, port)		writeb((val),(u8 *)((unsigned long)(port)))
+#define inw(port)		readw((u16 *)((port)))
+#define outw(val, port)		writew((val),(u16 *)((unsigned long)(port)))
+#define inl(port)		readl((u32 *)((port)))
 #define outl(val, port)		writel((val),(u32 *)((unsigned long)(port)))
 
 #define inb_p(port)		inb((port))
@@ -180,14 +181,13 @@ extern void outsl (unsigned long port, const void *src, unsigned long count);
 
 
 /*
- *  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- *   * access
- *    */
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem access
+ */
 #define xlate_dev_mem_ptr(p)    __va(p)
 
 /*
- *  * Convert a virtual cached pointer to an uncached pointer
- *   */
+ * Convert a virtual cached pointer to an uncached pointer
+ */
 #define xlate_dev_kmem_ptr(p)   p
 
 
diff --git a/include/asm-xtensa/irq.h b/include/asm-xtensa/irq.h
index 049fde7e752..fc73b7f11af 100644
--- a/include/asm-xtensa/irq.h
+++ b/include/asm-xtensa/irq.h
@@ -12,8 +12,7 @@
 #define _XTENSA_IRQ_H
 
 #include <asm/platform/hardware.h>
-
-#include <xtensa/config/core.h>
+#include <asm/variant/core.h>
 
 #ifndef PLATFORM_NR_IRQS
 # define PLATFORM_NR_IRQS 0
@@ -27,10 +26,5 @@ static __inline__ int irq_canonicalize(int irq)
 }
 
 struct irqaction;
-#if 0 // FIXME
-extern void disable_irq_nosync(unsigned int);
-extern void disable_irq(unsigned int);
-extern void enable_irq(unsigned int);
-#endif
 
 #endif	/* _XTENSA_IRQ_H */
diff --git a/include/asm-xtensa/irq_regs.h b/include/asm-xtensa/irq_regs.h
new file mode 100644
index 00000000000..3dd9c0b7027
--- /dev/null
+++ b/include/asm-xtensa/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/include/asm-xtensa/mmu_context.h b/include/asm-xtensa/mmu_context.h
index af683a74a4e..f14851f086c 100644
--- a/include/asm-xtensa/mmu_context.h
+++ b/include/asm-xtensa/mmu_context.h
@@ -16,187 +16,32 @@
 #include <linux/stringify.h>
 
 #include <asm/pgtable.h>
-#include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-/*
- * Linux was ported to Xtensa assuming all auto-refill ways in set 0
- * had the same properties (a very likely assumption).  Multiple sets
- * of auto-refill ways will still work properly, but not as optimally
- * as the Xtensa designer may have assumed.
- *
- * We make this case a hard #error, killing the kernel build, to alert
- * the developer to this condition (which is more likely an error).
- * You super-duper clever developers can change it to a warning or
- * remove it altogether if you think you know what you're doing.  :)
- */
+#define XCHAL_MMU_ASID_BITS	8
 
 #if (XCHAL_HAVE_TLBS != 1)
 # error "Linux must have an MMU!"
 #endif
 
-#if ((XCHAL_ITLB_ARF_WAYS == 0) || (XCHAL_DTLB_ARF_WAYS == 0))
-# error "MMU must have auto-refill ways"
-#endif
-
-#if ((XCHAL_ITLB_ARF_SETS != 1) || (XCHAL_DTLB_ARF_SETS != 1))
-# error Linux may not use all auto-refill ways as efficiently as you think
-#endif
-
-#if (XCHAL_MMU_MAX_PTE_PAGE_SIZE != XCHAL_MMU_MIN_PTE_PAGE_SIZE)
-# error Only one page size allowed!
-#endif
-
 extern unsigned long asid_cache;
-extern pgd_t *current_pgd;
-
-/*
- * Define the number of entries per auto-refill way in set 0 of both I and D
- * TLBs.  We deal only with set 0 here (an assumption further explained in
- * assertions.h).  Also, define the total number of ARF entries in both TLBs.
- */
-
-#define ITLB_ENTRIES_PER_ARF_WAY  (XCHAL_ITLB_SET(XCHAL_ITLB_ARF_SET0,ENTRIES))
-#define DTLB_ENTRIES_PER_ARF_WAY  (XCHAL_DTLB_SET(XCHAL_DTLB_ARF_SET0,ENTRIES))
-
-#define ITLB_ENTRIES							\
- 	(ITLB_ENTRIES_PER_ARF_WAY * (XCHAL_ITLB_SET(XCHAL_ITLB_ARF_SET0,WAYS)))
-#define DTLB_ENTRIES							\
-	(DTLB_ENTRIES_PER_ARF_WAY * (XCHAL_DTLB_SET(XCHAL_DTLB_ARF_SET0,WAYS)))
-
-
-/*
- * SMALLEST_NTLB_ENTRIES is the smaller of ITLB_ENTRIES and DTLB_ENTRIES.
- * In practice, they are probably equal.  This macro simplifies function
- * flush_tlb_range().
- */
-
-#if (DTLB_ENTRIES < ITLB_ENTRIES)
-# define SMALLEST_NTLB_ENTRIES  DTLB_ENTRIES
-#else
-# define SMALLEST_NTLB_ENTRIES  ITLB_ENTRIES
-#endif
-
-
-/*
- * asid_cache tracks only the ASID[USER_RING] field of the RASID special
- * register, which is the current user-task asid allocation value.
- * mm->context has the same meaning.  When it comes time to write the
- * asid_cache or mm->context values to the RASID special register, we first
- * shift the value left by 8, then insert the value.
- * ASID[0] always contains the kernel's asid value, and we reserve three
- * other asid values that we never assign to user tasks.
- */
-
-#define ASID_INC	0x1
-#define ASID_MASK	((1 << XCHAL_MMU_ASID_BITS) - 1)
-
-/*
- * XCHAL_MMU_ASID_INVALID is a configurable Xtensa processor constant
- * indicating invalid address space.  XCHAL_MMU_ASID_KERNEL is a configurable
- * Xtensa processor constant indicating the kernel address space.  They can
- * be arbitrary values.
- *
- * We identify three more unique, reserved ASID values to use in the unused
- * ring positions.  No other user process will be assigned these reserved
- * ASID values.
- *
- * For example, given that
- *
- *	XCHAL_MMU_ASID_INVALID == 0
- *	XCHAL_MMU_ASID_KERNEL  == 1
- *
- * the following maze of #if statements would generate
- *
- *	ASID_RESERVED_1        == 2
- *	ASID_RESERVED_2        == 3
- *	ASID_RESERVED_3        == 4
- *	ASID_FIRST_NONRESERVED == 5
- */
-
-#if (XCHAL_MMU_ASID_INVALID != XCHAL_MMU_ASID_KERNEL + 1)
-# define ASID_RESERVED_1         ((XCHAL_MMU_ASID_KERNEL + 1) & ASID_MASK)
-#else
-# define ASID_RESERVED_1         ((XCHAL_MMU_ASID_KERNEL + 2) & ASID_MASK)
-#endif
-
-#if (XCHAL_MMU_ASID_INVALID != ASID_RESERVED_1 + 1)
-# define ASID_RESERVED_2         ((ASID_RESERVED_1 + 1) & ASID_MASK)
-#else
-# define ASID_RESERVED_2         ((ASID_RESERVED_1 + 2) & ASID_MASK)
-#endif
-
-#if (XCHAL_MMU_ASID_INVALID != ASID_RESERVED_2 + 1)
-# define ASID_RESERVED_3         ((ASID_RESERVED_2 + 1) & ASID_MASK)
-#else
-# define ASID_RESERVED_3         ((ASID_RESERVED_2 + 2) & ASID_MASK)
-#endif
-
-#if (XCHAL_MMU_ASID_INVALID != ASID_RESERVED_3 + 1)
-# define ASID_FIRST_NONRESERVED  ((ASID_RESERVED_3 + 1) & ASID_MASK)
-#else
-# define ASID_FIRST_NONRESERVED  ((ASID_RESERVED_3 + 2) & ASID_MASK)
-#endif
-
-#define ASID_ALL_RESERVED ( ((ASID_RESERVED_1) << 24) + \
-                            ((ASID_RESERVED_2) << 16) + \
-                            ((ASID_RESERVED_3) <<  8) + \
-                            ((XCHAL_MMU_ASID_KERNEL))   )
-
 
 /*
  * NO_CONTEXT is the invalid ASID value that we don't ever assign to
- * any user or kernel context.  NO_CONTEXT is a better mnemonic than
- * XCHAL_MMU_ASID_INVALID, so we use it in code instead.
- */
-
-#define NO_CONTEXT   XCHAL_MMU_ASID_INVALID
-
-#if (KERNEL_RING != 0)
-# error The KERNEL_RING really should be zero.
-#endif
-
-#if (USER_RING >= XCHAL_MMU_RINGS)
-# error USER_RING cannot be greater than the highest numbered ring.
-#endif
-
-#if (USER_RING == KERNEL_RING)
-# error The user and kernel rings really should not be equal.
-#endif
-
-#if (USER_RING == 1)
-#define ASID_INSERT(x) ( ((ASID_RESERVED_1)   << 24) + \
-                         ((ASID_RESERVED_2)   << 16) + \
-                         (((x) & (ASID_MASK)) <<  8) + \
-                         ((XCHAL_MMU_ASID_KERNEL))   )
-
-#elif (USER_RING == 2)
-#define ASID_INSERT(x) ( ((ASID_RESERVED_1)   << 24) + \
-                         (((x) & (ASID_MASK)) << 16) + \
-                         ((ASID_RESERVED_2)   <<  8) + \
-                         ((XCHAL_MMU_ASID_KERNEL))   )
-
-#elif (USER_RING == 3)
-#define ASID_INSERT(x) ( (((x) & (ASID_MASK)) << 24) + \
-			 ((ASID_RESERVED_1)   << 16) + \
-                         ((ASID_RESERVED_2)   <<  8) + \
-                         ((XCHAL_MMU_ASID_KERNEL))   )
-
-#else
-#error Goofy value for USER_RING
-
-#endif /* USER_RING == 1 */
-
-
-/*
- *  All unused by hardware upper bits will be considered
- *  as a software asid extension.
+ * any user or kernel context.
+ *
+ * 0 invalid
+ * 1 kernel
+ * 2 reserved
+ * 3 reserved
+ * 4...255 available
  */
 
-#define ASID_VERSION_MASK  ((unsigned long)~(ASID_MASK|(ASID_MASK-1)))
-#define ASID_FIRST_VERSION						\
-	((unsigned long)(~ASID_VERSION_MASK) + 1 + ASID_FIRST_NONRESERVED)
+#define NO_CONTEXT	0
+#define ASID_USER_FIRST	4
+#define ASID_MASK	((1 << XCHAL_MMU_ASID_BITS) - 1)
+#define ASID_INSERT(x)	(0x03020001 | (((x) & ASID_MASK) << 8))
 
 static inline void set_rasid_register (unsigned long val)
 {
@@ -207,67 +52,28 @@ static inline void set_rasid_register (unsigned long val)
 static inline unsigned long get_rasid_register (void)
 {
 	unsigned long tmp;
-	__asm__ __volatile__ (" rsr %0, "__stringify(RASID)"\n\t" : "=a" (tmp));
+	__asm__ __volatile__ (" rsr %0,"__stringify(RASID)"\n\t" : "=a" (tmp));
 	return tmp;
 }
 
-
-#if ((XCHAL_MMU_ASID_INVALID == 0) && (XCHAL_MMU_ASID_KERNEL == 1))
-
 static inline void
-get_new_mmu_context(struct mm_struct *mm, unsigned long asid)
+__get_new_mmu_context(struct mm_struct *mm)
 {
 	extern void flush_tlb_all(void);
-	if (! ((asid += ASID_INC) & ASID_MASK) ) {
+	if (! (++asid_cache & ASID_MASK) ) {
 		flush_tlb_all(); /* start new asid cycle */
-		if (!asid)      /* fix version if needed */
-			asid = ASID_FIRST_VERSION - ASID_FIRST_NONRESERVED;
-		asid += ASID_FIRST_NONRESERVED;
+		asid_cache += ASID_USER_FIRST;
 	}
-	mm->context = asid_cache = asid;
-}
-
-#else
-#warning ASID_{INVALID,KERNEL} values impose non-optimal get_new_mmu_context implementation
-
-/* XCHAL_MMU_ASID_INVALID == 0 and XCHAL_MMU_ASID_KERNEL ==1 are
-   really the best, but if you insist... */
-
-static inline int validate_asid (unsigned long asid)
-{
-	switch (asid) {
-	case XCHAL_MMU_ASID_INVALID:
-	case XCHAL_MMU_ASID_KERNEL:
-	case ASID_RESERVED_1:
-	case ASID_RESERVED_2:
-	case ASID_RESERVED_3:
-		return 0; /* can't use these values as ASIDs */
-	}
-	return 1; /* valid */
+	mm->context = asid_cache;
 }
 
 static inline void
-get_new_mmu_context(struct mm_struct *mm, unsigned long asid)
+__load_mmu_context(struct mm_struct *mm)
 {
-	extern void flush_tlb_all(void);
-	while (1) {
-		asid += ASID_INC;
-		if ( ! (asid & ASID_MASK) ) {
-			flush_tlb_all(); /* start new asid cycle */
-			if (!asid)      /* fix version if needed */
-				asid = ASID_FIRST_VERSION - ASID_FIRST_NONRESERVED;
-			asid += ASID_FIRST_NONRESERVED;
-			break; /* no need to validate here */
-		}
-		if (validate_asid (asid & ASID_MASK))
-			break;
-	}
-	mm->context = asid_cache = asid;
+	set_rasid_register(ASID_INSERT(mm->context));
+	invalidate_page_directory();
 }
 
-#endif
-
-
 /*
  * Initialize the context related info for a new mm_struct
  * instance.
@@ -280,6 +86,20 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	return 0;
 }
 
+/*
+ * After we have set current->mm to a new value, this activates
+ * the context for the new mm so we see the new mappings.
+ */
+static inline void
+activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+	/* Unconditionally get a new ASID.  */
+
+	__get_new_mmu_context(next);
+	__load_mmu_context(next);
+}
+
+
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                              struct task_struct *tsk)
 {
@@ -287,11 +107,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 
 	/* Check if our ASID is of an older version and thus invalid */
 
-	if ((next->context ^ asid) & ASID_VERSION_MASK)
-		get_new_mmu_context(next, asid);
+	if (next->context == NO_CONTEXT || ((next->context^asid) & ~ASID_MASK))
+		__get_new_mmu_context(next);
 
-	set_rasid_register (ASID_INSERT(next->context));
-	invalidate_page_directory();
+	__load_mmu_context(next);
 }
 
 #define deactivate_mm(tsk, mm)	do { } while(0)
@@ -302,20 +121,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  */
 static inline void destroy_context(struct mm_struct *mm)
 {
-	/* Nothing to do. */
-}
-
-/*
- * After we have set current->mm to a new value, this activates
- * the context for the new mm so we see the new mappings.
- */
-static inline void
-activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
-	/* Unconditionally get a new ASID.  */
-
-	get_new_mmu_context(next, asid_cache);
-	set_rasid_register (ASID_INSERT(next->context));
 	invalidate_page_directory();
 }
 
diff --git a/include/asm-xtensa/page.h b/include/asm-xtensa/page.h
index 40f4c6c3f58..c631d006194 100644
--- a/include/asm-xtensa/page.h
+++ b/include/asm-xtensa/page.h
@@ -15,18 +15,24 @@
 
 #include <asm/processor.h>
 
+#define XCHAL_KSEG_CACHED_VADDR 0xd0000000
+#define XCHAL_KSEG_BYPASS_VADDR 0xd8000000
+#define XCHAL_KSEG_PADDR        0x00000000
+#define XCHAL_KSEG_SIZE         0x08000000
+
 /*
  * PAGE_SHIFT determines the page size
  * PAGE_ALIGN(x) aligns the pointer to the (next) page boundary
  */
 
-#define PAGE_SHIFT		XCHAL_MMU_MIN_PTE_PAGE_SIZE
+#define PAGE_SHIFT		12
 #define PAGE_SIZE		(1 << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 #define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE - 1) & PAGE_MASK)
 
-#define DCACHE_WAY_SIZE		(XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)
 #define PAGE_OFFSET		XCHAL_KSEG_CACHED_VADDR
+#define MAX_MEM_PFN             XCHAL_KSEG_SIZE
+#define PGTABLE_START           0x80000000
 
 #ifdef __ASSEMBLY__
 
diff --git a/include/asm-xtensa/param.h b/include/asm-xtensa/param.h
index c0eec8260b0..6f281392e3f 100644
--- a/include/asm-xtensa/param.h
+++ b/include/asm-xtensa/param.h
@@ -11,7 +11,7 @@
 #ifndef _XTENSA_PARAM_H
 #define _XTENSA_PARAM_H
 
-#include <xtensa/config/core.h>
+#include <asm/variant/core.h>
 
 #ifdef __KERNEL__
 # define HZ		100		/* internal timer frequency */
diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h
index b4318934b10..2d4b5db6ea6 100644
--- a/include/asm-xtensa/pgtable.h
+++ b/include/asm-xtensa/pgtable.h
@@ -14,45 +14,6 @@
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/page.h>
 
-/* Assertions. */
-
-#ifdef CONFIG_MMU
-
-
-#if (XCHAL_MMU_RINGS < 2)
-# error Linux build assumes at least 2 ring levels.
-#endif
-
-#if (XCHAL_MMU_CA_BITS != 4)
-# error We assume exactly four bits for CA.
-#endif
-
-#if (XCHAL_MMU_SR_BITS != 0)
-# error We have no room for SR bits.
-#endif
-
-/*
- * Use the first min-wired way for mapping page-table pages.
- * Page coloring requires a second min-wired way.
- */
-
-#if (XCHAL_DTLB_MINWIRED_SETS == 0)
-# error Need a min-wired way for mapping page-table pages
-#endif
-
-#define DTLB_WAY_PGTABLE XCHAL_DTLB_SET(XCHAL_DTLB_MINWIRED_SET0, WAY)
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
-# if XCHAL_DTLB_SET(XCHAL_DTLB_MINWIRED_SET0, WAYS) >= 2
-#  define DTLB_WAY_DCACHE_ALIAS0 (DTLB_WAY_PGTABLE + 1)
-#  define DTLB_WAY_DCACHE_ALIAS1 (DTLB_WAY_PGTABLE + 2)
-# else
-#  error Page coloring requires its own wired dtlb way!
-# endif
-#endif
-
-#endif /* CONFIG_MMU */
-
 /*
  * We only use two ring levels, user and kernel space.
  */
@@ -97,7 +58,7 @@
 #define PGD_ORDER		0
 #define PMD_ORDER		0
 #define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
-#define FIRST_USER_ADDRESS      XCHAL_SEG_MAPPABLE_VADDR
+#define FIRST_USER_ADDRESS      0
 #define FIRST_USER_PGD_NR	(FIRST_USER_ADDRESS >> PGDIR_SHIFT)
 
 /* virtual memory area. We keep a distance to other memory regions to be
diff --git a/include/asm-xtensa/platform-iss/hardware.h b/include/asm-xtensa/platform-iss/hardware.h
index 22240f00180..6930c12adc1 100644
--- a/include/asm-xtensa/platform-iss/hardware.h
+++ b/include/asm-xtensa/platform-iss/hardware.h
@@ -12,18 +12,18 @@
  * This file contains the default configuration of ISS.
  */
 
-#ifndef __ASM_XTENSA_ISS_HARDWARE
-#define __ASM_XTENSA_ISS_HARDWARE
+#ifndef _XTENSA_PLATFORM_ISS_HARDWARE_H
+#define _XTENSA_PLATFORM_ISS_HARDWARE_H
 
 /*
  * Memory configuration.
  */
 
-#define PLATFORM_DEFAULT_MEM_START XSHAL_RAM_PADDR
-#define PLATFORM_DEFAULT_MEM_SIZE XSHAL_RAM_VSIZE
+#define PLATFORM_DEFAULT_MEM_START	0x00000000
+#define PLATFORM_DEFAULT_MEM_SIZE	0x08000000
 
 /*
  * Interrupt configuration.
  */
 
-#endif /* __ASM_XTENSA_ISS_HARDWARE */
+#endif /* _XTENSA_PLATFORM_ISS_HARDWARE_H */
diff --git a/include/asm-xtensa/platform-iss/simcall.h b/include/asm-xtensa/platform-iss/simcall.h
new file mode 100644
index 00000000000..6acb572759a
--- /dev/null
+++ b/include/asm-xtensa/platform-iss/simcall.h
@@ -0,0 +1,62 @@
+/*
+ * include/asm-xtensa/platform-iss/hardware.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 Tensilica Inc.
+ */
+
+#ifndef _XTENSA_PLATFORM_ISS_SIMCALL_H
+#define _XTENSA_PLATFORM_ISS_SIMCALL_H
+
+
+/*
+ *  System call like services offered by the simulator host.
+ */
+
+#define SYS_nop		0	/* unused */
+#define SYS_exit	1	/*x*/
+#define SYS_fork	2
+#define SYS_read	3	/*x*/
+#define SYS_write	4	/*x*/
+#define SYS_open	5	/*x*/
+#define SYS_close	6	/*x*/
+#define SYS_rename	7	/*x 38 - waitpid */
+#define SYS_creat	8	/*x*/
+#define SYS_link	9	/*x (not implemented on WIN32) */
+#define SYS_unlink	10	/*x*/
+#define SYS_execv	11	/* n/a - execve */
+#define SYS_execve	12	/* 11 - chdir */
+#define SYS_pipe	13	/* 42 - time */
+#define SYS_stat	14	/* 106 - mknod */
+#define SYS_chmod	15
+#define SYS_chown	16	/* 202 - lchown */
+#define SYS_utime	17	/* 30 - break */
+#define SYS_wait	18	/* n/a - oldstat */
+#define SYS_lseek	19	/*x*/
+#define SYS_getpid	20
+#define SYS_isatty	21	/* n/a - mount */
+#define SYS_fstat	22	/* 108 - oldumount */
+#define SYS_time	23	/* 13 - setuid */
+#define SYS_gettimeofday 24	/*x 78 - getuid (not implemented on WIN32) */
+#define SYS_times	25	/*X 43 - stime (Xtensa-specific implementation) */
+#define SYS_socket      26
+#define SYS_sendto      27
+#define SYS_recvfrom    28
+#define SYS_select_one  29      /* not compitible select, one file descriptor at the time */
+#define SYS_bind        30
+#define SYS_ioctl	31
+
+/*
+ * SYS_select_one specifiers
+ */
+
+#define  XTISS_SELECT_ONE_READ    1
+#define  XTISS_SELECT_ONE_WRITE   2
+#define  XTISS_SELECT_ONE_EXCEPT  3
+
+
+#endif /* _XTENSA_PLATFORM_ISS_SIMCALL_H */
+
diff --git a/include/asm-xtensa/posix_types.h b/include/asm-xtensa/posix_types.h
index 2c816b0e776..3470b44c12c 100644
--- a/include/asm-xtensa/posix_types.h
+++ b/include/asm-xtensa/posix_types.h
@@ -21,7 +21,7 @@
 
 typedef unsigned long	__kernel_ino_t;
 typedef unsigned int	__kernel_mode_t;
-typedef unsigned short	__kernel_nlink_t;
+typedef unsigned long	__kernel_nlink_t;
 typedef long		__kernel_off_t;
 typedef int		__kernel_pid_t;
 typedef unsigned short	__kernel_ipc_pid_t;
diff --git a/include/asm-xtensa/processor.h b/include/asm-xtensa/processor.h
index 8b96e77c9d8..4feb9f7f35a 100644
--- a/include/asm-xtensa/processor.h
+++ b/include/asm-xtensa/processor.h
@@ -11,24 +11,18 @@
 #ifndef _XTENSA_PROCESSOR_H
 #define _XTENSA_PROCESSOR_H
 
-#ifdef __ASSEMBLY__
-#define _ASMLANGUAGE
-#endif
-
-#include <xtensa/config/core.h>
-#include <xtensa/config/specreg.h>
-#include <xtensa/config/tie.h>
-#include <xtensa/config/system.h>
+#include <asm/variant/core.h>
+#include <asm/coprocessor.h>
 
 #include <linux/compiler.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>
-#include <asm/coprocessor.h>
+#include <asm/regs.h>
 
 /* Assertions. */
 
 #if (XCHAL_HAVE_WINDOWED != 1)
-#error Linux requires the Xtensa Windowed Registers Option.
+# error Linux requires the Xtensa Windowed Registers Option.
 #endif
 
 /*
@@ -145,11 +139,11 @@ struct thread_struct {
  * Note: We set-up ps as if we did a call4 to the new pc.
  *       set_thread_state in signal.c depends on it.
  */
-#define USER_PS_VALUE ( (1 << XCHAL_PS_WOE_SHIFT) + \
-                        (1 << XCHAL_PS_CALLINC_SHIFT) + \
-                        (USER_RING << XCHAL_PS_RING_SHIFT) + \
-                        (1 << XCHAL_PS_PROGSTACK_SHIFT) + \
-                        (1 << XCHAL_PS_EXCM_SHIFT) )
+#define USER_PS_VALUE ((1 << PS_WOE_BIT) |				\
+                       (1 << PS_CALLINC_SHIFT) |			\
+                       (USER_RING << PS_RING_SHIFT) |			\
+                       (1 << PS_UM_BIT) |				\
+                       (1 << PS_EXCM_BIT))
 
 /* Clearing a0 terminates the backtrace. */
 #define start_thread(regs, new_pc, new_sp) \
diff --git a/include/asm-xtensa/ptrace.h b/include/asm-xtensa/ptrace.h
index a5ac71a5205..1b7fe363fad 100644
--- a/include/asm-xtensa/ptrace.h
+++ b/include/asm-xtensa/ptrace.h
@@ -11,7 +11,7 @@
 #ifndef _XTENSA_PTRACE_H
 #define _XTENSA_PTRACE_H
 
-#include <xtensa/config/core.h>
+#include <asm/variant/core.h>
 
 /*
  * Kernel stack
diff --git a/include/asm-xtensa/regs.h b/include/asm-xtensa/regs.h
new file mode 100644
index 00000000000..c913d259faa
--- /dev/null
+++ b/include/asm-xtensa/regs.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2006 Tensilica, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2.1 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
+ * USA.
+ */
+
+#ifndef _XTENSA_REGS_H
+#define _XTENSA_REGS_H
+
+/*  Special registers.  */
+
+#define LBEG		0
+#define LEND		1
+#define LCOUNT		2
+#define SAR		3
+#define BR		4
+#define SCOMPARE1	12
+#define ACCHI		16
+#define ACCLO		17
+#define MR		32
+#define WINDOWBASE	72
+#define WINDOWSTART	73
+#define PTEVADDR	83
+#define RASID		90
+#define ITLBCFG		91
+#define DTLBCFG		92
+#define IBREAKENABLE	96
+#define DDR		104
+#define IBREAKA		128
+#define DBREAKA		144
+#define DBREAKC		160
+#define EPC		176
+#define EPC_1		177
+#define DEPC		192
+#define EPS		192
+#define EPS_1		193
+#define EXCSAVE		208
+#define EXCSAVE_1	209
+#define INTERRUPT	226
+#define INTENABLE	228
+#define PS		230
+#define THREADPTR	231
+#define EXCCAUSE	232
+#define DEBUGCAUSE	233
+#define CCOUNT		234
+#define PRID		235
+#define ICOUNT		236
+#define ICOUNTLEVEL	237
+#define EXCVADDR	238
+#define CCOMPARE	240
+#define MISC		244
+
+/*  Special names for read-only and write-only interrupt registers.  */
+
+#define INTREAD		226
+#define INTSET		226
+#define INTCLEAR	227
+
+/*  EXCCAUSE register fields  */
+
+#define EXCCAUSE_EXCCAUSE_SHIFT	0
+#define EXCCAUSE_EXCCAUSE_MASK	0x3F
+
+#define EXCCAUSE_ILLEGAL_INSTRUCTION		0
+#define EXCCAUSE_SYSTEM_CALL			1
+#define EXCCAUSE_INSTRUCTION_FETCH_ERROR	2
+#define EXCCAUSE_LOAD_STORE_ERROR		3
+#define EXCCAUSE_LEVEL1_INTERRUPT		4
+#define EXCCAUSE_ALLOCA				5
+#define EXCCAUSE_INTEGER_DIVIDE_BY_ZERO		6
+#define EXCCAUSE_SPECULATION			7
+#define EXCCAUSE_PRIVILEGED			8
+#define EXCCAUSE_UNALIGNED			9
+#define EXCCAUSE_ITLB_MISS			16
+#define EXCCAUSE_ITLB_MULTIHIT			17
+#define EXCCAUSE_ITLB_PRIVILEGE			18
+#define EXCCAUSE_ITLB_SIZE_RESTRICTION		19
+#define EXCCAUSE_FETCH_CACHE_ATTRIBUTE		20
+#define EXCCAUSE_DTLB_MISS			24
+#define EXCCAUSE_DTLB_MULTIHIT			25
+#define EXCCAUSE_DTLB_PRIVILEGE			26
+#define EXCCAUSE_DTLB_SIZE_RESTRICTION		27
+#define EXCCAUSE_LOAD_CACHE_ATTRIBUTE		28
+#define EXCCAUSE_STORE_CACHE_ATTRIBUTE		29
+#define EXCCAUSE_FLOATING_POINT			40
+
+/*  PS register fields.  */
+
+#define PS_WOE_BIT		18
+#define PS_CALLINC_SHIFT	16
+#define PS_CALLINC_MASK		0x00030000
+#define PS_OWB_SHIFT		8
+#define PS_OWB_MASK		0x00000F00
+#define PS_RING_SHIFT		6
+#define PS_RING_MASK		0x000000C0
+#define PS_UM_BIT		5
+#define PS_EXCM_BIT		4
+#define PS_INTLEVEL_SHIFT	0
+#define PS_INTLEVEL_MASK	0x0000000F
+
+/*  DBREAKCn register fields.  */
+
+#define DBREAKC_MASK_BIT		0
+#define DBREAKC_MASK_MASK		0x0000003F
+#define DBREAKC_LOAD_BIT		30
+#define DBREAKC_LOAD_MASK		0x40000000
+#define DBREAKC_STOR_BIT		31
+#define DBREAKC_STOR_MASK		0x80000000
+
+/*  DEBUGCAUSE register fields.  */
+
+#define DEBUGCAUSE_DEBUGINT_BIT		5	/* External debug interrupt */
+#define DEBUGCAUSE_BREAKN_BIT		4	/* BREAK.N instruction */
+#define DEBUGCAUSE_BREAK_BIT		3	/* BREAK instruction */
+#define DEBUGCAUSE_DBREAK_BIT		2	/* DBREAK match */
+#define DEBUGCAUSE_IBREAK_BIT		1	/* IBREAK match */
+#define DEBUGCAUSE_ICOUNT_BIT		0	/* ICOUNT would incr. to zero */
+
+#endif /* _XTENSA_SPECREG_H */
+
diff --git a/include/asm-xtensa/sembuf.h b/include/asm-xtensa/sembuf.h
index 2d26c47666f..c15870493b3 100644
--- a/include/asm-xtensa/sembuf.h
+++ b/include/asm-xtensa/sembuf.h
@@ -25,7 +25,7 @@
 
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-#if XCHAL_HAVE_LE
+#ifdef __XTENSA_EL__
 	__kernel_time_t	sem_otime;		/* last semop time */
 	unsigned long	__unused1;
 	__kernel_time_t	sem_ctime;		/* last change time */
diff --git a/include/asm-xtensa/shmbuf.h b/include/asm-xtensa/shmbuf.h
index a30b81a4b93..ad4b0121782 100644
--- a/include/asm-xtensa/shmbuf.h
+++ b/include/asm-xtensa/shmbuf.h
@@ -19,6 +19,7 @@
 #ifndef _XTENSA_SHMBUF_H
 #define _XTENSA_SHMBUF_H
 
+#if defined (__XTENSA_EL__)
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 	size_t			shm_segsz;	/* size of segment (bytes) */
@@ -34,6 +35,26 @@ struct shmid64_ds {
 	unsigned long		__unused4;
 	unsigned long		__unused5;
 };
+#elif defined (__XTENSA_EB__)
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+	unsigned long		__unused1;
+	__kernel_time_t		shm_dtime;	/* last detach time */
+	unsigned long		__unused2;
+	__kernel_time_t		shm_ctime;	/* last change time */
+	unsigned long		__unused3;
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+#else
+# error endian order not defined
+#endif
+
 
 struct shminfo64 {
 	unsigned long	shmmax;
diff --git a/include/asm-xtensa/stat.h b/include/asm-xtensa/stat.h
index 2f4662ff6c3..149f4bce092 100644
--- a/include/asm-xtensa/stat.h
+++ b/include/asm-xtensa/stat.h
@@ -13,93 +13,57 @@
 
 #include <linux/types.h>
 
-struct __old_kernel_stat {
-	unsigned short st_dev;
-	unsigned short st_ino;
-	unsigned short st_mode;
-	unsigned short st_nlink;
-	unsigned short st_uid;
-	unsigned short st_gid;
-	unsigned short st_rdev;
-	unsigned long  st_size;
-	unsigned long  st_atime;
-	unsigned long  st_mtime;
-	unsigned long  st_ctime;
-};
-
 #define STAT_HAVE_NSEC 1
 
 struct stat {
-	unsigned short st_dev;
-	unsigned short __pad1;
-	unsigned long st_ino;
-	unsigned short st_mode;
-	unsigned short st_nlink;
-	unsigned short st_uid;
-	unsigned short st_gid;
-	unsigned short st_rdev;
-	unsigned short __pad2;
-	unsigned long  st_size;
-	unsigned long  st_blksize;
-	unsigned long  st_blocks;
-	unsigned long  st_atime;
-	unsigned long  st_atime_nsec;
-	unsigned long  st_mtime;
-	unsigned long  st_mtime_nsec;
-	unsigned long  st_ctime;
-	unsigned long  st_ctime_nsec;
-	unsigned long  __unused4;
-	unsigned long  __unused5;
+	unsigned long	st_dev;
+	ino_t		st_ino;
+	mode_t		st_mode;
+	nlink_t		st_nlink;
+	uid_t		st_uid;
+	gid_t		st_gid;
+	unsigned int	st_rdev;
+	off_t		st_size;
+	unsigned long	st_blksize;
+	unsigned long	st_blocks;
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+	unsigned long	__unused4;
+	unsigned long	__unused5;
 };
 
-/* This matches struct stat64 in glibc-2.2.3. */
+/* This matches struct stat64 in glibc-2.3 */
 
 struct stat64  {
-#ifdef __XTENSA_EL__
-	unsigned short	st_dev;		/* Device */
-	unsigned char	__pad0[10];
-#else
-	unsigned char	__pad0[6];
-	unsigned short	st_dev;
-	unsigned char	__pad1[2];
-#endif
-
-#define STAT64_HAS_BROKEN_ST_INO	1
-	unsigned long __st_ino;		/* 32bit file serial number. */
-
+	unsigned long long st_dev;	/* Device */
+	unsigned long long st_ino;	/* File serial number */
 	unsigned int  st_mode;		/* File mode. */
 	unsigned int  st_nlink;		/* Link count. */
 	unsigned int  st_uid;		/* User ID of the file's owner. */
 	unsigned int  st_gid;		/* Group ID of the file's group. */
-
-#ifdef __XTENSA_EL__
-	unsigned short	st_rdev;	/* Device number, if device. */
-	unsigned char	__pad3[10];
-#else
-	unsigned char	__pad2[6];
-	unsigned short	st_rdev;
-	unsigned char	__pad3[2];
-#endif
-
-	long long int  st_size;		/* Size of file, in bytes. */
-	long int st_blksize;		/* Optimal block size for I/O. */
-
-#ifdef __XTENSA_EL__
-	unsigned long  st_blocks;	/* Number 512-byte blocks allocated. */
-	unsigned long  __pad4;
+	unsigned long long st_rdev;	/* Device number, if device. */
+	long long st_size;		/* Size of file, in bytes. */
+	long st_blksize;		/* Optimal block size for I/O. */
+	unsigned long __unused2;
+#ifdef __XTENSA_EB__
+	unsigned long __unused3;
+	long st_blocks;			/* Number 512-byte blocks allocated. */
 #else
-	unsigned long  __pad4;
-	unsigned long  st_blocks;
+	long st_blocks;			/* Number 512-byte blocks allocated. */
+	unsigned long __unused3;
 #endif
-
-	unsigned long  __pad5;
-	long int st_atime;		/* Time of last access. */
-	unsigned long  st_atime_nsec;
-	long int st_mtime;		/* Time of last modification. */
-	unsigned long  st_mtime_nsec;
-	long int  st_ctime;		/* Time of last status change. */
-	unsigned long  st_ctime_nsec;
-	unsigned long long int st_ino;	/* File serial number. */
+	long st_atime;			/* Time of last access. */
+	unsigned long st_atime_nsec;
+	long st_mtime;			/* Time of last modification. */
+	unsigned long st_mtime_nsec;
+	long st_ctime;			/* Time of last status change. */
+	unsigned long st_ctime_nsec;
+	unsigned long __unused4;
+	unsigned long __unused5;
 };
 
 #endif	/* _XTENSA_STAT_H */
diff --git a/include/asm-xtensa/syscall.h b/include/asm-xtensa/syscall.h
new file mode 100644
index 00000000000..6cb0d42f11c
--- /dev/null
+++ b/include/asm-xtensa/syscall.h
@@ -0,0 +1,20 @@
+struct pt_regs;
+struct sigaction;
+asmlinkage long xtensa_execve(char*, char**, char**, struct pt_regs*);
+asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*);
+asmlinkage long xtensa_pipe(int __user *);
+asmlinkage long xtensa_mmap2(unsigned long, unsigned long, unsigned long,
+    			     unsigned long, unsigned long, unsigned long);
+asmlinkage long xtensa_ptrace(long, long, long, long);
+asmlinkage long xtensa_sigreturn(struct pt_regs*);
+asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
+asmlinkage long xtensa_sigsuspend(struct pt_regs*);
+asmlinkage long xtensa_rt_sigsuspend(struct pt_regs*);
+asmlinkage long xtensa_sigaction(int, const struct old_sigaction*,
+				 struct old_sigaction*);
+asmlinkage long xtensa_sigaltstack(struct pt_regs *regs);
+asmlinkage long sys_rt_sigaction(int,
+				 const struct sigaction __user *,
+				 struct sigaction __user *,
+				 size_t);
+asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg);
diff --git a/include/asm-xtensa/system.h b/include/asm-xtensa/system.h
index 932bda92a21..4aaed7fe6cf 100644
--- a/include/asm-xtensa/system.h
+++ b/include/asm-xtensa/system.h
@@ -213,7 +213,7 @@ static inline void spill_registers(void)
 	unsigned int a0, ps;
 
 	__asm__ __volatile__ (
-		"movi	a14," __stringify (PS_EXCM_MASK) " | 1\n\t"
+		"movi	a14," __stringify (PS_EXCM_BIT) " | 1\n\t"
 		"mov	a12, a0\n\t"
 		"rsr	a13," __stringify(SAR) "\n\t"
 		"xsr	a14," __stringify(PS) "\n\t"
diff --git a/include/asm-xtensa/timex.h b/include/asm-xtensa/timex.h
index c7b705e6665..28c7985a400 100644
--- a/include/asm-xtensa/timex.h
+++ b/include/asm-xtensa/timex.h
@@ -16,17 +16,22 @@
 #include <asm/processor.h>
 #include <linux/stringify.h>
 
-#if XCHAL_INT_LEVEL(XCHAL_TIMER0_INTERRUPT) == 1
+#define _INTLEVEL(x)	XCHAL_INT ## x ## _LEVEL
+#define INTLEVEL(x)	_INTLEVEL(x)
+
+#if INTLEVEL(XCHAL_TIMER0_INTERRUPT) == 1
 # define LINUX_TIMER     0
-#elif XCHAL_INT_LEVEL(XCHAL_TIMER1_INTERRUPT) == 1
+# define LINUX_TIMER_INT XCHAL_TIMER0_INTERRUPT
+#elif INTLEVEL(XCHAL_TIMER1_INTERRUPT) == 1
 # define LINUX_TIMER     1
-#elif XCHAL_INT_LEVEL(XCHAL_TIMER2_INTERRUPT) == 1
+# define LINUX_TIMER_INT XCHAL_TIMER1_INTERRUPT
+#elif INTLEVEL(XCHAL_TIMER2_INTERRUPT) == 1
 # define LINUX_TIMER     2
+# define LINUX_TIMER_INT XCHAL_TIMER2_INTERRUPT
 #else
 # error "Bad timer number for Linux configurations!"
 #endif
 
-#define LINUX_TIMER_INT         XCHAL_TIMER_INTERRUPT(LINUX_TIMER)
 #define LINUX_TIMER_MASK        (1L << LINUX_TIMER_INT)
 
 #define CLOCK_TICK_RATE 	1193180	/* (everyone is using this value) */
@@ -60,8 +65,8 @@ extern cycles_t cacheflush_time;
 
 #define WSR_CCOUNT(r)	  __asm__("wsr %0,"__stringify(CCOUNT) :: "a" (r))
 #define RSR_CCOUNT(r)	  __asm__("rsr %0,"__stringify(CCOUNT) : "=a" (r))
-#define WSR_CCOMPARE(x,r) __asm__("wsr %0,"__stringify(CCOMPARE_0)"+"__stringify(x) :: "a"(r))
-#define RSR_CCOMPARE(x,r) __asm__("rsr %0,"__stringify(CCOMPARE_0)"+"__stringify(x) : "=a"(r))
+#define WSR_CCOMPARE(x,r) __asm__("wsr %0,"__stringify(CCOMPARE)"+"__stringify(x) :: "a"(r))
+#define RSR_CCOMPARE(x,r) __asm__("rsr %0,"__stringify(CCOMPARE)"+"__stringify(x) : "=a"(r))
 
 static inline unsigned long get_ccount (void)
 {
diff --git a/include/asm-xtensa/tlbflush.h b/include/asm-xtensa/tlbflush.h
index 43f6ec859af..7c637b3c352 100644
--- a/include/asm-xtensa/tlbflush.h
+++ b/include/asm-xtensa/tlbflush.h
@@ -11,12 +11,20 @@
 #ifndef _XTENSA_TLBFLUSH_H
 #define _XTENSA_TLBFLUSH_H
 
-#define DEBUG_TLB
-
 #ifdef __KERNEL__
 
-#include <asm/processor.h>
 #include <linux/stringify.h>
+#include <asm/processor.h>
+
+#define DTLB_WAY_PGD	7
+
+#define ITLB_ARF_WAYS	4
+#define DTLB_ARF_WAYS	4
+
+#define ITLB_HIT_BIT	3
+#define DTLB_HIT_BIT	4
+
+#ifndef __ASSEMBLY__
 
 /* TLB flushing:
  *
@@ -46,11 +54,6 @@ static inline void flush_tlb_pgtables(struct mm_struct *mm,
 
 /* TLB operations. */
 
-#define ITLB_WAYS_LOG2      XCHAL_ITLB_WAY_BITS
-#define DTLB_WAYS_LOG2      XCHAL_DTLB_WAY_BITS
-#define ITLB_PROBE_SUCCESS  (1 << ITLB_WAYS_LOG2)
-#define DTLB_PROBE_SUCCESS  (1 << DTLB_WAYS_LOG2)
-
 static inline unsigned long itlb_probe(unsigned long addr)
 {
 	unsigned long tmp;
@@ -131,29 +134,30 @@ static inline void write_itlb_entry (pte_t entry, int way)
 
 static inline void invalidate_page_directory (void)
 {
-	invalidate_dtlb_entry (DTLB_WAY_PGTABLE);
+	invalidate_dtlb_entry (DTLB_WAY_PGD);
+	invalidate_dtlb_entry (DTLB_WAY_PGD+1);
+	invalidate_dtlb_entry (DTLB_WAY_PGD+2);
 }
 
 static inline void invalidate_itlb_mapping (unsigned address)
 {
 	unsigned long tlb_entry;
-	while ((tlb_entry = itlb_probe (address)) & ITLB_PROBE_SUCCESS)
-		invalidate_itlb_entry (tlb_entry);
+	if (((tlb_entry = itlb_probe(address)) & (1 << ITLB_HIT_BIT)) != 0)
+		invalidate_itlb_entry(tlb_entry);
 }
 
 static inline void invalidate_dtlb_mapping (unsigned address)
 {
 	unsigned long tlb_entry;
-	while ((tlb_entry = dtlb_probe (address)) & DTLB_PROBE_SUCCESS)
-		invalidate_dtlb_entry (tlb_entry);
+	if (((tlb_entry = dtlb_probe(address)) & (1 << DTLB_HIT_BIT)) != 0)
+		invalidate_dtlb_entry(tlb_entry);
 }
 
 #define check_pgt_cache()	do { } while (0)
 
 
-#ifdef DEBUG_TLB
-
-/* DO NOT USE THESE FUNCTIONS.  These instructions aren't part of the Xtensa
+/*
+ * DO NOT USE THESE FUNCTIONS.  These instructions aren't part of the Xtensa
  * ISA and exist only for test purposes..
  * You may find it helpful for MMU debugging, however.
  *
@@ -193,8 +197,6 @@ static inline unsigned long read_itlb_translation (int way)
 	return tmp;
 }
 
-#endif	/* DEBUG_TLB */
-
-
+#endif	/* __ASSEMBLY__ */
 #endif	/* __KERNEL__ */
-#endif	/* _XTENSA_PGALLOC_H */
+#endif	/* _XTENSA_TLBFLUSH_H */
diff --git a/include/asm-xtensa/unistd.h b/include/asm-xtensa/unistd.h
index 2e1a1b997e7..8a7fb6964ce 100644
--- a/include/asm-xtensa/unistd.h
+++ b/include/asm-xtensa/unistd.h
@@ -11,212 +11,593 @@
 #ifndef _XTENSA_UNISTD_H
 #define _XTENSA_UNISTD_H
 
-#define __NR_spill		  0
-#define __NR_exit		  1
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_lchown		 16
-#define __NR_break		 17
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_ptrace		 26
-#define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
-#define __NR_access		 33
-#define __NR_ftime		 35
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-#define __NR_prof		 44
-#define __NR_brk		 45
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_signal		 48
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_acct		 51
-#define __NR_lock		 53
-#define __NR_ioctl		 54
-#define __NR_fcntl		 55
-#define __NR_setpgid		 57
-#define __NR_ulimit		 58
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_setsid		 66
-#define __NR_sigaction		 67
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_sigsuspend		 72
-#define __NR_sigpending		 73
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-#define __NR_getrlimit	 	 76	/* Back compatible 2Gig limited rlimit */
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_select		 82
-#define __NR_symlink		 83
-#define __NR_readlink		 85
-#define __NR_uselib		 86
-#define __NR_swapon		 87
-#define __NR_reboot		 88
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-#define __NR_fchown		 95
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-#define __NR_profil		 98
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-#define __NR_ioperm		101
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-#define __NR_iopl		110
-#define __NR_vhangup		111
-#define __NR_idle		112
-#define __NR_wait4		114
-#define __NR_swapoff		115
-#define __NR_sysinfo		116
-#define __NR_fsync		118
-#define __NR_sigreturn		119
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-#define __NR_modify_ldt		123
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-#define __NR_sysfs		135
-#define __NR_personality	136
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR_getdents		141
-#define __NR__newselect		142
-#define __NR_flock		143
-#define __NR_msync		144
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_cacheflush         147
-#define __NR_cachectl           148
-#define __NR_sysxtensa          149
-#define __NR_sysdummy           150
-#define __NR_getsid		151
-#define __NR_fdatasync		152
-#define __NR__sysctl		153
-#define __NR_mlock		154
-#define __NR_munlock		155
-#define __NR_mlockall		156
-#define __NR_munlockall		157
-#define __NR_sched_setparam		158
-#define __NR_sched_getparam		159
-#define __NR_sched_setscheduler		160
-#define __NR_sched_getscheduler		161
-#define __NR_sched_yield		162
-#define __NR_sched_get_priority_max	163
-#define __NR_sched_get_priority_min	164
-#define __NR_sched_rr_get_interval	165
-#define __NR_nanosleep		166
-#define __NR_mremap		167
-#define __NR_accept             168
-#define __NR_bind               169
-#define __NR_connect            170
-#define __NR_getpeername        171
-#define __NR_getsockname        172
-#define __NR_getsockopt         173
-#define __NR_listen             174
-#define __NR_recv               175
-#define __NR_recvfrom           176
-#define __NR_recvmsg            177
-#define __NR_send               178
-#define __NR_sendmsg            179
-#define __NR_sendto             180
-#define __NR_setsockopt         181
-#define __NR_shutdown           182
-#define __NR_socket             183
-#define __NR_socketpair         184
-#define __NR_setresuid		185
-#define __NR_getresuid		186
-#define __NR_query_module	187
-#define __NR_poll		188
-#define __NR_nfsservctl		189
-#define __NR_setresgid		190
-#define __NR_getresgid		191
-#define __NR_prctl              192
-#define __NR_rt_sigreturn	193
-#define __NR_rt_sigaction	194
-#define __NR_rt_sigprocmask	195
-#define __NR_rt_sigpending	196
-#define __NR_rt_sigtimedwait	197
-#define __NR_rt_sigqueueinfo	198
-#define __NR_rt_sigsuspend	199
-#define __NR_pread		200
-#define __NR_pwrite		201
-#define __NR_chown		202
-#define __NR_getcwd		203
-#define __NR_capget		204
-#define __NR_capset		205
-#define __NR_sigaltstack	206
-#define __NR_sendfile		207
-#define __NR_mmap2		210
-#define __NR_truncate64		211
-#define __NR_ftruncate64	212
-#define __NR_stat64		213
-#define __NR_lstat64		214
-#define __NR_fstat64		215
-#define __NR_pivot_root		216
-#define __NR_mincore		217
-#define __NR_madvise		218
-#define __NR_getdents64		219
-
-/* Keep this last; should always equal the last valid call number. */
-#define __NR_Linux_syscalls     220
-
-/* user-visible error numbers are in the range -1 - -125: see
- * <asm-xtensa/errno.h> */
-
-#define SYSXTENSA_RESERVED	   0	/* don't use this */
-#define SYSXTENSA_ATOMIC_SET	   1	/* set variable */
-#define SYSXTENSA_ATOMIC_EXG_ADD   2	/* exchange memory and add */
-#define SYSXTENSA_ATOMIC_ADD	   3	/* add to memory */
-#define SYSXTENSA_ATOMIC_CMP_SWP   4	/* compare and swap */
-
-#define SYSXTENSA_COUNT		   5	/* count of syscall0 functions*/
+#ifndef __SYSCALL
+# define __SYSCALL(nr,func,nargs)
+#endif
+
+#define __NR_spill				  0
+__SYSCALL(  0, sys_ni_syscall, 0)
+#define __NR_xtensa				  1
+__SYSCALL(  1, sys_ni_syscall, 0)
+#define __NR_available4				  2
+__SYSCALL(  2, sys_ni_syscall, 0)
+#define __NR_available5				  3
+__SYSCALL(  3, sys_ni_syscall, 0)
+#define __NR_available6				  4
+__SYSCALL(  4, sys_ni_syscall, 0)
+#define __NR_available7				  5
+__SYSCALL(  5, sys_ni_syscall, 0)
+#define __NR_available8				  6
+__SYSCALL(  6, sys_ni_syscall, 0)
+#define __NR_available9				  7
+__SYSCALL(  7, sys_ni_syscall, 0)
+
+/* File Operations */
+
+#define __NR_open 				  8
+__SYSCALL(  8, sys_open, 3)
+#define __NR_close 				  9
+__SYSCALL(  9, sys_close, 1)
+#define __NR_dup 				 10
+__SYSCALL( 10, sys_dup, 1)
+#define __NR_dup2 				 11
+__SYSCALL( 11, sys_dup2, 2)
+#define __NR_read 				 12
+__SYSCALL( 12, sys_read, 3)
+#define __NR_write 				 13
+__SYSCALL( 13, sys_write, 3)
+#define __NR_select 				 14
+__SYSCALL( 14, sys_select, 5)
+#define __NR_lseek 				 15
+__SYSCALL( 15, sys_lseek, 3)
+#define __NR_poll 				 16
+__SYSCALL( 16, sys_poll, 3)
+#define __NR__llseek				 17
+__SYSCALL( 17, sys_llseek, 5)
+#define __NR_epoll_wait 			 18
+__SYSCALL( 18, sys_epoll_wait, 4)
+#define __NR_epoll_ctl 				 19
+__SYSCALL( 19, sys_epoll_ctl, 4)
+#define __NR_epoll_create 			 20
+__SYSCALL( 20, sys_epoll_create, 1)
+#define __NR_creat 				 21
+__SYSCALL( 21, sys_creat, 2)
+#define __NR_truncate 				 22
+__SYSCALL( 22, sys_truncate, 2)
+#define __NR_ftruncate 				 23
+__SYSCALL( 23, sys_ftruncate, 2)
+#define __NR_readv 				 24
+__SYSCALL( 24, sys_readv, 3)
+#define __NR_writev 				 25
+__SYSCALL( 25, sys_writev, 3)
+#define __NR_fsync 				 26
+__SYSCALL( 26, sys_fsync, 1)
+#define __NR_fdatasync 				 27
+__SYSCALL( 27, sys_fdatasync, 1)
+#define __NR_truncate64 			 28
+__SYSCALL( 28, sys_truncate64, 2)
+#define __NR_ftruncate64 			 29
+__SYSCALL( 29, sys_ftruncate64, 2)
+#define __NR_pread64 				 30
+__SYSCALL( 30, sys_pread64, 6)
+#define __NR_pwrite64 				 31
+__SYSCALL( 31, sys_pwrite64, 6)
+
+#define __NR_link 				 32
+__SYSCALL( 32, sys_link, 2)
+#define __NR_rename 				 33
+__SYSCALL( 33, sys_rename, 2)
+#define __NR_symlink 				 34
+__SYSCALL( 34, sys_symlink, 2)
+#define __NR_readlink 				 35
+__SYSCALL( 35, sys_readlink, 3)
+#define __NR_mknod 				 36
+__SYSCALL( 36, sys_mknod, 3)
+#define __NR_pipe 				 37
+__SYSCALL( 37, xtensa_pipe, 1)
+#define __NR_unlink 				 38
+__SYSCALL( 38, sys_unlink, 1)
+#define __NR_rmdir 				 39
+__SYSCALL( 39, sys_rmdir, 1)
+
+#define __NR_mkdir 				 40
+__SYSCALL( 40, sys_mkdir, 2)
+#define __NR_chdir 				 41
+__SYSCALL( 41, sys_chdir, 1)
+#define __NR_fchdir 				 42
+__SYSCALL( 42, sys_fchdir, 1)
+#define __NR_getcwd 				 43
+__SYSCALL( 43, sys_getcwd, 2)
+
+#define __NR_chmod 				 44
+__SYSCALL( 44, sys_chmod, 2)
+#define __NR_chown 				 45
+__SYSCALL( 45, sys_chown, 3)
+#define __NR_stat 				 46
+__SYSCALL( 46, sys_newstat, 2)
+#define __NR_stat64 				 47
+__SYSCALL( 47, sys_stat64, 2)
+
+#define __NR_lchown 				 48
+__SYSCALL( 48, sys_lchown, 3)
+#define __NR_lstat 				 49
+__SYSCALL( 49, sys_newlstat, 2)
+#define __NR_lstat64 				 50
+__SYSCALL( 50, sys_lstat64, 2)
+#define __NR_available51			 51
+__SYSCALL( 51, sys_ni_syscall, 0)
+
+#define __NR_fchmod 				 52
+__SYSCALL( 52, sys_fchmod, 2)
+#define __NR_fchown 				 53
+__SYSCALL( 53, sys_fchown, 3)
+#define __NR_fstat 				 54
+__SYSCALL( 54, sys_newfstat, 2)
+#define __NR_fstat64 				 55
+__SYSCALL( 55, sys_fstat64, 2)
+
+#define __NR_flock 				 56
+__SYSCALL( 56, sys_flock, 2)
+#define __NR_access 				 57
+__SYSCALL( 57, sys_access, 2)
+#define __NR_umask 				 58
+__SYSCALL( 58, sys_umask, 1)
+#define __NR_getdents 				 59
+__SYSCALL( 59, sys_getdents, 3)
+#define __NR_getdents64 			 60
+__SYSCALL( 60, sys_getdents64, 3)
+#define __NR_fcntl64 				 61
+__SYSCALL( 61, sys_fcntl64, 3)
+#define __NR_available62			 62
+__SYSCALL( 62, sys_ni_syscall, 0)
+#define __NR_fadvise64_64 			 63
+__SYSCALL( 63, sys_fadvise64_64, 6)
+#define __NR_utime				 64	/* glibc 2.3.3 ?? */
+__SYSCALL( 64, sys_utime, 2)
+#define __NR_utimes 				 65
+__SYSCALL( 65, sys_utimes, 2)
+#define __NR_ioctl 				 66
+__SYSCALL( 66, sys_ioctl, 3)
+#define __NR_fcntl 				 67
+__SYSCALL( 67, sys_fcntl, 3)
+
+#define __NR_setxattr 				 68
+__SYSCALL( 68, sys_setxattr, 5)
+#define __NR_getxattr 				 69
+__SYSCALL( 69, sys_getxattr, 4)
+#define __NR_listxattr 				 70
+__SYSCALL( 70, sys_listxattr, 3)
+#define __NR_removexattr 			 71
+__SYSCALL( 71, sys_removexattr, 2)
+#define __NR_lsetxattr 				 72
+__SYSCALL( 72, sys_lsetxattr, 5)
+#define __NR_lgetxattr 				 73
+__SYSCALL( 73, sys_lgetxattr, 4)
+#define __NR_llistxattr 			 74
+__SYSCALL( 74, sys_llistxattr, 3)
+#define __NR_lremovexattr 			 75
+__SYSCALL( 75, sys_lremovexattr, 2)
+#define __NR_fsetxattr 				 76
+__SYSCALL( 76, sys_fsetxattr, 5)
+#define __NR_fgetxattr 				 77
+__SYSCALL( 77, sys_fgetxattr, 4)
+#define __NR_flistxattr 			 78
+__SYSCALL( 78, sys_flistxattr, 3)
+#define __NR_fremovexattr 			 79
+__SYSCALL( 79, sys_fremovexattr, 2)
+
+/* File Map / Shared Memory Operations */
+
+#define __NR_mmap2 				 80
+__SYSCALL( 80, xtensa_mmap2, 6)
+#define __NR_munmap 				 81
+__SYSCALL( 81, sys_munmap, 2)
+#define __NR_mprotect 				 82
+__SYSCALL( 82, sys_mprotect, 3)
+#define __NR_brk 				 83
+__SYSCALL( 83, sys_brk, 1)
+#define __NR_mlock 				 84
+__SYSCALL( 84, sys_mlock, 2)
+#define __NR_munlock 				 85
+__SYSCALL( 85, sys_munlock, 2)
+#define __NR_mlockall 				 86
+__SYSCALL( 86, sys_mlockall, 1)
+#define __NR_munlockall 			 87
+__SYSCALL( 87, sys_munlockall, 0)
+#define __NR_mremap 				 88
+__SYSCALL( 88, sys_mremap, 4)
+#define __NR_msync 				 89
+__SYSCALL( 89, sys_msync, 3)
+#define __NR_mincore 				 90
+__SYSCALL( 90, sys_mincore, 3)
+#define __NR_madvise 				 91
+__SYSCALL( 91, sys_madvise, 3)
+#define __NR_shmget				 92
+__SYSCALL( 92, sys_shmget, 4)
+#define __NR_shmat				 93
+__SYSCALL( 93, xtensa_shmat, 4)
+#define __NR_shmctl				 94
+__SYSCALL( 94, sys_shmctl, 4)
+#define __NR_shmdt				 95
+__SYSCALL( 95, sys_shmdt, 4)
+
+/* Socket Operations */
+
+#define __NR_socket 				 96
+__SYSCALL( 96, sys_socket, 3)
+#define __NR_setsockopt 			 97
+__SYSCALL( 97, sys_setsockopt, 5)
+#define __NR_getsockopt 			 98
+__SYSCALL( 98, sys_getsockopt, 5)
+#define __NR_shutdown 				 99
+__SYSCALL( 99, sys_shutdown, 2)
+
+#define __NR_bind 				100
+__SYSCALL(100, sys_bind, 3)
+#define __NR_connect 				101
+__SYSCALL(101, sys_connect, 3)
+#define __NR_listen 				102
+__SYSCALL(102, sys_listen, 2)
+#define __NR_accept 				103
+__SYSCALL(103, sys_accept, 3)
+
+#define __NR_getsockname 			104
+__SYSCALL(104, sys_getsockname, 3)
+#define __NR_getpeername 			105
+__SYSCALL(105, sys_getpeername, 3)
+#define __NR_sendmsg 				106
+__SYSCALL(106, sys_sendmsg, 3)
+#define __NR_recvmsg 				107
+__SYSCALL(107, sys_recvmsg, 3)
+#define __NR_send 				108
+__SYSCALL(108, sys_send, 4)
+#define __NR_recv 				109
+__SYSCALL(109, sys_recv, 4)
+#define __NR_sendto 				110
+__SYSCALL(110, sys_sendto, 6)
+#define __NR_recvfrom 				111
+__SYSCALL(111, sys_recvfrom, 6)
+
+#define __NR_socketpair 			112
+__SYSCALL(112, sys_socketpair, 4)
+#define __NR_sendfile 				113
+__SYSCALL(113, sys_sendfile, 4)
+#define __NR_sendfile64 			114
+__SYSCALL(114, sys_sendfile64, 4)
+#define __NR_available115			115
+__SYSCALL(115, sys_ni_syscall, 0)
+
+/* Process Operations */
+
+#define __NR_clone 				116
+__SYSCALL(116, xtensa_clone, 5)
+#define __NR_execve 				117
+__SYSCALL(117, xtensa_execve, 3)
+#define __NR_exit 				118
+__SYSCALL(118, sys_exit, 1)
+#define __NR_exit_group 			119
+__SYSCALL(119, sys_exit_group, 1)
+#define __NR_getpid 				120
+__SYSCALL(120, sys_getpid, 0)
+#define __NR_wait4 				121
+__SYSCALL(121, sys_wait4, 4)
+#define __NR_waitid 				122
+__SYSCALL(122, sys_waitid, 5)
+#define __NR_kill 				123
+__SYSCALL(123, sys_kill, 2)
+#define __NR_tkill 				124
+__SYSCALL(124, sys_tkill, 2)
+#define __NR_tgkill 				125
+__SYSCALL(125, sys_tgkill, 3)
+#define __NR_set_tid_address 			126
+__SYSCALL(126, sys_set_tid_address, 1)
+#define __NR_gettid 				127
+__SYSCALL(127, sys_gettid, 0)
+#define __NR_setsid 				128
+__SYSCALL(128, sys_setsid, 0)
+#define __NR_getsid 				129
+__SYSCALL(129, sys_getsid, 1)
+#define __NR_prctl 				130
+__SYSCALL(130, sys_prctl, 5)
+#define __NR_personality 			131
+__SYSCALL(131, sys_personality, 1)
+#define __NR_getpriority 			132
+__SYSCALL(132, sys_getpriority, 2)
+#define __NR_setpriority 			133
+__SYSCALL(133, sys_setpriority, 3)
+#define __NR_setitimer 				134
+__SYSCALL(134, sys_setitimer, 3)
+#define __NR_getitimer 				135
+__SYSCALL(135, sys_getitimer, 2)
+#define __NR_setuid 				136
+__SYSCALL(136, sys_setuid, 1)
+#define __NR_getuid 				137
+__SYSCALL(137, sys_getuid, 0)
+#define __NR_setgid 				138
+__SYSCALL(138, sys_setgid, 1)
+#define __NR_getgid 				139
+__SYSCALL(139, sys_getgid, 0)
+#define __NR_geteuid 				140
+__SYSCALL(140, sys_geteuid, 0)
+#define __NR_getegid 				141
+__SYSCALL(141, sys_getegid, 0)
+#define __NR_setreuid 				142
+__SYSCALL(142, sys_setreuid, 2)
+#define __NR_setregid 				143
+__SYSCALL(143, sys_setregid, 2)
+#define __NR_setresuid 				144
+__SYSCALL(144, sys_setresuid, 3)
+#define __NR_getresuid 				145
+__SYSCALL(145, sys_getresuid, 3)
+#define __NR_setresgid 				146
+__SYSCALL(146, sys_setresgid, 3)
+#define __NR_getresgid 				147
+__SYSCALL(147, sys_getresgid, 3)
+#define __NR_setpgid 				148
+__SYSCALL(148, sys_setpgid, 2)
+#define __NR_getpgid 				149
+__SYSCALL(149, sys_getpgid, 1)
+#define __NR_getppid 				150
+__SYSCALL(150, sys_getppid, 0)
+#define __NR_available151			151
+__SYSCALL(151, sys_ni_syscall, 0)
+
+#define __NR_reserved152 			152	/* set_thread_area */
+__SYSCALL(152, sys_ni_syscall, 0)
+#define __NR_reserved153 			153	/* get_thread_area */
+__SYSCALL(153, sys_ni_syscall, 0)
+#define __NR_times 				154
+__SYSCALL(154, sys_times, 1)
+#define __NR_acct 				155
+__SYSCALL(155, sys_acct, 1)
+#define __NR_sched_setaffinity 			156
+__SYSCALL(156, sys_sched_setaffinity, 3)
+#define __NR_sched_getaffinity 			157
+__SYSCALL(157, sys_sched_getaffinity, 3)
+#define __NR_capget 				158
+__SYSCALL(158, sys_capget, 2)
+#define __NR_capset 				159
+__SYSCALL(159, sys_capset, 2)
+#define __NR_ptrace 				160
+__SYSCALL(160, sys_ptrace, 4)
+#define __NR_semtimedop				161
+__SYSCALL(161, sys_semtimedop, 5)
+#define __NR_semget				162
+__SYSCALL(162, sys_semget, 4)
+#define __NR_semop				163
+__SYSCALL(163, sys_semop, 4)
+#define __NR_semctl				164
+__SYSCALL(164, sys_semctl, 4)
+#define __NR_available165			165
+__SYSCALL(165, sys_ni_syscall, 0)
+#define __NR_msgget				166
+__SYSCALL(166, sys_msgget, 4)
+#define __NR_msgsnd				167
+__SYSCALL(167, sys_msgsnd, 4)
+#define __NR_msgrcv				168
+__SYSCALL(168, sys_msgrcv, 4)
+#define __NR_msgctl				169
+__SYSCALL(169, sys_msgctl, 4)
+#define __NR_available170			170
+__SYSCALL(170, sys_ni_syscall, 0)
+#define __NR_available171			171
+__SYSCALL(171, sys_ni_syscall, 0)
+
+/* File System */
+
+#define __NR_mount 				172
+__SYSCALL(172, sys_mount, 5)
+#define __NR_swapon 				173
+__SYSCALL(173, sys_swapon, 2)
+#define __NR_chroot 				174
+__SYSCALL(174, sys_chroot, 1)
+#define __NR_pivot_root 			175
+__SYSCALL(175, sys_pivot_root, 2)
+#define __NR_umount 				176
+__SYSCALL(176, sys_umount, 2)
+#define __NR_swapoff 				177
+__SYSCALL(177, sys_swapoff, 1)
+#define __NR_sync 				178
+__SYSCALL(178, sys_sync, 0)
+#define __NR_available179			179
+__SYSCALL(179, sys_ni_syscall, 0)
+#define __NR_setfsuid 				180
+__SYSCALL(180, sys_setfsuid, 1)
+#define __NR_setfsgid 				181
+__SYSCALL(181, sys_setfsgid, 1)
+#define __NR_sysfs 				182
+__SYSCALL(182, sys_sysfs, 3)
+#define __NR_ustat 				183
+__SYSCALL(183, sys_ustat, 2)
+#define __NR_statfs 				184
+__SYSCALL(184, sys_statfs, 2)
+#define __NR_fstatfs 				185
+__SYSCALL(185, sys_fstatfs, 2)
+#define __NR_statfs64 				186
+__SYSCALL(186, sys_statfs64, 3)
+#define __NR_fstatfs64 				187
+__SYSCALL(187, sys_fstatfs64, 3)
+
+/* System */
+
+#define __NR_setrlimit 				188
+__SYSCALL(188, sys_setrlimit, 2)
+#define __NR_getrlimit 				189
+__SYSCALL(189, sys_getrlimit, 2)
+#define __NR_getrusage 				190
+__SYSCALL(190, sys_getrusage, 2)
+#define __NR_futex				191
+__SYSCALL(191, sys_futex, 5)
+#define __NR_gettimeofday 			192
+__SYSCALL(192, sys_gettimeofday, 2)
+#define __NR_settimeofday 			193
+__SYSCALL(193, sys_settimeofday, 2)
+#define __NR_adjtimex 				194
+__SYSCALL(194, sys_adjtimex, 1)
+#define __NR_nanosleep	 			195
+__SYSCALL(195, sys_nanosleep, 2)
+#define __NR_getgroups 				196
+__SYSCALL(196, sys_getgroups, 2)
+#define __NR_setgroups 				197
+__SYSCALL(197, sys_setgroups, 2)
+#define __NR_sethostname 			198
+__SYSCALL(198, sys_sethostname, 2)
+#define __NR_setdomainname 			199
+__SYSCALL(199, sys_setdomainname, 2)
+#define __NR_syslog 				200
+__SYSCALL(200, sys_syslog, 3)
+#define __NR_vhangup 				201
+__SYSCALL(201, sys_vhangup, 0)
+#define __NR_uselib 				202
+__SYSCALL(202, sys_uselib, 1)
+#define __NR_reboot 				203
+__SYSCALL(203, sys_reboot, 3)
+#define __NR_quotactl 				204
+__SYSCALL(204, sys_quotactl, 4)
+#define __NR_nfsservctl 			205
+__SYSCALL(205, sys_nfsservctl, 3)
+#define __NR__sysctl 				206
+__SYSCALL(206, sys_sysctl, 1)
+#define __NR_bdflush 				207
+__SYSCALL(207, sys_bdflush, 2)
+#define __NR_uname 				208
+__SYSCALL(208, sys_newuname, 1)
+#define __NR_sysinfo 				209
+__SYSCALL(209, sys_sysinfo, 1)
+#define __NR_init_module 			210
+__SYSCALL(210, sys_init_module, 2)
+#define __NR_delete_module 			211
+__SYSCALL(211, sys_delete_module, 1)
+
+#define __NR_sched_setparam 			212
+__SYSCALL(212, sys_sched_setparam, 2)
+#define __NR_sched_getparam 			213
+__SYSCALL(213, sys_sched_getparam, 2)
+#define __NR_sched_setscheduler 		214
+__SYSCALL(214, sys_sched_setscheduler, 3)
+#define __NR_sched_getscheduler 		215
+__SYSCALL(215, sys_sched_getscheduler, 1)
+#define __NR_sched_get_priority_max 		216
+__SYSCALL(216, sys_sched_get_priority_max, 1)
+#define __NR_sched_get_priority_min 		217
+__SYSCALL(217, sys_sched_get_priority_min, 1)
+#define __NR_sched_rr_get_interval 		218
+__SYSCALL(218, sys_sched_rr_get_interval, 2)
+#define __NR_sched_yield 			219
+__SYSCALL(219, sys_sched_yield, 0)
+#define __NR_sigreturn	 			222
+__SYSCALL(222, xtensa_sigreturn, 0)
+
+/* Signal Handling */
+
+#define __NR_restart_syscall 			223
+__SYSCALL(223, sys_restart_syscall, 0)
+#define __NR_sigaltstack 			224
+__SYSCALL(224, xtensa_sigaltstack, 2)
+#define __NR_rt_sigreturn 			225
+__SYSCALL(225, xtensa_rt_sigreturn, 1)
+#define __NR_rt_sigaction 			226
+__SYSCALL(226, sys_rt_sigaction, 4)
+#define __NR_rt_sigprocmask 			227
+__SYSCALL(227, sys_rt_sigprocmask, 4)
+#define __NR_rt_sigpending 			228
+__SYSCALL(228, sys_rt_sigpending, 2)
+#define __NR_rt_sigtimedwait 			229
+__SYSCALL(229, sys_rt_sigtimedwait, 4)
+#define __NR_rt_sigqueueinfo 			230
+__SYSCALL(230, sys_rt_sigqueueinfo, 3)
+#define __NR_rt_sigsuspend 			231
+__SYSCALL(231, xtensa_rt_sigsuspend, 2)
+
+/* Message */
+
+#define __NR_mq_open 				232
+__SYSCALL(232, sys_mq_open, 4)
+#define __NR_mq_unlink 				233
+__SYSCALL(233, sys_mq_unlink, 1)
+#define __NR_mq_timedsend 			234
+__SYSCALL(234, sys_mq_timedsend, 5)
+#define __NR_mq_timedreceive 			235
+__SYSCALL(235, sys_mq_timedreceive, 5)
+#define __NR_mq_notify 				236
+__SYSCALL(236, sys_mq_notify, 2)
+#define __NR_mq_getsetattr 			237
+__SYSCALL(237, sys_mq_getsetattr, 3)
+#define __NR_available238			238
+__SYSCALL(238, sys_ni_syscall, 0)
+
+/* IO */
+
+#define __NR_io_setup 				239
+__SYSCALL(239, sys_io_setup, 2)
+#define __NR_io_destroy 			240
+__SYSCALL(240, sys_io_destroy, 1)
+#define __NR_io_submit 				241
+__SYSCALL(241, sys_io_submit, 3)
+#define __NR_io_getevents 			242
+__SYSCALL(242, sys_io_getevents, 5)
+#define __NR_io_cancel 				243
+__SYSCALL(243, sys_io_cancel, 3)
+#define __NR_clock_settime 			244
+__SYSCALL(244, sys_clock_settime, 2)
+#define __NR_clock_gettime 			245
+__SYSCALL(245, sys_clock_gettime, 2)
+#define __NR_clock_getres 			246
+__SYSCALL(246, sys_clock_getres, 2)
+#define __NR_clock_nanosleep 			247
+__SYSCALL(247, sys_clock_nanosleep, 4)
+
+/* Timer */
+
+#define __NR_timer_create 			248
+__SYSCALL(248, sys_timer_create, 3)
+#define __NR_timer_delete 			249
+__SYSCALL(249, sys_timer_delete, 1)
+#define __NR_timer_settime 			250
+__SYSCALL(250, sys_timer_settime, 4)
+#define __NR_timer_gettime 			251
+__SYSCALL(251, sys_timer_gettime, 2)
+#define __NR_timer_getoverrun 			252
+__SYSCALL(252, sys_timer_getoverrun, 1)
+
+/* System */
+
+#define __NR_reserved244 			253
+__SYSCALL(253, sys_ni_syscall, 0)
+#define __NR_lookup_dcookie 			254
+__SYSCALL(254, sys_lookup_dcookie, 4)
+#define __NR_available255			255
+__SYSCALL(255, sys_ni_syscall, 0)
+#define __NR_add_key 				256
+__SYSCALL(256, sys_add_key, 5)
+#define __NR_request_key 			257
+__SYSCALL(257, sys_request_key, 5)
+#define __NR_keyctl 				258
+__SYSCALL(258, sys_keyctl, 5)
+#define __NR_available259			259
+__SYSCALL(259, sys_ni_syscall, 0)
+
+#define __NR_syscall_count			261
+
+/*
+ * sysxtensa syscall handler
+ *
+ * int sysxtensa (SYS_XTENSA_ATOMIC_SET,     ptr, val,    unused);
+ * int sysxtensa (SYS_XTENSA_ATOMIC_ADD,     ptr, val,    unused);
+ * int sysxtensa (SYS_XTENSA_ATOMIC_EXG_ADD, ptr, val,    unused);
+ * int sysxtensa (SYS_XTENSA_ATOMIC_CMP_SWP, ptr, oldval, newval);
+ *        a2            a6                   a3    a4      a5
+ */
+
+#define SYS_XTENSA_RESERVED               0     /* don't use this */
+#define SYS_XTENSA_ATOMIC_SET             1     /* set variable */
+#define SYS_XTENSA_ATOMIC_EXG_ADD         2     /* exchange memory and add */
+#define SYS_XTENSA_ATOMIC_ADD             3     /* add to memory */
+#define SYS_XTENSA_ATOMIC_CMP_SWP         4     /* compare and swap */
+
+#define SYS_XTENSA_COUNT                  5     /* count */
+
+#ifdef __KERNEL__
 
 /*
  * "Conditional" syscalls
@@ -230,6 +611,9 @@
 #define __ARCH_WANT_SYS_UTIME
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_RT_SIGACTION
-#endif /* __KERNEL__ */
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+
+#endif	/* __KERNEL__ */
 
 #endif	/* _XTENSA_UNISTD_H */
+
diff --git a/include/asm-xtensa/variant-fsf/core.h b/include/asm-xtensa/variant-fsf/core.h
new file mode 100644
index 00000000000..2f337605c74
--- /dev/null
+++ b/include/asm-xtensa/variant-fsf/core.h
@@ -0,0 +1,359 @@
+/*
+ * Xtensa processor core configuration information.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2006 Tensilica Inc.
+ */
+
+#ifndef _XTENSA_CORE_H
+#define _XTENSA_CORE_H
+
+
+/****************************************************************************
+	    Parameters Useful for Any Code, USER or PRIVILEGED
+ ****************************************************************************/
+
+/*
+ *  Note:  Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is
+ *  configured, and a value of 0 otherwise.  These macros are always defined.
+ */
+
+
+/*----------------------------------------------------------------------
+				ISA
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_BE			1	/* big-endian byte ordering */
+#define XCHAL_HAVE_WINDOWED		1	/* windowed registers option */
+#define XCHAL_NUM_AREGS			64	/* num of physical addr regs */
+#define XCHAL_NUM_AREGS_LOG2		6	/* log2(XCHAL_NUM_AREGS) */
+#define XCHAL_MAX_INSTRUCTION_SIZE	3	/* max instr bytes (3..8) */
+#define XCHAL_HAVE_DEBUG		1	/* debug option */
+#define XCHAL_HAVE_DENSITY		1	/* 16-bit instructions */
+#define XCHAL_HAVE_LOOPS		1	/* zero-overhead loops */
+#define XCHAL_HAVE_NSA			1	/* NSA/NSAU instructions */
+#define XCHAL_HAVE_MINMAX		0	/* MIN/MAX instructions */
+#define XCHAL_HAVE_SEXT			0	/* SEXT instruction */
+#define XCHAL_HAVE_CLAMPS		0	/* CLAMPS instruction */
+#define XCHAL_HAVE_MUL16		0	/* MUL16S/MUL16U instructions */
+#define XCHAL_HAVE_MUL32		0	/* MULL instruction */
+#define XCHAL_HAVE_MUL32_HIGH		0	/* MULUH/MULSH instructions */
+#define XCHAL_HAVE_L32R			1	/* L32R instruction */
+#define XCHAL_HAVE_ABSOLUTE_LITERALS	1	/* non-PC-rel (extended) L32R */
+#define XCHAL_HAVE_CONST16		0	/* CONST16 instruction */
+#define XCHAL_HAVE_ADDX			1	/* ADDX#/SUBX# instructions */
+#define XCHAL_HAVE_WIDE_BRANCHES	0	/* B*.W18 or B*.W15 instr's */
+#define XCHAL_HAVE_PREDICTED_BRANCHES	0	/* B[EQ/EQZ/NE/NEZ]T instr's */
+#define XCHAL_HAVE_CALL4AND12		1	/* (obsolete option) */
+#define XCHAL_HAVE_ABS			1	/* ABS instruction */
+/*#define XCHAL_HAVE_POPC		0*/	/* POPC instruction */
+/*#define XCHAL_HAVE_CRC		0*/	/* CRC instruction */
+#define XCHAL_HAVE_RELEASE_SYNC		0	/* L32AI/S32RI instructions */
+#define XCHAL_HAVE_S32C1I		0	/* S32C1I instruction */
+#define XCHAL_HAVE_SPECULATION		0	/* speculation */
+#define XCHAL_HAVE_FULL_RESET		1	/* all regs/state reset */
+#define XCHAL_NUM_CONTEXTS		1	/* */
+#define XCHAL_NUM_MISC_REGS		2	/* num of scratch regs (0..4) */
+#define XCHAL_HAVE_TAP_MASTER		0	/* JTAG TAP control instr's */
+#define XCHAL_HAVE_PRID			1	/* processor ID register */
+#define XCHAL_HAVE_THREADPTR		1	/* THREADPTR register */
+#define XCHAL_HAVE_BOOLEANS		0	/* boolean registers */
+#define XCHAL_HAVE_CP			0	/* CPENABLE reg (coprocessor) */
+#define XCHAL_CP_MAXCFG			0	/* max allowed cp id plus one */
+#define XCHAL_HAVE_MAC16		0	/* MAC16 package */
+#define XCHAL_HAVE_VECTORFPU2005	0	/* vector floating-point pkg */
+#define XCHAL_HAVE_FP			0	/* floating point pkg */
+#define XCHAL_HAVE_VECTRA1		0	/* Vectra I  pkg */
+#define XCHAL_HAVE_VECTRALX		0	/* Vectra LX pkg */
+#define XCHAL_HAVE_HIFI2		0	/* HiFi2 Audio Engine pkg */
+
+
+/*----------------------------------------------------------------------
+				MISC
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_NUM_WRITEBUFFER_ENTRIES	4	/* size of write buffer */
+#define XCHAL_INST_FETCH_WIDTH		4	/* instr-fetch width in bytes */
+#define XCHAL_DATA_WIDTH		4	/* data width in bytes */
+/*  In T1050, applies to selected core load and store instructions (see ISA): */
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION	1	/* unaligned loads cause exc. */
+#define XCHAL_UNALIGNED_STORE_EXCEPTION	1	/* unaligned stores cause exc.*/
+
+#define XCHAL_CORE_ID			"fsf"	/* alphanum core name
+						   (CoreID) set in the Xtensa
+						   Processor Generator */
+
+#define XCHAL_BUILD_UNIQUE_ID		0x00006700	/* 22-bit sw build ID */
+
+/*
+ *  These definitions describe the hardware targeted by this software.
+ */
+#define XCHAL_HW_CONFIGID0		0xC103C3FF	/* ConfigID hi 32 bits*/
+#define XCHAL_HW_CONFIGID1		0x0C006700	/* ConfigID lo 32 bits*/
+#define XCHAL_HW_VERSION_NAME		"LX2.0.0"	/* full version name */
+#define XCHAL_HW_VERSION_MAJOR		2200	/* major ver# of targeted hw */
+#define XCHAL_HW_VERSION_MINOR		0	/* minor ver# of targeted hw */
+#define XTHAL_HW_REL_LX2		1
+#define XTHAL_HW_REL_LX2_0		1
+#define XTHAL_HW_REL_LX2_0_0		1
+#define XCHAL_HW_CONFIGID_RELIABLE	1
+/*  If software targets a *range* of hardware versions, these are the bounds: */
+#define XCHAL_HW_MIN_VERSION_MAJOR	2200	/* major v of earliest tgt hw */
+#define XCHAL_HW_MIN_VERSION_MINOR	0	/* minor v of earliest tgt hw */
+#define XCHAL_HW_MAX_VERSION_MAJOR	2200	/* major v of latest tgt hw */
+#define XCHAL_HW_MAX_VERSION_MINOR	0	/* minor v of latest tgt hw */
+
+
+/*----------------------------------------------------------------------
+				CACHE
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_ICACHE_LINESIZE		16	/* I-cache line size in bytes */
+#define XCHAL_DCACHE_LINESIZE		16	/* D-cache line size in bytes */
+#define XCHAL_ICACHE_LINEWIDTH		4	/* log2(I line size in bytes) */
+#define XCHAL_DCACHE_LINEWIDTH		4	/* log2(D line size in bytes) */
+
+#define XCHAL_ICACHE_SIZE		8192	/* I-cache size in bytes or 0 */
+#define XCHAL_DCACHE_SIZE		8192	/* D-cache size in bytes or 0 */
+
+#define XCHAL_DCACHE_IS_WRITEBACK	0	/* writeback feature */
+
+
+
+
+/****************************************************************************
+    Parameters Useful for PRIVILEGED (Supervisory or Non-Virtualized) Code
+ ****************************************************************************/
+
+
+#ifndef XTENSA_HAL_NON_PRIVILEGED_ONLY
+
+/*----------------------------------------------------------------------
+				CACHE
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_PIF			1	/* any outbound PIF present */
+
+/*  If present, cache size in bytes == (ways * 2^(linewidth + setwidth)).  */
+
+/*  Number of cache sets in log2(lines per way):  */
+#define XCHAL_ICACHE_SETWIDTH		8
+#define XCHAL_DCACHE_SETWIDTH		8
+
+/*  Cache set associativity (number of ways):  */
+#define XCHAL_ICACHE_WAYS		2
+#define XCHAL_DCACHE_WAYS		2
+
+/*  Cache features:  */
+#define XCHAL_ICACHE_LINE_LOCKABLE	0
+#define XCHAL_DCACHE_LINE_LOCKABLE	0
+#define XCHAL_ICACHE_ECC_PARITY		0
+#define XCHAL_DCACHE_ECC_PARITY		0
+
+/*  Number of encoded cache attr bits (see <xtensa/hal.h> for decoded bits):  */
+#define XCHAL_CA_BITS			4
+
+
+/*----------------------------------------------------------------------
+			INTERNAL I/D RAM/ROMs and XLMI
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_NUM_INSTROM		0	/* number of core instr. ROMs */
+#define XCHAL_NUM_INSTRAM		0	/* number of core instr. RAMs */
+#define XCHAL_NUM_DATAROM		0	/* number of core data ROMs */
+#define XCHAL_NUM_DATARAM		0	/* number of core data RAMs */
+#define XCHAL_NUM_URAM			0	/* number of core unified RAMs*/
+#define XCHAL_NUM_XLMI			0	/* number of core XLMI ports */
+
+
+/*----------------------------------------------------------------------
+			INTERRUPTS and TIMERS
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_INTERRUPTS		1	/* interrupt option */
+#define XCHAL_HAVE_HIGHPRI_INTERRUPTS	1	/* med/high-pri. interrupts */
+#define XCHAL_HAVE_NMI			0	/* non-maskable interrupt */
+#define XCHAL_HAVE_CCOUNT		1	/* CCOUNT reg. (timer option) */
+#define XCHAL_NUM_TIMERS		3	/* number of CCOMPAREn regs */
+#define XCHAL_NUM_INTERRUPTS		17	/* number of interrupts */
+#define XCHAL_NUM_INTERRUPTS_LOG2	5	/* ceil(log2(NUM_INTERRUPTS)) */
+#define XCHAL_NUM_EXTINTERRUPTS		10	/* num of external interrupts */
+#define XCHAL_NUM_INTLEVELS		4	/* number of interrupt levels
+						   (not including level zero) */
+#define XCHAL_EXCM_LEVEL		1	/* level masked by PS.EXCM */
+	/* (always 1 in XEA1; levels 2 .. EXCM_LEVEL are "medium priority") */
+
+/*  Masks of interrupts at each interrupt level:  */
+#define XCHAL_INTLEVEL1_MASK		0x000064F9
+#define XCHAL_INTLEVEL2_MASK		0x00008902
+#define XCHAL_INTLEVEL3_MASK		0x00011204
+#define XCHAL_INTLEVEL4_MASK		0x00000000
+#define XCHAL_INTLEVEL5_MASK		0x00000000
+#define XCHAL_INTLEVEL6_MASK		0x00000000
+#define XCHAL_INTLEVEL7_MASK		0x00000000
+
+/*  Masks of interrupts at each range 1..n of interrupt levels:  */
+#define XCHAL_INTLEVEL1_ANDBELOW_MASK	0x000064F9
+#define XCHAL_INTLEVEL2_ANDBELOW_MASK	0x0000EDFB
+#define XCHAL_INTLEVEL3_ANDBELOW_MASK	0x0001FFFF
+#define XCHAL_INTLEVEL4_ANDBELOW_MASK	0x0001FFFF
+#define XCHAL_INTLEVEL5_ANDBELOW_MASK	0x0001FFFF
+#define XCHAL_INTLEVEL6_ANDBELOW_MASK	0x0001FFFF
+#define XCHAL_INTLEVEL7_ANDBELOW_MASK	0x0001FFFF
+
+/*  Level of each interrupt:  */
+#define XCHAL_INT0_LEVEL		1
+#define XCHAL_INT1_LEVEL		2
+#define XCHAL_INT2_LEVEL		3
+#define XCHAL_INT3_LEVEL		1
+#define XCHAL_INT4_LEVEL		1
+#define XCHAL_INT5_LEVEL		1
+#define XCHAL_INT6_LEVEL		1
+#define XCHAL_INT7_LEVEL		1
+#define XCHAL_INT8_LEVEL		2
+#define XCHAL_INT9_LEVEL		3
+#define XCHAL_INT10_LEVEL		1
+#define XCHAL_INT11_LEVEL		2
+#define XCHAL_INT12_LEVEL		3
+#define XCHAL_INT13_LEVEL		1
+#define XCHAL_INT14_LEVEL		1
+#define XCHAL_INT15_LEVEL		2
+#define XCHAL_INT16_LEVEL		3
+#define XCHAL_DEBUGLEVEL		4	/* debug interrupt level */
+#define XCHAL_HAVE_DEBUG_EXTERN_INT	0	/* OCD external db interrupt */
+
+/*  Type of each interrupt:  */
+#define XCHAL_INT0_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT1_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT2_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT3_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT4_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT5_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT6_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT7_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT8_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT9_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT10_TYPE 	XTHAL_INTTYPE_TIMER
+#define XCHAL_INT11_TYPE 	XTHAL_INTTYPE_TIMER
+#define XCHAL_INT12_TYPE 	XTHAL_INTTYPE_TIMER
+#define XCHAL_INT13_TYPE 	XTHAL_INTTYPE_SOFTWARE
+#define XCHAL_INT14_TYPE 	XTHAL_INTTYPE_SOFTWARE
+#define XCHAL_INT15_TYPE 	XTHAL_INTTYPE_SOFTWARE
+#define XCHAL_INT16_TYPE 	XTHAL_INTTYPE_SOFTWARE
+
+/*  Masks of interrupts for each type of interrupt:  */
+#define XCHAL_INTTYPE_MASK_UNCONFIGURED	0xFFFE0000
+#define XCHAL_INTTYPE_MASK_SOFTWARE	0x0001E000
+#define XCHAL_INTTYPE_MASK_EXTERN_EDGE	0x00000380
+#define XCHAL_INTTYPE_MASK_EXTERN_LEVEL	0x0000007F
+#define XCHAL_INTTYPE_MASK_TIMER	0x00001C00
+#define XCHAL_INTTYPE_MASK_NMI		0x00000000
+#define XCHAL_INTTYPE_MASK_WRITE_ERROR	0x00000000
+
+/*  Interrupt numbers assigned to specific interrupt sources:  */
+#define XCHAL_TIMER0_INTERRUPT		10	/* CCOMPARE0 */
+#define XCHAL_TIMER1_INTERRUPT		11	/* CCOMPARE1 */
+#define XCHAL_TIMER2_INTERRUPT		12	/* CCOMPARE2 */
+#define XCHAL_TIMER3_INTERRUPT		XTHAL_TIMER_UNCONFIGURED
+
+/*  Interrupt numbers for levels at which only one interrupt is configured:  */
+/*  (There are many interrupts each at level(s) 1, 2, 3.)  */
+
+
+/*
+ *  External interrupt vectors/levels.
+ *  These macros describe how Xtensa processor interrupt numbers
+ *  (as numbered internally, eg. in INTERRUPT and INTENABLE registers)
+ *  map to external BInterrupt<n> pins, for those interrupts
+ *  configured as external (level-triggered, edge-triggered, or NMI).
+ *  See the Xtensa processor databook for more details.
+ */
+
+/*  Core interrupt numbers mapped to each EXTERNAL interrupt number:  */
+#define XCHAL_EXTINT0_NUM		0	/* (intlevel 1) */
+#define XCHAL_EXTINT1_NUM		1	/* (intlevel 2) */
+#define XCHAL_EXTINT2_NUM		2	/* (intlevel 3) */
+#define XCHAL_EXTINT3_NUM		3	/* (intlevel 1) */
+#define XCHAL_EXTINT4_NUM		4	/* (intlevel 1) */
+#define XCHAL_EXTINT5_NUM		5	/* (intlevel 1) */
+#define XCHAL_EXTINT6_NUM		6	/* (intlevel 1) */
+#define XCHAL_EXTINT7_NUM		7	/* (intlevel 1) */
+#define XCHAL_EXTINT8_NUM		8	/* (intlevel 2) */
+#define XCHAL_EXTINT9_NUM		9	/* (intlevel 3) */
+
+
+/*----------------------------------------------------------------------
+			EXCEPTIONS and VECTORS
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_XEA_VERSION		2	/* Xtensa Exception Architecture
+						   number: 1 == XEA1 (old)
+							   2 == XEA2 (new)
+							   0 == XEAX (extern) */
+#define XCHAL_HAVE_XEA1			0	/* Exception Architecture 1 */
+#define XCHAL_HAVE_XEA2			1	/* Exception Architecture 2 */
+#define XCHAL_HAVE_XEAX			0	/* External Exception Arch. */
+#define XCHAL_HAVE_EXCEPTIONS		1	/* exception option */
+#define XCHAL_HAVE_MEM_ECC_PARITY	0	/* local memory ECC/parity */
+
+#define XCHAL_RESET_VECTOR_VADDR	0xFE000020
+#define XCHAL_RESET_VECTOR_PADDR	0xFE000020
+#define XCHAL_USER_VECTOR_VADDR		0xD0000220
+#define XCHAL_USER_VECTOR_PADDR		0x00000220
+#define XCHAL_KERNEL_VECTOR_VADDR	0xD0000200
+#define XCHAL_KERNEL_VECTOR_PADDR	0x00000200
+#define XCHAL_DOUBLEEXC_VECTOR_VADDR	0xD0000290
+#define XCHAL_DOUBLEEXC_VECTOR_PADDR	0x00000290
+#define XCHAL_WINDOW_VECTORS_VADDR	0xD0000000
+#define XCHAL_WINDOW_VECTORS_PADDR	0x00000000
+#define XCHAL_INTLEVEL2_VECTOR_VADDR	0xD0000240
+#define XCHAL_INTLEVEL2_VECTOR_PADDR	0x00000240
+#define XCHAL_INTLEVEL3_VECTOR_VADDR	0xD0000250
+#define XCHAL_INTLEVEL3_VECTOR_PADDR	0x00000250
+#define XCHAL_INTLEVEL4_VECTOR_VADDR	0xFE000520
+#define XCHAL_INTLEVEL4_VECTOR_PADDR	0xFE000520
+#define XCHAL_DEBUG_VECTOR_VADDR	XCHAL_INTLEVEL4_VECTOR_VADDR
+#define XCHAL_DEBUG_VECTOR_PADDR	XCHAL_INTLEVEL4_VECTOR_PADDR
+
+
+/*----------------------------------------------------------------------
+				DEBUG
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_OCD			1	/* OnChipDebug option */
+#define XCHAL_NUM_IBREAK		2	/* number of IBREAKn regs */
+#define XCHAL_NUM_DBREAK		2	/* number of DBREAKn regs */
+#define XCHAL_HAVE_OCD_DIR_ARRAY	1	/* faster OCD option */
+
+
+/*----------------------------------------------------------------------
+				MMU
+  ----------------------------------------------------------------------*/
+
+/*  See <xtensa/config/core-matmap.h> header file for more details.  */
+
+#define XCHAL_HAVE_TLBS			1	/* inverse of HAVE_CACHEATTR */
+#define XCHAL_HAVE_SPANNING_WAY		0	/* one way maps I+D 4GB vaddr */
+#define XCHAL_HAVE_IDENTITY_MAP		0	/* vaddr == paddr always */
+#define XCHAL_HAVE_CACHEATTR		0	/* CACHEATTR register present */
+#define XCHAL_HAVE_MIMIC_CACHEATTR	0	/* region protection */
+#define XCHAL_HAVE_XLT_CACHEATTR	0	/* region prot. w/translation */
+#define XCHAL_HAVE_PTP_MMU		1	/* full MMU (with page table
+						   [autorefill] and protection)
+						   usable for an MMU-based OS */
+/*  If none of the above last 4 are set, it's a custom TLB configuration.  */
+#define XCHAL_ITLB_ARF_ENTRIES_LOG2	2	/* log2(autorefill way size) */
+#define XCHAL_DTLB_ARF_ENTRIES_LOG2	2	/* log2(autorefill way size) */
+
+#define XCHAL_MMU_ASID_BITS		8	/* number of bits in ASIDs */
+#define XCHAL_MMU_RINGS			4	/* number of rings (1..4) */
+#define XCHAL_MMU_RING_BITS		2	/* num of bits in RING field */
+
+#endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */
+
+
+#endif /* _XTENSA_CORE_CONFIGURATION_H */
+
diff --git a/include/asm-xtensa/variant-fsf/tie.h b/include/asm-xtensa/variant-fsf/tie.h
new file mode 100644
index 00000000000..a73c7166491
--- /dev/null
+++ b/include/asm-xtensa/variant-fsf/tie.h
@@ -0,0 +1,22 @@
+/*
+ * Xtensa processor core configuration information.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2006 Tensilica Inc.
+ */
+
+#ifndef XTENSA_TIE_H
+#define XTENSA_TIE_H
+
+/*----------------------------------------------------------------------
+			COPROCESSORS and EXTRA STATE
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_CP_NUM			0	/* number of coprocessors */
+#define XCHAL_CP_MASK			0x00
+
+#endif /*XTENSA_CONFIG_TIE_H*/
+
diff --git a/include/asm-xtensa/xtensa/cacheasm.h b/include/asm-xtensa/xtensa/cacheasm.h
deleted file mode 100644
index 0cdbb0bf180..00000000000
--- a/include/asm-xtensa/xtensa/cacheasm.h
+++ /dev/null
@@ -1,708 +0,0 @@
-#ifndef XTENSA_CACHEASM_H
-#define XTENSA_CACHEASM_H
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- * include/asm-xtensa/xtensa/cacheasm.h -- assembler-specific cache
- * related definitions that depend on CORE configuration.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002 Tensilica Inc.
- */
-
-
-#include <xtensa/coreasm.h>
-
-
-/*
- *  This header file defines assembler macros of the form:
- *	<x>cache_<func>
- *  where <x> is 'i' or 'd' for instruction and data caches,
- *  and <func> indicates the function of the macro.
- *
- *  The following functions <func> are defined,
- *  and apply only to the specified cache (I or D):
- *
- *  reset
- *	Resets the cache.
- *
- *  sync
- *	Makes sure any previous cache instructions have been completed;
- *	ie. makes sure any previous cache control operations
- *	have had full effect and been synchronized to memory.
- *	Eg. any invalidate completed [so as not to generate a hit],
- *	any writebacks or other pipelined writes written to memory, etc.
- *
- *  invalidate_line		(single cache line)
- *  invalidate_region		(specified memory range)
- *  invalidate_all		(entire cache)
- *	Invalidates all cache entries that cache
- *	data from the specified memory range.
- *	NOTE: locked entries are not invalidated.
- *
- *  writeback_line		(single cache line)
- *  writeback_region		(specified memory range)
- *  writeback_all		(entire cache)
- *	Writes back to memory all dirty cache entries
- *	that cache data from the specified memory range,
- *	and marks these entries as clean.
- *	NOTE: on some future implementations, this might
- *		also invalidate.
- *	NOTE: locked entries are written back, but never invalidated.
- *	NOTE: instruction caches never implement writeback.
- *
- *  writeback_inv_line		(single cache line)
- *  writeback_inv_region	(specified memory range)
- *  writeback_inv_all		(entire cache)
- *	Writes back to memory all dirty cache entries
- *	that cache data from the specified memory range,
- *	and invalidates these entries (including all clean
- *	cache entries that cache data from that range).
- *	NOTE: locked entries are written back but not invalidated.
- *	NOTE: instruction caches never implement writeback.
- *
- *  lock_line			(single cache line)
- *  lock_region			(specified memory range)
- *	Prefetch and lock the specified memory range into cache.
- *	NOTE:  if any part of the specified memory range cannot
- *	be locked, a ??? exception occurs.  These macros don't
- *	do anything special (yet anyway) to handle this situation.
- *
- *  unlock_line			(single cache line)
- *  unlock_region		(specified memory range)
- *  unlock_all			(entire cache)
- *	Unlock cache entries that cache the specified memory range.
- *	Entries not already locked are unaffected.
- */
-
-
-
-/***************************   GENERIC -- ALL CACHES   ***************************/
-
-
-/*
- *  The following macros assume the following cache size/parameter limits
- *  in the current Xtensa core implementation:
- *	cache size:	1024 bytes minimum
- *	line size:	16 - 64 bytes
- *	way count:	1 - 4
- *
- *  Minimum entries per way (ie. per associativity) = 1024 / 64 / 4 = 4
- *  Hence the assumption that each loop can execute four cache instructions.
- *
- *  Correspondingly, the offset range of instructions is assumed able to cover
- *  four lines, ie. offsets {0,1,2,3} * line_size are assumed valid for
- *  both hit and indexed cache instructions.  Ie. these offsets are all
- *  valid:  0, 16, 32, 48, 64, 96, 128, 192 (for line sizes 16, 32, 64).
- *  This is true of all original cache instructions
- *  (dhi, ihi, dhwb, dhwbi, dii, iii) which have offsets
- *  of 0 to 1020 in multiples of 4 (ie. 8 bits shifted by 2).
- *  This is also true of subsequent cache instructions
- *  (dhu, ihu, diu, iiu, diwb, diwbi, dpfl, ipfl) which have offsets
- *  of 0 to 240 in multiples of 16 (ie. 4 bits shifted by 4).
- *
- *  (Maximum cache size, currently 32k, doesn't affect the following macros.
- *  Cache ways > MMU min page size cause aliasing but that's another matter.)
- */
-
-
-
-/*
- *  Macro to apply an 'indexed' cache instruction to the entire cache.
- *
- *  Parameters:
- *	cainst		instruction/ that takes an address register parameter
- *			and an offset parameter (in range 0 .. 3*linesize).
- *	size		size of cache in bytes
- *	linesize	size of cache line in bytes
- *	assoc_or1	number of associativities (ways/sets) in cache
- *			if all sets affected by cainst,
- *			or 1 if only one set (or not all sets) of the cache
- *			is affected by cainst (eg. DIWB or DIWBI [not yet ISA defined]).
- *	aa, ab		unique address registers (temporaries)
- */
-
-	.macro	cache_index_all		cainst, size, linesize, assoc_or1, aa, ab
-
-	//  Sanity-check on cache parameters:
-	.ifne	(\size % (\linesize * \assoc_or1 * 4))
-	.err	//  cache configuration outside expected/supported range!
-	.endif
-
-	//  \size byte cache, \linesize byte lines, \assoc_or1 way(s) affected by each \cainst.
-	movi	\aa, (\size / (\linesize * \assoc_or1 * 4))
-	// Possible improvement: need only loop if \aa > 1 ;
-	// however that particular condition is highly unlikely.
-	movi	\ab, 0		// to iterate over cache
-	floop		\aa, cachex\@
-	\cainst		\ab, 0*\linesize
-	\cainst		\ab, 1*\linesize
-	\cainst		\ab, 2*\linesize
-	\cainst		\ab, 3*\linesize
-	addi		\ab, \ab, 4*\linesize	// move to next line
-	floopend	\aa, cachex\@
-
-	.endm
-
-
-/*
- *  Macro to apply a 'hit' cache instruction to a memory region,
- *  ie. to any cache entries that cache a specified portion (region) of memory.
- *  Takes care of the unaligned cases, ie. may apply to one
- *  more cache line than $asize / lineSize if $aaddr is not aligned.
- *
- *
- *  Parameters are:
- *	cainst	instruction/macro that takes an address register parameter
- *		and an offset parameter (currently always zero)
- *		and generates a cache instruction (eg. "dhi", "dhwb", "ihi", etc.)
- *	linesize_log2	log2(size of cache line in bytes)
- *	addr	register containing start address of region (clobbered)
- *	asize	register containing size of the region in bytes (clobbered)
- *	askew	unique register used as temporary
- *
- * !?!?! 2DO: optimization: iterate max(cache_size and \asize) / linesize
- */
-
-	.macro	cache_hit_region	cainst, linesize_log2, addr, asize, askew
-
-	//  Make \asize the number of iterations:
-	extui	\askew, \addr, 0, \linesize_log2	// get unalignment amount of \addr
-	add	\asize, \asize, \askew			// ... and add it to \asize
-	addi	\asize, \asize, (1 << \linesize_log2) - 1	// round up!
-	srli	\asize, \asize, \linesize_log2
-
-	//  Iterate over region:
-	floopnez	\asize, cacheh\@
-	\cainst		\addr, 0
-	addi		\addr, \addr, (1 << \linesize_log2)	// move to next line
-	floopend	\asize, cacheh\@
-
-	.endm
-
-
-
-
-
-/***************************   INSTRUCTION CACHE   ***************************/
-
-
-/*
- *  Reset/initialize the instruction cache by simply invalidating it:
- *  (need to unlock first also, if cache locking implemented):
- *
- *  Parameters:
- *	aa, ab		unique address registers (temporaries)
- */
-	.macro	icache_reset	aa, ab
-	icache_unlock_all	\aa, \ab
-	icache_invalidate_all	\aa, \ab
-	.endm
-
-
-/*
- * Synchronize after an instruction cache operation,
- * to be sure everything is in sync with memory as to be
- * expected following any previous instruction cache control operations.
- *
- * Parameters are:
- *	ar	an address register (temporary) (currently unused, but may be used in future)
- */
-	.macro	icache_sync	ar
-#if XCHAL_ICACHE_SIZE > 0
-	isync
-#endif
-	.endm
-
-
-
-/*
- *  Invalidate a single line of the instruction cache.
- *  Parameters are:
- *	ar	address register that contains (virtual) address to invalidate
- *		(may get clobbered in a future implementation, but not currently)
- *	offset	(optional) offset to add to \ar to compute effective address to invalidate
- *		(note: some number of lsbits are ignored)
- */
-	.macro	icache_invalidate_line	ar, offset
-#if XCHAL_ICACHE_SIZE > 0
-	ihi	\ar, \offset		// invalidate icache line
-	/*
-	 *  NOTE:  in some version of the silicon [!!!SHOULD HAVE BEEN DOCUMENTED!!!]
-	 *  'ihi' doesn't work, so it had been replaced with 'iii'
-	 *  (which would just invalidate more than it should,
-	 *  which should be okay other than the performance hit
-	 *  because cache locking did not exist in that version,
-	 *  unless user somehow relies on something being cached).
-	 *  [WHAT VERSION IS IT!!?!?
-	 *  IS THERE ANY WAY TO TEST FOR THAT HERE, TO OUTPUT 'III' ONLY IF NEEDED!?!?].
-	 *
-	 *	iii	\ar, \offset
-	 */
-	icache_sync	\ar
-#endif
-	.endm
-
-
-
-
-/*
- *  Invalidate instruction  cache entries that cache a specified portion of memory.
- *  Parameters are:
- *	astart	start address (register gets clobbered)
- *	asize	size of the region in bytes (register gets clobbered)
- *	ac	unique register used as temporary
- */
-	.macro	icache_invalidate_region	astart, asize, ac
-#if XCHAL_ICACHE_SIZE > 0
-	//  Instruction cache region invalidation:
-	cache_hit_region	ihi, XCHAL_ICACHE_LINEWIDTH, \astart, \asize, \ac
-	icache_sync	\ac
-	//  End of instruction cache region invalidation
-#endif
-	.endm
-
-
-
-/*
- *  Invalidate entire instruction cache.
- *
- *  Parameters:
- *	aa, ab		unique address registers (temporaries)
- */
-	.macro	icache_invalidate_all	aa, ab
-#if XCHAL_ICACHE_SIZE > 0
-	//  Instruction cache invalidation:
-	cache_index_all		iii, XCHAL_ICACHE_SIZE, XCHAL_ICACHE_LINESIZE, XCHAL_ICACHE_WAYS, \aa, \ab
-	icache_sync	\aa
-	//  End of instruction cache invalidation
-#endif
-	.endm
-
-
-
-/*
- *  Lock (prefetch & lock) a single line of the instruction cache.
- *
- *  Parameters are:
- *	ar	address register that contains (virtual) address to lock
- *		(may get clobbered in a future implementation, but not currently)
- *	offset	offset to add to \ar to compute effective address to lock
- *		(note: some number of lsbits are ignored)
- */
-	.macro	icache_lock_line	ar, offset
-#if XCHAL_ICACHE_SIZE > 0 && XCHAL_ICACHE_LINE_LOCKABLE
-	ipfl	\ar, \offset	/* prefetch and lock icache line */
-	icache_sync	\ar
-#endif
-	.endm
-
-
-
-/*
- *  Lock (prefetch & lock) a specified portion of memory into the instruction cache.
- *  Parameters are:
- *	astart	start address (register gets clobbered)
- *	asize	size of the region in bytes (register gets clobbered)
- *	ac	unique register used as temporary
- */
-	.macro	icache_lock_region	astart, asize, ac
-#if XCHAL_ICACHE_SIZE > 0 && XCHAL_ICACHE_LINE_LOCKABLE
-	//  Instruction cache region lock:
-	cache_hit_region	ipfl, XCHAL_ICACHE_LINEWIDTH, \astart, \asize, \ac
-	icache_sync	\ac
-	//  End of instruction cache region lock
-#endif
-	.endm
-
-
-
-/*
- *  Unlock a single line of the instruction cache.
- *
- *  Parameters are:
- *	ar	address register that contains (virtual) address to unlock
- *		(may get clobbered in a future implementation, but not currently)
- *	offset	offset to add to \ar to compute effective address to unlock
- *		(note: some number of lsbits are ignored)
- */
-	.macro	icache_unlock_line	ar, offset
-#if XCHAL_ICACHE_SIZE > 0 && XCHAL_ICACHE_LINE_LOCKABLE
-	ihu	\ar, \offset	/* unlock icache line */
-	icache_sync	\ar
-#endif
-	.endm
-
-
-
-/*
- *  Unlock a specified portion of memory from the instruction cache.
- *  Parameters are:
- *	astart	start address (register gets clobbered)
- *	asize	size of the region in bytes (register gets clobbered)
- *	ac	unique register used as temporary
- */
-	.macro	icache_unlock_region	astart, asize, ac
-#if XCHAL_ICACHE_SIZE > 0 && XCHAL_ICACHE_LINE_LOCKABLE
-	//  Instruction cache region unlock:
-	cache_hit_region	ihu, XCHAL_ICACHE_LINEWIDTH, \astart, \asize, \ac
-	icache_sync	\ac
-	//  End of instruction cache region unlock
-#endif
-	.endm
-
-
-
-/*
- *  Unlock entire instruction cache.
- *
- *  Parameters:
- *	aa, ab		unique address registers (temporaries)
- */
-	.macro	icache_unlock_all	aa, ab
-#if XCHAL_ICACHE_SIZE > 0 && XCHAL_ICACHE_LINE_LOCKABLE
-	//  Instruction cache unlock:
-	cache_index_all		iiu, XCHAL_ICACHE_SIZE, XCHAL_ICACHE_LINESIZE, 1, \aa, \ab
-	icache_sync	\aa
-	//  End of instruction cache unlock
-#endif
-	.endm
-
-
-
-
-
-/***************************   DATA CACHE   ***************************/
-
-
-
-/*
- *  Reset/initialize the data cache by simply invalidating it
- *  (need to unlock first also, if cache locking implemented):
- *
- *  Parameters:
- *	aa, ab		unique address registers (temporaries)
- */
-	.macro	dcache_reset	aa, ab
-	dcache_unlock_all	\aa, \ab
-	dcache_invalidate_all	\aa, \ab
-	.endm
-
-
-
-
-/*
- * Synchronize after a data cache operation,
- * to be sure everything is in sync with memory as to be
- * expected following any previous data cache control operations.
- *
- * Parameters are:
- *	ar	an address register (temporary) (currently unused, but may be used in future)
- */
-	.macro	dcache_sync	ar
-#if XCHAL_DCACHE_SIZE > 0
-	//  This previous sequence errs on the conservative side (too much so); a DSYNC should be sufficient:
-	//memw		// synchronize data cache changes relative to subsequent memory accesses
-	//isync		// be conservative and ISYNC as well (just to be sure)
-
-	dsync
-#endif
-	.endm
-
-
-
-/*
- * Synchronize after a data store operation,
- * to be sure the stored data is completely off the processor
- * (and assuming there is no buffering outside the processor,
- *  that the data is in memory).  This may be required to
- * ensure that the processor's write buffers are emptied.
- * A MEMW followed by a read guarantees this, by definition.
- * We also try to make sure the read itself completes.
- *
- * Parameters are:
- *	ar	an address register (temporary)
- */
-	.macro	write_sync	ar
-	memw			// ensure previous memory accesses are complete prior to subsequent memory accesses
-	l32i	\ar, sp, 0	// completing this read ensures any previous write has completed, because of MEMW
-	//slot
-	add	\ar, \ar, \ar	// use the result of the read to help ensure the read completes (in future architectures)
-	.endm
-
-
-/*
- *  Invalidate a single line of the data cache.
- *  Parameters are:
- *	ar	address register that contains (virtual) address to invalidate
- *		(may get clobbered in a future implementation, but not currently)
- *	offset	(optional) offset to add to \ar to compute effective address to invalidate
- *		(note: some number of lsbits are ignored)
- */
-	.macro	dcache_invalidate_line	ar, offset
-#if XCHAL_DCACHE_SIZE > 0
-	dhi	\ar, \offset
-	dcache_sync	\ar
-#endif
-	.endm
-
-
-
-
-
-/*
- *  Invalidate data cache entries that cache a specified portion of memory.
- *  Parameters are:
- *	astart	start address (register gets clobbered)
- *	asize	size of the region in bytes (register gets clobbered)
- *	ac	unique register used as temporary
- */
-	.macro	dcache_invalidate_region	astart, asize, ac
-#if XCHAL_DCACHE_SIZE > 0
-	//  Data cache region invalidation:
-	cache_hit_region	dhi, XCHAL_DCACHE_LINEWIDTH, \astart, \asize, \ac
-	dcache_sync	\ac
-	//  End of data cache region invalidation
-#endif
-	.endm
-
-
-
-#if 0
-/*
- *  This is a work-around for a bug in SiChip1 (???).
- *  There should be a proper mechanism for not outputting
- *  these instructions when not needed.
- *  To enable work-around, uncomment this and replace 'dii'
- *  with 'dii_s1' everywhere, eg. in dcache_invalidate_all
- *  macro below.
- */
-	.macro	dii_s1	ar, offset
-	dii	\ar, \offset
-	or	\ar, \ar, \ar
-	or	\ar, \ar, \ar
-	or	\ar, \ar, \ar
-	or	\ar, \ar, \ar
-	.endm
-#endif
-
-
-/*
- *  Invalidate entire data cache.
- *
- *  Parameters:
- *	aa, ab		unique address registers (temporaries)
- */
-	.macro	dcache_invalidate_all	aa, ab
-#if XCHAL_DCACHE_SIZE > 0
-	//  Data cache invalidation:
-	cache_index_all		dii, XCHAL_DCACHE_SIZE, XCHAL_DCACHE_LINESIZE, XCHAL_DCACHE_WAYS, \aa, \ab
-	dcache_sync	\aa
-	//  End of data cache invalidation
-#endif
-	.endm
-
-
-
-/*
- *  Writeback a single line of the data cache.
- *  Parameters are:
- *	ar	address register that contains (virtual) address to writeback
- *		(may get clobbered in a future implementation, but not currently)
- *	offset	offset to add to \ar to compute effective address to writeback
- *		(note: some number of lsbits are ignored)
- */
-	.macro	dcache_writeback_line	ar, offset
-#if XCHAL_DCACHE_SIZE > 0 && XCHAL_DCACHE_IS_WRITEBACK
-	dhwb	\ar, \offset
-	dcache_sync	\ar
-#endif
-	.endm
-
-
-
-/*
- *  Writeback dirty data cache entries that cache a specified portion of memory.
- *  Parameters are:
- *	astart	start address (register gets clobbered)
- *	asize	size of the region in bytes (register gets clobbered)
- *	ac	unique register used as temporary
- */
-	.macro	dcache_writeback_region		astart, asize, ac
-#if XCHAL_DCACHE_SIZE > 0 && XCHAL_DCACHE_IS_WRITEBACK
-	//  Data cache region writeback:
-	cache_hit_region	dhwb, XCHAL_DCACHE_LINEWIDTH, \astart, \asize, \ac
-	dcache_sync	\ac
-	//  End of data cache region writeback
-#endif
-	.endm
-
-
-
-/*
- *  Writeback entire data cache.
- *  Parameters:
- *	aa, ab		unique address registers (temporaries)
- */
-	.macro	dcache_writeback_all	aa, ab
-#if XCHAL_DCACHE_SIZE > 0 && XCHAL_DCACHE_IS_WRITEBACK
-	//  Data cache writeback:
-	cache_index_all		diwb, XCHAL_DCACHE_SIZE, XCHAL_DCACHE_LINESIZE, 1, \aa, \ab
-	dcache_sync	\aa
-	//  End of data cache writeback
-#endif
-	.endm
-
-
-
-/*
- *  Writeback and invalidate a single line of the data cache.
- *  Parameters are:
- *	ar	address register that contains (virtual) address to writeback and invalidate
- *		(may get clobbered in a future implementation, but not currently)
- *	offset	offset to add to \ar to compute effective address to writeback and invalidate
- *		(note: some number of lsbits are ignored)
- */
-	.macro	dcache_writeback_inv_line	ar, offset
-#if XCHAL_DCACHE_SIZE > 0
-	dhwbi	\ar, \offset	/* writeback and invalidate dcache line */
-	dcache_sync	\ar
-#endif
-	.endm
-
-
-
-/*
- *  Writeback and invalidate data cache entries that cache a specified portion of memory.
- *  Parameters are:
- *	astart	start address (register gets clobbered)
- *	asize	size of the region in bytes (register gets clobbered)
- *	ac	unique register used as temporary
- */
-	.macro	dcache_writeback_inv_region	astart, asize, ac
-#if XCHAL_DCACHE_SIZE > 0
-	//  Data cache region writeback and invalidate:
-	cache_hit_region	dhwbi, XCHAL_DCACHE_LINEWIDTH, \astart, \asize, \ac
-	dcache_sync	\ac
-	//  End of data cache region writeback and invalidate
-#endif
-	.endm
-
-
-
-/*
- *  Writeback and invalidate entire data cache.
- *  Parameters:
- *	aa, ab		unique address registers (temporaries)
- */
-	.macro	dcache_writeback_inv_all	aa, ab
-#if XCHAL_DCACHE_SIZE > 0
-	//  Data cache writeback and invalidate:
-#if XCHAL_DCACHE_IS_WRITEBACK
-	cache_index_all		diwbi, XCHAL_DCACHE_SIZE, XCHAL_DCACHE_LINESIZE, 1, \aa, \ab
-	dcache_sync	\aa
-#else /*writeback*/
-	//  Data cache does not support writeback, so just invalidate: */
-	dcache_invalidate_all	\aa, \ab
-#endif /*writeback*/
-	//  End of data cache writeback and invalidate
-#endif
-	.endm
-
-
-
-
-/*
- *  Lock (prefetch & lock) a single line of the data cache.
- *
- *  Parameters are:
- *	ar	address register that contains (virtual) address to lock
- *		(may get clobbered in a future implementation, but not currently)
- *	offset	offset to add to \ar to compute effective address to lock
- *		(note: some number of lsbits are ignored)
- */
-	.macro	dcache_lock_line	ar, offset
-#if XCHAL_DCACHE_SIZE > 0 && XCHAL_DCACHE_LINE_LOCKABLE
-	dpfl	\ar, \offset	/* prefetch and lock dcache line */
-	dcache_sync	\ar
-#endif
-	.endm
-
-
-
-/*
- *  Lock (prefetch & lock) a specified portion of memory into the data cache.
- *  Parameters are:
- *	astart	start address (register gets clobbered)
- *	asize	size of the region in bytes (register gets clobbered)
- *	ac	unique register used as temporary
- */
-	.macro	dcache_lock_region	astart, asize, ac
-#if XCHAL_DCACHE_SIZE > 0 && XCHAL_DCACHE_LINE_LOCKABLE
-	//  Data cache region lock:
-	cache_hit_region	dpfl, XCHAL_DCACHE_LINEWIDTH, \astart, \asize, \ac
-	dcache_sync	\ac
-	//  End of data cache region lock
-#endif
-	.endm
-
-
-
-/*
- *  Unlock a single line of the data cache.
- *
- *  Parameters are:
- *	ar	address register that contains (virtual) address to unlock
- *		(may get clobbered in a future implementation, but not currently)
- *	offset	offset to add to \ar to compute effective address to unlock
- *		(note: some number of lsbits are ignored)
- */
-	.macro	dcache_unlock_line	ar, offset
-#if XCHAL_DCACHE_SIZE > 0 && XCHAL_DCACHE_LINE_LOCKABLE
-	dhu	\ar, \offset	/* unlock dcache line */
-	dcache_sync	\ar
-#endif
-	.endm
-
-
-
-/*
- *  Unlock a specified portion of memory from the data cache.
- *  Parameters are:
- *	astart	start address (register gets clobbered)
- *	asize	size of the region in bytes (register gets clobbered)
- *	ac	unique register used as temporary
- */
-	.macro	dcache_unlock_region	astart, asize, ac
-#if XCHAL_DCACHE_SIZE > 0 && XCHAL_DCACHE_LINE_LOCKABLE
-	//  Data cache region unlock:
-	cache_hit_region	dhu, XCHAL_DCACHE_LINEWIDTH, \astart, \asize, \ac
-	dcache_sync	\ac
-	//  End of data cache region unlock
-#endif
-	.endm
-
-
-
-/*
- *  Unlock entire data cache.
- *
- *  Parameters:
- *	aa, ab		unique address registers (temporaries)
- */
-	.macro	dcache_unlock_all	aa, ab
-#if XCHAL_DCACHE_SIZE > 0 && XCHAL_DCACHE_LINE_LOCKABLE
-	//  Data cache unlock:
-	cache_index_all		diu, XCHAL_DCACHE_SIZE, XCHAL_DCACHE_LINESIZE, 1, \aa, \ab
-	dcache_sync	\aa
-	//  End of data cache unlock
-#endif
-	.endm
-
-
-#endif /*XTENSA_CACHEASM_H*/
-
diff --git a/include/asm-xtensa/xtensa/cacheattrasm.h b/include/asm-xtensa/xtensa/cacheattrasm.h
deleted file mode 100644
index 1c3e117b359..00000000000
--- a/include/asm-xtensa/xtensa/cacheattrasm.h
+++ /dev/null
@@ -1,432 +0,0 @@
-#ifndef XTENSA_CACHEATTRASM_H
-#define XTENSA_CACHEATTRASM_H
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- * include/asm-xtensa/xtensa/cacheattrasm.h -- assembler-specific
- * CACHEATTR register related definitions that depend on CORE
- * configuration.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002 Tensilica Inc.
- */
-
-
-#include <xtensa/coreasm.h>
-
-
-/*
- *  This header file defines assembler macros of the form:
- *	<x>cacheattr_<func>
- *  where:
- *	<x> is 'i', 'd' or absent for instruction, data
- *		or both caches; and
- *	<func> indicates the function of the macro.
- *
- *  The following functions are defined:
- *
- *  icacheattr_get
- *	Reads I-cache CACHEATTR into a2 (clobbers a3-a5).
- *
- *  dcacheattr_get
- *	Reads D-cache CACHEATTR into a2 (clobbers a3-a5).
- *	(Note:  for configs with a real CACHEATTR register, the
- *	 above two macros are identical.)
- *
- *  cacheattr_set
- *	Writes both I-cache and D-cache CACHEATTRs from a2 (a3-a8 clobbered).
- *	Works even when changing one's own code's attributes.
- *
- *  icacheattr_is_enabled  label
- *	Branches to \label if I-cache appears to have been enabled
- *	(eg. if CACHEATTR contains a cache-enabled attribute).
- *	(clobbers a2-a5,SAR)
- *
- *  dcacheattr_is_enabled  label
- *	Branches to \label if D-cache appears to have been enabled
- *	(eg. if CACHEATTR contains a cache-enabled attribute).
- *	(clobbers a2-a5,SAR)
- *
- *  cacheattr_is_enabled  label
- *	Branches to \label if either I-cache or D-cache appears to have been enabled
- *	(eg. if CACHEATTR contains a cache-enabled attribute).
- *	(clobbers a2-a5,SAR)
- *
- *  The following macros are only defined under certain conditions:
- *
- *  icacheattr_set	(if XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR)
- *	Writes I-cache CACHEATTR from a2 (a3-a8 clobbered).
- *
- *  dcacheattr_set	(if XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR)
- *	Writes D-cache CACHEATTR from a2 (a3-a8 clobbered).
- */
-
-
-
-/***************************   GENERIC -- ALL CACHES   ***************************/
-
-/*
- *  _cacheattr_get
- *
- *  (Internal macro.)
- *  Returns value of CACHEATTR register (or closest equivalent) in a2.
- *
- *  Entry:
- *	(none)
- *  Exit:
- *	a2	value read from CACHEATTR
- *	a3-a5	clobbered (temporaries)
- */
-	.macro	_cacheattr_get	tlb
-#if XCHAL_HAVE_CACHEATTR
-	rsr	a2, CACHEATTR
-#elif XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR
-	//  We have a config that "mimics" CACHEATTR using a simplified
-	//  "MMU" composed of a single statically-mapped way.
-	//  DTLB and ITLB are independent, so there's no single
-	//  cache attribute that can describe both.  So for now
-	//  just return the DTLB state.
-	movi	a5, 0xE0000000
-	movi	a2, 0
-	movi	a3, 0
-1:	add	a3, a3, a5	// next segment
-	r&tlb&1	a4, a3		// get PPN+CA of segment at 0xE0000000, 0xC0000000, ..., 0
-	dsync	// interlock???
-	slli	a2, a2, 4
-	extui	a4, a4, 0, 4	// extract CA
-	or	a2, a2, a4
-	bnez	a3, 1b
-#else
-	//  This macro isn't applicable to arbitrary MMU configurations.
-	//  Just return zero.
-	movi	a2, 0
-#endif
-	.endm
-
-	.macro	icacheattr_get
-	_cacheattr_get	itlb
-	.endm
-
-	.macro	dcacheattr_get
-	_cacheattr_get	dtlb
-	.endm
-
-
-#define XCHAL_CACHEATTR_ALL_BYPASS	0x22222222	/* default (powerup/reset) value of CACHEATTR, all BYPASS
-							   mode (ie. disabled/bypassed caches) */
-
-#if XCHAL_HAVE_CACHEATTR || XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR
-
-#define XCHAL_FCA_ENAMASK	0x001A	/* bitmap of fetch attributes that require enabled icache */
-#define XCHAL_LCA_ENAMASK	0x0003	/* bitmap of load  attributes that require enabled dcache */
-#define XCHAL_SCA_ENAMASK	0x0003	/* bitmap of store attributes that require enabled dcache */
-#define XCHAL_LSCA_ENAMASK	(XCHAL_LCA_ENAMASK|XCHAL_SCA_ENAMASK)	/* l/s attrs requiring enabled dcache */
-#define XCHAL_ALLCA_ENAMASK	(XCHAL_FCA_ENAMASK|XCHAL_LSCA_ENAMASK)	/* all attrs requiring enabled caches */
-
-/*
- *  _cacheattr_is_enabled
- *
- *  (Internal macro.)
- *  Branches to \label if CACHEATTR in a2 indicates an enabled
- *  cache, using mask in a3.
- *
- *  Parameters:
- *	label	where to branch to if cache is enabled
- *  Entry:
- *	a2	contains CACHEATTR value used to determine whether
- *		caches are enabled
- *	a3	16-bit constant where each bit correspond to
- *		one of the 16 possible CA values (in a CACHEATTR mask);
- *		CA values that indicate the cache is enabled
- *		have their corresponding bit set in this mask
- *		(eg. use XCHAL_xCA_ENAMASK , above)
- *  Exit:
- *	a2,a4,a5	clobbered
- *	SAR		clobbered
- */
-	.macro	_cacheattr_is_enabled	label
-	movi	a4, 8		// loop 8 times
-.Lcaife\@:
-	extui	a5, a2, 0, 4	// get CA nibble
-	ssr	a5		// index into mask according to CA...
-	srl	a5, a3		// ...and get CA's mask bit in a5 bit 0
-	bbsi.l	a5, 0, \label	// if CA indicates cache enabled, jump to label
-	srli	a2, a2, 4	// next nibble
-	addi	a4, a4, -1
-	bnez	a4, .Lcaife\@	// loop for each nibble
-	.endm
-
-#else /* XCHAL_HAVE_CACHEATTR || XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR */
-	.macro	_cacheattr_is_enabled	label
-	j	\label		// macro not applicable, assume caches always enabled
-	.endm
-#endif /* XCHAL_HAVE_CACHEATTR || XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR */
-
-
-
-/*
- *  icacheattr_is_enabled
- *
- *  Branches to \label if I-cache is enabled.
- *
- *  Parameters:
- *	label	where to branch to if icache is enabled
- *  Entry:
- *	(none)
- *  Exit:
- *	a2-a5, SAR	clobbered (temporaries)
- */
-	.macro	icacheattr_is_enabled	label
-#if XCHAL_HAVE_CACHEATTR || XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR
-	icacheattr_get
-	movi	a3, XCHAL_FCA_ENAMASK
-#endif
-	_cacheattr_is_enabled	\label
-	.endm
-
-/*
- *  dcacheattr_is_enabled
- *
- *  Branches to \label if D-cache is enabled.
- *
- *  Parameters:
- *	label	where to branch to if dcache is enabled
- *  Entry:
- *	(none)
- *  Exit:
- *	a2-a5, SAR	clobbered (temporaries)
- */
-	.macro	dcacheattr_is_enabled	label
-#if XCHAL_HAVE_CACHEATTR || XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR
-	dcacheattr_get
-	movi	a3, XCHAL_LSCA_ENAMASK
-#endif
-	_cacheattr_is_enabled	\label
-	.endm
-
-/*
- *  cacheattr_is_enabled
- *
- *  Branches to \label if either I-cache or D-cache is enabled.
- *
- *  Parameters:
- *	label	where to branch to if a cache is enabled
- *  Entry:
- *	(none)
- *  Exit:
- *	a2-a5, SAR	clobbered (temporaries)
- */
-	.macro	cacheattr_is_enabled	label
-#if XCHAL_HAVE_CACHEATTR
-	rsr	a2, CACHEATTR
-	movi	a3, XCHAL_ALLCA_ENAMASK
-#elif XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR
-	icacheattr_get
-	movi	a3, XCHAL_FCA_ENAMASK
-	_cacheattr_is_enabled	\label
-	dcacheattr_get
-	movi	a3, XCHAL_LSCA_ENAMASK
-#endif
-	_cacheattr_is_enabled	\label
-	.endm
-
-
-
-/*
- *  The ISA does not have a defined way to change the
- *  instruction cache attributes of the running code,
- *  ie. of the memory area that encloses the current PC.
- *  However, each micro-architecture (or class of
- *  configurations within a micro-architecture)
- *  provides a way to deal with this issue.
- *
- *  Here are a few macros used to implement the relevant
- *  approach taken.
- */
-
-#if XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR
-	//  We have a config that "mimics" CACHEATTR using a simplified
-	//  "MMU" composed of a single statically-mapped way.
-
-/*
- *  icacheattr_set
- *
- *  Entry:
- *	a2		cacheattr value to set
- *  Exit:
- *	a2		unchanged
- *	a3-a8		clobbered (temporaries)
- */
-	.macro	icacheattr_set
-
-	movi	a5, 0xE0000000	// mask of upper 3 bits
-	movi	a6, 3f		// PC where ITLB is set
-	movi	a3, 0		// start at region 0 (0 .. 7)
-	and	a6, a6, a5	// upper 3 bits of local PC area
-	mov	a7, a2		// copy a2 so it doesn't get clobbered
-	j	3f
-
-# if XCHAL_HAVE_XLT_CACHEATTR
-	//  Can do translations, use generic method:
-1:	sub	a6, a3, a5	// address of some other segment
-	ritlb1	a8, a6		// save its PPN+CA
-	dsync	// interlock??
-	witlb	a4, a6		// make it translate to this code area
-	movi	a6, 5f		// where to jump into it
-	isync
-	sub	a6, a6, a5	// adjust jump address within that other segment
-	jx	a6
-
-	//  Note that in the following code snippet, which runs at a different virtual
-	//  address than it is assembled for, we avoid using literals (eg. via movi/l32r)
-	//  just in case literals end up in a different 512 MB segment, and we avoid
-	//  instructions that rely on the current PC being what is expected.
-	//
-	.align	4
-	_j	6f		// this is at label '5' minus 4 bytes
-	.align	4
-5:	witlb	a4, a3		// we're in other segment, now can write previous segment's CA
-	isync
-	add	a6, a6, a5	// back to previous segment
-	addi	a6, a6, -4	// next jump label
-	jx	a6
-
-6:	sub	a6, a3, a5	// address of some other segment
-	witlb	a8, a6		// restore PPN+CA of other segment
-	mov	a6, a3		// restore a6
-	isync
-# else /* XCHAL_HAVE_XLT_CACHEATTR */
-	//  Use micro-architecture specific method.
-	//  The following 4-instruction sequence is aligned such that
-	//  it all fits within a single I-cache line.  Sixteen byte
-	//  alignment is sufficient for this (using XCHAL_ICACHE_LINESIZE
-	//  actually causes problems because that can be greater than
-	//  the alignment of the reset vector, where this macro is often
-	//  invoked, which would cause the linker to align the reset
-	//  vector code away from the reset vector!!).
-	.align	16 /*XCHAL_ICACHE_LINESIZE*/
-1:	_witlb	a4, a3		// write wired PTE (CA, no PPN) of 512MB segment to ITLB
-	_isync
-	nop
-	nop
-# endif /* XCHAL_HAVE_XLT_CACHEATTR */
-	beq	a3, a5, 4f	// done?
-
-	//  Note that in the WITLB loop, we don't do any load/stores
-	//  (may not be an issue here, but it is important in the DTLB case).
-2:	srli	a7, a7, 4	// next CA
-	sub	a3, a3, a5	// next segment (add 0x20000000)
-3:
-# if XCHAL_HAVE_XLT_CACHEATTR	/* if have translation, preserve it */
-	ritlb1	a8, a3		// get current PPN+CA of segment
-	dsync	// interlock???
-	extui	a4, a7, 0, 4	// extract CA to set
-	srli	a8, a8, 4	// clear CA but keep PPN ...
-	slli	a8, a8, 4	// ...
-	add	a4, a4, a8	// combine new CA with PPN to preserve
-# else
-	extui	a4, a7, 0, 4	// extract CA
-# endif
-	beq	a3, a6, 1b	// current PC's region? if so, do it in a safe way
-	witlb	a4, a3		// write wired PTE (CA [+PPN]) of 512MB segment to ITLB
-	bne	a3, a5, 2b
-	isync			// make sure all ifetch changes take effect
-4:
-	.endm	// icacheattr_set
-
-
-/*
- *  dcacheattr_set
- *
- *  Entry:
- *	a2		cacheattr value to set
- *  Exit:
- *	a2		unchanged
- *	a3-a8		clobbered (temporaries)
- */
-
-	.macro	dcacheattr_set
-
-	movi	a5, 0xE0000000	// mask of upper 3 bits
-	movi	a3, 0		// start at region 0 (0 .. 7)
-	mov	a7, a2		// copy a2 so it doesn't get clobbered
-	j	3f
-	//  Note that in the WDTLB loop, we don't do any load/stores
-	//  (including implicit l32r via movi) because it isn't safe.
-2:	srli	a7, a7, 4	// next CA
-	sub	a3, a3, a5	// next segment (add 0x20000000)
-3:
-# if XCHAL_HAVE_XLT_CACHEATTR	/* if have translation, preserve it */
-	rdtlb1	a8, a3		// get current PPN+CA of segment
-	dsync	// interlock???
-	extui	a4, a7, 0, 4	// extract CA to set
-	srli	a8, a8, 4	// clear CA but keep PPN ...
-	slli	a8, a8, 4	// ...
-	add	a4, a4, a8	// combine new CA with PPN to preserve
-# else
-	extui	a4, a7, 0, 4	// extract CA to set
-# endif
-	wdtlb	a4, a3		// write wired PTE (CA [+PPN]) of 512MB segment to DTLB
-	bne	a3, a5, 2b
-	dsync			// make sure all data path changes take effect
-	.endm	// dcacheattr_set
-
-#endif /* XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR */
-
-
-
-/*
- *  cacheattr_set
- *
- *  Macro that sets the current CACHEATTR safely
- *  (both i and d) according to the current contents of a2.
- *  It works even when changing the cache attributes of
- *  the currently running code.
- *
- *  Entry:
- *	a2		cacheattr value to set
- *  Exit:
- *	a2		unchanged
- *	a3-a8		clobbered (temporaries)
- */
-	.macro	cacheattr_set
-
-#if XCHAL_HAVE_CACHEATTR
-# if XCHAL_ICACHE_LINESIZE < 4
-	//  No i-cache, so can always safely write to CACHEATTR:
-	wsr	a2, CACHEATTR
-# else
-	//  The Athens micro-architecture, when using the old
-	//  exception architecture option (ie. with the CACHEATTR register)
-	//  allows changing the cache attributes of the running code
-	//  using the following exact sequence aligned to be within
-	//  an instruction cache line.  (NOTE: using XCHAL_ICACHE_LINESIZE
-	//  alignment actually causes problems because that can be greater
-	//  than the alignment of the reset vector, where this macro is often
-	//  invoked, which would cause the linker to align the reset
-	//  vector code away from the reset vector!!).
-	j	1f
-	.align	16 /*XCHAL_ICACHE_LINESIZE*/	// align to within an I-cache line
-1:	_wsr	a2, CACHEATTR
-	_isync
-	nop
-	nop
-# endif
-#elif XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR
-	//  DTLB and ITLB are independent, but to keep semantics
-	//  of this macro we simply write to both.
-	icacheattr_set
-	dcacheattr_set
-#else
-	//  This macro isn't applicable to arbitrary MMU configurations.
-	//  Do nothing in this case.
-#endif
-	.endm
-
-
-#endif /*XTENSA_CACHEATTRASM_H*/
-
diff --git a/include/asm-xtensa/xtensa/config-linux_be/core.h b/include/asm-xtensa/xtensa/config-linux_be/core.h
deleted file mode 100644
index d54fe5eb106..00000000000
--- a/include/asm-xtensa/xtensa/config-linux_be/core.h
+++ /dev/null
@@ -1,1270 +0,0 @@
-/*
- * xtensa/config/core.h -- HAL definitions that are dependent on CORE configuration
- *
- *  This header file is sometimes referred to as the "compile-time HAL" or CHAL.
- *  It was generated for a specific Xtensa processor configuration.
- *
- *  Source for configuration-independent binaries (which link in a
- *  configuration-specific HAL library) must NEVER include this file.
- *  It is perfectly normal, however, for the HAL source itself to include this file.
- */
-
-/*
- * Copyright (c) 2003 Tensilica, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
- * USA.
- */
-
-
-#ifndef XTENSA_CONFIG_CORE_H
-#define XTENSA_CONFIG_CORE_H
-
-#include <xtensa/hal.h>
-
-
-/*----------------------------------------------------------------------
-				GENERAL
-  ----------------------------------------------------------------------*/
-
-/*
- *  Separators for macros that expand into arrays.
- *  These can be predefined by files that #include this one,
- *  when different separators are required.
- */
-/*  Element separator for macros that expand into 1-dimensional arrays:  */
-#ifndef XCHAL_SEP
-#define XCHAL_SEP			,
-#endif
-/*  Array separator for macros that expand into 2-dimensional arrays:  */
-#ifndef XCHAL_SEP2
-#define XCHAL_SEP2			},{
-#endif
-
-
-/*----------------------------------------------------------------------
-				ENDIANNESS
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_BE			1
-#define XCHAL_HAVE_LE			0
-#define XCHAL_MEMORY_ORDER		XTHAL_BIGENDIAN
-
-
-/*----------------------------------------------------------------------
-				REGISTER WINDOWS
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_WINDOWED		1	/* 1 if windowed registers option configured, 0 otherwise */
-#define XCHAL_NUM_AREGS			64	/* number of physical address regs */
-#define XCHAL_NUM_AREGS_LOG2		6	/* log2(XCHAL_NUM_AREGS) */
-
-
-/*----------------------------------------------------------------------
-				ADDRESS ALIGNMENT
-  ----------------------------------------------------------------------*/
-
-/*  These apply to a selected set of core load and store instructions only (see ISA):  */
-#define XCHAL_UNALIGNED_LOAD_EXCEPTION	1	/* 1 if unaligned loads cause an exception, 0 otherwise */
-#define XCHAL_UNALIGNED_STORE_EXCEPTION	1	/* 1 if unaligned stores cause an exception, 0 otherwise */
-
-
-/*----------------------------------------------------------------------
-				INTERRUPTS
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_INTERRUPTS		1	/* 1 if interrupt option configured, 0 otherwise */
-#define XCHAL_HAVE_HIGHPRI_INTERRUPTS	1	/* 1 if high-priority interrupt option configured, 0 otherwise */
-#define XCHAL_HAVE_HIGHLEVEL_INTERRUPTS	XCHAL_HAVE_HIGHPRI_INTERRUPTS
-#define XCHAL_HAVE_NMI			0	/* 1 if NMI option configured, 0 otherwise */
-#define XCHAL_NUM_INTERRUPTS		17	/* number of interrupts */
-#define XCHAL_NUM_INTERRUPTS_LOG2	5	/* number of bits to hold an interrupt number: roundup(log2(number of interrupts)) */
-#define XCHAL_NUM_EXTINTERRUPTS		10	/* number of external interrupts */
-#define XCHAL_NUM_INTLEVELS		4	/* number of interrupt levels (not including level zero!) */
-#define XCHAL_NUM_LOWPRI_LEVELS		1			/* number of low-priority interrupt levels (always 1) */
-#define XCHAL_FIRST_HIGHPRI_LEVEL	(XCHAL_NUM_LOWPRI_LEVELS+1)	/* level of first high-priority interrupt (always 2) */
-#define XCHAL_EXCM_LEVEL		1			/* level of interrupts masked by PS.EXCM (XEA2 only; always 1 in T10xx);
-								   for XEA1, where there is no PS.EXCM, this is always 1;
-								   interrupts at levels FIRST_HIGHPRI <= n <= EXCM_LEVEL, if any,
-								   are termed "medium priority" interrupts (post T10xx only) */
-/*  Note:  1 <= LOWPRI_LEVELS <= EXCM_LEVEL < DEBUGLEVEL <= NUM_INTLEVELS < NMILEVEL <= 15  */
-
-/*  Masks of interrupts at each interrupt level:  */
-#define XCHAL_INTLEVEL0_MASK		0x00000000
-#define XCHAL_INTLEVEL1_MASK		0x000064F9
-#define XCHAL_INTLEVEL2_MASK		0x00008902
-#define XCHAL_INTLEVEL3_MASK		0x00011204
-#define XCHAL_INTLEVEL4_MASK		0x00000000
-#define XCHAL_INTLEVEL5_MASK		0x00000000
-#define XCHAL_INTLEVEL6_MASK		0x00000000
-#define XCHAL_INTLEVEL7_MASK		0x00000000
-#define XCHAL_INTLEVEL8_MASK		0x00000000
-#define XCHAL_INTLEVEL9_MASK		0x00000000
-#define XCHAL_INTLEVEL10_MASK		0x00000000
-#define XCHAL_INTLEVEL11_MASK		0x00000000
-#define XCHAL_INTLEVEL12_MASK		0x00000000
-#define XCHAL_INTLEVEL13_MASK		0x00000000
-#define XCHAL_INTLEVEL14_MASK		0x00000000
-#define XCHAL_INTLEVEL15_MASK		0x00000000
-/*  As an array of entries (eg. for C constant arrays):  */
-#define XCHAL_INTLEVEL_MASKS		0x00000000	XCHAL_SEP \
-					0x000064F9	XCHAL_SEP \
-					0x00008902	XCHAL_SEP \
-					0x00011204	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000	XCHAL_SEP \
-					0x00000000
-
-/*  Masks of interrupts at each range 1..n of interrupt levels:  */
-#define XCHAL_INTLEVEL0_ANDBELOW_MASK	0x00000000
-#define XCHAL_INTLEVEL1_ANDBELOW_MASK	0x000064F9
-#define XCHAL_INTLEVEL2_ANDBELOW_MASK	0x0000EDFB
-#define XCHAL_INTLEVEL3_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL4_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL5_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL6_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL7_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL8_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL9_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL10_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL11_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL12_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL13_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL14_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_INTLEVEL15_ANDBELOW_MASK	0x0001FFFF
-#define XCHAL_LOWPRI_MASK		XCHAL_INTLEVEL1_ANDBELOW_MASK	/* mask of all low-priority interrupts */
-#define XCHAL_EXCM_MASK			XCHAL_INTLEVEL1_ANDBELOW_MASK	/* mask of all interrupts masked by PS.EXCM (or CEXCM) */
-/*  As an array of entries (eg. for C constant arrays):  */
-#define XCHAL_INTLEVEL_ANDBELOW_MASKS	0x00000000	XCHAL_SEP \
-					0x000064F9	XCHAL_SEP \
-					0x0000EDFB	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF	XCHAL_SEP \
-					0x0001FFFF
-
-/*  Interrupt numbers for each interrupt level at which only one interrupt was configured:  */
-/*#define XCHAL_INTLEVEL1_NUM		...more than one interrupt at this level...*/
-/*#define XCHAL_INTLEVEL2_NUM		...more than one interrupt at this level...*/
-/*#define XCHAL_INTLEVEL3_NUM		...more than one interrupt at this level...*/
-
-/*  Level of each interrupt:  */
-#define XCHAL_INT0_LEVEL		1
-#define XCHAL_INT1_LEVEL		2
-#define XCHAL_INT2_LEVEL		3
-#define XCHAL_INT3_LEVEL		1
-#define XCHAL_INT4_LEVEL		1
-#define XCHAL_INT5_LEVEL		1
-#define XCHAL_INT6_LEVEL		1
-#define XCHAL_INT7_LEVEL		1
-#define XCHAL_INT8_LEVEL		2
-#define XCHAL_INT9_LEVEL		3
-#define XCHAL_INT10_LEVEL		1
-#define XCHAL_INT11_LEVEL		2
-#define XCHAL_INT12_LEVEL		3
-#define XCHAL_INT13_LEVEL		1
-#define XCHAL_INT14_LEVEL		1
-#define XCHAL_INT15_LEVEL		2
-#define XCHAL_INT16_LEVEL		3
-#define XCHAL_INT17_LEVEL		0
-#define XCHAL_INT18_LEVEL		0
-#define XCHAL_INT19_LEVEL		0
-#define XCHAL_INT20_LEVEL		0
-#define XCHAL_INT21_LEVEL		0
-#define XCHAL_INT22_LEVEL		0
-#define XCHAL_INT23_LEVEL		0
-#define XCHAL_INT24_LEVEL		0
-#define XCHAL_INT25_LEVEL		0
-#define XCHAL_INT26_LEVEL		0
-#define XCHAL_INT27_LEVEL		0
-#define XCHAL_INT28_LEVEL		0
-#define XCHAL_INT29_LEVEL		0
-#define XCHAL_INT30_LEVEL		0
-#define XCHAL_INT31_LEVEL		0
-/*  As an array of entries (eg. for C constant arrays):  */
-#define XCHAL_INT_LEVELS		1	XCHAL_SEP \
-					2	XCHAL_SEP \
-					3	XCHAL_SEP \
-					1	XCHAL_SEP \
-					1	XCHAL_SEP \
-					1	XCHAL_SEP \
-					1	XCHAL_SEP \
-					1	XCHAL_SEP \
-					2	XCHAL_SEP \
-					3	XCHAL_SEP \
-					1	XCHAL_SEP \
-					2	XCHAL_SEP \
-					3	XCHAL_SEP \
-					1	XCHAL_SEP \
-					1	XCHAL_SEP \
-					2	XCHAL_SEP \
-					3	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0	XCHAL_SEP \
-					0
-
-/*  Type of each interrupt:  */
-#define XCHAL_INT0_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT1_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT2_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT3_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT4_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT5_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT6_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
-#define XCHAL_INT7_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
-#define XCHAL_INT8_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
-#define XCHAL_INT9_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
-#define XCHAL_INT10_TYPE 	XTHAL_INTTYPE_TIMER
-#define XCHAL_INT11_TYPE 	XTHAL_INTTYPE_TIMER
-#define XCHAL_INT12_TYPE 	XTHAL_INTTYPE_TIMER
-#define XCHAL_INT13_TYPE 	XTHAL_INTTYPE_SOFTWARE
-#define XCHAL_INT14_TYPE 	XTHAL_INTTYPE_SOFTWARE
-#define XCHAL_INT15_TYPE 	XTHAL_INTTYPE_SOFTWARE
-#define XCHAL_INT16_TYPE 	XTHAL_INTTYPE_SOFTWARE
-#define XCHAL_INT17_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT18_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT19_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT20_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT21_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT22_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT23_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT24_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT25_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT26_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT27_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT28_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT29_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT30_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-#define XCHAL_INT31_TYPE 	XTHAL_INTTYPE_UNCONFIGURED
-/*  As an array of entries (eg. for C constant arrays):  */
-#define XCHAL_INT_TYPES		XTHAL_INTTYPE_EXTERN_LEVEL     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_LEVEL     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_LEVEL     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_LEVEL     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_LEVEL     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_LEVEL     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_LEVEL     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_EDGE     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_EDGE     	XCHAL_SEP \
-				XTHAL_INTTYPE_EXTERN_EDGE     	XCHAL_SEP \
-				XTHAL_INTTYPE_TIMER     	XCHAL_SEP \
-				XTHAL_INTTYPE_TIMER     	XCHAL_SEP \
-				XTHAL_INTTYPE_TIMER     	XCHAL_SEP \
-				XTHAL_INTTYPE_SOFTWARE     	XCHAL_SEP \
-				XTHAL_INTTYPE_SOFTWARE     	XCHAL_SEP \
-				XTHAL_INTTYPE_SOFTWARE     	XCHAL_SEP \
-				XTHAL_INTTYPE_SOFTWARE     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED     	XCHAL_SEP \
-				XTHAL_INTTYPE_UNCONFIGURED
-
-/*  Masks of interrupts for each type of interrupt:  */
-#define XCHAL_INTTYPE_MASK_UNCONFIGURED	0xFFFE0000
-#define XCHAL_INTTYPE_MASK_SOFTWARE	0x0001E000
-#define XCHAL_INTTYPE_MASK_EXTERN_EDGE	0x00000380
-#define XCHAL_INTTYPE_MASK_EXTERN_LEVEL	0x0000007F
-#define XCHAL_INTTYPE_MASK_TIMER	0x00001C00
-#define XCHAL_INTTYPE_MASK_NMI		0x00000000
-/*  As an array of entries (eg. for C constant arrays):  */
-#define XCHAL_INTTYPE_MASKS		0xFFFE0000	XCHAL_SEP \
-					0x0001E000	XCHAL_SEP \
-					0x00000380	XCHAL_SEP \
-					0x0000007F	XCHAL_SEP \
-					0x00001C00	XCHAL_SEP \
-					0x00000000
-
-/*  Interrupts assigned to each timer (CCOMPARE0 to CCOMPARE3), -1 if unassigned  */
-#define XCHAL_TIMER0_INTERRUPT	10
-#define XCHAL_TIMER1_INTERRUPT	11
-#define XCHAL_TIMER2_INTERRUPT	12
-#define XCHAL_TIMER3_INTERRUPT	XTHAL_TIMER_UNCONFIGURED
-/*  As an array of entries (eg. for C constant arrays):  */
-#define XCHAL_TIMER_INTERRUPTS	10	XCHAL_SEP \
-				11	XCHAL_SEP \
-				12	XCHAL_SEP \
-				XTHAL_TIMER_UNCONFIGURED
-
-/*  Indexing macros:  */
-#define _XCHAL_INTLEVEL_MASK(n)		XCHAL_INTLEVEL ## n ## _MASK
-#define XCHAL_INTLEVEL_MASK(n)		_XCHAL_INTLEVEL_MASK(n)		/* n = 0 .. 15 */
-#define _XCHAL_INTLEVEL_ANDBELOWMASK(n)	XCHAL_INTLEVEL ## n ## _ANDBELOW_MASK
-#define XCHAL_INTLEVEL_ANDBELOW_MASK(n)	_XCHAL_INTLEVEL_ANDBELOWMASK(n)	/* n = 0 .. 15 */
-#define _XCHAL_INT_LEVEL(n)		XCHAL_INT ## n ## _LEVEL
-#define XCHAL_INT_LEVEL(n)		_XCHAL_INT_LEVEL(n)		/* n = 0 .. 31 */
-#define _XCHAL_INT_TYPE(n)		XCHAL_INT ## n ## _TYPE
-#define XCHAL_INT_TYPE(n)		_XCHAL_INT_TYPE(n)		/* n = 0 .. 31 */
-#define _XCHAL_TIMER_INTERRUPT(n)	XCHAL_TIMER ## n ## _INTERRUPT
-#define XCHAL_TIMER_INTERRUPT(n)	_XCHAL_TIMER_INTERRUPT(n)	/* n = 0 .. 3 */
-
-
-
-/*
- *  External interrupt vectors/levels.
- *  These macros describe how Xtensa processor interrupt numbers
- *  (as numbered internally, eg. in INTERRUPT and INTENABLE registers)
- *  map to external BInterrupt<n> pins, for those interrupts
- *  configured as external (level-triggered, edge-triggered, or NMI).
- *  See the Xtensa processor databook for more details.
- */
-
-/*  Core interrupt numbers mapped to each EXTERNAL interrupt number:  */
-#define XCHAL_EXTINT0_NUM		0	/* (intlevel 1) */
-#define XCHAL_EXTINT1_NUM		1	/* (intlevel 2) */
-#define XCHAL_EXTINT2_NUM		2	/* (intlevel 3) */
-#define XCHAL_EXTINT3_NUM		3	/* (intlevel 1) */
-#define XCHAL_EXTINT4_NUM		4	/* (intlevel 1) */
-#define XCHAL_EXTINT5_NUM		5	/* (intlevel 1) */
-#define XCHAL_EXTINT6_NUM		6	/* (intlevel 1) */
-#define XCHAL_EXTINT7_NUM		7	/* (intlevel 1) */
-#define XCHAL_EXTINT8_NUM		8	/* (intlevel 2) */
-#define XCHAL_EXTINT9_NUM		9	/* (intlevel 3) */
-
-/*  Corresponding interrupt masks:  */
-#define XCHAL_EXTINT0_MASK		0x00000001
-#define XCHAL_EXTINT1_MASK		0x00000002
-#define XCHAL_EXTINT2_MASK		0x00000004
-#define XCHAL_EXTINT3_MASK		0x00000008
-#define XCHAL_EXTINT4_MASK		0x00000010
-#define XCHAL_EXTINT5_MASK		0x00000020
-#define XCHAL_EXTINT6_MASK		0x00000040
-#define XCHAL_EXTINT7_MASK		0x00000080
-#define XCHAL_EXTINT8_MASK		0x00000100
-#define XCHAL_EXTINT9_MASK		0x00000200
-
-/*  Core config interrupt levels mapped to each external interrupt:  */
-#define XCHAL_EXTINT0_LEVEL		1	/* (int number 0) */
-#define XCHAL_EXTINT1_LEVEL		2	/* (int number 1) */
-#define XCHAL_EXTINT2_LEVEL		3	/* (int number 2) */
-#define XCHAL_EXTINT3_LEVEL		1	/* (int number 3) */
-#define XCHAL_EXTINT4_LEVEL		1	/* (int number 4) */
-#define XCHAL_EXTINT5_LEVEL		1	/* (int number 5) */
-#define XCHAL_EXTINT6_LEVEL		1	/* (int number 6) */
-#define XCHAL_EXTINT7_LEVEL		1	/* (int number 7) */
-#define XCHAL_EXTINT8_LEVEL		2	/* (int number 8) */
-#define XCHAL_EXTINT9_LEVEL		3	/* (int number 9) */
-
-
-/*----------------------------------------------------------------------
-			EXCEPTIONS and VECTORS
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_EXCEPTIONS		1	/* 1 if exception option configured, 0 otherwise */
-
-#define XCHAL_XEA_VERSION		2	/* Xtensa Exception Architecture number: 1 for XEA1 (old), 2 for XEA2 (new) */
-#define XCHAL_HAVE_XEA1			0	/* 1 if XEA1, 0 otherwise */
-#define XCHAL_HAVE_XEA2			1	/* 1 if XEA2, 0 otherwise */
-/*  For backward compatibility ONLY -- DO NOT USE (will be removed in future release):  */
-#define XCHAL_HAVE_OLD_EXC_ARCH		XCHAL_HAVE_XEA1	/* (DEPRECATED) 1 if old exception architecture (XEA1), 0 otherwise (eg. XEA2) */
-#define XCHAL_HAVE_EXCM			XCHAL_HAVE_XEA2	/* (DEPRECATED) 1 if PS.EXCM bit exists (currently equals XCHAL_HAVE_TLBS) */
-
-#define XCHAL_RESET_VECTOR_VADDR	0xFE000020
-#define XCHAL_RESET_VECTOR_PADDR	0xFE000020
-#define XCHAL_USER_VECTOR_VADDR		0xD0000220
-#define XCHAL_PROGRAMEXC_VECTOR_VADDR	XCHAL_USER_VECTOR_VADDR		/* for backward compatibility */
-#define XCHAL_USEREXC_VECTOR_VADDR	XCHAL_USER_VECTOR_VADDR		/* for backward compatibility */
-#define XCHAL_USER_VECTOR_PADDR		0x00000220
-#define XCHAL_PROGRAMEXC_VECTOR_PADDR	XCHAL_USER_VECTOR_PADDR		/* for backward compatibility */
-#define XCHAL_USEREXC_VECTOR_PADDR	XCHAL_USER_VECTOR_PADDR		/* for backward compatibility */
-#define XCHAL_KERNEL_VECTOR_VADDR	0xD0000200
-#define XCHAL_STACKEDEXC_VECTOR_VADDR	XCHAL_KERNEL_VECTOR_VADDR	/* for backward compatibility */
-#define XCHAL_KERNELEXC_VECTOR_VADDR	XCHAL_KERNEL_VECTOR_VADDR	/* for backward compatibility */
-#define XCHAL_KERNEL_VECTOR_PADDR	0x00000200
-#define XCHAL_STACKEDEXC_VECTOR_PADDR	XCHAL_KERNEL_VECTOR_PADDR	/* for backward compatibility */
-#define XCHAL_KERNELEXC_VECTOR_PADDR	XCHAL_KERNEL_VECTOR_PADDR	/* for backward compatibility */
-#define XCHAL_DOUBLEEXC_VECTOR_VADDR	0xD0000290
-#define XCHAL_DOUBLEEXC_VECTOR_PADDR	0x00000290
-#define XCHAL_WINDOW_VECTORS_VADDR	0xD0000000
-#define XCHAL_WINDOW_VECTORS_PADDR	0x00000000
-#define XCHAL_INTLEVEL2_VECTOR_VADDR	0xD0000240
-#define XCHAL_INTLEVEL2_VECTOR_PADDR	0x00000240
-#define XCHAL_INTLEVEL3_VECTOR_VADDR	0xD0000250
-#define XCHAL_INTLEVEL3_VECTOR_PADDR	0x00000250
-#define XCHAL_INTLEVEL4_VECTOR_VADDR	0xFE000520
-#define XCHAL_INTLEVEL4_VECTOR_PADDR	0xFE000520
-#define XCHAL_DEBUG_VECTOR_VADDR	XCHAL_INTLEVEL4_VECTOR_VADDR
-#define XCHAL_DEBUG_VECTOR_PADDR	XCHAL_INTLEVEL4_VECTOR_PADDR
-
-/*  Indexing macros:  */
-#define _XCHAL_INTLEVEL_VECTOR_VADDR(n)		XCHAL_INTLEVEL ## n ## _VECTOR_VADDR
-#define XCHAL_INTLEVEL_VECTOR_VADDR(n)		_XCHAL_INTLEVEL_VECTOR_VADDR(n)		/* n = 0 .. 15 */
-
-/*
- *  General Exception Causes
- *  (values of EXCCAUSE special register set by general exceptions,
- *   which vector to the user, kernel, or double-exception vectors):
- */
-#define XCHAL_EXCCAUSE_ILLEGAL_INSTRUCTION		0	/* Illegal Instruction (IllegalInstruction) */
-#define XCHAL_EXCCAUSE_SYSTEM_CALL			1	/* System Call (SystemCall) */
-#define XCHAL_EXCCAUSE_INSTRUCTION_FETCH_ERROR		2	/* Instruction Fetch Error (InstructionFetchError) */
-#define XCHAL_EXCCAUSE_LOAD_STORE_ERROR			3	/* Load Store Error (LoadStoreError) */
-#define XCHAL_EXCCAUSE_LEVEL1_INTERRUPT			4	/* Level 1 Interrupt (Level1Interrupt) */
-#define XCHAL_EXCCAUSE_ALLOCA				5	/* Stack Extension Assist (Alloca) */
-#define XCHAL_EXCCAUSE_INTEGER_DIVIDE_BY_ZERO		6	/* Integer Divide by Zero (IntegerDivideByZero) */
-#define XCHAL_EXCCAUSE_SPECULATION			7	/* Speculation (Speculation) */
-#define XCHAL_EXCCAUSE_PRIVILEGED			8	/* Privileged Instruction (Privileged) */
-#define XCHAL_EXCCAUSE_UNALIGNED			9	/* Unaligned Load Store (Unaligned) */
-#define XCHAL_EXCCAUSE_ITLB_MISS			16	/* ITlb Miss Exception (ITlbMiss) */
-#define XCHAL_EXCCAUSE_ITLB_MULTIHIT			17	/* ITlb Mutltihit Exception (ITlbMultihit) */
-#define XCHAL_EXCCAUSE_ITLB_PRIVILEGE			18	/* ITlb Privilege Exception (ITlbPrivilege) */
-#define XCHAL_EXCCAUSE_ITLB_SIZE_RESTRICTION		19	/* ITlb Size Restriction Exception (ITlbSizeRestriction) */
-#define XCHAL_EXCCAUSE_FETCH_CACHE_ATTRIBUTE		20	/* Fetch Cache Attribute Exception (FetchCacheAttribute) */
-#define XCHAL_EXCCAUSE_DTLB_MISS			24	/* DTlb Miss Exception (DTlbMiss) */
-#define XCHAL_EXCCAUSE_DTLB_MULTIHIT			25	/* DTlb Multihit Exception (DTlbMultihit) */
-#define XCHAL_EXCCAUSE_DTLB_PRIVILEGE			26	/* DTlb Privilege Exception (DTlbPrivilege) */
-#define XCHAL_EXCCAUSE_DTLB_SIZE_RESTRICTION		27	/* DTlb Size Restriction Exception (DTlbSizeRestriction) */
-#define XCHAL_EXCCAUSE_LOAD_CACHE_ATTRIBUTE		28	/* Load Cache Attribute Exception (LoadCacheAttribute) */
-#define XCHAL_EXCCAUSE_STORE_CACHE_ATTRIBUTE		29	/* Store Cache Attribute Exception (StoreCacheAttribute) */
-#define XCHAL_EXCCAUSE_FLOATING_POINT			40	/* Floating Point Exception (FloatingPoint) */
-
-
-
-/*----------------------------------------------------------------------
-				TIMERS
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_CCOUNT		1	/* 1 if have CCOUNT, 0 otherwise */
-/*#define XCHAL_HAVE_TIMERS		XCHAL_HAVE_CCOUNT*/
-#define XCHAL_NUM_TIMERS		3	/* number of CCOMPAREn regs */
-
-
-
-/*----------------------------------------------------------------------
-				DEBUG
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_DEBUG		1	/* 1 if debug option configured, 0 otherwise */
-#define XCHAL_HAVE_OCD			1	/* 1 if OnChipDebug option configured, 0 otherwise */
-#define XCHAL_NUM_IBREAK		2	/* number of IBREAKn regs */
-#define XCHAL_NUM_DBREAK		2	/* number of DBREAKn regs */
-#define XCHAL_DEBUGLEVEL		4	/* debug interrupt level */
-/*DebugExternalInterrupt		0		0|1*/
-/*DebugUseDIRArray			0		0|1*/
-
-
-
-
-/*----------------------------------------------------------------------
-			COPROCESSORS and EXTRA STATE
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_CP			0	/* 1 if coprocessor option configured (CPENABLE present) */
-#define XCHAL_CP_MAXCFG			0	/* max allowed cp id plus one (per cfg) */
-
-#include <xtensa/config/tie.h>
-
-
-
-
-/*----------------------------------------------------------------------
-			INTERNAL I/D RAM/ROMs and XLMI
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_NUM_INSTROM		0	/* number of core instruction ROMs configured */
-#define XCHAL_NUM_INSTRAM		0	/* number of core instruction RAMs configured */
-#define XCHAL_NUM_DATAROM		0	/* number of core data ROMs configured */
-#define XCHAL_NUM_DATARAM		0	/* number of core data RAMs configured */
-#define XCHAL_NUM_XLMI			0	/* number of core XLMI ports configured */
-#define  XCHAL_NUM_IROM			XCHAL_NUM_INSTROM	/* (DEPRECATED) */
-#define  XCHAL_NUM_IRAM			XCHAL_NUM_INSTRAM	/* (DEPRECATED) */
-#define  XCHAL_NUM_DROM			XCHAL_NUM_DATAROM	/* (DEPRECATED) */
-#define  XCHAL_NUM_DRAM			XCHAL_NUM_DATARAM	/* (DEPRECATED) */
-
-
-
-/*----------------------------------------------------------------------
-				CACHE
-  ----------------------------------------------------------------------*/
-
-/*  Size of the cache lines in log2(bytes):  */
-#define XCHAL_ICACHE_LINEWIDTH		4
-#define XCHAL_DCACHE_LINEWIDTH		4
-/*  Size of the cache lines in bytes:  */
-#define XCHAL_ICACHE_LINESIZE		16
-#define XCHAL_DCACHE_LINESIZE		16
-/*  Max for both I-cache and D-cache (used for general alignment):  */
-#define XCHAL_CACHE_LINEWIDTH_MAX	4
-#define XCHAL_CACHE_LINESIZE_MAX	16
-
-/*  Number of cache sets in log2(lines per way):  */
-#define XCHAL_ICACHE_SETWIDTH		8
-#define XCHAL_DCACHE_SETWIDTH		8
-/*  Max for both I-cache and D-cache (used for general cache-coherency page alignment):  */
-#define XCHAL_CACHE_SETWIDTH_MAX	8
-#define XCHAL_CACHE_SETSIZE_MAX		256
-
-/*  Cache set associativity (number of ways):  */
-#define XCHAL_ICACHE_WAYS		2
-#define XCHAL_DCACHE_WAYS		2
-
-/*  Size of the caches in bytes (ways * 2^(linewidth + setwidth)):  */
-#define XCHAL_ICACHE_SIZE		8192
-#define XCHAL_DCACHE_SIZE		8192
-
-/*  Cache features:  */
-#define XCHAL_DCACHE_IS_WRITEBACK	0
-/*  Whether cache locking feature is available:  */
-#define XCHAL_ICACHE_LINE_LOCKABLE	0
-#define XCHAL_DCACHE_LINE_LOCKABLE	0
-
-/*  Number of (encoded) cache attribute bits:  */
-#define XCHAL_CA_BITS			4	/* number of bits needed to hold cache attribute encoding */
-/*  (The number of access mode bits (decoded cache attribute bits) is defined by the architecture; see xtensa/hal.h?)  */
-
-
-/*  Cache Attribute encodings -- lists of access modes for each cache attribute:  */
-#define XCHAL_FCA_LIST		XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_BYPASS	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_BYPASS	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_CACHED	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_CACHED	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_CACHED	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_CACHED	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_FAM_EXCEPTION
-#define XCHAL_LCA_LIST		XTHAL_LAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_LAM_BYPASSG	XCHAL_SEP \
-				XTHAL_LAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_LAM_BYPASSG	XCHAL_SEP \
-				XTHAL_LAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_LAM_CACHED	XCHAL_SEP \
-				XTHAL_LAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_LAM_CACHED	XCHAL_SEP \
-				XTHAL_LAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_LAM_NACACHED	XCHAL_SEP \
-				XTHAL_LAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_LAM_NACACHED	XCHAL_SEP \
-				XTHAL_LAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_LAM_ISOLATE	XCHAL_SEP \
-				XTHAL_LAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_LAM_CACHED
-#define XCHAL_SCA_LIST		XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_BYPASS	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_WRITETHRU	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_WRITETHRU	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_ISOLATE	XCHAL_SEP \
-				XTHAL_SAM_EXCEPTION	XCHAL_SEP \
-				XTHAL_SAM_WRITETHRU
-
-/*  Test:
-	read/only: 0 + 1 + 2 + 4 + 5 + 6 + 8 + 9 + 10 + 12 + 14
-	read/only: 0 + 1 + 2 + 4 + 5 + 6 + 8 + 9 + 10 + 12 + 14
-	all:       0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15
-	fault:     0 + 2 + 4 + 6 + 8 + 10 + 12 + 14
-	r/w/x cached:
-	r/w/x dcached:
-	I-bypass:  1 + 3
-
-	load guard bit set: 1 + 3
-	load guard bit clr: 0 + 2 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15
-	hit-cache r/w/x: 7 + 11
-
-	fams: 5
-	fams: 0 / 6 / 18 / 1 / 2
-	fams: Bypass / Isolate / Cached / Exception / NACached
-
-        MMU okay:  yes
-*/
-
-
-/*----------------------------------------------------------------------
-				MMU
-  ----------------------------------------------------------------------*/
-
-/*
- *  General notes on MMU parameters.
- *
- *  Terminology:
- *	ASID = address-space ID (acts as an "extension" of virtual addresses)
- *	VPN  = virtual page number
- *	PPN  = physical page number
- *	CA   = encoded cache attribute (access modes)
- *	TLB  = translation look-aside buffer (term is stretched somewhat here)
- *	I    = instruction (fetch accesses)
- *	D    = data (load and store accesses)
- *	way  = each TLB (ITLB and DTLB) consists of a number of "ways"
- *		that simultaneously match the virtual address of an access;
- *		a TLB successfully translates a virtual address if exactly
- *		one way matches the vaddr; if none match, it is a miss;
- *		if multiple match, one gets a "multihit" exception;
- *		each way can be independently configured in terms of number of
- *		entries, page sizes, which fields are writable or constant, etc.
- *	set  = group of contiguous ways with exactly identical parameters
- *	ARF  = auto-refill; hardware services a 1st-level miss by loading a PTE
- *		from the page table and storing it in one of the auto-refill ways;
- *		if this PTE load also misses, a miss exception is posted for s/w.
- *	min-wired = a "min-wired" way can be used to map a single (minimum-sized)
- * 		page arbitrarily under program control; it has a single entry,
- *		is non-auto-refill (some other way(s) must be auto-refill),
- *		all its fields (VPN, PPN, ASID, CA) are all writable, and it
- *		supports the XCHAL_MMU_MIN_PTE_PAGE_SIZE page size (a current
- *		restriction is that this be the only page size it supports).
- *
- *  TLB way entries are virtually indexed.
- *  TLB ways that support multiple page sizes:
- *	- must have all writable VPN and PPN fields;
- *	- can only use one page size at any given time (eg. setup at startup),
- *	  selected by the respective ITLBCFG or DTLBCFG special register,
- *	  whose bits n*4+3 .. n*4 index the list of page sizes for way n
- *	  (XCHAL_xTLB_SETm_PAGESZ_LOG2_LIST for set m corresponding to way n);
- *	  this list may be sparse for auto-refill ways because auto-refill
- *	  ways have independent lists of supported page sizes sharing a
- *	  common encoding with PTE entries; the encoding is the index into
- *	  this list; unsupported sizes for a given way are zero in the list;
- *	  selecting unsupported sizes results in undefined hardware behaviour;
- *	- is only possible for ways 0 thru 7 (due to ITLBCFG/DTLBCFG definition).
- */
-
-#define XCHAL_HAVE_CACHEATTR		0	/* 1 if CACHEATTR register present, 0 if TLBs present instead */
-#define XCHAL_HAVE_TLBS			1	/* 1 if TLBs present, 0 if CACHEATTR present instead */
-#define XCHAL_HAVE_MMU			XCHAL_HAVE_TLBS	/* (DEPRECATED; use XCHAL_HAVE_TLBS instead; will be removed in future release) */
-#define XCHAL_HAVE_SPANNING_WAY		0	/* 1 if single way maps entire virtual address space in I+D */
-#define XCHAL_HAVE_IDENTITY_MAP		0	/* 1 if virtual addr == physical addr always, 0 otherwise */
-#define XCHAL_HAVE_MIMIC_CACHEATTR	0	/* 1 if have MMU that mimics a CACHEATTR config (CaMMU) */
-#define XCHAL_HAVE_XLT_CACHEATTR	0	/* 1 if have MMU that mimics a CACHEATTR config, but with translation (CaXltMMU) */
-
-#define XCHAL_MMU_ASID_BITS		8	/* number of bits in ASIDs (address space IDs) */
-#define XCHAL_MMU_ASID_INVALID		0	/* ASID value indicating invalid address space */
-#define XCHAL_MMU_ASID_KERNEL		1	/* ASID value indicating kernel (ring 0) address space */
-#define XCHAL_MMU_RINGS			4	/* number of rings supported (1..4) */
-#define XCHAL_MMU_RING_BITS		2	/* number of bits needed to hold ring number */
-#define XCHAL_MMU_SR_BITS		0	/* number of size-restriction bits supported */
-#define XCHAL_MMU_CA_BITS		4	/* number of bits needed to hold cache attribute encoding */
-#define XCHAL_MMU_MAX_PTE_PAGE_SIZE	12	/* max page size in a PTE structure (log2) */
-#define XCHAL_MMU_MIN_PTE_PAGE_SIZE	12	/* min page size in a PTE structure (log2) */
-
-
-/***  Instruction TLB:  ***/
-
-#define XCHAL_ITLB_WAY_BITS		3	/* number of bits holding the ways */
-#define XCHAL_ITLB_WAYS			7	/* number of ways (n-way set-associative TLB) */
-#define XCHAL_ITLB_ARF_WAYS		4	/* number of auto-refill ways */
-#define XCHAL_ITLB_SETS			4	/* number of sets (groups of ways with identical settings) */
-
-/*  Way set to which each way belongs:  */
-#define XCHAL_ITLB_WAY0_SET		0
-#define XCHAL_ITLB_WAY1_SET		0
-#define XCHAL_ITLB_WAY2_SET		0
-#define XCHAL_ITLB_WAY3_SET		0
-#define XCHAL_ITLB_WAY4_SET		1
-#define XCHAL_ITLB_WAY5_SET		2
-#define XCHAL_ITLB_WAY6_SET		3
-
-/*  Ways sets that are used by hardware auto-refill (ARF):  */
-#define XCHAL_ITLB_ARF_SETS		1	/* number of auto-refill sets */
-#define XCHAL_ITLB_ARF_SET0		0	/* index of n'th auto-refill set */
-
-/*  Way sets that are "min-wired" (see terminology comment above):  */
-#define XCHAL_ITLB_MINWIRED_SETS	0	/* number of "min-wired" sets */
-
-
-/*  ITLB way set 0 (group of ways 0 thru 3):  */
-#define XCHAL_ITLB_SET0_WAY			0	/* index of first way in this way set */
-#define XCHAL_ITLB_SET0_WAYS			4	/* number of (contiguous) ways in this way set */
-#define XCHAL_ITLB_SET0_ENTRIES_LOG2		2	/* log2(number of entries in this way) */
-#define XCHAL_ITLB_SET0_ENTRIES			4	/* number of entries in this way (always a power of 2) */
-#define XCHAL_ITLB_SET0_ARF			1	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_ITLB_SET0_PAGESIZES		1	/* number of supported page sizes in this way */
-#define XCHAL_ITLB_SET0_PAGESZ_BITS		0	/* number of bits to encode the page size */
-#define XCHAL_ITLB_SET0_PAGESZ_LOG2_MIN		12	/* log2(minimum supported page size) */
-#define XCHAL_ITLB_SET0_PAGESZ_LOG2_MAX		12	/* log2(maximum supported page size) */
-#define XCHAL_ITLB_SET0_PAGESZ_LOG2_LIST	12	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_ITLB_SET0_ASID_CONSTMASK		0	/* constant ASID bits; 0 if all writable */
-#define XCHAL_ITLB_SET0_VPN_CONSTMASK		0	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_ITLB_SET0_PPN_CONSTMASK		0	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_ITLB_SET0_CA_CONSTMASK		0	/* constant CA bits; 0 if all writable */
-#define XCHAL_ITLB_SET0_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET0_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET0_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET0_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-
-/*  ITLB way set 1 (group of ways 4 thru 4):  */
-#define XCHAL_ITLB_SET1_WAY			4	/* index of first way in this way set */
-#define XCHAL_ITLB_SET1_WAYS			1	/* number of (contiguous) ways in this way set */
-#define XCHAL_ITLB_SET1_ENTRIES_LOG2		2	/* log2(number of entries in this way) */
-#define XCHAL_ITLB_SET1_ENTRIES			4	/* number of entries in this way (always a power of 2) */
-#define XCHAL_ITLB_SET1_ARF			0	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_ITLB_SET1_PAGESIZES		4	/* number of supported page sizes in this way */
-#define XCHAL_ITLB_SET1_PAGESZ_BITS		2	/* number of bits to encode the page size */
-#define XCHAL_ITLB_SET1_PAGESZ_LOG2_MIN		20	/* log2(minimum supported page size) */
-#define XCHAL_ITLB_SET1_PAGESZ_LOG2_MAX		26	/* log2(maximum supported page size) */
-#define XCHAL_ITLB_SET1_PAGESZ_LOG2_LIST	20 XCHAL_SEP 22 XCHAL_SEP 24 XCHAL_SEP 26	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_ITLB_SET1_ASID_CONSTMASK		0	/* constant ASID bits; 0 if all writable */
-#define XCHAL_ITLB_SET1_VPN_CONSTMASK		0	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_ITLB_SET1_PPN_CONSTMASK		0	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_ITLB_SET1_CA_CONSTMASK		0	/* constant CA bits; 0 if all writable */
-#define XCHAL_ITLB_SET1_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET1_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET1_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET1_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-
-/*  ITLB way set 2 (group of ways 5 thru 5):  */
-#define XCHAL_ITLB_SET2_WAY			5	/* index of first way in this way set */
-#define XCHAL_ITLB_SET2_WAYS			1	/* number of (contiguous) ways in this way set */
-#define XCHAL_ITLB_SET2_ENTRIES_LOG2		1	/* log2(number of entries in this way) */
-#define XCHAL_ITLB_SET2_ENTRIES			2	/* number of entries in this way (always a power of 2) */
-#define XCHAL_ITLB_SET2_ARF			0	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_ITLB_SET2_PAGESIZES		1	/* number of supported page sizes in this way */
-#define XCHAL_ITLB_SET2_PAGESZ_BITS		0	/* number of bits to encode the page size */
-#define XCHAL_ITLB_SET2_PAGESZ_LOG2_MIN		27	/* log2(minimum supported page size) */
-#define XCHAL_ITLB_SET2_PAGESZ_LOG2_MAX		27	/* log2(maximum supported page size) */
-#define XCHAL_ITLB_SET2_PAGESZ_LOG2_LIST	27	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_ITLB_SET2_ASID_CONSTMASK		0xFF	/* constant ASID bits; 0 if all writable */
-#define XCHAL_ITLB_SET2_VPN_CONSTMASK		0xF0000000	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_ITLB_SET2_PPN_CONSTMASK		0xF8000000	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_ITLB_SET2_CA_CONSTMASK		0x0000000F	/* constant CA bits; 0 if all writable */
-#define XCHAL_ITLB_SET2_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET2_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET2_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET2_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-/*  Constant ASID values for each entry of ITLB way set 2 (because ASID_CONSTMASK is non-zero):  */
-#define XCHAL_ITLB_SET2_E0_ASID_CONST		0x01
-#define XCHAL_ITLB_SET2_E1_ASID_CONST		0x01
-/*  Constant VPN values for each entry of ITLB way set 2 (because VPN_CONSTMASK is non-zero):  */
-#define XCHAL_ITLB_SET2_E0_VPN_CONST		0xD0000000
-#define XCHAL_ITLB_SET2_E1_VPN_CONST		0xD8000000
-/*  Constant PPN values for each entry of ITLB way set 2 (because PPN_CONSTMASK is non-zero):  */
-#define XCHAL_ITLB_SET2_E0_PPN_CONST		0x00000000
-#define XCHAL_ITLB_SET2_E1_PPN_CONST		0x00000000
-/*  Constant CA values for each entry of ITLB way set 2 (because CA_CONSTMASK is non-zero):  */
-#define XCHAL_ITLB_SET2_E0_CA_CONST		0x07
-#define XCHAL_ITLB_SET2_E1_CA_CONST		0x03
-
-/*  ITLB way set 3 (group of ways 6 thru 6):  */
-#define XCHAL_ITLB_SET3_WAY			6	/* index of first way in this way set */
-#define XCHAL_ITLB_SET3_WAYS			1	/* number of (contiguous) ways in this way set */
-#define XCHAL_ITLB_SET3_ENTRIES_LOG2		1	/* log2(number of entries in this way) */
-#define XCHAL_ITLB_SET3_ENTRIES			2	/* number of entries in this way (always a power of 2) */
-#define XCHAL_ITLB_SET3_ARF			0	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_ITLB_SET3_PAGESIZES		1	/* number of supported page sizes in this way */
-#define XCHAL_ITLB_SET3_PAGESZ_BITS		0	/* number of bits to encode the page size */
-#define XCHAL_ITLB_SET3_PAGESZ_LOG2_MIN		28	/* log2(minimum supported page size) */
-#define XCHAL_ITLB_SET3_PAGESZ_LOG2_MAX		28	/* log2(maximum supported page size) */
-#define XCHAL_ITLB_SET3_PAGESZ_LOG2_LIST	28	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_ITLB_SET3_ASID_CONSTMASK		0xFF	/* constant ASID bits; 0 if all writable */
-#define XCHAL_ITLB_SET3_VPN_CONSTMASK		0xE0000000	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_ITLB_SET3_PPN_CONSTMASK		0xF0000000	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_ITLB_SET3_CA_CONSTMASK		0x0000000F	/* constant CA bits; 0 if all writable */
-#define XCHAL_ITLB_SET3_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET3_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET3_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_ITLB_SET3_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-/*  Constant ASID values for each entry of ITLB way set 3 (because ASID_CONSTMASK is non-zero):  */
-#define XCHAL_ITLB_SET3_E0_ASID_CONST		0x01
-#define XCHAL_ITLB_SET3_E1_ASID_CONST		0x01
-/*  Constant VPN values for each entry of ITLB way set 3 (because VPN_CONSTMASK is non-zero):  */
-#define XCHAL_ITLB_SET3_E0_VPN_CONST		0xE0000000
-#define XCHAL_ITLB_SET3_E1_VPN_CONST		0xF0000000
-/*  Constant PPN values for each entry of ITLB way set 3 (because PPN_CONSTMASK is non-zero):  */
-#define XCHAL_ITLB_SET3_E0_PPN_CONST		0xF0000000
-#define XCHAL_ITLB_SET3_E1_PPN_CONST		0xF0000000
-/*  Constant CA values for each entry of ITLB way set 3 (because CA_CONSTMASK is non-zero):  */
-#define XCHAL_ITLB_SET3_E0_CA_CONST		0x07
-#define XCHAL_ITLB_SET3_E1_CA_CONST		0x03
-
-/*  Indexing macros:  */
-#define _XCHAL_ITLB_SET(n,_what)	XCHAL_ITLB_SET ## n ## _what
-#define XCHAL_ITLB_SET(n,what)		_XCHAL_ITLB_SET(n, _ ## what )
-#define _XCHAL_ITLB_SET_E(n,i,_what)	XCHAL_ITLB_SET ## n ## _E ## i ## _what
-#define XCHAL_ITLB_SET_E(n,i,what)	_XCHAL_ITLB_SET_E(n,i, _ ## what )
-/*
- *  Example use:  XCHAL_ITLB_SET(XCHAL_ITLB_ARF_SET0,ENTRIES)
- *	to get the value of XCHAL_ITLB_SET<n>_ENTRIES where <n> is the first auto-refill set.
- */
-
-
-/***  Data TLB:  ***/
-
-#define XCHAL_DTLB_WAY_BITS		4	/* number of bits holding the ways */
-#define XCHAL_DTLB_WAYS			10	/* number of ways (n-way set-associative TLB) */
-#define XCHAL_DTLB_ARF_WAYS		4	/* number of auto-refill ways */
-#define XCHAL_DTLB_SETS			5	/* number of sets (groups of ways with identical settings) */
-
-/*  Way set to which each way belongs:  */
-#define XCHAL_DTLB_WAY0_SET		0
-#define XCHAL_DTLB_WAY1_SET		0
-#define XCHAL_DTLB_WAY2_SET		0
-#define XCHAL_DTLB_WAY3_SET		0
-#define XCHAL_DTLB_WAY4_SET		1
-#define XCHAL_DTLB_WAY5_SET		2
-#define XCHAL_DTLB_WAY6_SET		3
-#define XCHAL_DTLB_WAY7_SET		4
-#define XCHAL_DTLB_WAY8_SET		4
-#define XCHAL_DTLB_WAY9_SET		4
-
-/*  Ways sets that are used by hardware auto-refill (ARF):  */
-#define XCHAL_DTLB_ARF_SETS		1	/* number of auto-refill sets */
-#define XCHAL_DTLB_ARF_SET0		0	/* index of n'th auto-refill set */
-
-/*  Way sets that are "min-wired" (see terminology comment above):  */
-#define XCHAL_DTLB_MINWIRED_SETS	1	/* number of "min-wired" sets */
-#define XCHAL_DTLB_MINWIRED_SET0	4	/* index of n'th "min-wired" set */
-
-
-/*  DTLB way set 0 (group of ways 0 thru 3):  */
-#define XCHAL_DTLB_SET0_WAY			0	/* index of first way in this way set */
-#define XCHAL_DTLB_SET0_WAYS			4	/* number of (contiguous) ways in this way set */
-#define XCHAL_DTLB_SET0_ENTRIES_LOG2		2	/* log2(number of entries in this way) */
-#define XCHAL_DTLB_SET0_ENTRIES			4	/* number of entries in this way (always a power of 2) */
-#define XCHAL_DTLB_SET0_ARF			1	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_DTLB_SET0_PAGESIZES		1	/* number of supported page sizes in this way */
-#define XCHAL_DTLB_SET0_PAGESZ_BITS		0	/* number of bits to encode the page size */
-#define XCHAL_DTLB_SET0_PAGESZ_LOG2_MIN		12	/* log2(minimum supported page size) */
-#define XCHAL_DTLB_SET0_PAGESZ_LOG2_MAX		12	/* log2(maximum supported page size) */
-#define XCHAL_DTLB_SET0_PAGESZ_LOG2_LIST	12	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_DTLB_SET0_ASID_CONSTMASK		0	/* constant ASID bits; 0 if all writable */
-#define XCHAL_DTLB_SET0_VPN_CONSTMASK		0	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET0_PPN_CONSTMASK		0	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET0_CA_CONSTMASK		0	/* constant CA bits; 0 if all writable */
-#define XCHAL_DTLB_SET0_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET0_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET0_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET0_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-
-/*  DTLB way set 1 (group of ways 4 thru 4):  */
-#define XCHAL_DTLB_SET1_WAY			4	/* index of first way in this way set */
-#define XCHAL_DTLB_SET1_WAYS			1	/* number of (contiguous) ways in this way set */
-#define XCHAL_DTLB_SET1_ENTRIES_LOG2		2	/* log2(number of entries in this way) */
-#define XCHAL_DTLB_SET1_ENTRIES			4	/* number of entries in this way (always a power of 2) */
-#define XCHAL_DTLB_SET1_ARF			0	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_DTLB_SET1_PAGESIZES		4	/* number of supported page sizes in this way */
-#define XCHAL_DTLB_SET1_PAGESZ_BITS		2	/* number of bits to encode the page size */
-#define XCHAL_DTLB_SET1_PAGESZ_LOG2_MIN		20	/* log2(minimum supported page size) */
-#define XCHAL_DTLB_SET1_PAGESZ_LOG2_MAX		26	/* log2(maximum supported page size) */
-#define XCHAL_DTLB_SET1_PAGESZ_LOG2_LIST	20 XCHAL_SEP 22 XCHAL_SEP 24 XCHAL_SEP 26	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_DTLB_SET1_ASID_CONSTMASK		0	/* constant ASID bits; 0 if all writable */
-#define XCHAL_DTLB_SET1_VPN_CONSTMASK		0	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET1_PPN_CONSTMASK		0	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET1_CA_CONSTMASK		0	/* constant CA bits; 0 if all writable */
-#define XCHAL_DTLB_SET1_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET1_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET1_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET1_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-
-/*  DTLB way set 2 (group of ways 5 thru 5):  */
-#define XCHAL_DTLB_SET2_WAY			5	/* index of first way in this way set */
-#define XCHAL_DTLB_SET2_WAYS			1	/* number of (contiguous) ways in this way set */
-#define XCHAL_DTLB_SET2_ENTRIES_LOG2		1	/* log2(number of entries in this way) */
-#define XCHAL_DTLB_SET2_ENTRIES			2	/* number of entries in this way (always a power of 2) */
-#define XCHAL_DTLB_SET2_ARF			0	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_DTLB_SET2_PAGESIZES		1	/* number of supported page sizes in this way */
-#define XCHAL_DTLB_SET2_PAGESZ_BITS		0	/* number of bits to encode the page size */
-#define XCHAL_DTLB_SET2_PAGESZ_LOG2_MIN		27	/* log2(minimum supported page size) */
-#define XCHAL_DTLB_SET2_PAGESZ_LOG2_MAX		27	/* log2(maximum supported page size) */
-#define XCHAL_DTLB_SET2_PAGESZ_LOG2_LIST	27	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_DTLB_SET2_ASID_CONSTMASK		0xFF	/* constant ASID bits; 0 if all writable */
-#define XCHAL_DTLB_SET2_VPN_CONSTMASK		0xF0000000	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET2_PPN_CONSTMASK		0xF8000000	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET2_CA_CONSTMASK		0x0000000F	/* constant CA bits; 0 if all writable */
-#define XCHAL_DTLB_SET2_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET2_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET2_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET2_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-/*  Constant ASID values for each entry of DTLB way set 2 (because ASID_CONSTMASK is non-zero):  */
-#define XCHAL_DTLB_SET2_E0_ASID_CONST		0x01
-#define XCHAL_DTLB_SET2_E1_ASID_CONST		0x01
-/*  Constant VPN values for each entry of DTLB way set 2 (because VPN_CONSTMASK is non-zero):  */
-#define XCHAL_DTLB_SET2_E0_VPN_CONST		0xD0000000
-#define XCHAL_DTLB_SET2_E1_VPN_CONST		0xD8000000
-/*  Constant PPN values for each entry of DTLB way set 2 (because PPN_CONSTMASK is non-zero):  */
-#define XCHAL_DTLB_SET2_E0_PPN_CONST		0x00000000
-#define XCHAL_DTLB_SET2_E1_PPN_CONST		0x00000000
-/*  Constant CA values for each entry of DTLB way set 2 (because CA_CONSTMASK is non-zero):  */
-#define XCHAL_DTLB_SET2_E0_CA_CONST		0x07
-#define XCHAL_DTLB_SET2_E1_CA_CONST		0x03
-
-/*  DTLB way set 3 (group of ways 6 thru 6):  */
-#define XCHAL_DTLB_SET3_WAY			6	/* index of first way in this way set */
-#define XCHAL_DTLB_SET3_WAYS			1	/* number of (contiguous) ways in this way set */
-#define XCHAL_DTLB_SET3_ENTRIES_LOG2		1	/* log2(number of entries in this way) */
-#define XCHAL_DTLB_SET3_ENTRIES			2	/* number of entries in this way (always a power of 2) */
-#define XCHAL_DTLB_SET3_ARF			0	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_DTLB_SET3_PAGESIZES		1	/* number of supported page sizes in this way */
-#define XCHAL_DTLB_SET3_PAGESZ_BITS		0	/* number of bits to encode the page size */
-#define XCHAL_DTLB_SET3_PAGESZ_LOG2_MIN		28	/* log2(minimum supported page size) */
-#define XCHAL_DTLB_SET3_PAGESZ_LOG2_MAX		28	/* log2(maximum supported page size) */
-#define XCHAL_DTLB_SET3_PAGESZ_LOG2_LIST	28	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_DTLB_SET3_ASID_CONSTMASK		0xFF	/* constant ASID bits; 0 if all writable */
-#define XCHAL_DTLB_SET3_VPN_CONSTMASK		0xE0000000	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET3_PPN_CONSTMASK		0xF0000000	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET3_CA_CONSTMASK		0x0000000F	/* constant CA bits; 0 if all writable */
-#define XCHAL_DTLB_SET3_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET3_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET3_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET3_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-/*  Constant ASID values for each entry of DTLB way set 3 (because ASID_CONSTMASK is non-zero):  */
-#define XCHAL_DTLB_SET3_E0_ASID_CONST		0x01
-#define XCHAL_DTLB_SET3_E1_ASID_CONST		0x01
-/*  Constant VPN values for each entry of DTLB way set 3 (because VPN_CONSTMASK is non-zero):  */
-#define XCHAL_DTLB_SET3_E0_VPN_CONST		0xE0000000
-#define XCHAL_DTLB_SET3_E1_VPN_CONST		0xF0000000
-/*  Constant PPN values for each entry of DTLB way set 3 (because PPN_CONSTMASK is non-zero):  */
-#define XCHAL_DTLB_SET3_E0_PPN_CONST		0xF0000000
-#define XCHAL_DTLB_SET3_E1_PPN_CONST		0xF0000000
-/*  Constant CA values for each entry of DTLB way set 3 (because CA_CONSTMASK is non-zero):  */
-#define XCHAL_DTLB_SET3_E0_CA_CONST		0x07
-#define XCHAL_DTLB_SET3_E1_CA_CONST		0x03
-
-/*  DTLB way set 4 (group of ways 7 thru 9):  */
-#define XCHAL_DTLB_SET4_WAY			7	/* index of first way in this way set */
-#define XCHAL_DTLB_SET4_WAYS			3	/* number of (contiguous) ways in this way set */
-#define XCHAL_DTLB_SET4_ENTRIES_LOG2		0	/* log2(number of entries in this way) */
-#define XCHAL_DTLB_SET4_ENTRIES			1	/* number of entries in this way (always a power of 2) */
-#define XCHAL_DTLB_SET4_ARF			0	/* 1=autorefill by h/w, 0=non-autorefill (wired/constant/static) */
-#define XCHAL_DTLB_SET4_PAGESIZES		1	/* number of supported page sizes in this way */
-#define XCHAL_DTLB_SET4_PAGESZ_BITS		0	/* number of bits to encode the page size */
-#define XCHAL_DTLB_SET4_PAGESZ_LOG2_MIN		12	/* log2(minimum supported page size) */
-#define XCHAL_DTLB_SET4_PAGESZ_LOG2_MAX		12	/* log2(maximum supported page size) */
-#define XCHAL_DTLB_SET4_PAGESZ_LOG2_LIST	12	/* list of log2(page size)s, separated by XCHAL_SEP;
-							   2^PAGESZ_BITS entries in list, unsupported entries are zero */
-#define XCHAL_DTLB_SET4_ASID_CONSTMASK		0	/* constant ASID bits; 0 if all writable */
-#define XCHAL_DTLB_SET4_VPN_CONSTMASK		0	/* constant VPN bits, not including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET4_PPN_CONSTMASK		0	/* constant PPN bits, including entry index bits; 0 if all writable */
-#define XCHAL_DTLB_SET4_CA_CONSTMASK		0	/* constant CA bits; 0 if all writable */
-#define XCHAL_DTLB_SET4_ASID_RESET		0	/* 1 if ASID reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET4_VPN_RESET		0	/* 1 if VPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET4_PPN_RESET		0	/* 1 if PPN reset values defined (and all writable); 0 otherwise */
-#define XCHAL_DTLB_SET4_CA_RESET		0	/* 1 if CA reset values defined (and all writable); 0 otherwise */
-
-/*  Indexing macros:  */
-#define _XCHAL_DTLB_SET(n,_what)	XCHAL_DTLB_SET ## n ## _what
-#define XCHAL_DTLB_SET(n,what)		_XCHAL_DTLB_SET(n, _ ## what )
-#define _XCHAL_DTLB_SET_E(n,i,_what)	XCHAL_DTLB_SET ## n ## _E ## i ## _what
-#define XCHAL_DTLB_SET_E(n,i,what)	_XCHAL_DTLB_SET_E(n,i, _ ## what )
-/*
- *  Example use:  XCHAL_DTLB_SET(XCHAL_DTLB_ARF_SET0,ENTRIES)
- *	to get the value of XCHAL_DTLB_SET<n>_ENTRIES where <n> is the first auto-refill set.
- */
-
-
-/*
- *  Determine whether we have a full MMU (with Page Table and Protection)
- *  usable for an MMU-based OS:
- */
-#if XCHAL_HAVE_TLBS && !XCHAL_HAVE_SPANNING_WAY && XCHAL_ITLB_ARF_WAYS > 0 && XCHAL_DTLB_ARF_WAYS > 0 && XCHAL_MMU_RINGS >= 2
-# define XCHAL_HAVE_PTP_MMU		1	/* have full MMU (with page table [autorefill] and protection) */
-#else
-# define XCHAL_HAVE_PTP_MMU		0	/* don't have full MMU */
-#endif
-
-/*
- *  For full MMUs, report kernel RAM segment and kernel I/O segment static page mappings:
- */
-#if XCHAL_HAVE_PTP_MMU
-#define XCHAL_KSEG_CACHED_VADDR		0xD0000000	/* virt.addr of kernel RAM cached static map */
-#define XCHAL_KSEG_CACHED_PADDR		0x00000000	/* phys.addr of kseg_cached */
-#define XCHAL_KSEG_CACHED_SIZE		0x08000000	/* size in bytes of kseg_cached (assumed power of 2!!!) */
-#define XCHAL_KSEG_BYPASS_VADDR		0xD8000000	/* virt.addr of kernel RAM bypass (uncached) static map */
-#define XCHAL_KSEG_BYPASS_PADDR		0x00000000	/* phys.addr of kseg_bypass */
-#define XCHAL_KSEG_BYPASS_SIZE		0x08000000	/* size in bytes of kseg_bypass (assumed power of 2!!!) */
-
-#define XCHAL_KIO_CACHED_VADDR		0xE0000000	/* virt.addr of kernel I/O cached static map */
-#define XCHAL_KIO_CACHED_PADDR		0xF0000000	/* phys.addr of kio_cached */
-#define XCHAL_KIO_CACHED_SIZE		0x10000000	/* size in bytes of kio_cached (assumed power of 2!!!) */
-#define XCHAL_KIO_BYPASS_VADDR		0xF0000000	/* virt.addr of kernel I/O bypass (uncached) static map */
-#define XCHAL_KIO_BYPASS_PADDR		0xF0000000	/* phys.addr of kio_bypass */
-#define XCHAL_KIO_BYPASS_SIZE		0x10000000	/* size in bytes of kio_bypass (assumed power of 2!!!) */
-
-#define XCHAL_SEG_MAPPABLE_VADDR	0x00000000	/* start of largest non-static-mapped virtual addr area */
-#define XCHAL_SEG_MAPPABLE_SIZE		0xD0000000	/* size in bytes of  "  */
-/* define XCHAL_SEG_MAPPABLE2_xxx if more areas present, sorted in order of descending size.  */
-#endif
-
-
-/*----------------------------------------------------------------------
-				MISC
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_NUM_WRITEBUFFER_ENTRIES	4	/* number of write buffer entries */
-
-#define XCHAL_CORE_ID			"linux_be"	/* configuration's alphanumeric core identifier
-							   (CoreID) set in the Xtensa Processor Generator */
-
-#define XCHAL_BUILD_UNIQUE_ID		0x00003256	/* software build-unique ID (22-bit) */
-
-/*  These definitions describe the hardware targeted by this software:  */
-#define XCHAL_HW_CONFIGID0		0xC103D1FF	/* config ID reg 0 value (upper 32 of 64 bits) */
-#define XCHAL_HW_CONFIGID1		0x00803256	/* config ID reg 1 value (lower 32 of 64 bits) */
-#define XCHAL_CONFIGID0			XCHAL_HW_CONFIGID0	/* for backward compatibility only -- don't use! */
-#define XCHAL_CONFIGID1			XCHAL_HW_CONFIGID1	/* for backward compatibility only -- don't use! */
-#define XCHAL_HW_RELEASE_MAJOR		1050	/* major release of targeted hardware */
-#define XCHAL_HW_RELEASE_MINOR		1	/* minor release of targeted hardware */
-#define XCHAL_HW_RELEASE_NAME		"T1050.1"	/* full release name of targeted hardware */
-#define XTHAL_HW_REL_T1050	1
-#define XTHAL_HW_REL_T1050_1	1
-#define XCHAL_HW_CONFIGID_RELIABLE	1
-
-
-/*
- *  Miscellaneous special register fields:
- */
-
-
-/*  DBREAKC (special register number 160):  */
-#define XCHAL_DBREAKC_VALIDMASK	0xC000003F	/* bits of DBREAKC that are defined */
-/*  MASK field:  */
-#define XCHAL_DBREAKC_MASK_BITS 	6		/* number of bits in MASK field */
-#define XCHAL_DBREAKC_MASK_NUM  	64		/* max number of possible causes (2^bits) */
-#define XCHAL_DBREAKC_MASK_SHIFT	0		/* position of MASK bits in DBREAKC, starting from lsbit */
-#define XCHAL_DBREAKC_MASK_MASK 	0x0000003F	/* mask of bits in MASK field of DBREAKC */
-/*  LOADBREAK field:  */
-#define XCHAL_DBREAKC_LOADBREAK_BITS 	1		/* number of bits in LOADBREAK field */
-#define XCHAL_DBREAKC_LOADBREAK_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_DBREAKC_LOADBREAK_SHIFT	30		/* position of LOADBREAK bits in DBREAKC, starting from lsbit */
-#define XCHAL_DBREAKC_LOADBREAK_MASK 	0x40000000	/* mask of bits in LOADBREAK field of DBREAKC */
-/*  STOREBREAK field:  */
-#define XCHAL_DBREAKC_STOREBREAK_BITS 	1		/* number of bits in STOREBREAK field */
-#define XCHAL_DBREAKC_STOREBREAK_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_DBREAKC_STOREBREAK_SHIFT	31		/* position of STOREBREAK bits in DBREAKC, starting from lsbit */
-#define XCHAL_DBREAKC_STOREBREAK_MASK 	0x80000000	/* mask of bits in STOREBREAK field of DBREAKC */
-
-/*  PS (special register number 230):  */
-#define XCHAL_PS_VALIDMASK	0x00070FFF	/* bits of PS that are defined */
-/*  INTLEVEL field:  */
-#define XCHAL_PS_INTLEVEL_BITS 	4		/* number of bits in INTLEVEL field */
-#define XCHAL_PS_INTLEVEL_NUM  	16		/* max number of possible causes (2^bits) */
-#define XCHAL_PS_INTLEVEL_SHIFT	0		/* position of INTLEVEL bits in PS, starting from lsbit */
-#define XCHAL_PS_INTLEVEL_MASK 	0x0000000F	/* mask of bits in INTLEVEL field of PS */
-/*  EXCM field:  */
-#define XCHAL_PS_EXCM_BITS 	1		/* number of bits in EXCM field */
-#define XCHAL_PS_EXCM_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_PS_EXCM_SHIFT	4		/* position of EXCM bits in PS, starting from lsbit */
-#define XCHAL_PS_EXCM_MASK 	0x00000010	/* mask of bits in EXCM field of PS */
-/*  PROGSTACK field:  */
-#define XCHAL_PS_PROGSTACK_BITS 	1		/* number of bits in PROGSTACK field */
-#define XCHAL_PS_PROGSTACK_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_PS_PROGSTACK_SHIFT	5		/* position of PROGSTACK bits in PS, starting from lsbit */
-#define XCHAL_PS_PROGSTACK_MASK 	0x00000020	/* mask of bits in PROGSTACK field of PS */
-/*  RING field:  */
-#define XCHAL_PS_RING_BITS 	2		/* number of bits in RING field */
-#define XCHAL_PS_RING_NUM  	4		/* max number of possible causes (2^bits) */
-#define XCHAL_PS_RING_SHIFT	6		/* position of RING bits in PS, starting from lsbit */
-#define XCHAL_PS_RING_MASK 	0x000000C0	/* mask of bits in RING field of PS */
-/*  OWB field:  */
-#define XCHAL_PS_OWB_BITS 	4		/* number of bits in OWB field */
-#define XCHAL_PS_OWB_NUM  	16		/* max number of possible causes (2^bits) */
-#define XCHAL_PS_OWB_SHIFT	8		/* position of OWB bits in PS, starting from lsbit */
-#define XCHAL_PS_OWB_MASK 	0x00000F00	/* mask of bits in OWB field of PS */
-/*  CALLINC field:  */
-#define XCHAL_PS_CALLINC_BITS 	2		/* number of bits in CALLINC field */
-#define XCHAL_PS_CALLINC_NUM  	4		/* max number of possible causes (2^bits) */
-#define XCHAL_PS_CALLINC_SHIFT	16		/* position of CALLINC bits in PS, starting from lsbit */
-#define XCHAL_PS_CALLINC_MASK 	0x00030000	/* mask of bits in CALLINC field of PS */
-/*  WOE field:  */
-#define XCHAL_PS_WOE_BITS 	1		/* number of bits in WOE field */
-#define XCHAL_PS_WOE_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_PS_WOE_SHIFT	18		/* position of WOE bits in PS, starting from lsbit */
-#define XCHAL_PS_WOE_MASK 	0x00040000	/* mask of bits in WOE field of PS */
-
-/*  EXCCAUSE (special register number 232):  */
-#define XCHAL_EXCCAUSE_VALIDMASK	0x0000003F	/* bits of EXCCAUSE that are defined */
-/*  EXCCAUSE field:  */
-#define XCHAL_EXCCAUSE_BITS 		6		/* number of bits in EXCCAUSE register */
-#define XCHAL_EXCCAUSE_NUM  		64		/* max number of possible causes (2^bits) */
-#define XCHAL_EXCCAUSE_SHIFT		0		/* position of EXCCAUSE bits in register, starting from lsbit */
-#define XCHAL_EXCCAUSE_MASK 		0x0000003F	/* mask of bits in EXCCAUSE register */
-
-/*  DEBUGCAUSE (special register number 233):  */
-#define XCHAL_DEBUGCAUSE_VALIDMASK	0x0000003F	/* bits of DEBUGCAUSE that are defined */
-/*  ICOUNT field:  */
-#define XCHAL_DEBUGCAUSE_ICOUNT_BITS 	1		/* number of bits in ICOUNT field */
-#define XCHAL_DEBUGCAUSE_ICOUNT_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_DEBUGCAUSE_ICOUNT_SHIFT	0		/* position of ICOUNT bits in DEBUGCAUSE, starting from lsbit */
-#define XCHAL_DEBUGCAUSE_ICOUNT_MASK 	0x00000001	/* mask of bits in ICOUNT field of DEBUGCAUSE */
-/*  IBREAK field:  */
-#define XCHAL_DEBUGCAUSE_IBREAK_BITS 	1		/* number of bits in IBREAK field */
-#define XCHAL_DEBUGCAUSE_IBREAK_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_DEBUGCAUSE_IBREAK_SHIFT	1		/* position of IBREAK bits in DEBUGCAUSE, starting from lsbit */
-#define XCHAL_DEBUGCAUSE_IBREAK_MASK 	0x00000002	/* mask of bits in IBREAK field of DEBUGCAUSE */
-/*  DBREAK field:  */
-#define XCHAL_DEBUGCAUSE_DBREAK_BITS 	1		/* number of bits in DBREAK field */
-#define XCHAL_DEBUGCAUSE_DBREAK_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_DEBUGCAUSE_DBREAK_SHIFT	2		/* position of DBREAK bits in DEBUGCAUSE, starting from lsbit */
-#define XCHAL_DEBUGCAUSE_DBREAK_MASK 	0x00000004	/* mask of bits in DBREAK field of DEBUGCAUSE */
-/*  BREAK field:  */
-#define XCHAL_DEBUGCAUSE_BREAK_BITS 	1		/* number of bits in BREAK field */
-#define XCHAL_DEBUGCAUSE_BREAK_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_DEBUGCAUSE_BREAK_SHIFT	3		/* position of BREAK bits in DEBUGCAUSE, starting from lsbit */
-#define XCHAL_DEBUGCAUSE_BREAK_MASK 	0x00000008	/* mask of bits in BREAK field of DEBUGCAUSE */
-/*  BREAKN field:  */
-#define XCHAL_DEBUGCAUSE_BREAKN_BITS 	1		/* number of bits in BREAKN field */
-#define XCHAL_DEBUGCAUSE_BREAKN_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_DEBUGCAUSE_BREAKN_SHIFT	4		/* position of BREAKN bits in DEBUGCAUSE, starting from lsbit */
-#define XCHAL_DEBUGCAUSE_BREAKN_MASK 	0x00000010	/* mask of bits in BREAKN field of DEBUGCAUSE */
-/*  DEBUGINT field:  */
-#define XCHAL_DEBUGCAUSE_DEBUGINT_BITS 	1		/* number of bits in DEBUGINT field */
-#define XCHAL_DEBUGCAUSE_DEBUGINT_NUM  	2		/* max number of possible causes (2^bits) */
-#define XCHAL_DEBUGCAUSE_DEBUGINT_SHIFT	5		/* position of DEBUGINT bits in DEBUGCAUSE, starting from lsbit */
-#define XCHAL_DEBUGCAUSE_DEBUGINT_MASK 	0x00000020	/* mask of bits in DEBUGINT field of DEBUGCAUSE */
-
-
-
-/*----------------------------------------------------------------------
-				ISA
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_HAVE_DENSITY		1	/* 1 if density option configured, 0 otherwise */
-#define XCHAL_HAVE_LOOPS		1	/* 1 if zero-overhead loops option configured, 0 otherwise */
-/*  Misc instructions:  */
-#define XCHAL_HAVE_NSA			0	/* 1 if NSA/NSAU instructions option configured, 0 otherwise */
-#define XCHAL_HAVE_MINMAX		0	/* 1 if MIN/MAX instructions option configured, 0 otherwise */
-#define XCHAL_HAVE_SEXT			0	/* 1 if sign-extend instruction option configured, 0 otherwise */
-#define XCHAL_HAVE_CLAMPS		0	/* 1 if CLAMPS instruction option configured, 0 otherwise */
-#define XCHAL_HAVE_MAC16		0	/* 1 if MAC16 option configured, 0 otherwise */
-#define XCHAL_HAVE_MUL16		0	/* 1 if 16-bit integer multiply option configured, 0 otherwise */
-/*#define XCHAL_HAVE_POPC		0*/	/* 1 if CRC instruction option configured, 0 otherwise */
-/*#define XCHAL_HAVE_CRC		0*/	/* 1 if POPC instruction option configured, 0 otherwise */
-
-#define XCHAL_HAVE_SPECULATION		0	/* 1 if speculation option configured, 0 otherwise */
-/*#define XCHAL_HAVE_MP_SYNC		0*/	/* 1 if multiprocessor sync. option configured, 0 otherwise */
-#define XCHAL_HAVE_PRID			0	/* 1 if processor ID register configured, 0 otherwise */
-
-#define XCHAL_NUM_MISC_REGS		2	/* number of miscellaneous registers (0..4) */
-
-/*  These relate a bit more to TIE:  */
-#define XCHAL_HAVE_BOOLEANS		0	/* 1 if booleans option configured, 0 otherwise */
-#define XCHAL_HAVE_MUL32		0	/* 1 if 32-bit integer multiply option configured, 0 otherwise */
-#define XCHAL_HAVE_MUL32_HIGH		0	/* 1 if MUL32 option includes MULUH and MULSH, 0 otherwise */
-#define XCHAL_HAVE_FP			0	/* 1 if floating point option configured, 0 otherwise */
-
-
-/*----------------------------------------------------------------------
-				DERIVED
-  ----------------------------------------------------------------------*/
-
-#if XCHAL_HAVE_BE
-#define XCHAL_INST_ILLN			0xD60F		/* 2-byte illegal instruction, msb-first */
-#define XCHAL_INST_ILLN_BYTE0		0xD6		/* 2-byte illegal instruction, 1st byte */
-#define XCHAL_INST_ILLN_BYTE1		0x0F		/* 2-byte illegal instruction, 2nd byte */
-#else
-#define XCHAL_INST_ILLN			0xF06D		/* 2-byte illegal instruction, lsb-first */
-#define XCHAL_INST_ILLN_BYTE0		0x6D		/* 2-byte illegal instruction, 1st byte */
-#define XCHAL_INST_ILLN_BYTE1		0xF0		/* 2-byte illegal instruction, 2nd byte */
-#endif
-/*  Belongs in xtensa/hal.h:  */
-#define XTHAL_INST_ILL			0x000000	/* 3-byte illegal instruction */
-
-
-/*
- *  Because information as to exactly which hardware release is targeted
- *  by a given software build is not always available, compile-time HAL
- *  Hardware-Release "_AT" macros are fuzzy (return 0, 1, or XCHAL_MAYBE):
- */
-#ifndef XCHAL_HW_RELEASE_MAJOR
-# define XCHAL_HW_CONFIGID_RELIABLE	0
-#endif
-#if XCHAL_HW_CONFIGID_RELIABLE
-# define XCHAL_HW_RELEASE_AT_OR_BELOW(major,minor)	(XTHAL_REL_LE( XCHAL_HW_RELEASE_MAJOR,XCHAL_HW_RELEASE_MINOR, major,minor ) ? 1 : 0)
-# define XCHAL_HW_RELEASE_AT_OR_ABOVE(major,minor)	(XTHAL_REL_GE( XCHAL_HW_RELEASE_MAJOR,XCHAL_HW_RELEASE_MINOR, major,minor ) ? 1 : 0)
-# define XCHAL_HW_RELEASE_AT(major,minor)		(XTHAL_REL_EQ( XCHAL_HW_RELEASE_MAJOR,XCHAL_HW_RELEASE_MINOR, major,minor ) ? 1 : 0)
-# define XCHAL_HW_RELEASE_MAJOR_AT(major)		((XCHAL_HW_RELEASE_MAJOR == (major)) ? 1 : 0)
-#else
-# define XCHAL_HW_RELEASE_AT_OR_BELOW(major,minor)	( ((major) < 1040 && XCHAL_HAVE_XEA2) ? 0 \
-							: ((major) > 1050 && XCHAL_HAVE_XEA1) ? 1 \
-							: XTHAL_MAYBE )
-# define XCHAL_HW_RELEASE_AT_OR_ABOVE(major,minor)	( ((major) >= 2000 && XCHAL_HAVE_XEA1) ? 0 \
-							: (XTHAL_REL_LE(major,minor, 1040,0) && XCHAL_HAVE_XEA2) ? 1 \
-							: XTHAL_MAYBE )
-# define XCHAL_HW_RELEASE_AT(major,minor)		( (((major) < 1040 && XCHAL_HAVE_XEA2) || \
-							   ((major) >= 2000 && XCHAL_HAVE_XEA1)) ? 0 : XTHAL_MAYBE)
-# define XCHAL_HW_RELEASE_MAJOR_AT(major)		XCHAL_HW_RELEASE_AT(major,0)
-#endif
-
-/*
- *  Specific errata:
- */
-
-/*
- *  Erratum T1020.H13, T1030.H7, T1040.H10, T1050.H4 (fixed in T1040.3 and T1050.1;
- *  relevant only in XEA1, kernel-vector mode, level-one interrupts and overflows enabled):
- */
-#define XCHAL_MAYHAVE_ERRATUM_XEA1KWIN	(XCHAL_HAVE_XEA1 && \
-					 (XCHAL_HW_RELEASE_AT_OR_BELOW(1040,2) != 0 \
-					  || XCHAL_HW_RELEASE_AT(1050,0)))
-
-
-
-#endif /*XTENSA_CONFIG_CORE_H*/
-
diff --git a/include/asm-xtensa/xtensa/config-linux_be/defs.h b/include/asm-xtensa/xtensa/config-linux_be/defs.h
deleted file mode 100644
index f7c58b27337..00000000000
--- a/include/asm-xtensa/xtensa/config-linux_be/defs.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/* Definitions for Xtensa instructions, types, and protos. */
-
-/*
- * Copyright (c) 2003 Tensilica, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
- * USA.
- */
-
-/* Do not modify. This is automatically generated.*/
-
-#ifndef _XTENSA_BASE_HEADER
-#define _XTENSA_BASE_HEADER
-
-#ifdef __XTENSA__
-#if defined(__GNUC__) && !defined(__XCC__)
-
-#define L8UI_ASM(arr, ars, imm) { \
-  __asm__ volatile("l8ui %0, %1, %2" : "=a" (arr) : "a" (ars) , "i" (imm)); \
-}
-
-#define XT_L8UI(ars, imm) \
-({ \
-  unsigned char _arr; \
-  const unsigned char *_ars = ars; \
-  L8UI_ASM(_arr, _ars, imm); \
-  _arr; \
-})
-
-#define L16UI_ASM(arr, ars, imm) { \
-  __asm__ volatile("l16ui %0, %1, %2" : "=a" (arr) : "a" (ars) , "i" (imm)); \
-}
-
-#define XT_L16UI(ars, imm) \
-({ \
-  unsigned short _arr; \
-  const unsigned short *_ars = ars; \
-  L16UI_ASM(_arr, _ars, imm); \
-  _arr; \
-})
-
-#define L16SI_ASM(arr, ars, imm) {\
-  __asm__ volatile("l16si %0, %1, %2" : "=a" (arr) : "a" (ars) , "i" (imm)); \
-}
-
-#define XT_L16SI(ars, imm) \
-({ \
-  signed short _arr; \
-  const signed short *_ars = ars; \
-  L16SI_ASM(_arr, _ars, imm); \
-  _arr; \
-})
-
-#define L32I_ASM(arr, ars, imm) { \
-  __asm__ volatile("l32i %0, %1, %2" : "=a" (arr) : "a" (ars) , "i" (imm)); \
-}
-
-#define XT_L32I(ars, imm) \
-({ \
-  unsigned _arr; \
-  const unsigned *_ars = ars; \
-  L32I_ASM(_arr, _ars, imm); \
-  _arr; \
-})
-
-#define S8I_ASM(arr, ars, imm) {\
-  __asm__ volatile("s8i %0, %1, %2" : : "a" (arr), "a" (ars) , "i" (imm) : "memory" ); \
-}
-
-#define XT_S8I(arr, ars, imm) \
-({ \
-  signed char _arr = arr; \
-  const signed char *_ars = ars; \
-  S8I_ASM(_arr, _ars, imm); \
-})
-
-#define S16I_ASM(arr, ars, imm) {\
-  __asm__ volatile("s16i %0, %1, %2" : : "a" (arr), "a" (ars) , "i" (imm) : "memory" ); \
-}
-
-#define XT_S16I(arr, ars, imm) \
-({ \
-  signed short _arr = arr; \
-  const signed short *_ars = ars; \
-  S16I_ASM(_arr, _ars, imm); \
-})
-
-#define S32I_ASM(arr, ars, imm) { \
-  __asm__ volatile("s32i %0, %1, %2" : : "a" (arr), "a" (ars) , "i" (imm) : "memory" ); \
-}
-
-#define XT_S32I(arr, ars, imm) \
-({ \
-  signed int _arr = arr; \
-  const signed int *_ars = ars; \
-  S32I_ASM(_arr, _ars, imm); \
-})
-
-#define ADDI_ASM(art, ars, imm) {\
-   __asm__ ("addi %0, %1, %2" : "=a" (art) : "a" (ars), "i" (imm)); \
-}
-
-#define XT_ADDI(ars, imm) \
-({ \
-   unsigned _art; \
-   unsigned _ars = ars; \
-   ADDI_ASM(_art, _ars, imm); \
-   _art; \
-})
-
-#define ABS_ASM(arr, art) {\
-   __asm__ ("abs %0, %1" : "=a" (arr) : "a" (art)); \
-}
-
-#define XT_ABS(art) \
-({ \
-   unsigned _arr; \
-   signed _art = art; \
-   ABS_ASM(_arr, _art); \
-   _arr; \
-})
-
-/* Note: In the following macros that reference SAR, the magic "state"
-   register is used to capture the dependency on SAR.  This is because
-   SAR is a 5-bit register and thus there are no C types that can be
-   used to represent it.  It doesn't appear that the SAR register is
-   even relevant to GCC, but it is marked as "clobbered" just in
-   case.  */
-
-#define SRC_ASM(arr, ars, art) {\
-   register int _xt_sar __asm__ ("state"); \
-   __asm__ ("src %0, %1, %2" \
-	    : "=a" (arr) : "a" (ars), "a" (art), "t" (_xt_sar)); \
-}
-
-#define XT_SRC(ars, art) \
-({ \
-   unsigned _arr; \
-   unsigned _ars = ars; \
-   unsigned _art = art; \
-   SRC_ASM(_arr, _ars, _art); \
-   _arr; \
-})
-
-#define SSR_ASM(ars) {\
-   register int _xt_sar __asm__ ("state"); \
-   __asm__ ("ssr %1" : "=t" (_xt_sar) : "a" (ars) : "sar"); \
-}
-
-#define XT_SSR(ars) \
-({ \
-   unsigned _ars = ars; \
-   SSR_ASM(_ars); \
-})
-
-#define SSL_ASM(ars) {\
-   register int _xt_sar __asm__ ("state"); \
-   __asm__ ("ssl %1" : "=t" (_xt_sar) : "a" (ars) : "sar"); \
-}
-
-#define XT_SSL(ars) \
-({ \
-   unsigned _ars = ars; \
-   SSL_ASM(_ars); \
-})
-
-#define SSA8B_ASM(ars) {\
-   register int _xt_sar __asm__ ("state"); \
-   __asm__ ("ssa8b %1" : "=t" (_xt_sar) : "a" (ars) : "sar"); \
-}
-
-#define XT_SSA8B(ars) \
-({ \
-   unsigned _ars = ars; \
-   SSA8B_ASM(_ars); \
-})
-
-#define SSA8L_ASM(ars) {\
-   register int _xt_sar __asm__ ("state"); \
-   __asm__ ("ssa8l %1" : "=t" (_xt_sar) : "a" (ars) : "sar"); \
-}
-
-#define XT_SSA8L(ars) \
-({ \
-   unsigned _ars = ars; \
-   SSA8L_ASM(_ars); \
-})
-
-#define SSAI_ASM(imm) {\
-   register int _xt_sar __asm__ ("state"); \
-   __asm__ ("ssai %1" : "=t" (_xt_sar) : "i" (imm) : "sar"); \
-}
-
-#define XT_SSAI(imm) \
-({ \
-   SSAI_ASM(imm); \
-})
-
-
-
-
-
-
-
-
-#endif /* __GNUC__ && !__XCC__ */
-
-#ifdef __XCC__
-
-/* Core load/store instructions */
-extern unsigned char _TIE_L8UI(const unsigned char * ars, immediate imm);
-extern unsigned short _TIE_L16UI(const unsigned short * ars, immediate imm);
-extern signed short _TIE_L16SI(const signed short * ars, immediate imm);
-extern unsigned _TIE_L32I(const unsigned * ars, immediate imm);
-extern void _TIE_S8I(unsigned char arr, unsigned char * ars, immediate imm);
-extern void _TIE_S16I(unsigned short arr, unsigned short * ars, immediate imm);
-extern void _TIE_S32I(unsigned arr, unsigned * ars, immediate imm);
-
-#define XT_L8UI  _TIE_L8UI
-#define XT_L16UI _TIE_L16UI
-#define XT_L16SI _TIE_L16SI
-#define XT_L32I  _TIE_L32I
-#define XT_S8I   _TIE_S8I
-#define XT_S16I  _TIE_S16I
-#define XT_S32I  _TIE_S32I
-
-/* Add-immediate instruction */
-extern unsigned _TIE_ADDI(unsigned ars, immediate imm);
-#define XT_ADDI  _TIE_ADDI
-
-/* Absolute value instruction */
-extern unsigned _TIE_ABS(int art);
-#define XT_ABS _TIE_ABS
-
-/* funnel shift instructions */
-extern unsigned _TIE_SRC(unsigned ars, unsigned art);
-#define XT_SRC _TIE_SRC
-extern void _TIE_SSR(unsigned ars);
-#define XT_SSR _TIE_SSR
-extern void _TIE_SSL(unsigned ars);
-#define XT_SSL _TIE_SSL
-extern void _TIE_SSA8B(unsigned ars);
-#define XT_SSA8B _TIE_SSA8B
-extern void _TIE_SSA8L(unsigned ars);
-#define XT_SSA8L _TIE_SSA8L
-extern void _TIE_SSAI(immediate imm);
-#define XT_SSAI _TIE_SSAI
-
-
-#endif /* __XCC__ */
-
-#endif /* __XTENSA__ */
-#endif /* !_XTENSA_BASE_HEADER */
diff --git a/include/asm-xtensa/xtensa/config-linux_be/specreg.h b/include/asm-xtensa/xtensa/config-linux_be/specreg.h
deleted file mode 100644
index fa4106aa9a0..00000000000
--- a/include/asm-xtensa/xtensa/config-linux_be/specreg.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Xtensa Special Register symbolic names
- */
-
-/* $Id: specreg.h,v 1.2 2003/03/07 19:15:18 joetaylor Exp $ */
-
-/*
- * Copyright (c) 2003 Tensilica, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
- * USA.
- */
-
-#ifndef XTENSA_SPECREG_H
-#define XTENSA_SPECREG_H
-
-/*  Include these special register bitfield definitions, for historical reasons:  */
-#include <xtensa/corebits.h>
-
-
-/*  Special registers:  */
-#define LBEG		0
-#define LEND		1
-#define LCOUNT		2
-#define SAR		3
-#define WINDOWBASE	72
-#define WINDOWSTART	73
-#define PTEVADDR	83
-#define RASID		90
-#define ITLBCFG		91
-#define DTLBCFG		92
-#define IBREAKENABLE	96
-#define DDR		104
-#define IBREAKA_0	128
-#define IBREAKA_1	129
-#define DBREAKA_0	144
-#define DBREAKA_1	145
-#define DBREAKC_0	160
-#define DBREAKC_1	161
-#define EPC_1		177
-#define EPC_2		178
-#define EPC_3		179
-#define EPC_4		180
-#define DEPC		192
-#define EPS_2		194
-#define EPS_3		195
-#define EPS_4		196
-#define EXCSAVE_1	209
-#define EXCSAVE_2	210
-#define EXCSAVE_3	211
-#define EXCSAVE_4	212
-#define INTERRUPT	226
-#define INTENABLE	228
-#define PS		230
-#define EXCCAUSE	232
-#define DEBUGCAUSE	233
-#define CCOUNT		234
-#define ICOUNT		236
-#define ICOUNTLEVEL	237
-#define EXCVADDR	238
-#define CCOMPARE_0	240
-#define CCOMPARE_1	241
-#define CCOMPARE_2	242
-#define MISC_REG_0	244
-#define MISC_REG_1	245
-
-/*  Special cases (bases of special register series):  */
-#define IBREAKA		128
-#define DBREAKA		144
-#define DBREAKC		160
-#define EPC		176
-#define EPS		192
-#define EXCSAVE		208
-#define CCOMPARE	240
-
-/*  Special names for read-only and write-only interrupt registers:  */
-#define INTREAD		226
-#define INTSET		226
-#define INTCLEAR	227
-
-#endif /* XTENSA_SPECREG_H */
-
diff --git a/include/asm-xtensa/xtensa/config-linux_be/system.h b/include/asm-xtensa/xtensa/config-linux_be/system.h
deleted file mode 100644
index cf9d4d308e3..00000000000
--- a/include/asm-xtensa/xtensa/config-linux_be/system.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * xtensa/config/system.h -- HAL definitions that are dependent on SYSTEM configuration
- *
- *  NOTE: The location and contents of this file are highly subject to change.
- *
- *  Source for configuration-independent binaries (which link in a
- *  configuration-specific HAL library) must NEVER include this file.
- *  The HAL itself has historically included this file in some instances,
- *  but this is not appropriate either, because the HAL is meant to be
- *  core-specific but system independent.
- */
-
-/*
- * Copyright (c) 2003 Tensilica, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
- * USA.
- */
-
-
-#ifndef XTENSA_CONFIG_SYSTEM_H
-#define XTENSA_CONFIG_SYSTEM_H
-
-/*#include <xtensa/hal.h>*/
-
-
-
-/*----------------------------------------------------------------------
-				DEVICE ADDRESSES
-  ----------------------------------------------------------------------*/
-
-/*
- *  Strange place to find these, but the configuration GUI
- *  allows moving these around to account for various core
- *  configurations.  Specific boards (and their BSP software)
- *  will have specific meanings for these components.
- */
-
-/*  I/O Block areas:  */
-#define XSHAL_IOBLOCK_CACHED_VADDR	0xE0000000
-#define XSHAL_IOBLOCK_CACHED_PADDR	0xF0000000
-#define XSHAL_IOBLOCK_CACHED_SIZE	0x0E000000
-
-#define XSHAL_IOBLOCK_BYPASS_VADDR	0xF0000000
-#define XSHAL_IOBLOCK_BYPASS_PADDR	0xF0000000
-#define XSHAL_IOBLOCK_BYPASS_SIZE	0x0E000000
-
-/*  System ROM:  */
-#define XSHAL_ROM_VADDR		0xEE000000
-#define XSHAL_ROM_PADDR		0xFE000000
-#define XSHAL_ROM_SIZE		0x00400000
-/*  Largest available area (free of vectors):  */
-#define XSHAL_ROM_AVAIL_VADDR	0xEE00052C
-#define XSHAL_ROM_AVAIL_VSIZE	0x003FFAD4
-
-/*  System RAM:  */
-#define XSHAL_RAM_VADDR		0xD0000000
-#define XSHAL_RAM_PADDR		0x00000000
-#define XSHAL_RAM_VSIZE		0x08000000
-#define XSHAL_RAM_PSIZE		0x10000000
-#define XSHAL_RAM_SIZE		XSHAL_RAM_PSIZE
-/*  Largest available area (free of vectors):  */
-#define XSHAL_RAM_AVAIL_VADDR	0xD0000370
-#define XSHAL_RAM_AVAIL_VSIZE	0x07FFFC90
-
-/*
- *  Shadow system RAM (same device as system RAM, at different address).
- *  (Emulation boards need this for the SONIC Ethernet driver
- *   when data caches are configured for writeback mode.)
- *  NOTE: on full MMU configs, this points to the BYPASS virtual address
- *  of system RAM, ie. is the same as XSHAL_RAM_* except that virtual
- *  addresses are viewed through the BYPASS static map rather than
- *  the CACHED static map.
- */
-#define XSHAL_RAM_BYPASS_VADDR		0xD8000000
-#define XSHAL_RAM_BYPASS_PADDR		0x00000000
-#define XSHAL_RAM_BYPASS_PSIZE		0x08000000
-
-/*  Alternate system RAM (different device than system RAM):  */
-#define XSHAL_ALTRAM_VADDR		0xCEE00000
-#define XSHAL_ALTRAM_PADDR		0xC0000000
-#define XSHAL_ALTRAM_SIZE		0x00200000
-
-
-/*----------------------------------------------------------------------
- *			DEVICE-ADDRESS DEPENDENT...
- *
- *  Values written to CACHEATTR special register (or its equivalent)
- *  to enable and disable caches in various modes.
- *----------------------------------------------------------------------*/
-
-/*----------------------------------------------------------------------
-			BACKWARD COMPATIBILITY ...
-  ----------------------------------------------------------------------*/
-
-/*
- *  NOTE:  the following two macros are DEPRECATED.  Use the latter
- *  board-specific macros instead, which are specially tuned for the
- *  particular target environments' memory maps.
- */
-#define XSHAL_CACHEATTR_BYPASS		XSHAL_XT2000_CACHEATTR_BYPASS	/* disable caches in bypass mode */
-#define XSHAL_CACHEATTR_DEFAULT		XSHAL_XT2000_CACHEATTR_DEFAULT	/* default setting to enable caches (no writeback!) */
-
-/*----------------------------------------------------------------------
-			ISS (Instruction Set Simulator) SPECIFIC ...
-  ----------------------------------------------------------------------*/
-
-#define XSHAL_ISS_CACHEATTR_WRITEBACK	0x1122222F	/* enable caches in write-back mode */
-#define XSHAL_ISS_CACHEATTR_WRITEALLOC	0x1122222F	/* enable caches in write-allocate mode */
-#define XSHAL_ISS_CACHEATTR_WRITETHRU	0x1122222F	/* enable caches in write-through mode */
-#define XSHAL_ISS_CACHEATTR_BYPASS	0x2222222F	/* disable caches in bypass mode */
-#define XSHAL_ISS_CACHEATTR_DEFAULT	XSHAL_ISS_CACHEATTR_WRITEBACK	/* default setting to enable caches */
-
-/*  For Coware only:  */
-#define XSHAL_COWARE_CACHEATTR_WRITEBACK	0x11222222	/* enable caches in write-back mode */
-#define XSHAL_COWARE_CACHEATTR_WRITEALLOC	0x11222222	/* enable caches in write-allocate mode */
-#define XSHAL_COWARE_CACHEATTR_WRITETHRU	0x11222222	/* enable caches in write-through mode */
-#define XSHAL_COWARE_CACHEATTR_BYPASS		0x22222222	/* disable caches in bypass mode */
-#define XSHAL_COWARE_CACHEATTR_DEFAULT		XSHAL_COWARE_CACHEATTR_WRITEBACK	/* default setting to enable caches */
-
-/*  For BFM and other purposes:  */
-#define XSHAL_ALLVALID_CACHEATTR_WRITEBACK	0x11222222	/* enable caches without any invalid regions */
-#define XSHAL_ALLVALID_CACHEATTR_DEFAULT	XSHAL_ALLVALID_CACHEATTR_WRITEBACK	/* default setting for caches without any invalid regions */
-
-#define XSHAL_ISS_PIPE_REGIONS	0
-#define XSHAL_ISS_SDRAM_REGIONS	0
-
-
-/*----------------------------------------------------------------------
-			XT2000 BOARD SPECIFIC ...
-  ----------------------------------------------------------------------*/
-
-#define XSHAL_XT2000_CACHEATTR_WRITEBACK	0x22FFFFFF	/* enable caches in write-back mode */
-#define XSHAL_XT2000_CACHEATTR_WRITEALLOC	0x22FFFFFF	/* enable caches in write-allocate mode */
-#define XSHAL_XT2000_CACHEATTR_WRITETHRU	0x22FFFFFF	/* enable caches in write-through mode */
-#define XSHAL_XT2000_CACHEATTR_BYPASS		0x22FFFFFF	/* disable caches in bypass mode */
-#define XSHAL_XT2000_CACHEATTR_DEFAULT		XSHAL_XT2000_CACHEATTR_WRITEBACK	/* default setting to enable caches */
-
-#define XSHAL_XT2000_PIPE_REGIONS	0x00001000	/* BusInt pipeline regions */
-#define XSHAL_XT2000_SDRAM_REGIONS	0x00000005	/* BusInt SDRAM regions */
-
-
-/*----------------------------------------------------------------------
-				VECTOR SIZES
-  ----------------------------------------------------------------------*/
-
-/*
- *  Sizes allocated to vectors by the system (memory map) configuration.
- *  These sizes are constrained by core configuration (eg. one vector's
- *  code cannot overflow into another vector) but are dependent on the
- *  system or board (or LSP) memory map configuration.
- *
- *  Whether or not each vector happens to be in a system ROM is also
- *  a system configuration matter, sometimes useful, included here also:
- */
-#define XSHAL_RESET_VECTOR_SIZE	0x000004E0
-#define XSHAL_RESET_VECTOR_ISROM	1
-#define XSHAL_USER_VECTOR_SIZE	0x0000001C
-#define XSHAL_USER_VECTOR_ISROM	0
-#define XSHAL_PROGRAMEXC_VECTOR_SIZE	XSHAL_USER_VECTOR_SIZE	/* for backward compatibility */
-#define XSHAL_USEREXC_VECTOR_SIZE	XSHAL_USER_VECTOR_SIZE	/* for backward compatibility */
-#define XSHAL_KERNEL_VECTOR_SIZE	0x0000001C
-#define XSHAL_KERNEL_VECTOR_ISROM	0
-#define XSHAL_STACKEDEXC_VECTOR_SIZE	XSHAL_KERNEL_VECTOR_SIZE	/* for backward compatibility */
-#define XSHAL_KERNELEXC_VECTOR_SIZE	XSHAL_KERNEL_VECTOR_SIZE	/* for backward compatibility */
-#define XSHAL_DOUBLEEXC_VECTOR_SIZE	0x000000E0
-#define XSHAL_DOUBLEEXC_VECTOR_ISROM	0
-#define XSHAL_WINDOW_VECTORS_SIZE	0x00000180
-#define XSHAL_WINDOW_VECTORS_ISROM	0
-#define XSHAL_INTLEVEL2_VECTOR_SIZE	0x0000000C
-#define XSHAL_INTLEVEL2_VECTOR_ISROM	0
-#define XSHAL_INTLEVEL3_VECTOR_SIZE	0x0000000C
-#define XSHAL_INTLEVEL3_VECTOR_ISROM	0
-#define XSHAL_INTLEVEL4_VECTOR_SIZE	0x0000000C
-#define XSHAL_INTLEVEL4_VECTOR_ISROM	1
-#define XSHAL_DEBUG_VECTOR_SIZE		XSHAL_INTLEVEL4_VECTOR_SIZE
-#define XSHAL_DEBUG_VECTOR_ISROM	XSHAL_INTLEVEL4_VECTOR_ISROM
-
-
-#endif /*XTENSA_CONFIG_SYSTEM_H*/
-
diff --git a/include/asm-xtensa/xtensa/config-linux_be/tie.h b/include/asm-xtensa/xtensa/config-linux_be/tie.h
deleted file mode 100644
index 3c2e514602f..00000000000
--- a/include/asm-xtensa/xtensa/config-linux_be/tie.h
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * xtensa/config/tie.h -- HAL definitions that are dependent on CORE and TIE configuration
- *
- *  This header file is sometimes referred to as the "compile-time HAL" or CHAL.
- *  It was generated for a specific Xtensa processor configuration,
- *  and furthermore for a specific set of TIE source files that extend
- *  basic core functionality.
- *
- *  Source for configuration-independent binaries (which link in a
- *  configuration-specific HAL library) must NEVER include this file.
- *  It is perfectly normal, however, for the HAL source itself to include this file.
- */
-
-/*
- * Copyright (c) 2003 Tensilica, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
- * USA.
- */
-
-
-#ifndef XTENSA_CONFIG_TIE_H
-#define XTENSA_CONFIG_TIE_H
-
-#include <xtensa/hal.h>
-
-
-/*----------------------------------------------------------------------
-				GENERAL
-  ----------------------------------------------------------------------*/
-
-/*
- *  Separators for macros that expand into arrays.
- *  These can be predefined by files that #include this one,
- *  when different separators are required.
- */
-/*  Element separator for macros that expand into 1-dimensional arrays:  */
-#ifndef XCHAL_SEP
-#define XCHAL_SEP			,
-#endif
-/*  Array separator for macros that expand into 2-dimensional arrays:  */
-#ifndef XCHAL_SEP2
-#define XCHAL_SEP2			},{
-#endif
-
-
-
-
-
-
-/*----------------------------------------------------------------------
-			COPROCESSORS and EXTRA STATE
-  ----------------------------------------------------------------------*/
-
-#define XCHAL_CP_NUM			0	/* number of coprocessors */
-#define XCHAL_CP_MAX			0	/* max coprocessor id plus one (0 if none) */
-#define XCHAL_CP_MASK			0x00	/* bitmask of coprocessors by id */
-
-/*  Space for coprocessors' state save areas:  */
-#define XCHAL_CP0_SA_SIZE		0
-#define XCHAL_CP1_SA_SIZE		0
-#define XCHAL_CP2_SA_SIZE		0
-#define XCHAL_CP3_SA_SIZE		0
-#define XCHAL_CP4_SA_SIZE		0
-#define XCHAL_CP5_SA_SIZE		0
-#define XCHAL_CP6_SA_SIZE		0
-#define XCHAL_CP7_SA_SIZE		0
-/*  Minimum required alignments of CP state save areas:  */
-#define XCHAL_CP0_SA_ALIGN		1
-#define XCHAL_CP1_SA_ALIGN		1
-#define XCHAL_CP2_SA_ALIGN		1
-#define XCHAL_CP3_SA_ALIGN		1
-#define XCHAL_CP4_SA_ALIGN		1
-#define XCHAL_CP5_SA_ALIGN		1
-#define XCHAL_CP6_SA_ALIGN		1
-#define XCHAL_CP7_SA_ALIGN		1
-
-/*  Indexing macros:  */
-#define _XCHAL_CP_SA_SIZE(n)		XCHAL_CP ## n ## _SA_SIZE
-#define XCHAL_CP_SA_SIZE(n)		_XCHAL_CP_SA_SIZE(n)	/* n = 0 .. 7 */
-#define _XCHAL_CP_SA_ALIGN(n)		XCHAL_CP ## n ## _SA_ALIGN
-#define XCHAL_CP_SA_ALIGN(n)		_XCHAL_CP_SA_ALIGN(n)	/* n = 0 .. 7 */
-
-
-/*  Space for "extra" state (user special registers and non-cp TIE) save area:  */
-#define XCHAL_EXTRA_SA_SIZE		0
-#define XCHAL_EXTRA_SA_ALIGN		1
-
-/*  Total save area size (extra + all coprocessors)  */
-/*  (not useful until xthal_{save,restore}_all_extra() is implemented,  */
-/*   but included for Tor2 beta; doesn't account for alignment!):  */
-#define XCHAL_CPEXTRA_SA_SIZE_TOR2	0	/* Tor2Beta temporary definition -- do not use */
-
-/*  Combined required alignment for all CP and EXTRA state save areas  */
-/*  (does not include required alignment for any base config registers):  */
-#define XCHAL_CPEXTRA_SA_ALIGN		1
-
-/* ... */
-
-
-#ifdef _ASMLANGUAGE
-/*
- *  Assembly-language specific definitions (assembly macros, etc.).
- */
-#include <xtensa/config/specreg.h>
-
-/********************
- *  Macros to save and restore the non-coprocessor TIE portion of EXTRA state.
- */
-
-/* (none) */
-
-
-/********************
- *  Macros to create functions that save and restore all EXTRA (non-coprocessor) state
- *  (does not include zero-overhead loop registers and non-optional registers).
- */
-
-	/*
-	 *  Macro that expands to the body of a function that
-	 *  stores the extra (non-coprocessor) optional/custom state.
-	 *	Entry:	a2 = ptr to save area in which to save extra state
-	 *	Exit:	any register a2-a15 (?) may have been clobbered.
-	 */
-	.macro	xchal_extra_store_funcbody
-	.endm
-
-
-	/*
-	 *  Macro that expands to the body of a function that
-	 *  loads the extra (non-coprocessor) optional/custom state.
-	 *	Entry:	a2 = ptr to save area from which to restore extra state
-	 *	Exit:	any register a2-a15 (?) may have been clobbered.
-	 */
-	.macro	xchal_extra_load_funcbody
-	.endm
-
-
-/********************
- *  Macros to save and restore the state of each TIE coprocessor.
- */
-
-
-
-/********************
- *  Macros to create functions that save and restore the state of *any* TIE coprocessor.
- */
-
-	/*
-	 *  Macro that expands to the body of a function
-	 *  that stores the selected coprocessor's state (registers etc).
-	 *	Entry:	a2 = ptr to save area in which to save cp state
-	 *		a3 = coprocessor number
-	 *	Exit:	any register a2-a15 (?) may have been clobbered.
-	 */
-	.macro	xchal_cpi_store_funcbody
-	.endm
-
-
-	/*
-	 *  Macro that expands to the body of a function
-	 *  that loads the selected coprocessor's state (registers etc).
-	 *	Entry:	a2 = ptr to save area from which to restore cp state
-	 *		a3 = coprocessor number
-	 *	Exit:	any register a2-a15 (?) may have been clobbered.
-	 */
-	.macro	xchal_cpi_load_funcbody
-	.endm
-
-#endif /*_ASMLANGUAGE*/
-
-
-/*
- *  Contents of save areas in terms of libdb register numbers.
- *  NOTE:  CONTENTS_LIBDB_{UREG,REGF} macros are not defined in this file;
- *  it is up to the user of this header file to define these macros
- *  usefully before each expansion of the CONTENTS_LIBDB macros.
- *  (Fields rsv[123] are reserved for future additions; they are currently
- *   set to zero but may be set to some useful values in the future.)
- *
- *	CONTENTS_LIBDB_SREG(libdbnum, offset, size, align, rsv1, name, sregnum, bitmask, rsv2, rsv3)
- *	CONTENTS_LIBDB_UREG(libdbnum, offset, size, align, rsv1, name, uregnum, bitmask, rsv2, rsv3)
- *	CONTENTS_LIBDB_REGF(libdbnum, offset, size, align, rsv1, name, index, numentries, contentsize, regname_base, regfile_name, rsv2, rsv3)
- */
-
-#define XCHAL_EXTRA_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_EXTRA_SA_CONTENTS_LIBDB	/* empty */
-
-#define XCHAL_CP0_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_CP0_SA_CONTENTS_LIBDB	/* empty */
-
-#define XCHAL_CP1_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_CP1_SA_CONTENTS_LIBDB	/* empty */
-
-#define XCHAL_CP2_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_CP2_SA_CONTENTS_LIBDB	/* empty */
-
-#define XCHAL_CP3_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_CP3_SA_CONTENTS_LIBDB	/* empty */
-
-#define XCHAL_CP4_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_CP4_SA_CONTENTS_LIBDB	/* empty */
-
-#define XCHAL_CP5_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_CP5_SA_CONTENTS_LIBDB	/* empty */
-
-#define XCHAL_CP6_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_CP6_SA_CONTENTS_LIBDB	/* empty */
-
-#define XCHAL_CP7_SA_CONTENTS_LIBDB_NUM	0
-#define XCHAL_CP7_SA_CONTENTS_LIBDB	/* empty */
-
-
-
-
-
-
-/*----------------------------------------------------------------------
-				MISC
-  ----------------------------------------------------------------------*/
-
-#if 0	/* is there something equivalent for user TIE? */
-#define XCHAL_CORE_ID			"linux_be"	/* configuration's alphanumeric core identifier
-							   (CoreID) set in the Xtensa Processor Generator */
-
-#define XCHAL_BUILD_UNIQUE_ID		0x00003256	/* software build-unique ID (22-bit) */
-
-/*  These definitions describe the hardware targeted by this software:  */
-#define XCHAL_HW_CONFIGID0		0xC103D1FF	/* config ID reg 0 value (upper 32 of 64 bits) */
-#define XCHAL_HW_CONFIGID1		0x00803256	/* config ID reg 1 value (lower 32 of 64 bits) */
-#define XCHAL_CONFIGID0			XCHAL_HW_CONFIGID0	/* for backward compatibility only -- don't use! */
-#define XCHAL_CONFIGID1			XCHAL_HW_CONFIGID1	/* for backward compatibility only -- don't use! */
-#define XCHAL_HW_RELEASE_MAJOR		1050	/* major release of targeted hardware */
-#define XCHAL_HW_RELEASE_MINOR		1	/* minor release of targeted hardware */
-#define XCHAL_HW_RELEASE_NAME		"T1050.1"	/* full release name of targeted hardware */
-#define XTHAL_HW_REL_T1050	1
-#define XTHAL_HW_REL_T1050_1	1
-#define XCHAL_HW_CONFIGID_RELIABLE	1
-#endif /*0*/
-
-
-
-/*----------------------------------------------------------------------
-				ISA
-  ----------------------------------------------------------------------*/
-
-#if 0	/* these probably don't belong here, but are related to or implemented using TIE */
-#define XCHAL_HAVE_BOOLEANS		0	/* 1 if booleans option configured, 0 otherwise */
-/*  Misc instructions:  */
-#define XCHAL_HAVE_MUL32		0	/* 1 if 32-bit integer multiply option configured, 0 otherwise */
-#define XCHAL_HAVE_MUL32_HIGH		0	/* 1 if MUL32 option includes MULUH and MULSH, 0 otherwise */
-
-#define XCHAL_HAVE_FP			0	/* 1 if floating point option configured, 0 otherwise */
-#endif /*0*/
-
-
-#endif /*XTENSA_CONFIG_TIE_H*/
-
diff --git a/include/asm-xtensa/xtensa/coreasm.h b/include/asm-xtensa/xtensa/coreasm.h
deleted file mode 100644
index a8cfb54c20a..00000000000
--- a/include/asm-xtensa/xtensa/coreasm.h
+++ /dev/null
@@ -1,526 +0,0 @@
-#ifndef XTENSA_COREASM_H
-#define XTENSA_COREASM_H
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- * include/asm-xtensa/xtensa/coreasm.h -- assembler-specific
- * definitions that depend on CORE configuration.
- *
- * Source for configuration-independent binaries (which link in a
- * configuration-specific HAL library) must NEVER include this file.
- * It is perfectly normal, however, for the HAL itself to include this
- * file.
- *
- * This file must NOT include xtensa/config/system.h.  Any assembler
- * header file that depends on system information should likely go in
- * a new systemasm.h (or sysasm.h) header file.
- *
- *  NOTE: macro beqi32 is NOT configuration-dependent, and is placed
- *        here til we will have configuration-independent header file.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file "COPYING" in the main directory of
- * this archive for more details.
- *
- * Copyright (C) 2002 Tensilica Inc.
- */
-
-
-#include <xtensa/config/core.h>
-#include <xtensa/config/specreg.h>
-
-/*
- *  Assembly-language specific definitions (assembly macros, etc.).
- */
-
-/*----------------------------------------------------------------------
- *  find_ms_setbit
- *
- *  This macro finds the most significant bit that is set in <as>
- *  and return its index + <base> in <ad>, or <base> - 1 if <as> is zero.
- *  The index counts starting at zero for the lsbit, so the return
- *  value ranges from <base>-1 (no bit set) to <base>+31 (msbit set).
- *
- *  Parameters:
- *	<ad>	destination address register (any register)
- *	<as>	source address register
- *	<at>	temporary address register (must be different than <as>)
- *	<base>	constant value added to result (usually 0 or 1)
- *  On entry:
- *	<ad> = undefined if different than <as>
- *	<as> = value whose most significant set bit is to be found
- *	<at> = undefined
- *	no other registers are used by this macro.
- *  On exit:
- *	<ad> = <base> + index of msbit set in original <as>,
- *	     = <base> - 1 if original <as> was zero.
- *	<as> clobbered (if not <ad>)
- *	<at> clobbered (if not <ad>)
- *  Example:
- *	find_ms_setbit a0, a4, a0, 0		-- return in a0 index of msbit set in a4
- */
-
-	.macro	find_ms_setbit ad, as, at, base
-#if XCHAL_HAVE_NSA
-	movi	\at, 31+\base
-	nsau	\as, \as	// get index of \as, numbered from msbit (32 if absent)
-	sub	\ad, \at, \as	// get numbering from lsbit (0..31, -1 if absent)
-#else /* XCHAL_HAVE_NSA */
-	movi	\at, \base	// start with result of 0 (point to lsbit of 32)
-
-	beqz	\as, 2f		// special case for zero argument: return -1
-	bltui	\as, 0x10000, 1f	// is it one of the 16 lsbits? (if so, check lower 16 bits)
-	addi	\at, \at, 16	// no, increment result to upper 16 bits (of 32)
-	//srli	\as, \as, 16	// check upper half (shift right 16 bits)
-	extui	\as, \as, 16, 16	// check upper half (shift right 16 bits)
-1:	bltui	\as, 0x100, 1f	// is it one of the 8 lsbits? (if so, check lower 8 bits)
-	addi	\at, \at, 8	// no, increment result to upper 8 bits (of 16)
-	srli	\as, \as, 8	// shift right to check upper 8 bits
-1:	bltui	\as, 0x10, 1f	// is it one of the 4 lsbits? (if so, check lower 4 bits)
-	addi	\at, \at, 4	// no, increment result to upper 4 bits (of 8)
-	srli	\as, \as, 4	// shift right 4 bits to check upper half
-1:	bltui	\as, 0x4, 1f	// is it one of the 2 lsbits? (if so, check lower 2 bits)
-	addi	\at, \at, 2	// no, increment result to upper 2 bits (of 4)
-	srli	\as, \as, 2	// shift right 2 bits to check upper half
-1:	bltui	\as, 0x2, 1f	// is it the lsbit?
-	addi	\at, \at, 2	// no, increment result to upper bit (of 2)
-2:	addi	\at, \at, -1	// (from just above: add 1;  from beqz: return -1)
-	//srli	\as, \as, 1
-1:				// done! \at contains index of msbit set (or -1 if none set)
-	.if	0x\ad - 0x\at	// destination different than \at ? (works because regs are a0-a15)
-	mov	\ad, \at	// then move result to \ad
-	.endif
-#endif /* XCHAL_HAVE_NSA */
-	.endm	// find_ms_setbit
-
-/*----------------------------------------------------------------------
- *  find_ls_setbit
- *
- *  This macro finds the least significant bit that is set in <as>,
- *  and return its index in <ad>.
- *  Usage is the same as for the find_ms_setbit macro.
- *  Example:
- *	find_ls_setbit a0, a4, a0, 0	-- return in a0 index of lsbit set in a4
- */
-
-	.macro	find_ls_setbit ad, as, at, base
-	neg	\at, \as	// keep only the least-significant bit that is set...
-	and	\as, \at, \as	// ... in \as
-	find_ms_setbit	\ad, \as, \at, \base
-	.endm	// find_ls_setbit
-
-/*----------------------------------------------------------------------
- *  find_ls_one
- *
- *  Same as find_ls_setbit with base zero.
- *  Source (as) and destination (ad) registers must be different.
- *  Provided for backward compatibility.
- */
-
-	.macro	find_ls_one ad, as
-	find_ls_setbit	\ad, \as, \ad, 0
-	.endm	// find_ls_one
-
-/*----------------------------------------------------------------------
- *  floop, floopnez, floopgtz, floopend
- *
- *  These macros are used for fast inner loops that
- *  work whether or not the Loops options is configured.
- *  If the Loops option is configured, they simply use
- *  the zero-overhead LOOP instructions; otherwise
- *  they use explicit decrement and branch instructions.
- *
- *  They are used in pairs, with floop, floopnez or floopgtz
- *  at the beginning of the loop, and floopend at the end.
- *
- *  Each pair of loop macro calls must be given the loop count
- *  address register and a unique label for that loop.
- *
- *  Example:
- *
- *	movi	 a3, 16     // loop 16 times
- *	floop    a3, myloop1
- *	:
- *	bnez     a7, end1	// exit loop if a7 != 0
- *	:
- *	floopend a3, myloop1
- *  end1:
- *
- *  Like the LOOP instructions, these macros cannot be
- *  nested, must include at least one instruction,
- *  cannot call functions inside the loop, etc.
- *  The loop can be exited by jumping to the instruction
- *  following floopend (or elsewhere outside the loop),
- *  or continued by jumping to a NOP instruction placed
- *  immediately before floopend.
- *
- *  Unlike LOOP instructions, the register passed to floop*
- *  cannot be used inside the loop, because it is used as
- *  the loop counter if the Loops option is not configured.
- *  And its value is undefined after exiting the loop.
- *  And because the loop counter register is active inside
- *  the loop, you can't easily use this construct to loop
- *  across a register file using ROTW as you might with LOOP
- *  instructions, unless you copy the loop register along.
- */
-
-	/*  Named label version of the macros:  */
-
-	.macro	floop		ar, endlabel
-	floop_		\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
-	.endm
-
-	.macro	floopnez	ar, endlabel
-	floopnez_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
-	.endm
-
-	.macro	floopgtz	ar, endlabel
-	floopgtz_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
-	.endm
-
-	.macro	floopend	ar, endlabel
-	floopend_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
-	.endm
-
-	/*  Numbered local label version of the macros:  */
-#if 0 /*UNTESTED*/
-	.macro	floop89		ar
-	floop_		\ar, 8, 9f
-	.endm
-
-	.macro	floopnez89	ar
-	floopnez_	\ar, 8, 9f
-	.endm
-
-	.macro	floopgtz89	ar
-	floopgtz_	\ar, 8, 9f
-	.endm
-
-	.macro	floopend89	ar
-	floopend_	\ar, 8b, 9
-	.endm
-#endif /*0*/
-
-	/*  Underlying version of the macros:  */
-
-	.macro	floop_	ar, startlabel, endlabelref
-	.ifdef	_infloop_
-	.if	_infloop_
-	.err	// Error: floop cannot be nested
-	.endif
-	.endif
-	.set	_infloop_, 1
-#if XCHAL_HAVE_LOOPS
-	loop	\ar, \endlabelref
-#else /* XCHAL_HAVE_LOOPS */
-\startlabel:
-	addi	\ar, \ar, -1
-#endif /* XCHAL_HAVE_LOOPS */
-	.endm	// floop_
-
-	.macro	floopnez_	ar, startlabel, endlabelref
-	.ifdef	_infloop_
-	.if	_infloop_
-	.err	// Error: floopnez cannot be nested
-	.endif
-	.endif
-	.set	_infloop_, 1
-#if XCHAL_HAVE_LOOPS
-	loopnez	\ar, \endlabelref
-#else /* XCHAL_HAVE_LOOPS */
-	beqz	\ar, \endlabelref
-\startlabel:
-	addi	\ar, \ar, -1
-#endif /* XCHAL_HAVE_LOOPS */
-	.endm	// floopnez_
-
-	.macro	floopgtz_	ar, startlabel, endlabelref
-	.ifdef	_infloop_
-	.if	_infloop_
-	.err	// Error: floopgtz cannot be nested
-	.endif
-	.endif
-	.set	_infloop_, 1
-#if XCHAL_HAVE_LOOPS
-	loopgtz	\ar, \endlabelref
-#else /* XCHAL_HAVE_LOOPS */
-	bltz	\ar, \endlabelref
-	beqz	\ar, \endlabelref
-\startlabel:
-	addi	\ar, \ar, -1
-#endif /* XCHAL_HAVE_LOOPS */
-	.endm	// floopgtz_
-
-
-	.macro	floopend_	ar, startlabelref, endlabel
-	.ifndef	_infloop_
-	.err	// Error: floopend without matching floopXXX
-	.endif
-	.ifeq	_infloop_
-	.err	// Error: floopend without matching floopXXX
-	.endif
-	.set	_infloop_, 0
-#if ! XCHAL_HAVE_LOOPS
-	bnez	\ar, \startlabelref
-#endif /* XCHAL_HAVE_LOOPS */
-\endlabel:
-	.endm	// floopend_
-
-/*----------------------------------------------------------------------
- *  crsil  --  conditional RSIL (read/set interrupt level)
- *
- *  Executes the RSIL instruction if it exists, else just reads PS.
- *  The RSIL instruction does not exist in the new exception architecture
- *  if the interrupt option is not selected.
- */
-
-	.macro	crsil	ar, newlevel
-#if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS
-	rsil	\ar, \newlevel
-#else
-	rsr	\ar, PS
-#endif
-	.endm	// crsil
-
-/*----------------------------------------------------------------------
- *  window_spill{4,8,12}
- *
- *  These macros spill callers' register windows to the stack.
- *  They work for both privileged and non-privileged tasks.
- *  Must be called from a windowed ABI context, eg. within
- *  a windowed ABI function (ie. valid stack frame, window
- *  exceptions enabled, not in exception mode, etc).
- *
- *  This macro requires a single invocation of the window_spill_common
- *  macro in the same assembly unit and section.
- *
- *  Note that using window_spill{4,8,12} macros is more efficient
- *  than calling a function implemented using window_spill_function,
- *  because the latter needs extra code to figure out the size of
- *  the call to the spilling function.
- *
- *  Example usage:
- *
- *		.text
- *		.align	4
- *		.global	some_function
- *		.type	some_function,@function
- *	some_function:
- *		entry	a1, 16
- *		:
- *		:
- *
- *		window_spill4	// spill windows of some_function's callers; preserves a0..a3 only;
- *				// to use window_spill{8,12} in this example function we'd have
- *				// to increase space allocated by the entry instruction, because
- *				// 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed
- *				// for call8/window_spill8 or call12/window_spill12 respectively.
- *		:
- *
- *		retw
- *
- *		window_spill_common	// instantiates code used by window_spill4
- *
- *
- *  On entry:
- *	none (if window_spill4)
- *	stack frame has enough space allocated for call8 (if window_spill8)
- *	stack frame has enough space allocated for call12 (if window_spill12)
- *  On exit:
- *	 a4..a15 clobbered (if window_spill4)
- *	 a8..a15 clobbered (if window_spill8)
- *	a12..a15 clobbered (if window_spill12)
- *	no caller windows are in live registers
- */
-
-	.macro	window_spill4
-#if XCHAL_HAVE_WINDOWED
-# if XCHAL_NUM_AREGS == 16
-	movi	a15, 0			// for 16-register files, no need to call to reach the end
-# elif XCHAL_NUM_AREGS == 32
-	call4	.L__wdwspill_assist28	// call deep enough to clear out any live callers
-# elif XCHAL_NUM_AREGS == 64
-	call4	.L__wdwspill_assist60	// call deep enough to clear out any live callers
-# endif
-#endif
-	.endm	// window_spill4
-
-	.macro	window_spill8
-#if XCHAL_HAVE_WINDOWED
-# if XCHAL_NUM_AREGS == 16
-	movi	a15, 0			// for 16-register files, no need to call to reach the end
-# elif XCHAL_NUM_AREGS == 32
-	call8	.L__wdwspill_assist24	// call deep enough to clear out any live callers
-# elif XCHAL_NUM_AREGS == 64
-	call8	.L__wdwspill_assist56	// call deep enough to clear out any live callers
-# endif
-#endif
-	.endm	// window_spill8
-
-	.macro	window_spill12
-#if XCHAL_HAVE_WINDOWED
-# if XCHAL_NUM_AREGS == 16
-	movi	a15, 0			// for 16-register files, no need to call to reach the end
-# elif XCHAL_NUM_AREGS == 32
-	call12	.L__wdwspill_assist20	// call deep enough to clear out any live callers
-# elif XCHAL_NUM_AREGS == 64
-	call12	.L__wdwspill_assist52	// call deep enough to clear out any live callers
-# endif
-#endif
-	.endm	// window_spill12
-
-/*----------------------------------------------------------------------
- *  window_spill_function
- *
- *  This macro outputs a function that will spill its caller's callers'
- *  register windows to the stack.  Eg. it could be used to implement
- *  a version of xthal_window_spill() that works in non-privileged tasks.
- *  This works for both privileged and non-privileged tasks.
- *
- *  Typical usage:
- *
- *		.text
- *		.align	4
- *		.global	my_spill_function
- *		.type	my_spill_function,@function
- *	my_spill_function:
- *		window_spill_function
- *
- *  On entry to resulting function:
- *	none
- *  On exit from resulting function:
- *	none (no caller windows are in live registers)
- */
-
-	.macro	window_spill_function
-#if XCHAL_HAVE_WINDOWED
-# if XCHAL_NUM_AREGS == 32
-	entry	sp, 48
-	bbci.l	a0, 31, 1f		// branch if called with call4
-	bbsi.l	a0, 30, 2f		// branch if called with call12
-	call8	.L__wdwspill_assist16	// called with call8, only need another 8
-	retw
-1:	call12	.L__wdwspill_assist16	// called with call4, only need another 12
-	retw
-2:	call4	.L__wdwspill_assist16	// called with call12, only need another 4
-	retw
-# elif XCHAL_NUM_AREGS == 64
-	entry	sp, 48
-	bbci.l	a0, 31, 1f		// branch if called with call4
-	bbsi.l	a0, 30, 2f		// branch if called with call12
-	call4	.L__wdwspill_assist52	// called with call8, only need a call4
-	retw
-1:	call8	.L__wdwspill_assist52	// called with call4, only need a call8
-	retw
-2:	call12	.L__wdwspill_assist40	// called with call12, can skip a call12
-	retw
-# elif XCHAL_NUM_AREGS == 16
-	entry	sp, 16
-	bbci.l	a0, 31, 1f	// branch if called with call4
-	bbsi.l	a0, 30, 2f	// branch if called with call12
-	movi	a7, 0		// called with call8
-	retw
-1:	movi	a11, 0		// called with call4
-2:	retw			// if called with call12, everything already spilled
-
-//	movi	a15, 0		// trick to spill all but the direct caller
-//	j	1f
-//	//  The entry instruction is magical in the assembler (gets auto-aligned)
-//	//  so we have to jump to it to avoid falling through the padding.
-//	//  We need entry/retw to know where to return.
-//1:	entry	sp, 16
-//	retw
-# else
-#  error "unrecognized address register file size"
-# endif
-#endif /* XCHAL_HAVE_WINDOWED */
-	window_spill_common
-	.endm	// window_spill_function
-
-/*----------------------------------------------------------------------
- *  window_spill_common
- *
- *  Common code used by any number of invocations of the window_spill##
- *  and window_spill_function macros.
- *
- *  Must be instantiated exactly once within a given assembly unit,
- *  within call/j range of and same section as window_spill##
- *  macro invocations for that assembly unit.
- *  (Is automatically instantiated by the window_spill_function macro.)
- */
-
-	.macro	window_spill_common
-#if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64)
-	.ifndef	.L__wdwspill_defined
-# if XCHAL_NUM_AREGS >= 64
-.L__wdwspill_assist60:
-	entry	sp, 32
-	call8	.L__wdwspill_assist52
-	retw
-.L__wdwspill_assist56:
-	entry	sp, 16
-	call4	.L__wdwspill_assist52
-	retw
-.L__wdwspill_assist52:
-	entry	sp, 48
-	call12	.L__wdwspill_assist40
-	retw
-.L__wdwspill_assist40:
-	entry	sp, 48
-	call12	.L__wdwspill_assist28
-	retw
-# endif
-.L__wdwspill_assist28:
-	entry	sp, 48
-	call12	.L__wdwspill_assist16
-	retw
-.L__wdwspill_assist24:
-	entry	sp, 32
-	call8	.L__wdwspill_assist16
-	retw
-.L__wdwspill_assist20:
-	entry	sp, 16
-	call4	.L__wdwspill_assist16
-	retw
-.L__wdwspill_assist16:
-	entry	sp, 16
-	movi	a15, 0
-	retw
-	.set	.L__wdwspill_defined, 1
-	.endif
-#endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */
-	.endm	// window_spill_common
-
-/*----------------------------------------------------------------------
- *  beqi32
- *
- *  macro implements version of beqi for arbitrary 32-bit immidiate value
- *
- *     beqi32 ax, ay, imm32, label
- *
- *  Compares value in register ax with imm32 value and jumps to label if
- *  equal. Clobberes register ay if needed
- *
- */
-   .macro beqi32	ax, ay, imm, label
-    .ifeq ((\imm-1) & ~7)	// 1..8 ?
-		beqi	\ax, \imm, \label
-    .else
-      .ifeq (\imm+1)		// -1 ?
-		beqi	\ax, \imm, \label
-      .else
-        .ifeq (\imm)		// 0 ?
-		beqz	\ax, \label
-        .else
-		//  We could also handle immediates 10,12,16,32,64,128,256
-		//  but it would be a long macro...
-		movi	\ay, \imm
-		beq	\ax, \ay, \label
-        .endif
-      .endif
-    .endif
-   .endm // beqi32
-
-#endif /*XTENSA_COREASM_H*/
-
diff --git a/include/asm-xtensa/xtensa/corebits.h b/include/asm-xtensa/xtensa/corebits.h
deleted file mode 100644
index e578ade4163..00000000000
--- a/include/asm-xtensa/xtensa/corebits.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef XTENSA_COREBITS_H
-#define XTENSA_COREBITS_H
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- * xtensa/corebits.h - Xtensa Special Register field positions and masks.
- *
- * (In previous releases, these were defined in specreg.h, a generated file.
- *  This file is not generated, i.e. it is processor configuration independent.)
- */
-
-
-/*  EXCCAUSE register fields:  */
-#define EXCCAUSE_EXCCAUSE_SHIFT	0
-#define EXCCAUSE_EXCCAUSE_MASK	0x3F
-/*  Exception causes (mostly incomplete!):  */
-#define EXCCAUSE_ILLEGAL		0
-#define EXCCAUSE_SYSCALL		1
-#define EXCCAUSE_IFETCHERROR		2
-#define EXCCAUSE_LOADSTOREERROR		3
-#define EXCCAUSE_LEVEL1INTERRUPT	4
-#define EXCCAUSE_ALLOCA			5
-
-/*  PS register fields:  */
-#define PS_WOE_SHIFT		18
-#define PS_WOE_MASK		0x00040000
-#define PS_WOE			PS_WOE_MASK
-#define PS_CALLINC_SHIFT	16
-#define PS_CALLINC_MASK		0x00030000
-#define PS_CALLINC(n)		(((n)&3)<<PS_CALLINC_SHIFT)	/* n = 0..3 */
-#define PS_OWB_SHIFT		8
-#define PS_OWB_MASK		0x00000F00
-#define PS_OWB(n)		(((n)&15)<<PS_OWB_SHIFT)	/* n = 0..15 (or 0..7) */
-#define PS_RING_SHIFT		6
-#define PS_RING_MASK		0x000000C0
-#define PS_RING(n)		(((n)&3)<<PS_RING_SHIFT)	/* n = 0..3 */
-#define PS_UM_SHIFT		5
-#define PS_UM_MASK		0x00000020
-#define PS_UM			PS_UM_MASK
-#define PS_EXCM_SHIFT		4
-#define PS_EXCM_MASK		0x00000010
-#define PS_EXCM			PS_EXCM_MASK
-#define PS_INTLEVEL_SHIFT	0
-#define PS_INTLEVEL_MASK	0x0000000F
-#define PS_INTLEVEL(n)		((n)&PS_INTLEVEL_MASK)		/* n = 0..15 */
-/*  Backward compatibility (deprecated):  */
-#define PS_PROGSTACK_SHIFT	PS_UM_SHIFT
-#define PS_PROGSTACK_MASK	PS_UM_MASK
-#define PS_PROG_SHIFT		PS_UM_SHIFT
-#define PS_PROG_MASK		PS_UM_MASK
-#define PS_PROG			PS_UM
-
-/*  DBREAKCn register fields:  */
-#define DBREAKC_MASK_SHIFT		0
-#define DBREAKC_MASK_MASK		0x0000003F
-#define DBREAKC_LOADBREAK_SHIFT		30
-#define DBREAKC_LOADBREAK_MASK		0x40000000
-#define DBREAKC_STOREBREAK_SHIFT	31
-#define DBREAKC_STOREBREAK_MASK		0x80000000
-
-/*  DEBUGCAUSE register fields:  */
-#define DEBUGCAUSE_DEBUGINT_SHIFT	5
-#define DEBUGCAUSE_DEBUGINT_MASK	0x20	/* debug interrupt */
-#define DEBUGCAUSE_BREAKN_SHIFT		4
-#define DEBUGCAUSE_BREAKN_MASK		0x10	/* BREAK.N instruction */
-#define DEBUGCAUSE_BREAK_SHIFT		3
-#define DEBUGCAUSE_BREAK_MASK		0x08	/* BREAK instruction */
-#define DEBUGCAUSE_DBREAK_SHIFT		2
-#define DEBUGCAUSE_DBREAK_MASK		0x04	/* DBREAK match */
-#define DEBUGCAUSE_IBREAK_SHIFT		1
-#define DEBUGCAUSE_IBREAK_MASK		0x02	/* IBREAK match */
-#define DEBUGCAUSE_ICOUNT_SHIFT		0
-#define DEBUGCAUSE_ICOUNT_MASK		0x01	/* ICOUNT would increment to zero */
-
-#endif /*XTENSA_COREBITS_H*/
-
diff --git a/include/asm-xtensa/xtensa/hal.h b/include/asm-xtensa/xtensa/hal.h
deleted file mode 100644
index d1047250545..00000000000
--- a/include/asm-xtensa/xtensa/hal.h
+++ /dev/null
@@ -1,822 +0,0 @@
-#ifndef XTENSA_HAL_H
-#define XTENSA_HAL_H
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- * include/asm-xtensa/xtensa/hal.h -- contains a definition of the
- * Core HAL interface.
- *
- * All definitions in this header file are independent of any specific
- * Xtensa processor configuration.  Thus an OS or other software can
- * include this header file and be compiled into configuration-
- * independent objects that can be distributed and eventually linked
- * to the HAL library (libhal.a) to create a configuration-specific
- * final executable.
- *
- * Certain definitions, however, are release-specific -- such as the
- * XTHAL_RELEASE_xxx macros (or additions made in later releases).
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002 Tensilica Inc.
- */
-
-
-/*----------------------------------------------------------------------
-  			 Constant Definitions
-			(shared with assembly)
-  ----------------------------------------------------------------------*/
-
-/*  Software release information (not configuration-specific!):  */
-#define XTHAL_RELEASE_MAJOR	1050
-#define XTHAL_RELEASE_MINOR	0
-#define XTHAL_RELEASE_NAME	"T1050.0-2002-08-06-eng0"
-#define XTHAL_RELEASE_INTERNAL	"2002-08-06-eng0"
-#define XTHAL_REL_T1050	1
-#define XTHAL_REL_T1050_0	1
-#define XTHAL_REL_T1050_0_2002	1
-#define XTHAL_REL_T1050_0_2002_08	1
-#define XTHAL_REL_T1050_0_2002_08_06	1
-#define XTHAL_REL_T1050_0_2002_08_06_ENG0	1
-
-/*  HAL version numbers (these names are for backward compatibility):  */
-#define XTHAL_MAJOR_REV		XTHAL_RELEASE_MAJOR
-#define XTHAL_MINOR_REV		XTHAL_RELEASE_MINOR
-/*
- *  A bit of software release history on values of XTHAL_{MAJOR,MINOR}_REV:
- *
- *	Release		MAJOR	MINOR		Comment
- *	=======		=====	=====		=======
- *	T1015.n		n/a	n/a		(HAL not yet available)
- *	T1020.{0,1,2}	0	1		(HAL beta)
- *	T1020.{3,4}	0	2		First release.
- *	T1020.n (n>4)	0	2 or >3		(TBD)
- *	T1030.0		0	1		(HAL beta)
- *	T1030.{1,2}	0	3		Equivalent to first release.
- *	T1030.n (n>=3)	0	>= 3		(TBD)
- *	T1040.n		1040	n		Full CHAL available from T1040.2
- *	T1050.n		1050	n		Current release.
- *
- *
- *  Note:  there is a distinction between the software release with
- *  which something is compiled (accessible using XTHAL_RELEASE_* macros)
- *  and the software release with which the HAL library was compiled
- *  (accessible using Xthal_release_* global variables).  This
- *  distinction is particularly relevant for vendors that distribute
- *  configuration-independent binaries (eg. an OS), where their customer
- *  might link it with a HAL of a different Xtensa software release.
- *  In this case, it may be appropriate for the OS to verify at run-time
- *  whether XTHAL_RELEASE_* and Xthal_release_* are compatible.
- *  [Guidelines as to which release is compatible with which are not
- *  currently provided explicitly, but might be inferred from reading
- *  OSKit documentation for all releases -- compatibility is also highly
- *  dependent on which HAL features are used.  Each release is usually
- *  backward compatible, with very few exceptions if any.]
- *
- *  Notes:
- *	Tornado 2.0 supported in T1020.3+, T1030.1+, and T1040.{0,1} only.
- *	Tornado 2.0.2 supported in T1040.2+, and T1050.
- *	Compile-time HAL port of NucleusPlus supported by T1040.2+ and T1050.
- */
-
-
-/*
- *  Architectural limits, independent of configuration.
- *  Note that these are ISA-defined limits, not micro-architecture implementation
- *  limits enforced by the Xtensa Processor Generator (which may be stricter than
- *  these below).
- */
-#define XTHAL_MAX_CPS		8	/* max number of coprocessors (0..7) */
-#define XTHAL_MAX_INTERRUPTS	32	/* max number of interrupts (0..31) */
-#define XTHAL_MAX_INTLEVELS	16	/* max number of interrupt levels (0..15) */
-					/* (as of T1040, implementation limit is 7: 0..6) */
-#define XTHAL_MAX_TIMERS	4	/* max number of timers (CCOMPARE0..CCOMPARE3) */
-					/* (as of T1040, implementation limit is 3: 0..2) */
-
-/*  Misc:  */
-#define XTHAL_LITTLEENDIAN		0
-#define XTHAL_BIGENDIAN			1
-
-
-/*  Interrupt types:  */
-#define XTHAL_INTTYPE_UNCONFIGURED	0
-#define XTHAL_INTTYPE_SOFTWARE		1
-#define XTHAL_INTTYPE_EXTERN_EDGE	2
-#define XTHAL_INTTYPE_EXTERN_LEVEL	3
-#define XTHAL_INTTYPE_TIMER		4
-#define XTHAL_INTTYPE_NMI		5
-#define XTHAL_MAX_INTTYPES		6	/* number of interrupt types */
-
-/*  Timer related:  */
-#define XTHAL_TIMER_UNCONFIGURED	-1	/* Xthal_timer_interrupt[] value for non-existent timers */
-#define XTHAL_TIMER_UNASSIGNED	XTHAL_TIMER_UNCONFIGURED	/* (for backwards compatibility only) */
-
-
-/*  Access Mode bits (tentative):  */	/* bit abbr unit short_name       PPC equ - Description */
-#define XTHAL_AMB_EXCEPTION	0	/* 001 E EX fls: EXception        none    - generate exception on any access (aka "illegal") */
-#define XTHAL_AMB_HITCACHE	1	/* 002 C CH fls: use Cache on Hit ~(I CI) - use cache on hit -- way from tag match [or H HC, or U UC] (ISA: same, except for Isolate case) */
-#define XTHAL_AMB_ALLOCATE	2	/* 004 A AL fl?: ALlocate         none    - refill cache on miss -- way from LRU [or F FI fill] (ISA: Read/Write Miss Refill) */
-#define XTHAL_AMB_WRITETHRU	3	/* 008 W WT --s: WriteThrough     W WT    - store immediately to memory (ISA: same) */
-#define XTHAL_AMB_ISOLATE	4	/* 010 I IS fls: ISolate          none    - use cache regardless of hit-vs-miss -- way from vaddr (ISA: use-cache-on-miss+hit) */
-#define XTHAL_AMB_GUARD		5	/* 020 G GU ?l?: GUard            G *     - non-speculative; spec/replay refs not permitted */
-#if 0
-#define XTHAL_AMB_ORDERED	x	/* 000 O OR fls: ORdered          G *     - mem accesses cannot be out of order */
-#define XTHAL_AMB_FUSEWRITES	x	/* 000 F FW --s: FuseWrites       none    - allow combining/merging multiple writes (to same datapath data unit) into one (implied by writeback) */
-#define XTHAL_AMB_COHERENT	x	/* 000 M MC fl?: Mem/MP Coherent  M       - on reads, other CPUs/bus-masters may need to supply data */
-#define XTHAL_AMB_TRUSTED	x	/* 000 T TR ?l?: TRusted          none    - memory will not bus error (if it does, handle as fatal imprecise interrupt) */
-#define XTHAL_AMB_PREFETCH	x	/* 000 P PR fl?: PRefetch         none    - on refill, read line+1 into prefetch buffers */
-#define XTHAL_AMB_STREAM	x	/* 000 S ST ???: STreaming        none    - access one of N stream buffers */
-#endif /*0*/
-
-#define XTHAL_AM_EXCEPTION	(1<<XTHAL_AMB_EXCEPTION)
-#define XTHAL_AM_HITCACHE	(1<<XTHAL_AMB_HITCACHE)
-#define XTHAL_AM_ALLOCATE	(1<<XTHAL_AMB_ALLOCATE)
-#define XTHAL_AM_WRITETHRU	(1<<XTHAL_AMB_WRITETHRU)
-#define XTHAL_AM_ISOLATE	(1<<XTHAL_AMB_ISOLATE)
-#define XTHAL_AM_GUARD		(1<<XTHAL_AMB_GUARD)
-#if 0
-#define XTHAL_AM_ORDERED	(1<<XTHAL_AMB_ORDERED)
-#define XTHAL_AM_FUSEWRITES	(1<<XTHAL_AMB_FUSEWRITES)
-#define XTHAL_AM_COHERENT	(1<<XTHAL_AMB_COHERENT)
-#define XTHAL_AM_TRUSTED	(1<<XTHAL_AMB_TRUSTED)
-#define XTHAL_AM_PREFETCH	(1<<XTHAL_AMB_PREFETCH)
-#define XTHAL_AM_STREAM		(1<<XTHAL_AMB_STREAM)
-#endif /*0*/
-
-/*
- *  Allowed Access Modes (bit combinations).
- *
- *  Columns are:
- *  "FOGIWACE"
- *	Access mode bits (see XTHAL_AMB_xxx above).
- *	<letter> = bit is set
- *	'-'      = bit is clear
- *	'.'      = bit is irrelevant / don't care, as follows:
- *			E=1 makes all others irrelevant
- *			W,F relevant only for stores
- *  "2345"
- *	Indicates which Xtensa releases support the corresponding
- *	access mode.  Releases for each character column are:
- *		2 = prior to T1020.2:   T1015 (V1.5), T1020.0, T1020.1
- *		3 = T1020.2 and later:  T1020.2+, T1030
- *		4 = T1040
- *		5 = T1050 (maybe)
- *	And the character column contents are:
- *		<number> = support by release(s)
- *		"." = unsupported by release(s)
- *		"?" = support unknown
- */
-					/* FOGIWACE 2345 */
-/*  For instruction fetch:  */
-#define XTHAL_FAM_EXCEPTION	0x001	/* .......E 2345 exception */
-#define XTHAL_FAM_ISOLATE	0x012	/* .--I.-C- .... isolate */
-#define XTHAL_FAM_BYPASS	0x000	/* .---.--- 2345 bypass */
-#define XTHAL_FAM_NACACHED	0x002	/* .---.-C- .... cached no-allocate (frozen) */
-#define XTHAL_FAM_CACHED	0x006	/* .---.AC- 2345 cached */
-/*  For data load:  */
-#define XTHAL_LAM_EXCEPTION	0x001	/* .......E 2345 exception */
-#define XTHAL_LAM_ISOLATE	0x012	/* .--I.-C- 2345 isolate */
-#define XTHAL_LAM_BYPASS	0x000	/* .O--.--- 2... bypass speculative */
-#define XTHAL_LAM_BYPASSG	0x020	/* .OG-.--- .345 bypass guarded */
-#define XTHAL_LAM_NACACHED	0x002	/* .O--.-C- 2... cached no-allocate speculative */
-#define XTHAL_LAM_NACACHEDG	0x022	/* .OG-.-C- .345 cached no-allocate guarded */
-#define XTHAL_LAM_CACHED	0x006	/* .---.AC- 2345 cached speculative */
-#define XTHAL_LAM_CACHEDG	0x026	/* .?G-.AC- .... cached guarded */
-/*  For data store:  */
-#define XTHAL_SAM_EXCEPTION	0x001	/* .......E 2345 exception */
-#define XTHAL_SAM_ISOLATE	0x032	/* .-GI--C- 2345 isolate */
-#define XTHAL_SAM_BYPASS	0x028	/* -OG-W--- 2345 bypass */
-/*efine XTHAL_SAM_BYPASSF	0x028*/	/* F-G-W--- ...? bypass write-combined */
-#define XTHAL_SAM_WRITETHRU	0x02A	/* -OG-W-C- 234? writethrough */
-/*efine XTHAL_SAM_WRITETHRUF	0x02A*/	/* F-G-W-C- ...5 writethrough write-combined */
-#define XTHAL_SAM_WRITEALLOC	0x02E	/* -OG-WAC- ...? writethrough-allocate */
-/*efine XTHAL_SAM_WRITEALLOCF	0x02E*/	/* F-G-WAC- ...? writethrough-allocate write-combined */
-#define XTHAL_SAM_WRITEBACK	0x026	/* F-G--AC- ...5 writeback */
-
-#if 0
-/*
-    Cache attribute encoding for CACHEATTR (per ISA):
-    (Note:  if this differs from ISA Ref Manual, ISA has precedence)
-
-	Inst-fetches	Loads		Stores
-	-------------	------------	-------------
-0x0	FCA_EXCEPTION  ?LCA_NACACHED_G*	SCA_WRITETHRU	"uncached"
-0x1	FCA_CACHED	LCA_CACHED	SCA_WRITETHRU	cached
-0x2	FCA_BYPASS	LCA_BYPASS_G*	SCA_BYPASS	bypass
-0x3	FCA_CACHED	LCA_CACHED	SCA_WRITEALLOCF	write-allocate
-		     or LCA_EXCEPTION	SCA_EXCEPTION	(if unimplemented)
-0x4	FCA_CACHED	LCA_CACHED	SCA_WRITEBACK	write-back
-		     or LCA_EXCEPTION	SCA_EXCEPTION	(if unimplemented)
-0x5..D	FCA_EXCEPTION	LCA_EXCEPTION	SCA_EXCEPTION	(reserved)
-0xE	FCA_EXCEPTION	LCA_ISOLATE	SCA_ISOLATE	isolate
-0xF	FCA_EXCEPTION	LCA_EXCEPTION	SCA_EXCEPTION	illegal
-     *  Prior to T1020.2?, guard feature not supported, this defaulted to speculative (no _G)
-*/
-#endif /*0*/
-
-
-#if !defined(__ASSEMBLY__) && !defined(_NOCLANGUAGE)
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*----------------------------------------------------------------------
-  			     HAL
-  ----------------------------------------------------------------------*/
-
-/* Constant to be checked in build = (XTHAL_MAJOR_REV<<16)|XTHAL_MINOR_REV */
-extern const unsigned int Xthal_rev_no;
-
-
-/*----------------------------------------------------------------------
-  			Processor State
-  ----------------------------------------------------------------------*/
-/* save & restore the extra processor state */
-extern void xthal_save_extra(void *base);
-extern void xthal_restore_extra(void *base);
-
-extern void xthal_save_cpregs(void *base, int);
-extern void xthal_restore_cpregs(void *base, int);
-
-/*extern void xthal_save_all_extra(void *base);*/
-/*extern void xthal_restore_all_extra(void *base);*/
-
-/* space for processor state */
-extern const unsigned int Xthal_extra_size;
-extern const unsigned int Xthal_extra_align;
-/* space for TIE register files */
-extern const unsigned int Xthal_cpregs_size[XTHAL_MAX_CPS];
-extern const unsigned int Xthal_cpregs_align[XTHAL_MAX_CPS];
-
-/* total of space for the processor state (for Tor2) */
-extern const unsigned int Xthal_all_extra_size;
-extern const unsigned int Xthal_all_extra_align;
-
-/* initialize the extra processor */
-/*extern void xthal_init_extra(void);*/
-/* initialize the TIE coprocessor */
-/*extern void xthal_init_cp(int);*/
-
-/* initialize the extra processor */
-extern void xthal_init_mem_extra(void *);
-/* initialize the TIE coprocessor */
-extern void xthal_init_mem_cp(void *, int);
-
-/* validate & invalidate the TIE register file */
-extern void xthal_validate_cp(int);
-extern void xthal_invalidate_cp(int);
-
-/* the number of TIE coprocessors contiguous from zero (for Tor2) */
-extern const unsigned int Xthal_num_coprocessors;
-
-/* actual number of coprocessors */
-extern const unsigned char Xthal_cp_num;
-/* index of highest numbered coprocessor, plus one */
-extern const unsigned char Xthal_cp_max;
-/* index of highest allowed coprocessor number, per cfg, plus one */
-/*extern const unsigned char Xthal_cp_maxcfg;*/
-/* bitmask of which coprocessors are present */
-extern const unsigned int  Xthal_cp_mask;
-
-/* read and write cpenable register */
-extern void xthal_set_cpenable(unsigned);
-extern unsigned xthal_get_cpenable(void);
-
-/* read & write extra state register */
-/*extern int xthal_read_extra(void *base, unsigned reg, unsigned *value);*/
-/*extern int xthal_write_extra(void *base, unsigned reg, unsigned value);*/
-
-/* read & write a TIE coprocessor register */
-/*extern int xthal_read_cpreg(void *base, int cp, unsigned reg, unsigned *value);*/
-/*extern int xthal_write_cpreg(void *base, int cp, unsigned reg, unsigned value);*/
-
-/* return coprocessor number based on register */
-/*extern int xthal_which_cp(unsigned reg);*/
-
-/*----------------------------------------------------------------------
-   				Interrupts
-  ----------------------------------------------------------------------*/
-
-/* the number of interrupt levels */
-extern const unsigned char Xthal_num_intlevels;
-/* the number of interrupts */
-extern const unsigned char Xthal_num_interrupts;
-
-/* mask for level of interrupts */
-extern const unsigned int Xthal_intlevel_mask[XTHAL_MAX_INTLEVELS];
-/* mask for level 0 to N interrupts */
-extern const unsigned int Xthal_intlevel_andbelow_mask[XTHAL_MAX_INTLEVELS];
-
-/* level of each interrupt */
-extern const unsigned char Xthal_intlevel[XTHAL_MAX_INTERRUPTS];
-
-/* type per interrupt */
-extern const unsigned char Xthal_inttype[XTHAL_MAX_INTERRUPTS];
-
-/* masks of each type of interrupt */
-extern const unsigned int Xthal_inttype_mask[XTHAL_MAX_INTTYPES];
-
-/* interrupt numbers assigned to each timer interrupt */
-extern const int Xthal_timer_interrupt[XTHAL_MAX_TIMERS];
-
-/***  Virtual interrupt prioritization:  ***/
-
-/*  Convert between interrupt levels (as per PS.INTLEVEL) and virtual interrupt priorities:  */
-extern unsigned	xthal_vpri_to_intlevel(unsigned vpri);
-extern unsigned	xthal_intlevel_to_vpri(unsigned intlevel);
-
-/*  Enables/disables given set (mask) of interrupts; returns previous enabled-mask of all ints:  */
-extern unsigned	xthal_int_enable(unsigned);
-extern unsigned	xthal_int_disable(unsigned);
-
-/*  Set/get virtual priority of an interrupt:  */
-extern int	xthal_set_int_vpri(int intnum, int vpri);
-extern int	xthal_get_int_vpri(int intnum);
-
-/*  Set/get interrupt lockout level for exclusive access to virtual priority data structures:  */
-extern void	xthal_set_vpri_locklevel(unsigned intlevel);
-extern unsigned	xthal_get_vpri_locklevel(void);
-
-/*  Set/get current virtual interrupt priority:  */
-extern unsigned	xthal_set_vpri(unsigned vpri);
-extern unsigned	xthal_get_vpri(unsigned vpri);
-extern unsigned	xthal_set_vpri_intlevel(unsigned intlevel);
-extern unsigned	xthal_set_vpri_lock(void);
-
-
-
-/*----------------------------------------------------------------------
-   			Generic Interrupt Trampolining Support
-  ----------------------------------------------------------------------*/
-
-typedef void (XtHalVoidFunc)(void);
-
-/*
- *  Bitmask of interrupts currently trampolining down:
- */
-extern unsigned Xthal_tram_pending;
-
-/*
- *  Bitmask of which interrupts currently trampolining down
- *  synchronously are actually enabled; this bitmask is necessary
- *  because INTENABLE cannot hold that state (sync-trampolining
- *  interrupts must be kept disabled while trampolining);
- *  in the current implementation, any bit set here is not set
- *  in INTENABLE, and vice-versa; once a sync-trampoline is
- *  handled (at level one), its enable bit must be moved from
- *  here to INTENABLE:
- */
-extern unsigned Xthal_tram_enabled;
-
-/*
- *  Bitmask of interrupts configured for sync trampolining:
- */
-extern unsigned Xthal_tram_sync;
-
-
-/*  Trampoline support functions:  */
-extern unsigned  xthal_tram_pending_to_service( void );
-extern void      xthal_tram_done( unsigned serviced_mask );
-extern int       xthal_tram_set_sync( int intnum, int sync );
-extern XtHalVoidFunc* xthal_set_tram_trigger_func( XtHalVoidFunc *trigger_fn );
-
-/*  INTENABLE,INTREAD,INTSET,INTCLEAR register access functions:  */
-extern unsigned  xthal_get_intenable( void );
-extern void      xthal_set_intenable( unsigned );
-extern unsigned  xthal_get_intread( void );
-extern void      xthal_set_intset( unsigned );
-extern void      xthal_set_intclear( unsigned );
-
-
-/*----------------------------------------------------------------------
-   				Register Windows
-  ----------------------------------------------------------------------*/
-
-/* number of registers in register window */
-extern const unsigned int  Xthal_num_aregs;
-extern const unsigned char Xthal_num_aregs_log2;
-
-/*  This spill any live register windows (other than the caller's):  */
-extern void      xthal_window_spill( void );
-
-
-/*----------------------------------------------------------------------
-   				Cache
-  ----------------------------------------------------------------------*/
-
-/* size of the cache lines in log2(bytes) */
-extern const unsigned char Xthal_icache_linewidth;
-extern const unsigned char Xthal_dcache_linewidth;
-/* size of the cache lines in bytes */
-extern const unsigned short Xthal_icache_linesize;
-extern const unsigned short Xthal_dcache_linesize;
-/* number of cache sets in log2(lines per way) */
-extern const unsigned char Xthal_icache_setwidth;
-extern const unsigned char Xthal_dcache_setwidth;
-/* cache set associativity (number of ways) */
-extern const unsigned int  Xthal_icache_ways;
-extern const unsigned int  Xthal_dcache_ways;
-/* size of the caches in bytes (ways * 2^(linewidth + setwidth)) */
-extern const unsigned int  Xthal_icache_size;
-extern const unsigned int  Xthal_dcache_size;
-/* cache features */
-extern const unsigned char Xthal_dcache_is_writeback;
-extern const unsigned char Xthal_icache_line_lockable;
-extern const unsigned char Xthal_dcache_line_lockable;
-
-/* cache attribute register control (used by other HAL routines) */
-extern unsigned xthal_get_cacheattr( void );
-extern unsigned xthal_get_icacheattr( void );
-extern unsigned xthal_get_dcacheattr( void );
-extern void     xthal_set_cacheattr( unsigned );
-extern void     xthal_set_icacheattr( unsigned );
-extern void     xthal_set_dcacheattr( unsigned );
-
-/* initialize cache support (must be called once at startup, before all other cache calls) */
-/*extern void xthal_cache_startinit( void );*/
-/* reset caches */
-/*extern void xthal_icache_reset( void );*/
-/*extern void xthal_dcache_reset( void );*/
-/* enable caches */
-extern void xthal_icache_enable( void );	/* DEPRECATED */
-extern void xthal_dcache_enable( void );	/* DEPRECATED */
-/* disable caches */
-extern void xthal_icache_disable( void );	/* DEPRECATED */
-extern void xthal_dcache_disable( void );	/* DEPRECATED */
-
-/* invalidate the caches */
-extern void xthal_icache_all_invalidate( void );
-extern void xthal_dcache_all_invalidate( void );
-extern void xthal_icache_region_invalidate( void *addr, unsigned size );
-extern void xthal_dcache_region_invalidate( void *addr, unsigned size );
-extern void xthal_icache_line_invalidate(void *addr);
-extern void xthal_dcache_line_invalidate(void *addr);
-/* write dirty data back */
-extern void xthal_dcache_all_writeback( void );
-extern void xthal_dcache_region_writeback( void *addr, unsigned size );
-extern void xthal_dcache_line_writeback(void *addr);
-/* write dirty data back and invalidate */
-extern void xthal_dcache_all_writeback_inv( void );
-extern void xthal_dcache_region_writeback_inv( void *addr, unsigned size );
-extern void xthal_dcache_line_writeback_inv(void *addr);
-/* prefetch and lock specified memory range into cache */
-extern void xthal_icache_region_lock( void *addr, unsigned size );
-extern void xthal_dcache_region_lock( void *addr, unsigned size );
-extern void xthal_icache_line_lock(void *addr);
-extern void xthal_dcache_line_lock(void *addr);
-/* unlock from cache */
-extern void xthal_icache_all_unlock( void );
-extern void xthal_dcache_all_unlock( void );
-extern void xthal_icache_region_unlock( void *addr, unsigned size );
-extern void xthal_dcache_region_unlock( void *addr, unsigned size );
-extern void xthal_icache_line_unlock(void *addr);
-extern void xthal_dcache_line_unlock(void *addr);
-
-
-/* sync icache and memory */
-extern void xthal_icache_sync( void );
-/* sync dcache and memory */
-extern void xthal_dcache_sync( void );
-
-/*----------------------------------------------------------------------
-   				Debug
-  ----------------------------------------------------------------------*/
-
-/*  1 if debug option configured, 0 if not:  */
-extern const int Xthal_debug_configured;
-
-/*  Number of instruction and data break registers:  */
-extern const int Xthal_num_ibreak;
-extern const int Xthal_num_dbreak;
-
-/*  Set (plant) and remove software breakpoint, both synchronizing cache:  */
-extern unsigned int xthal_set_soft_break(void *addr);
-extern void         xthal_remove_soft_break(void *addr, unsigned int);
-
-
-/*----------------------------------------------------------------------
-   				Disassembler
-  ----------------------------------------------------------------------*/
-
-/*  Max expected size of the return buffer for a disassembled instruction (hint only):  */
-#define XTHAL_DISASM_BUFSIZE	80
-
-/*  Disassembly option bits for selecting what to return:  */
-#define XTHAL_DISASM_OPT_ADDR	0x0001	/* display address */
-#define XTHAL_DISASM_OPT_OPHEX	0x0002	/* display opcode bytes in hex */
-#define XTHAL_DISASM_OPT_OPCODE	0x0004	/* display opcode name (mnemonic) */
-#define XTHAL_DISASM_OPT_PARMS	0x0008	/* display parameters */
-#define XTHAL_DISASM_OPT_ALL	0x0FFF	/* display everything */
-
-/* routine to get a string for the disassembled instruction */
-extern int xthal_disassemble( unsigned char *instr_buf, void *tgt_addr,
-		       char *buffer, unsigned buflen, unsigned options );
-
-/* routine to get the size of the next instruction. Returns 0 for
-   illegal instruction */
-extern int xthal_disassemble_size( unsigned char *instr_buf );
-
-
-/*----------------------------------------------------------------------
-   				Core Counter
-  ----------------------------------------------------------------------*/
-
-/* counter info */
-extern const unsigned char Xthal_have_ccount;	/* set if CCOUNT register present */
-extern const unsigned char Xthal_num_ccompare;	/* number of CCOMPAREn registers */
-
-/* get CCOUNT register (if not present return 0) */
-extern unsigned xthal_get_ccount(void);
-
-/* set and get CCOMPAREn registers (if not present, get returns 0) */
-extern void     xthal_set_ccompare(int, unsigned);
-extern unsigned xthal_get_ccompare(int);
-
-
-/*----------------------------------------------------------------------
-			Instruction/Data RAM/ROM Access
-  ----------------------------------------------------------------------*/
-
-extern void* xthal_memcpy(void *dst, const void *src, unsigned len);
-extern void* xthal_bcopy(const void *src, void *dst, unsigned len);
-
-/*----------------------------------------------------------------------
-                           MP Synchronization
-  ----------------------------------------------------------------------*/
-extern int      xthal_compare_and_set( int *addr, int test_val, int compare_val );
-extern unsigned xthal_get_prid( void );
-
-/*extern const char  Xthal_have_s32c1i;*/
-extern const unsigned char Xthal_have_prid;
-
-
-/*----------------------------------------------------------------------
-                             Miscellaneous
-  ----------------------------------------------------------------------*/
-
-extern const unsigned int  Xthal_release_major;
-extern const unsigned int  Xthal_release_minor;
-extern const char * const  Xthal_release_name;
-extern const char * const  Xthal_release_internal;
-
-extern const unsigned char Xthal_memory_order;
-extern const unsigned char Xthal_have_windowed;
-extern const unsigned char Xthal_have_density;
-extern const unsigned char Xthal_have_booleans;
-extern const unsigned char Xthal_have_loops;
-extern const unsigned char Xthal_have_nsa;
-extern const unsigned char Xthal_have_minmax;
-extern const unsigned char Xthal_have_sext;
-extern const unsigned char Xthal_have_clamps;
-extern const unsigned char Xthal_have_mac16;
-extern const unsigned char Xthal_have_mul16;
-extern const unsigned char Xthal_have_fp;
-extern const unsigned char Xthal_have_speculation;
-extern const unsigned char Xthal_have_exceptions;
-extern const unsigned char Xthal_xea_version;
-extern const unsigned char Xthal_have_interrupts;
-extern const unsigned char Xthal_have_highlevel_interrupts;
-extern const unsigned char Xthal_have_nmi;
-
-extern const unsigned short Xthal_num_writebuffer_entries;
-
-extern const unsigned int  Xthal_build_unique_id;
-/*  Release info for hardware targeted by software upgrades:  */
-extern const unsigned int  Xthal_hw_configid0;
-extern const unsigned int  Xthal_hw_configid1;
-extern const unsigned int  Xthal_hw_release_major;
-extern const unsigned int  Xthal_hw_release_minor;
-extern const char * const  Xthal_hw_release_name;
-extern const char * const  Xthal_hw_release_internal;
-
-
-/*  Internal memories...  */
-
-extern const unsigned char Xthal_num_instrom;
-extern const unsigned char Xthal_num_instram;
-extern const unsigned char Xthal_num_datarom;
-extern const unsigned char Xthal_num_dataram;
-extern const unsigned char Xthal_num_xlmi;
-extern const unsigned int  Xthal_instrom_vaddr[1];
-extern const unsigned int  Xthal_instrom_paddr[1];
-extern const unsigned int  Xthal_instrom_size [1];
-extern const unsigned int  Xthal_instram_vaddr[1];
-extern const unsigned int  Xthal_instram_paddr[1];
-extern const unsigned int  Xthal_instram_size [1];
-extern const unsigned int  Xthal_datarom_vaddr[1];
-extern const unsigned int  Xthal_datarom_paddr[1];
-extern const unsigned int  Xthal_datarom_size [1];
-extern const unsigned int  Xthal_dataram_vaddr[1];
-extern const unsigned int  Xthal_dataram_paddr[1];
-extern const unsigned int  Xthal_dataram_size [1];
-extern const unsigned int  Xthal_xlmi_vaddr[1];
-extern const unsigned int  Xthal_xlmi_paddr[1];
-extern const unsigned int  Xthal_xlmi_size [1];
-
-
-
-/*----------------------------------------------------------------------
-                         Memory Management Unit
-  ----------------------------------------------------------------------*/
-
-extern const unsigned char Xthal_have_spanning_way;
-extern const unsigned char Xthal_have_identity_map;
-extern const unsigned char Xthal_have_mimic_cacheattr;
-extern const unsigned char Xthal_have_xlt_cacheattr;
-extern const unsigned char Xthal_have_cacheattr;
-extern const unsigned char Xthal_have_tlbs;
-
-extern const unsigned char Xthal_mmu_asid_bits;		/* 0 .. 8 */
-extern const unsigned char Xthal_mmu_asid_kernel;
-extern const unsigned char Xthal_mmu_rings;		/* 1 .. 4 (perhaps 0 if no MMU and/or no protection?) */
-extern const unsigned char Xthal_mmu_ring_bits;
-extern const unsigned char Xthal_mmu_sr_bits;
-extern const unsigned char Xthal_mmu_ca_bits;
-extern const unsigned int  Xthal_mmu_max_pte_page_size;
-extern const unsigned int  Xthal_mmu_min_pte_page_size;
-
-extern const unsigned char Xthal_itlb_way_bits;
-extern const unsigned char Xthal_itlb_ways;
-extern const unsigned char Xthal_itlb_arf_ways;
-extern const unsigned char Xthal_dtlb_way_bits;
-extern const unsigned char Xthal_dtlb_ways;
-extern const unsigned char Xthal_dtlb_arf_ways;
-
-/*  Convert between virtual and physical addresses (through static maps only):  */
-/*** WARNING: these two functions may go away in a future release; don't depend on them! ***/
-extern int  xthal_static_v2p( unsigned vaddr, unsigned *paddrp );
-extern int  xthal_static_p2v( unsigned paddr, unsigned *vaddrp, unsigned cached );
-
-#if 0
-/*******************   EXPERIMENTAL AND TENTATIVE ONLY   ********************/
-
-#define XTHAL_MMU_PAGESZ_COUNT_MAX	8	/* maximum number of different page sizes */
-extern const char	Xthal_mmu_pagesz_count;		/* 0 .. 8		number of different page sizes configured */
-
-/*  Note:  the following table doesn't necessarily have page sizes in increasing order: */
-extern const char	Xthal_mmu_pagesz_log2[XTHAL_MMU_PAGESZ_COUNT_MAX];	/* 10 .. 28 (0 past count) */
-
-/*  Sorted (increasing) table of page sizes, that indexes into the above table: */
-extern const char	Xthal_mmu_pagesz_sorted[XTHAL_MMU_PAGESZ_COUNT_MAX];	/* 0 .. 7 (0 past count) */
-
-/*u32	Xthal_virtual_exceptions;*/	/* bitmask of which exceptions execute in virtual mode... */
-
-extern const char	Xthal_mmu_pte_pagesz_log2_min;	/* ?? minimum page size in PTEs */
-extern const char	Xthal_mmu_pte_pagesz_log2_max;	/* ?? maximum page size in PTEs */
-
-/*  Cache Attribute Bits Implemented by the Cache (part of the cache abstraction) */
-extern const char	Xthal_icache_fca_bits_implemented;	/* ITLB/UTLB only! */
-extern const char	Xthal_dcache_lca_bits_implemented;	/* DTLB/UTLB only! */
-extern const char	Xthal_dcache_sca_bits_implemented; 	/* DTLB/UTLB only! */
-
-/*  Per TLB Parameters (Instruction, Data, Unified)  */
-struct XtHalMmuTlb	Xthal_itlb;	/* description of MMU I-TLB generic features */
-struct XtHalMmuTlb	Xthal_dtlb;	/* description of MMU D-TLB generic features */
-struct XtHalMmuTlb	Xthal_utlb;	/* description of MMU U-TLB generic features */
-
-#define XTHAL_MMU_WAYS_MAX	8	/* maximum number of ways (associativities) for each TLB */
-
-/*  Structure for common information described for each possible TLB (instruction, data and unified): */
-typedef struct XtHalMmuTlb {
-    u8  	va_bits;		/* 32		(number of virtual address bits) */
-    u8  	pa_bits;		/* 32		(number of physical address bits) */
-    bool	tlb_va_indexed;		/* 1	(set if TLB is indexed by virtual address) */
-    bool	tlb_va_tagged;		/* 0	(set if TLB is tagged by virtual address) */
-    bool	cache_va_indexed;	/* 1	(set if cache is indexed by virtual address) */
-    bool	cache_va_tagged;	/* 0	(set if cache is tagged by virtual address) */
-    /*bool	(whether page tables are traversed in vaddr sorted order, paddr sorted order, ...) */
-    /*u8	(set of available page attribute bits, other than cache attribute bits defined above) */
-    /*u32	(various masks for pages, MMU table/TLB entries, etc.) */
-    u8  	way_count;		/* 0 .. 8	(number of ways, a.k.a. associativities, for this TLB) */
-    XtHalMmuTlbWay *	ways[XTHAL_MMU_WAYS_MAX];	/* pointers to per-way parms for each way */
-} XtHalMmuTlb;
-
-/*  Per TLB Way (Per Associativity) Parameters  */
-typedef struct XtHalMmuTlbWay {
-     u32	index_count_log2;	/* 0 .. 4 */
-     u32	pagesz_mask;		/* 0 .. 2^pagesz_count - 1	(each bit corresponds to a size */
-					/*		defined in the Xthal_mmu_pagesz_log2[] table) */
-     u32	vpn_const_mask;
-     u32	vpn_const_value;
-     u64	ppn_const_mask;		/* future may support pa_bits > 32 */
-     u64	ppn_const_value;
-     u32	ppn_id_mask;		/* paddr bits taken directly from vaddr */
-     bool	backgnd_match;		/* 0 or 1 */
-     /*  These are defined in terms of the XTHAL_CACHE_xxx bits: */
-     u8 	fca_const_mask;		/* ITLB/UTLB only! */
-     u8 	fca_const_value;	/* ITLB/UTLB only! */
-     u8 	lca_const_mask;		/* DTLB/UTLB only! */
-     u8 	lca_const_value; 	/* DTLB/UTLB only! */
-     u8 	sca_const_mask; 	/* DTLB/UTLB only! */
-     u8 	sca_const_value; 	/* DTLB/UTLB only! */
-     /*  These define an encoding that map 5 bits in TLB and PTE entries to */
-     /*  8 bits (FCA, ITLB), 16 bits (LCA+SCA, DTLB) or 24 bits (FCA+LCA+SCA, UTLB): */
-     /*  (they may be moved to struct XtHalMmuTlb) */
-     u8		ca_bits;		/* number of bits in TLB/PTE entries for cache attributes */
-     u32 *	ca_map;			/* pointer to array of 2^ca_bits entries of FCA+LCA+SCA bits */
-} XtHalMmuTlbWay;
-
-/*
- *  The way to determine whether protection support is present in core
- *  is to [look at Xthal_mmu_rings ???].
- *  Give info on memory requirements for MMU tables and other in-memory
- *  data structures (globally, per task, base and per page, etc.) - whatever bounds can be calculated.
- */
-
-
-/*  Default vectors:  */
-xthal_immu_fetch_miss_vector
-xthal_dmmu_load_miss_vector
-xthal_dmmu_store_miss_vector
-
-/*  Functions called when a fault is detected:  */
-typedef void (XtHalMmuFaultFunc)( unsigned vaddr, ...context... );
-/*  Or, */
-/*	a? = vaddr */
-/*	a? = context... */
-/*	PS.xxx = xxx */
-XtHalMMuFaultFunc *Xthal_immu_fetch_fault_func;
-XtHalMMuFaultFunc *Xthal_dmmu_load_fault_func;
-XtHalMMuFaultFunc *Xthal_dmmu_store_fault_func;
-
-/*  Default Handlers:  */
-/*  The user and/or kernel exception handlers may jump to these handlers to handle the relevant exceptions,
- *  according to the value of EXCCAUSE.  The exact register state on entry to these handlers is TBD.  */
-/*  When multiple TLB entries match (hit) on the same access:  */
-xthal_immu_fetch_multihit_handler
-xthal_dmmu_load_multihit_handler
-xthal_dmmu_store_multihit_handler
-/*  Protection violations according to cache attributes, and other cache attribute mismatches:  */
-xthal_immu_fetch_attr_handler
-xthal_dmmu_load_attr_handler
-xthal_dmmu_store_attr_handler
-/*  Protection violations due to insufficient ring level:  */
-xthal_immu_fetch_priv_handler
-xthal_dmmu_load_priv_handler
-xthal_dmmu_store_priv_handler
-/*  Alignment exception handlers (if supported by the particular Xtensa MMU configuration):  */
-xthal_dmmu_load_align_handler
-xthal_dmmu_store_align_handler
-
-/*  Or, alternatively, the OS user and/or kernel exception handlers may simply jump to the
- *  following entry points which will handle any values of EXCCAUSE not handled by the OS:  */
-xthal_user_exc_default_handler
-xthal_kernel_exc_default_handler
-
-#endif /*0*/
-
-#ifdef INCLUDE_DEPRECATED_HAL_CODE
-extern const unsigned char Xthal_have_old_exc_arch;
-extern const unsigned char Xthal_have_mmu;
-extern const unsigned int  Xthal_num_regs;
-extern const unsigned char Xthal_num_iroms;
-extern const unsigned char Xthal_num_irams;
-extern const unsigned char Xthal_num_droms;
-extern const unsigned char Xthal_num_drams;
-extern const unsigned int  Xthal_configid0;
-extern const unsigned int  Xthal_configid1;
-#endif
-
-#ifdef INCLUDE_DEPRECATED_HAL_DEBUG_CODE
-#define XTHAL_24_BIT_BREAK		0x80000000
-#define XTHAL_16_BIT_BREAK		0x40000000
-extern const unsigned short	Xthal_ill_inst_16[16];
-#define XTHAL_DEST_REG		0xf0000000	/* Mask for destination register */
-#define XTHAL_DEST_REG_INST	0x08000000	/* Branch address is in register */
-#define XTHAL_DEST_REL_INST	0x04000000	/* Branch address is relative */
-#define XTHAL_RFW_INST		0x00000800
-#define XTHAL_RFUE_INST		0x00000400
-#define XTHAL_RFI_INST		0x00000200
-#define XTHAL_RFE_INST		0x00000100
-#define XTHAL_RET_INST		0x00000080
-#define XTHAL_BREAK_INST	0x00000040
-#define XTHAL_SYSCALL_INST	0x00000020
-#define XTHAL_LOOP_END		0x00000010	/* Not set by xthal_inst_type */
-#define XTHAL_JUMP_INST		0x00000008	/* Call or jump instruction */
-#define XTHAL_BRANCH_INST	0x00000004	/* Branch instruction */
-#define XTHAL_24_BIT_INST	0x00000002
-#define XTHAL_16_BIT_INST   0x00000001
-typedef struct xthal_state {
-    unsigned	pc;
-    unsigned	ar[16];
-    unsigned	lbeg;
-    unsigned	lend;
-    unsigned	lcount;
-    unsigned	extra_ptr;
-    unsigned	cpregs_ptr[XTHAL_MAX_CPS];
-} XTHAL_STATE;
-extern unsigned int xthal_inst_type(void *addr);
-extern unsigned int xthal_branch_addr(void *addr);
-extern unsigned int xthal_get_npc(XTHAL_STATE *user_state);
-#endif /* INCLUDE_DEPRECATED_HAL_DEBUG_CODE */
-
-#ifdef __cplusplus
-}
-#endif
-#endif /*!__ASSEMBLY__ */
-
-#endif /*XTENSA_HAL_H*/
-
diff --git a/include/asm-xtensa/xtensa/simcall.h b/include/asm-xtensa/xtensa/simcall.h
deleted file mode 100644
index a2b868929a4..00000000000
--- a/include/asm-xtensa/xtensa/simcall.h
+++ /dev/null
@@ -1,130 +0,0 @@
-#ifndef SIMCALL_INCLUDED
-#define SIMCALL_INCLUDED
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- * include/asm-xtensa/xtensa/simcall.h  -  Simulator call numbers
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file "COPYING" in the main directory of
- * this archive for more details.
- *
- * Copyright (C) 2002 Tensilica Inc.
- */
-
-
-/*
- *  System call like services offered by the simulator host.
- *  These are modeled after the Linux 2.4 kernel system calls
- *  for Xtensa processors.  However not all system calls and
- *  not all functionality of a given system call are implemented,
- *  or necessarily have well defined or equivalent semantics in
- *  the context of a simulation (as opposed to a Unix kernel).
- *
- *  These services behave largely as if they had been invoked
- *  as a task in the simulator host's operating system
- *  (eg. files accessed are those of the simulator host).
- *  However, these SIMCALLs model a virtual operating system
- *  so that various definitions, bit assignments etc
- *  (eg. open mode bits, errno values, etc) are independent
- *  of the host operating system used to run the simulation.
- *  Rather these definitions are specific to the Xtensa ISS.
- *  This way Xtensa ISA code written to use these SIMCALLs
- *  can (in principle) be simulated on any host.
- *
- *  Up to 6 parameters are passed in registers a3 to a8
- *  (note the 6th parameter isn't passed on the stack,
- *   unlike windowed function calling conventions).
- *  The return value is in a2.  A negative value in the
- *  range -4096 to -1 indicates a negated error code to be
- *  reported in errno with a return value of -1, otherwise
- *  the value in a2 is returned as is.
- */
-
-/* These #defines need to match what's in Xtensa/OS/vxworks/xtiss/simcalls.c */
-
-#define SYS_nop		0	/* n/a - setup; used to flush register windows */
-#define SYS_exit	1	/*x*/
-#define SYS_fork	2
-#define SYS_read	3	/*x*/
-#define SYS_write	4	/*x*/
-#define SYS_open	5	/*x*/
-#define SYS_close	6	/*x*/
-#define SYS_rename	7	/*x 38 - waitpid */
-#define SYS_creat	8	/*x*/
-#define SYS_link	9	/*x (not implemented on WIN32) */
-#define SYS_unlink	10	/*x*/
-#define SYS_execv	11	/* n/a - execve */
-#define SYS_execve	12	/* 11 - chdir */
-#define SYS_pipe	13	/* 42 - time */
-#define SYS_stat	14	/* 106 - mknod */
-#define SYS_chmod	15
-#define SYS_chown	16	/* 202 - lchown */
-#define SYS_utime	17	/* 30 - break */
-#define SYS_wait	18	/* n/a - oldstat */
-#define SYS_lseek	19	/*x*/
-#define SYS_getpid	20
-#define SYS_isatty	21	/* n/a - mount */
-#define SYS_fstat	22	/* 108 - oldumount */
-#define SYS_time	23	/* 13 - setuid */
-#define SYS_gettimeofday 24	/*x 78 - getuid (not implemented on WIN32) */
-#define SYS_times	25	/*X 43 - stime (Xtensa-specific implementation) */
-#define SYS_socket      26
-#define SYS_sendto      27
-#define SYS_recvfrom    28
-#define SYS_select_one  29      /* not compitible select, one file descriptor at the time */
-#define SYS_bind        30
-#define SYS_ioctl	31
-
-/*
- *  Other...
- */
-#define SYS_iss_argc      1000	/* returns value of argc */
-#define SYS_iss_argv_size 1001	/* bytes needed for argv & arg strings */
-#define SYS_iss_set_argv  1002	/* saves argv & arg strings at given addr */
-
-/*
- * SIMCALLs for the ferret memory debugger. All are invoked by
- * libferret.a ...  ( Xtensa/Target-Libs/ferret )
- */
-#define SYS_ferret           1010
-#define SYS_malloc           1011
-#define SYS_free             1012
-#define SYS_more_heap        1013
-#define SYS_no_heap          1014
-
-
-/*
- *  Extra SIMCALLs for GDB:
- */
-#define SYS_gdb_break         -1	/* invoked by XTOS on user exceptions if EPC points
-					   to a break.n/break, regardless of cause! */
-#define SYS_xmon_out          -2	/* invoked by XMON: ... */
-#define SYS_xmon_in           -3	/* invoked by XMON: ... */
-#define SYS_xmon_flush        -4	/* invoked by XMON: ... */
-#define SYS_gdb_abort         -5	/* invoked by XTOS in _xtos_panic() */
-#define SYS_gdb_illegal_inst  -6	/* invoked by XTOS for illegal instructions (too deeply) */
-#define SYS_xmon_init         -7	/* invoked by XMON: ... */
-#define SYS_gdb_enter_sktloop -8	/* invoked by XTOS on debug exceptions */
-
-/*
- *  SIMCALLs for vxWorks xtiss BSP:
- */
-#define SYS_setup_ppp_pipes   -83
-#define SYS_log_msg           -84
-
-/*
- *  Test SIMCALLs:
- */
-#define SYS_test_write_state  -100
-#define SYS_test_read_state   -101
-
-/*
- * SYS_select_one specifiers
- */
-#define  XTISS_SELECT_ONE_READ    1
-#define  XTISS_SELECT_ONE_WRITE   2
-#define  XTISS_SELECT_ONE_EXCEPT  3
-
-#endif /* !SIMCALL_INCLUDED */
diff --git a/include/asm-xtensa/xtensa/xt2000-uart.h b/include/asm-xtensa/xtensa/xt2000-uart.h
deleted file mode 100644
index 0154460f0ed..00000000000
--- a/include/asm-xtensa/xtensa/xt2000-uart.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef _uart_h_included_
-#define _uart_h_included_
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- * include/asm-xtensa/xtensa/xt2000-uart.h -- NatSemi PC16552D DUART
- * definitions
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002 Tensilica Inc.
- */
-
-
-#include <xtensa/xt2000.h>
-
-
-/* 16550 UART DEVICE REGISTERS
-   The XT2000 board aligns each register to a 32-bit word but the UART device only uses
-   one byte of the word, which is the least-significant byte regardless of the
-   endianness of the core (ie. byte offset 0 for little-endian and 3 for big-endian).
-   So if using word accesses then endianness doesn't matter.
-   The macros provided here do that.
-*/
-struct uart_dev_s {
-  union {
-    unsigned int rxb;	/* DLAB=0: receive buffer, read-only */
-    unsigned int txb;	/* DLAB=0: transmit buffer, write-only */
-    unsigned int dll;	/* DLAB=1: divisor, least-significant byte latch (was write-only?) */
-  } w0;
-  union {
-    unsigned int ier;	/* DLAB=0: interrupt-enable register (was write-only?) */
-    unsigned int dlm;	/* DLAB=1: divisor, most-significant byte latch (was write-only?) */
-  } w1;
-
-  union {
-    unsigned int isr;	/* DLAB=0: interrupt status register, read-only */
-    unsigned int fcr;	/* DLAB=0: FIFO control register, write-only */
-    unsigned int afr;	/* DLAB=1: alternate function register */
-  } w2;
-
-  unsigned int lcr;	/* line control-register, write-only */
-  unsigned int mcr;	/* modem control-regsiter, write-only */
-  unsigned int lsr;	/* line status register, read-only */
-  unsigned int msr;	/* modem status register, read-only */
-  unsigned int scr;	/* scratch regsiter, read/write */
-};
-
-#define _RXB(u) ((u)->w0.rxb)
-#define _TXB(u) ((u)->w0.txb)
-#define _DLL(u) ((u)->w0.dll)
-#define _IER(u) ((u)->w1.ier)
-#define _DLM(u) ((u)->w1.dlm)
-#define _ISR(u) ((u)->w2.isr)
-#define _FCR(u) ((u)->w2.fcr)
-#define _AFR(u) ((u)->w2.afr)
-#define _LCR(u) ((u)->lcr)
-#define _MCR(u) ((u)->mcr)
-#define _LSR(u) ((u)->lsr)
-#define _MSR(u) ((u)->msr)
-#define _SCR(u) ((u)->scr)
-
-typedef volatile struct uart_dev_s uart_dev_t;
-
-/* IER bits */
-#define RCVR_DATA_REG_INTENABLE 0x01
-#define XMIT_HOLD_REG_INTENABLE    0x02
-#define RCVR_STATUS_INTENABLE   0x04
-#define MODEM_STATUS_INTENABLE     0x08
-
-/* FCR bits */
-#define _FIFO_ENABLE      0x01
-#define RCVR_FIFO_RESET  0x02
-#define XMIT_FIFO_RESET  0x04
-#define DMA_MODE_SELECT  0x08
-#define RCVR_TRIGGER_LSB 0x40
-#define RCVR_TRIGGER_MSB 0x80
-
-/* AFR bits */
-#define AFR_CONC_WRITE	0x01
-#define AFR_BAUDOUT_SEL	0x02
-#define AFR_RXRDY_SEL	0x04
-
-/* ISR bits */
-#define INT_STATUS(r)   ((r)&1)
-#define INT_PRIORITY(r) (((r)>>1)&0x7)
-
-/* LCR bits */
-#define WORD_LENGTH(n)  (((n)-5)&0x3)
-#define STOP_BIT_ENABLE 0x04
-#define PARITY_ENABLE   0x08
-#define EVEN_PARITY     0x10
-#define FORCE_PARITY    0x20
-#define XMIT_BREAK      0x40
-#define DLAB_ENABLE     0x80
-
-/* MCR bits */
-#define _DTR 0x01
-#define _RTS 0x02
-#define _OP1 0x04
-#define _OP2 0x08
-#define LOOP_BACK 0x10
-
-/* LSR Bits */
-#define RCVR_DATA_READY 0x01
-#define OVERRUN_ERROR   0x02
-#define PARITY_ERROR    0x04
-#define FRAMING_ERROR   0x08
-#define BREAK_INTERRUPT 0x10
-#define XMIT_HOLD_EMPTY 0x20
-#define XMIT_EMPTY      0x40
-#define FIFO_ERROR      0x80
-#define RCVR_READY(u)   (_LSR(u)&RCVR_DATA_READY)
-#define XMIT_READY(u)   (_LSR(u)&XMIT_HOLD_EMPTY)
-
-/* MSR bits */
-#define _RDR       0x01
-#define DELTA_DSR 0x02
-#define DELTA_RI  0x04
-#define DELTA_CD  0x08
-#define _CTS       0x10
-#define _DSR       0x20
-#define _RI        0x40
-#define _CD        0x80
-
-/* prototypes */
-void uart_init( uart_dev_t *u, int bitrate );
-void uart_out( uart_dev_t *u, char c );
-void uart_puts( uart_dev_t *u, char *s );
-char uart_in( uart_dev_t *u );
-void uart_enable_rcvr_int( uart_dev_t *u );
-void uart_disable_rcvr_int( uart_dev_t *u );
-
-#ifdef DUART16552_1_VADDR
-/*  DUART present.  */
-#define DUART_1_BASE	(*(uart_dev_t*)DUART16552_1_VADDR)
-#define DUART_2_BASE	(*(uart_dev_t*)DUART16552_2_VADDR)
-#define UART1_PUTS(s)	uart_puts( &DUART_1_BASE, s )
-#define UART2_PUTS(s)	uart_puts( &DUART_2_BASE, s )
-#else
-/*  DUART not configured, use dummy placeholders to allow compiles to work.  */
-#define DUART_1_BASE	(*(uart_dev_t*)0)
-#define DUART_2_BASE	(*(uart_dev_t*)0)
-#define UART1_PUTS(s)
-#define UART2_PUTS(s)
-#endif
-
-/*  Compute 16-bit divisor for baudrate generator, with rounding:  */
-#define DUART_DIVISOR(crystal,speed)	(((crystal)/16 + (speed)/2)/(speed))
-
-#endif /*_uart_h_included_*/
-
diff --git a/include/asm-xtensa/xtensa/xt2000.h b/include/asm-xtensa/xtensa/xt2000.h
deleted file mode 100644
index 703a45002f8..00000000000
--- a/include/asm-xtensa/xtensa/xt2000.h
+++ /dev/null
@@ -1,408 +0,0 @@
-#ifndef _INC_XT2000_H_
-#define _INC_XT2000_H_
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- * include/asm-xtensa/xtensa/xt2000.h - Definitions specific to the
- * Tensilica XT2000 Emulation Board
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002 Tensilica Inc.
- */
-
-
-#include <xtensa/config/core.h>
-#include <xtensa/config/system.h>
-
-
-/*
- *  Default assignment of XT2000 devices to external interrupts.
- */
-
-/*  Ethernet interrupt:  */
-#ifdef XCHAL_EXTINT3_NUM
-#define SONIC83934_INTNUM	XCHAL_EXTINT3_NUM
-#define SONIC83934_INTLEVEL	XCHAL_EXTINT3_LEVEL
-#define SONIC83934_INTMASK	XCHAL_EXTINT3_MASK
-#else
-#define SONIC83934_INTMASK	0
-#endif
-
-/*  DUART channel 1 interrupt (P1 - console):  */
-#ifdef XCHAL_EXTINT4_NUM
-#define DUART16552_1_INTNUM	XCHAL_EXTINT4_NUM
-#define DUART16552_1_INTLEVEL	XCHAL_EXTINT4_LEVEL
-#define DUART16552_1_INTMASK	XCHAL_EXTINT4_MASK
-#else
-#define DUART16552_1_INTMASK	0
-#endif
-
-/*  DUART channel 2 interrupt (P2 - 2nd serial port):  */
-#ifdef XCHAL_EXTINT5_NUM
-#define DUART16552_2_INTNUM	XCHAL_EXTINT5_NUM
-#define DUART16552_2_INTLEVEL	XCHAL_EXTINT5_LEVEL
-#define DUART16552_2_INTMASK	XCHAL_EXTINT5_MASK
-#else
-#define DUART16552_2_INTMASK	0
-#endif
-
-/*  FPGA-combined PCI/etc interrupts:  */
-#ifdef XCHAL_EXTINT6_NUM
-#define XT2000_FPGAPCI_INTNUM	XCHAL_EXTINT6_NUM
-#define XT2000_FPGAPCI_INTLEVEL	XCHAL_EXTINT6_LEVEL
-#define XT2000_FPGAPCI_INTMASK	XCHAL_EXTINT6_MASK
-#else
-#define XT2000_FPGAPCI_INTMASK	0
-#endif
-
-
-
-/*
- *  Device addresses.
- *
- *  Note:  for endianness-independence, use 32-bit loads and stores for all
- *  register accesses to Ethernet, DUART and LED devices.  Undefined bits
- *  may need to be masked out if needed when reading if the actual register
- *  size is smaller than 32 bits.
- *
- *  Note:  XT2000 bus byte lanes are defined in terms of msbyte and lsbyte
- *  relative to the processor.  So 32-bit registers are accessed consistently
- *  from both big and little endian processors.  However, this means byte
- *  sequences are not consistent between big and little endian processors.
- *  This is fine for RAM, and for ROM if ROM is created for a specific
- *  processor (and thus has correct byte sequences).  However this may be
- *  unexpected for Flash, which might contain a file-system that one wants
- *  to use for multiple processor configurations (eg. the Flash might contain
- *  the Ethernet card's address, endianness-independent application data, etc).
- *  That is, byte sequences written in Flash by a core of a given endianness
- *  will be byte-swapped when seen by a core of the other endianness.
- *  Someone implementing an endianness-independent Flash file system will
- *  likely handle this byte-swapping issue in the Flash driver software.
- */
-
-#define DUART16552_XTAL_FREQ	18432000	/* crystal frequency in Hz */
-#define XTBOARD_FLASH_MAXSIZE	0x4000000	/* 64 MB (max; depends on what is socketed!) */
-#define XTBOARD_EPROM_MAXSIZE	0x0400000	/* 4 MB (max; depends on what is socketed!) */
-#define XTBOARD_EEPROM_MAXSIZE	0x0080000	/* 512 kB (max; depends on what is socketed!) */
-#define XTBOARD_ASRAM_SIZE	0x0100000	/* 1 MB */
-#define XTBOARD_PCI_MEM_SIZE	0x8000000	/* 128 MB (allocated) */
-#define XTBOARD_PCI_IO_SIZE	0x1000000	/* 16 MB (allocated) */
-
-#ifdef XSHAL_IOBLOCK_BYPASS_PADDR
-/*  PCI memory space:  */
-# define XTBOARD_PCI_MEM_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0x0000000)
-/*  Socketed Flash (eg. 2 x 16-bit devices):  */
-# define XTBOARD_FLASH_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0x8000000)
-/*  PCI I/O space:  */
-# define XTBOARD_PCI_IO_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xC000000)
-/*  V3 PCI interface chip register/config space:  */
-# define XTBOARD_V3PCI_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD000000)
-/*  Bus Interface registers:  */
-# define XTBOARD_BUSINT_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD010000)
-/*  FPGA registers:  */
-# define XT2000_FPGAREGS_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD020000)
-/*  SONIC SN83934 Ethernet controller/transceiver:  */
-# define SONIC83934_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD030000)
-/*  8-character bitmapped LED display:  */
-# define XTBOARD_LED_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD040000)
-/*  National-Semi PC16552D DUART:  */
-# define DUART16552_1_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD050020)	/* channel 1 (P1 - console) */
-# define DUART16552_2_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD050000)	/* channel 2 (P2) */
-/*  Asynchronous Static RAM:  */
-# define XTBOARD_ASRAM_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD400000)
-/*  8-bit EEPROM:  */
-# define XTBOARD_EEPROM_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD600000)
-/*  2 x 16-bit EPROMs:  */
-# define XTBOARD_EPROM_PADDR	(XSHAL_IOBLOCK_BYPASS_PADDR+0xD800000)
-#endif /* XSHAL_IOBLOCK_BYPASS_PADDR */
-
-/*  These devices might be accessed cached:  */
-#ifdef XSHAL_IOBLOCK_CACHED_PADDR
-# define XTBOARD_PCI_MEM_CACHED_PADDR	(XSHAL_IOBLOCK_CACHED_PADDR+0x0000000)
-# define XTBOARD_FLASH_CACHED_PADDR	(XSHAL_IOBLOCK_CACHED_PADDR+0x8000000)
-# define XTBOARD_ASRAM_CACHED_PADDR	(XSHAL_IOBLOCK_CACHED_PADDR+0xD400000)
-# define XTBOARD_EEPROM_CACHED_PADDR	(XSHAL_IOBLOCK_CACHED_PADDR+0xD600000)
-# define XTBOARD_EPROM_CACHED_PADDR	(XSHAL_IOBLOCK_CACHED_PADDR+0xD800000)
-#endif /* XSHAL_IOBLOCK_CACHED_PADDR */
-
-
-/***  Same thing over again, this time with virtual addresses:  ***/
-
-#ifdef XSHAL_IOBLOCK_BYPASS_VADDR
-/*  PCI memory space:  */
-# define XTBOARD_PCI_MEM_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0x0000000)
-/*  Socketed Flash (eg. 2 x 16-bit devices):  */
-# define XTBOARD_FLASH_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0x8000000)
-/*  PCI I/O space:  */
-# define XTBOARD_PCI_IO_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xC000000)
-/*  V3 PCI interface chip register/config space:  */
-# define XTBOARD_V3PCI_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD000000)
-/*  Bus Interface registers:  */
-# define XTBOARD_BUSINT_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD010000)
-/*  FPGA registers:  */
-# define XT2000_FPGAREGS_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD020000)
-/*  SONIC SN83934 Ethernet controller/transceiver:  */
-# define SONIC83934_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD030000)
-/*  8-character bitmapped LED display:  */
-# define XTBOARD_LED_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD040000)
-/*  National-Semi PC16552D DUART:  */
-# define DUART16552_1_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD050020)	/* channel 1 (P1 - console) */
-# define DUART16552_2_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD050000)	/* channel 2 (P2) */
-/*  Asynchronous Static RAM:  */
-# define XTBOARD_ASRAM_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD400000)
-/*  8-bit EEPROM:  */
-# define XTBOARD_EEPROM_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD600000)
-/*  2 x 16-bit EPROMs:  */
-# define XTBOARD_EPROM_VADDR	(XSHAL_IOBLOCK_BYPASS_VADDR+0xD800000)
-#endif /* XSHAL_IOBLOCK_BYPASS_VADDR */
-
-/*  These devices might be accessed cached:  */
-#ifdef XSHAL_IOBLOCK_CACHED_VADDR
-# define XTBOARD_PCI_MEM_CACHED_VADDR	(XSHAL_IOBLOCK_CACHED_VADDR+0x0000000)
-# define XTBOARD_FLASH_CACHED_VADDR	(XSHAL_IOBLOCK_CACHED_VADDR+0x8000000)
-# define XTBOARD_ASRAM_CACHED_VADDR	(XSHAL_IOBLOCK_CACHED_VADDR+0xD400000)
-# define XTBOARD_EEPROM_CACHED_VADDR	(XSHAL_IOBLOCK_CACHED_VADDR+0xD600000)
-# define XTBOARD_EPROM_CACHED_VADDR	(XSHAL_IOBLOCK_CACHED_VADDR+0xD800000)
-#endif /* XSHAL_IOBLOCK_CACHED_VADDR */
-
-
-/*  System ROM:  */
-#define XTBOARD_ROM_SIZE		XSHAL_ROM_SIZE
-#ifdef XSHAL_ROM_VADDR
-#define XTBOARD_ROM_VADDR		XSHAL_ROM_VADDR
-#endif
-#ifdef XSHAL_ROM_PADDR
-#define XTBOARD_ROM_PADDR		XSHAL_ROM_PADDR
-#endif
-
-/*  System RAM:  */
-#define XTBOARD_RAM_SIZE		XSHAL_RAM_SIZE
-#ifdef XSHAL_RAM_VADDR
-#define XTBOARD_RAM_VADDR		XSHAL_RAM_VADDR
-#endif
-#ifdef XSHAL_RAM_PADDR
-#define XTBOARD_RAM_PADDR		XSHAL_RAM_PADDR
-#endif
-#define XTBOARD_RAM_BYPASS_VADDR	XSHAL_RAM_BYPASS_VADDR
-#define XTBOARD_RAM_BYPASS_PADDR	XSHAL_RAM_BYPASS_PADDR
-
-
-
-/*
- *  Things that depend on device addresses.
- */
-
-
-#define XTBOARD_CACHEATTR_WRITEBACK	XSHAL_XT2000_CACHEATTR_WRITEBACK
-#define XTBOARD_CACHEATTR_WRITEALLOC	XSHAL_XT2000_CACHEATTR_WRITEALLOC
-#define XTBOARD_CACHEATTR_WRITETHRU	XSHAL_XT2000_CACHEATTR_WRITETHRU
-#define XTBOARD_CACHEATTR_BYPASS	XSHAL_XT2000_CACHEATTR_BYPASS
-#define XTBOARD_CACHEATTR_DEFAULT	XSHAL_XT2000_CACHEATTR_DEFAULT
-
-#define XTBOARD_BUSINT_PIPE_REGIONS	XSHAL_XT2000_PIPE_REGIONS
-#define XTBOARD_BUSINT_SDRAM_REGIONS	XSHAL_XT2000_SDRAM_REGIONS
-
-
-
-/*
- *  BusLogic (FPGA) registers.
- *  All these registers are normally accessed using 32-bit loads/stores.
- */
-
-/*  Register offsets:  */
-#define XT2000_DATECD_OFS	0x00	/* date code (read-only) */
-#define XT2000_STSREG_OFS	0x04	/* status (read-only) */
-#define XT2000_SYSLED_OFS	0x08	/* system LED */
-#define XT2000_WRPROT_OFS	0x0C	/* write protect */
-#define XT2000_SWRST_OFS	0x10	/* software reset */
-#define XT2000_SYSRST_OFS	0x14	/* system (peripherals) reset */
-#define XT2000_IMASK_OFS	0x18	/* interrupt mask */
-#define XT2000_ISTAT_OFS	0x1C	/* interrupt status */
-#define XT2000_V3CFG_OFS	0x20	/* V3 config (V320 PCI) */
-
-/*  Physical register addresses:  */
-#ifdef XT2000_FPGAREGS_PADDR
-#define XT2000_DATECD_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_DATECD_OFS)
-#define XT2000_STSREG_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_STSREG_OFS)
-#define XT2000_SYSLED_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_SYSLED_OFS)
-#define XT2000_WRPROT_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_WRPROT_OFS)
-#define XT2000_SWRST_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_SWRST_OFS)
-#define XT2000_SYSRST_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_SYSRST_OFS)
-#define XT2000_IMASK_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_IMASK_OFS)
-#define XT2000_ISTAT_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_ISTAT_OFS)
-#define XT2000_V3CFG_PADDR	(XT2000_FPGAREGS_PADDR+XT2000_V3CFG_OFS)
-#endif
-
-/*  Virtual register addresses:  */
-#ifdef XT2000_FPGAREGS_VADDR
-#define XT2000_DATECD_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_DATECD_OFS)
-#define XT2000_STSREG_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_STSREG_OFS)
-#define XT2000_SYSLED_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_SYSLED_OFS)
-#define XT2000_WRPROT_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_WRPROT_OFS)
-#define XT2000_SWRST_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_SWRST_OFS)
-#define XT2000_SYSRST_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_SYSRST_OFS)
-#define XT2000_IMASK_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_IMASK_OFS)
-#define XT2000_ISTAT_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_ISTAT_OFS)
-#define XT2000_V3CFG_VADDR	(XT2000_FPGAREGS_VADDR+XT2000_V3CFG_OFS)
-/*  Register access (for C code):  */
-#define XT2000_DATECD_REG	(*(volatile unsigned*) XT2000_DATECD_VADDR)
-#define XT2000_STSREG_REG	(*(volatile unsigned*) XT2000_STSREG_VADDR)
-#define XT2000_SYSLED_REG	(*(volatile unsigned*) XT2000_SYSLED_VADDR)
-#define XT2000_WRPROT_REG	(*(volatile unsigned*) XT2000_WRPROT_VADDR)
-#define XT2000_SWRST_REG	(*(volatile unsigned*) XT2000_SWRST_VADDR)
-#define XT2000_SYSRST_REG	(*(volatile unsigned*) XT2000_SYSRST_VADDR)
-#define XT2000_IMASK_REG	(*(volatile unsigned*) XT2000_IMASK_VADDR)
-#define XT2000_ISTAT_REG	(*(volatile unsigned*) XT2000_ISTAT_VADDR)
-#define XT2000_V3CFG_REG	(*(volatile unsigned*) XT2000_V3CFG_VADDR)
-#endif
-
-/*  DATECD (date code) bit fields:  */
-
-/*  BCD-coded month (01..12):  */
-#define XT2000_DATECD_MONTH_SHIFT	24
-#define XT2000_DATECD_MONTH_BITS	8
-#define XT2000_DATECD_MONTH_MASK	0xFF000000
-/*  BCD-coded day (01..31):  */
-#define XT2000_DATECD_DAY_SHIFT		16
-#define XT2000_DATECD_DAY_BITS		8
-#define XT2000_DATECD_DAY_MASK		0x00FF0000
-/*  BCD-coded year (2001..9999):  */
-#define XT2000_DATECD_YEAR_SHIFT	0
-#define XT2000_DATECD_YEAR_BITS		16
-#define XT2000_DATECD_YEAR_MASK		0x0000FFFF
-
-/*  STSREG (status) bit fields:  */
-
-/*  Switch SW3 setting bit fields (0=off/up, 1=on/down):  */
-#define XT2000_STSREG_SW3_SHIFT		0
-#define XT2000_STSREG_SW3_BITS		4
-#define XT2000_STSREG_SW3_MASK		0x0000000F
-/*  Boot-select bits of switch SW3:  */
-#define XT2000_STSREG_BOOTSEL_SHIFT	0
-#define XT2000_STSREG_BOOTSEL_BITS	2
-#define XT2000_STSREG_BOOTSEL_MASK	0x00000003
-/*  Boot-select values:  */
-#define XT2000_STSREG_BOOTSEL_FLASH	0
-#define XT2000_STSREG_BOOTSEL_EPROM16	1
-#define XT2000_STSREG_BOOTSEL_PROM8	2
-#define XT2000_STSREG_BOOTSEL_ASRAM	3
-/*  User-defined bits of switch SW3:  */
-#define XT2000_STSREG_SW3_2_SHIFT	2
-#define XT2000_STSREG_SW3_2_MASK	0x00000004
-#define XT2000_STSREG_SW3_3_SHIFT	3
-#define XT2000_STSREG_SW3_3_MASK	0x00000008
-
-/*  SYSLED (system LED) bit fields:  */
-
-/*  LED control bit (0=off, 1=on):  */
-#define XT2000_SYSLED_LEDON_SHIFT	0
-#define XT2000_SYSLED_LEDON_MASK	0x00000001
-
-/*  WRPROT (write protect) bit fields (0=writable, 1=write-protected [default]):  */
-
-/*  Flash write protect:  */
-#define XT2000_WRPROT_FLWP_SHIFT	0
-#define XT2000_WRPROT_FLWP_MASK		0x00000001
-/*  Reserved but present write protect bits:  */
-#define XT2000_WRPROT_WRP_SHIFT		1
-#define XT2000_WRPROT_WRP_BITS		7
-#define XT2000_WRPROT_WRP_MASK		0x000000FE
-
-/*  SWRST (software reset; allows s/w to generate power-on equivalent reset):  */
-
-/*  Software reset bits:  */
-#define XT2000_SWRST_SWR_SHIFT		0
-#define XT2000_SWRST_SWR_BITS		16
-#define XT2000_SWRST_SWR_MASK		0x0000FFFF
-/*  Software reset value -- writing this value resets the board:  */
-#define XT2000_SWRST_RESETVALUE		0x0000DEAD
-
-/*  SYSRST (system reset; controls reset of individual peripherals):  */
-
-/*  All-device reset:  */
-#define XT2000_SYSRST_ALL_SHIFT		0
-#define XT2000_SYSRST_ALL_BITS		4
-#define XT2000_SYSRST_ALL_MASK		0x0000000F
-/*  HDSP-2534 LED display reset (1=reset, 0=nothing):  */
-#define XT2000_SYSRST_LED_SHIFT		0
-#define XT2000_SYSRST_LED_MASK		0x00000001
-/*  Sonic DP83934 Ethernet controller reset (1=reset, 0=nothing):  */
-#define XT2000_SYSRST_SONIC_SHIFT	1
-#define XT2000_SYSRST_SONIC_MASK	0x00000002
-/*  DP16552 DUART reset (1=reset, 0=nothing):  */
-#define XT2000_SYSRST_DUART_SHIFT	2
-#define XT2000_SYSRST_DUART_MASK	0x00000004
-/*  V3 V320 PCI bridge controller reset (1=reset, 0=nothing):  */
-#define XT2000_SYSRST_V3_SHIFT		3
-#define XT2000_SYSRST_V3_MASK		0x00000008
-
-/*  IMASK (interrupt mask; 0=disable, 1=enable):  */
-/*  ISTAT (interrupt status; 0=inactive, 1=pending):  */
-
-/*  PCI INTP interrupt:  */
-#define XT2000_INTMUX_PCI_INTP_SHIFT	2
-#define XT2000_INTMUX_PCI_INTP_MASK	0x00000004
-/*  PCI INTS interrupt:  */
-#define XT2000_INTMUX_PCI_INTS_SHIFT	3
-#define XT2000_INTMUX_PCI_INTS_MASK	0x00000008
-/*  PCI INTD interrupt:  */
-#define XT2000_INTMUX_PCI_INTD_SHIFT	4
-#define XT2000_INTMUX_PCI_INTD_MASK	0x00000010
-/*  V320 PCI controller interrupt:  */
-#define XT2000_INTMUX_V3_SHIFT		5
-#define XT2000_INTMUX_V3_MASK		0x00000020
-/*  PCI ENUM interrupt:  */
-#define XT2000_INTMUX_PCI_ENUM_SHIFT	6
-#define XT2000_INTMUX_PCI_ENUM_MASK	0x00000040
-/*  PCI DEG interrupt:  */
-#define XT2000_INTMUX_PCI_DEG_SHIFT	7
-#define XT2000_INTMUX_PCI_DEG_MASK	0x00000080
-
-/*  V3CFG (V3 config, V320 PCI controller):  */
-
-/*  V3 address control (0=pass-thru, 1=V3 address bits 31:28 set to 4'b0001 [default]):  */
-#define XT2000_V3CFG_V3ADC_SHIFT	0
-#define XT2000_V3CFG_V3ADC_MASK		0x00000001
-
-/* I2C Devices */
-
-#define	XT2000_I2C_RTC_ID		0x68
-#define	XT2000_I2C_NVRAM0_ID		0x56	/* 1st 256 byte block */
-#define	XT2000_I2C_NVRAM1_ID		0x57	/* 2nd 256 byte block */
-
-/*  NVRAM Board Info structure:  */
-
-#define XT2000_NVRAM_SIZE		512
-
-#define XT2000_NVRAM_BINFO_START	0x100
-#define XT2000_NVRAM_BINFO_SIZE		0x20
-#define XT2000_NVRAM_BINFO_VERSION	0x10	/* version 1.0 */
-#if 0
-#define XT2000_NVRAM_BINFO_VERSION_OFFSET	0x00
-#define XT2000_NVRAM_BINFO_VERSION_SIZE			0x1
-#define XT2000_NVRAM_BINFO_ETH_ADDR_OFFSET	0x02
-#define XT2000_NVRAM_BINFO_ETH_ADDR_SIZE		0x6
-#define XT2000_NVRAM_BINFO_SN_OFFSET		0x10
-#define XT2000_NVRAM_BINFO_SN_SIZE			0xE
-#define	XT2000_NVRAM_BINFO_CRC_OFFSET		0x1E
-#define	XT2000_NVRAM_BINFO_CRC_SIZE			0x2
-#endif /*0*/
-
-#if !defined(__ASSEMBLY__) && !defined(_NOCLANGUAGE)
-typedef struct xt2000_nvram_binfo {
-    unsigned char	version;
-    unsigned char	reserved1;
-    unsigned char	eth_addr[6];
-    unsigned char	reserved8[8];
-    unsigned char	serialno[14];
-    unsigned char	crc[2];		/* 16-bit CRC */
-} xt2000_nvram_binfo;
-#endif /*!__ASSEMBLY__ && !_NOCLANGUAGE*/
-
-
-#endif /*_INC_XT2000_H_*/
-
diff --git a/include/asm-xtensa/xtensa/xtboard.h b/include/asm-xtensa/xtensa/xtboard.h
deleted file mode 100644
index 22469c17530..00000000000
--- a/include/asm-xtensa/xtensa/xtboard.h
+++ /dev/null
@@ -1,120 +0,0 @@
-#ifndef _xtboard_h_included_
-#define _xtboard_h_included_
-
-/*
- * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
- *
- *  xtboard.h  --  Routines for getting useful information from the board.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002 Tensilica Inc.
- */
-
-
-#include <xtensa/xt2000.h>
-
-#define	XTBOARD_RTC_ERROR	-1
-#define	XTBOARD_RTC_STOPPED	-2
-
-
-/*  xt2000-i2cdev.c:  */
-typedef void XtboardDelayFunc( unsigned );
-extern XtboardDelayFunc* xtboard_set_nsdelay_func( XtboardDelayFunc *delay_fn );
-extern int xtboard_i2c_read (unsigned id, unsigned char *buf, unsigned addr, unsigned size);
-extern int xtboard_i2c_write(unsigned id, unsigned char *buf, unsigned addr, unsigned size);
-extern int xtboard_i2c_wait_nvram_ack(unsigned id, unsigned swtimer);
-
-/*  xtboard.c:  */
-extern int xtboard_nvram_read (unsigned addr, unsigned len, unsigned char *buf);
-extern int xtboard_nvram_write(unsigned addr, unsigned len, unsigned char *buf);
-extern int xtboard_nvram_binfo_read (xt2000_nvram_binfo *buf);
-extern int xtboard_nvram_binfo_write(xt2000_nvram_binfo *buf);
-extern int xtboard_nvram_binfo_valid(xt2000_nvram_binfo *buf);
-extern int xtboard_ethermac_get(unsigned char *buf);
-extern int xtboard_ethermac_set(unsigned char *buf);
-
-/*+*----------------------------------------------------------------------------
-/ Function: xtboard_get_rtc_time
-/
-/ Description:  Get time stored in real-time clock.
-/
-/ Returns: time in seconds stored in real-time clock.
-/-**----------------------------------------------------------------------------*/
-
-extern unsigned xtboard_get_rtc_time(void);
-
-/*+*----------------------------------------------------------------------------
-/ Function: xtboard_set_rtc_time
-/
-/ Description:  Set time stored in real-time clock.
-/
-/ Parameters: 	time -- time in seconds to store to real-time clock
-/
-/ Returns: 0 on success, xtboard_i2c_write() error code otherwise.
-/-**----------------------------------------------------------------------------*/
-
-extern int xtboard_set_rtc_time(unsigned time);
-
-
-/*  xtfreq.c:  */
-/*+*----------------------------------------------------------------------------
-/ Function: xtboard_measure_sys_clk
-/
-/ Description:  Get frequency of system clock.
-/
-/ Parameters:	none
-/
-/ Returns: frequency of system clock.
-/-**----------------------------------------------------------------------------*/
-
-extern unsigned xtboard_measure_sys_clk(void);
-
-
-#if 0	/* old stuff from xtboard.c: */
-
-/*+*----------------------------------------------------------------------------
-/ Function: xtboard_nvram valid
-/
-/ Description:  Determines if data in NVRAM is valid.
-/
-/ Parameters:	delay -- 10us delay function
-/
-/ Returns: 1 if NVRAM is valid, 0 otherwise
-/-**----------------------------------------------------------------------------*/
-
-extern unsigned xtboard_nvram_valid(void (*delay)( void ));
-
-/*+*----------------------------------------------------------------------------
-/ Function: xtboard_get_nvram_contents
-/
-/ Description:  Returns contents of NVRAM.
-/
-/ Parameters: 	buf -- buffer to NVRAM contents.
-/		delay -- 10us delay function
-/
-/ Returns: 1 if NVRAM is valid, 0 otherwise
-/-**----------------------------------------------------------------------------*/
-
-extern unsigned xtboard_get_nvram_contents(unsigned char *buf, void (*delay)( void ));
-
-/*+*----------------------------------------------------------------------------
-/ Function: xtboard_get_ether_addr
-/
-/ Description:  Returns ethernet address of board.
-/
-/ Parameters: 	buf -- buffer to store ethernet address
-/		delay -- 10us delay function
-/
-/ Returns: nothing.
-/-**----------------------------------------------------------------------------*/
-
-extern void xtboard_get_ether_addr(unsigned char *buf, void (*delay)( void ));
-
-#endif /*0*/
-
-
-#endif /*_xtboard_h_included_*/
-
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index e618b25b5ad..a1b04d8a1d0 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -61,7 +61,6 @@ header-y += fd.h
 header-y += fdreg.h
 header-y += fib_rules.h
 header-y += fuse.h
-header-y += futex.h
 header-y += genetlink.h
 header-y += gen_stats.h
 header-y += gigaset_dev.h
@@ -203,6 +202,7 @@ unifdef-y += fb.h
 unifdef-y += fcntl.h
 unifdef-y += filter.h
 unifdef-y += flat.h
+unifdef-y += futex.h
 unifdef-y += fs.h
 unifdef-y += gameport.h
 unifdef-y += generic_serial.h
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d852024ed09..1622d23a8dc 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -159,7 +159,7 @@ static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles)
  * Unless you're the timekeeping code, you should not be using this!
  */
 static inline void clocksource_calculate_interval(struct clocksource *c,
-						unsigned long length_nsec)
+					  	  unsigned long length_nsec)
 {
 	u64 tmp;
 
diff --git a/include/linux/file.h b/include/linux/file.h
index 6e77b9177f9..edca361f2ab 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -26,19 +26,12 @@ struct embedded_fd_set {
 	unsigned long fds_bits[1];
 };
 
-/*
- * More than this number of fds: we use a separately allocated fd_set
- */
-#define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set))
-
 struct fdtable {
 	unsigned int max_fds;
-	int max_fdset;
 	struct file ** fd;      /* current fd array */
 	fd_set *close_on_exec;
 	fd_set *open_fds;
 	struct rcu_head rcu;
-	struct files_struct *free_files;
 	struct fdtable *next;
 };
 
@@ -83,14 +76,8 @@ extern int get_unused_fd(void);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
 struct kmem_cache;
 
-extern struct file ** alloc_fd_array(int);
-extern void free_fd_array(struct file **, int);
-
-extern fd_set *alloc_fdset(int);
-extern void free_fdset(fd_set *, int);
-
 extern int expand_files(struct files_struct *, int nr);
-extern void free_fdtable(struct fdtable *fdt);
+extern void free_fdtable_rcu(struct rcu_head *rcu);
 extern void __init files_defer_init(void);
 
 static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 6e05e3e7ce3..39306309613 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -1,5 +1,7 @@
 /* Freezer declarations */
 
+#include <linux/sched.h>
+
 #ifdef CONFIG_PM
 /*
  * Check if a process has been frozen
diff --git a/include/linux/futex.h b/include/linux/futex.h
index d097b5b72bc..3f153b4e156 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -93,6 +93,7 @@ struct robust_list_head {
  */
 #define ROBUST_LIST_LIMIT	2048
 
+#ifdef __KERNEL__
 long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 
@@ -110,6 +111,7 @@ static inline void exit_pi_state_list(struct task_struct *curr)
 {
 }
 #endif
+#endif /* __KERNEL__ */
 
 #define FUTEX_OP_SET		0	/* *(int *)UADDR2 = OPARG; */
 #define FUTEX_OP_ADD		1	/* *(int *)UADDR2 += OPARG; */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9c2050293f1..64e070f62a8 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -796,6 +796,7 @@ typedef struct hwif_s {
 	unsigned	sg_mapped  : 1;	/* sg_table and sg_nents are ready */
 	unsigned	no_io_32bit : 1; /* 1 = can not do 32-bit IO ops */
 	unsigned	err_stops_fifo : 1; /* 1=data FIFO is cleared by an error */
+	unsigned	atapi_irq_bogon : 1; /* Generates spurious DMA interrupts in PIO mode */
 
 	struct device	gendev;
 	struct completion gendev_rel_comp; /* To deal with device release() */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 7272ff9ee77..b5315150199 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,12 +12,10 @@
 #define INIT_FDTABLE \
 {							\
 	.max_fds	= NR_OPEN_DEFAULT, 		\
-	.max_fdset	= EMBEDDED_FD_SET_SIZE,		\
 	.fd		= &init_files.fd_array[0], 	\
 	.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, \
 	.open_fds	= (fd_set *)&init_files.open_fds_init, 	\
 	.rcu		= RCU_HEAD_INIT, 		\
-	.free_files	= NULL,		 		\
 	.next		= NULL,		 		\
 }
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index de7593f4e89..e36e86c869f 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -231,7 +231,8 @@ enum
 	NET_TX_SOFTIRQ,
 	NET_RX_SOFTIRQ,
 	BLOCK_SOFTIRQ,
-	TASKLET_SOFTIRQ
+	TASKLET_SOFTIRQ,
+	SCHED_SOFTIRQ,
 };
 
 /* softirq mask and active fields moved to irq_cpustat_t in
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
new file mode 100644
index 00000000000..5bb2c3c585c
--- /dev/null
+++ b/include/linux/kvm.h
@@ -0,0 +1,227 @@
+#ifndef __LINUX_KVM_H
+#define __LINUX_KVM_H
+
+/*
+ * Userspace interface for /dev/kvm - kernel based virtual machine
+ *
+ * Note: this interface is considered experimental and may change without
+ *       notice.
+ */
+
+#include <asm/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * Architectural interrupt line count, and the size of the bitmap needed
+ * to hold them.
+ */
+#define KVM_NR_INTERRUPTS 256
+#define KVM_IRQ_BITMAP_SIZE_BYTES    ((KVM_NR_INTERRUPTS + 7) / 8)
+#define KVM_IRQ_BITMAP_SIZE(type)    (KVM_IRQ_BITMAP_SIZE_BYTES / sizeof(type))
+
+
+/* for KVM_CREATE_MEMORY_REGION */
+struct kvm_memory_region {
+	__u32 slot;
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size; /* bytes */
+};
+
+/* for kvm_memory_region::flags */
+#define KVM_MEM_LOG_DIRTY_PAGES  1UL
+
+
+#define KVM_EXIT_TYPE_FAIL_ENTRY 1
+#define KVM_EXIT_TYPE_VM_EXIT    2
+
+enum kvm_exit_reason {
+	KVM_EXIT_UNKNOWN          = 0,
+	KVM_EXIT_EXCEPTION        = 1,
+	KVM_EXIT_IO               = 2,
+	KVM_EXIT_CPUID            = 3,
+	KVM_EXIT_DEBUG            = 4,
+	KVM_EXIT_HLT              = 5,
+	KVM_EXIT_MMIO             = 6,
+};
+
+/* for KVM_RUN */
+struct kvm_run {
+	/* in */
+	__u32 vcpu;
+	__u32 emulated;  /* skip current instruction */
+	__u32 mmio_completed; /* mmio request completed */
+
+	/* out */
+	__u32 exit_type;
+	__u32 exit_reason;
+	__u32 instruction_length;
+	union {
+		/* KVM_EXIT_UNKNOWN */
+		struct {
+			__u32 hardware_exit_reason;
+		} hw;
+		/* KVM_EXIT_EXCEPTION */
+		struct {
+			__u32 exception;
+			__u32 error_code;
+		} ex;
+		/* KVM_EXIT_IO */
+		struct {
+#define KVM_EXIT_IO_IN  0
+#define KVM_EXIT_IO_OUT 1
+			__u8 direction;
+			__u8 size; /* bytes */
+			__u8 string;
+			__u8 string_down;
+			__u8 rep;
+			__u8 pad;
+			__u16 port;
+			__u64 count;
+			union {
+				__u64 address;
+				__u32 value;
+			};
+		} io;
+		struct {
+		} debug;
+		/* KVM_EXIT_MMIO */
+		struct {
+			__u64 phys_addr;
+			__u8  data[8];
+			__u32 len;
+			__u8  is_write;
+		} mmio;
+	};
+};
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+	/* in */
+	__u32 vcpu;
+	__u32 padding;
+
+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+	__u64 rax, rbx, rcx, rdx;
+	__u64 rsi, rdi, rsp, rbp;
+	__u64 r8,  r9,  r10, r11;
+	__u64 r12, r13, r14, r15;
+	__u64 rip, rflags;
+};
+
+struct kvm_segment {
+	__u64 base;
+	__u32 limit;
+	__u16 selector;
+	__u8  type;
+	__u8  present, dpl, db, s, l, g, avl;
+	__u8  unusable;
+	__u8  padding;
+};
+
+struct kvm_dtable {
+	__u64 base;
+	__u16 limit;
+	__u16 padding[3];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+	/* in */
+	__u32 vcpu;
+	__u32 padding;
+
+	/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)];
+};
+
+struct kvm_msr_entry {
+	__u32 index;
+	__u32 reserved;
+	__u64 data;
+};
+
+/* for KVM_GET_MSRS and KVM_SET_MSRS */
+struct kvm_msrs {
+	__u32 vcpu;
+	__u32 nmsrs; /* number of msrs in entries */
+
+	struct kvm_msr_entry entries[0];
+};
+
+/* for KVM_GET_MSR_INDEX_LIST */
+struct kvm_msr_list {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 indices[0];
+};
+
+/* for KVM_TRANSLATE */
+struct kvm_translation {
+	/* in */
+	__u64 linear_address;
+	__u32 vcpu;
+	__u32 padding;
+
+	/* out */
+	__u64 physical_address;
+	__u8  valid;
+	__u8  writeable;
+	__u8  usermode;
+};
+
+/* for KVM_INTERRUPT */
+struct kvm_interrupt {
+	/* in */
+	__u32 vcpu;
+	__u32 irq;
+};
+
+struct kvm_breakpoint {
+	__u32 enabled;
+	__u32 padding;
+	__u64 address;
+};
+
+/* for KVM_DEBUG_GUEST */
+struct kvm_debug_guest {
+	/* int */
+	__u32 vcpu;
+	__u32 enabled;
+	struct kvm_breakpoint breakpoints[4];
+	__u32 singlestep;
+};
+
+/* for KVM_GET_DIRTY_LOG */
+struct kvm_dirty_log {
+	__u32 slot;
+	__u32 padding;
+	union {
+		void __user *dirty_bitmap; /* one bit per page */
+		__u64 padding;
+	};
+};
+
+#define KVMIO 0xAE
+
+#define KVM_RUN                   _IOWR(KVMIO, 2, struct kvm_run)
+#define KVM_GET_REGS              _IOWR(KVMIO, 3, struct kvm_regs)
+#define KVM_SET_REGS              _IOW(KVMIO, 4, struct kvm_regs)
+#define KVM_GET_SREGS             _IOWR(KVMIO, 5, struct kvm_sregs)
+#define KVM_SET_SREGS             _IOW(KVMIO, 6, struct kvm_sregs)
+#define KVM_TRANSLATE             _IOWR(KVMIO, 7, struct kvm_translation)
+#define KVM_INTERRUPT             _IOW(KVMIO, 8, struct kvm_interrupt)
+#define KVM_DEBUG_GUEST           _IOW(KVMIO, 9, struct kvm_debug_guest)
+#define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 10, struct kvm_memory_region)
+#define KVM_CREATE_VCPU           _IOW(KVMIO, 11, int /* vcpu_slot */)
+#define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 12, struct kvm_dirty_log)
+#define KVM_GET_MSRS              _IOWR(KVMIO, 13, struct kvm_msrs)
+#define KVM_SET_MSRS              _IOWR(KVMIO, 14, struct kvm_msrs)
+#define KVM_GET_MSR_INDEX_LIST    _IOWR(KVMIO, 15, struct kvm_msr_list)
+
+#endif
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 03636d7918f..d8286db60b9 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -227,7 +227,10 @@ struct raid5_private_data {
 	struct list_head	handle_list; /* stripes needing handling */
 	struct list_head	delayed_list; /* stripes that have plugged requests */
 	struct list_head	bitmap_list; /* stripes delaying awaiting bitmap update */
+	struct bio		*retry_read_aligned; /* currently retrying aligned bios   */
+	struct bio		*retry_read_aligned_list; /* aligned bios retry list  */
 	atomic_t		preread_active_stripes; /* stripes with scheduled io */
+	atomic_t		active_aligned_reads;
 
 	atomic_t		reshape_stripes; /* stripes with pending writes for reshape */
 	/* unfortunately we need two cache names as we temporarily have
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 09ff4c3e271..5e22d4510d1 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -106,6 +106,7 @@ extern int rtc_year_days(unsigned int day, unsigned int month, unsigned int year
 extern int rtc_valid_tm(struct rtc_time *tm);
 extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time);
 extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm);
+extern void rtc_merge_alarm(struct rtc_time *now, struct rtc_time *alarm);
 
 #include <linux/device.h>
 #include <linux/seq_file.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ad9c46071ff..ea92e5c8908 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -82,6 +82,7 @@ struct sched_param {
 #include <linux/resource.h>
 #include <linux/timer.h>
 #include <linux/hrtimer.h>
+#include <linux/task_io_accounting.h>
 
 #include <asm/processor.h>
 
@@ -647,6 +648,7 @@ enum idle_type
 #define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
 #define SD_POWERSAVINGS_BALANCE	256	/* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES	512	/* Domain members share cpu pkg resources */
+#define SD_SERIALIZE		1024	/* Only a single load balancing instance */
 
 #define BALANCE_FOR_MC_POWER	\
 	(sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
@@ -1013,6 +1015,7 @@ struct task_struct {
 	wait_queue_t *io_wait;
 /* i/o counters(bytes read/written, #syscalls */
 	u64 rchar, wchar, syscr, syscw;
+	struct task_io_accounting ioac;
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 94316a98e0d..6d8846e7be6 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -918,8 +918,7 @@ typedef struct ctl_table ctl_table;
 
 typedef int ctl_handler (ctl_table *table, int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
-			 void __user *newval, size_t newlen, 
-			 void **context);
+			 void __user *newval, size_t newlen);
 
 typedef int proc_handler (ctl_table *ctl, int write, struct file * filp,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -950,7 +949,7 @@ extern int do_sysctl (int __user *name, int nlen,
 extern int do_sysctl_strategy (ctl_table *table, 
 			       int __user *name, int nlen,
 			       void __user *oldval, size_t __user *oldlenp,
-			       void __user *newval, size_t newlen, void ** context);
+			       void __user *newval, size_t newlen);
 
 extern ctl_handler sysctl_string;
 extern ctl_handler sysctl_intvec;
diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h
new file mode 100644
index 00000000000..44d00e9ccee
--- /dev/null
+++ b/include/linux/task_io_accounting.h
@@ -0,0 +1,37 @@
+/*
+ * task_io_accounting: a structure which is used for recording a single task's
+ * IO statistics.
+ *
+ * Don't include this header file directly - it is designed to be dragged in via
+ * sched.h.
+ *
+ * Blame akpm@osdl.org for all this.
+ */
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+struct task_io_accounting {
+	/*
+	 * The number of bytes which this task has caused to be read from
+	 * storage.
+	 */
+	u64 read_bytes;
+
+	/*
+	 * The number of bytes which this task has caused, or shall cause to be
+	 * written to disk.
+	 */
+	u64 write_bytes;
+
+	/*
+	 * A task can cause "negative" IO too.  If this task truncates some
+	 * dirty pagecache, some IO which another task has been accounted for
+	 * (in its write_bytes) will not be happening.  We _could_ just
+	 * subtract that from the truncating task's write_bytes, but there is
+	 * information loss in doing that.
+	 */
+	u64 cancelled_write_bytes;
+};
+#else
+struct task_io_accounting {
+};
+#endif
diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h
new file mode 100644
index 00000000000..df2a319106b
--- /dev/null
+++ b/include/linux/task_io_accounting_ops.h
@@ -0,0 +1,47 @@
+/*
+ * Task I/O accounting operations
+ */
+#ifndef __TASK_IO_ACCOUNTING_OPS_INCLUDED
+#define __TASK_IO_ACCOUNTING_OPS_INCLUDED
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+static inline void task_io_account_read(size_t bytes)
+{
+	current->ioac.read_bytes += bytes;
+}
+
+static inline void task_io_account_write(size_t bytes)
+{
+	current->ioac.write_bytes += bytes;
+}
+
+static inline void task_io_account_cancelled_write(size_t bytes)
+{
+	current->ioac.cancelled_write_bytes += bytes;
+}
+
+static inline void task_io_accounting_init(struct task_struct *tsk)
+{
+	memset(&tsk->ioac, 0, sizeof(tsk->ioac));
+}
+
+#else
+
+static inline void task_io_account_read(size_t bytes)
+{
+}
+
+static inline void task_io_account_write(size_t bytes)
+{
+}
+
+static inline void task_io_account_cancelled_write(size_t bytes)
+{
+}
+
+static inline void task_io_accounting_init(struct task_struct *tsk)
+{
+}
+
+#endif		/* CONFIG_TASK_IO_ACCOUNTING */
+#endif		/* __TASK_IO_ACCOUNTING_OPS_INCLUDED */
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 45248806ae9..3fced479825 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
  */
 
 
-#define TASKSTATS_VERSION	2
+#define TASKSTATS_VERSION	3
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -115,31 +115,37 @@ struct taskstats {
 	__u64	ac_majflt;		/* Major Page Fault Count */
 	/* Basic Accounting Fields end */
 
- 	/* Extended accounting fields start */
+	/* Extended accounting fields start */
 	/* Accumulated RSS usage in duration of a task, in MBytes-usecs.
 	 * The current rss usage is added to this counter every time
 	 * a tick is charged to a task's system time. So, at the end we
 	 * will have memory usage multiplied by system time. Thus an
 	 * average usage per system time unit can be calculated.
 	 */
- 	__u64	coremem;		/* accumulated RSS usage in MB-usec */
+	__u64	coremem;		/* accumulated RSS usage in MB-usec */
 	/* Accumulated virtual memory usage in duration of a task.
 	 * Same as acct_rss_mem1 above except that we keep track of VM usage.
 	 */
- 	__u64	virtmem;		/* accumulated VM  usage in MB-usec */
+	__u64	virtmem;		/* accumulated VM  usage in MB-usec */
 
 	/* High watermark of RSS and virtual memory usage in duration of
 	 * a task, in KBytes.
 	 */
- 	__u64	hiwater_rss;		/* High-watermark of RSS usage, in KB */
- 	__u64	hiwater_vm;		/* High-water VM usage, in KB */
+	__u64	hiwater_rss;		/* High-watermark of RSS usage, in KB */
+	__u64	hiwater_vm;		/* High-water VM usage, in KB */
 
 	/* The following four fields are I/O statistics of a task. */
- 	__u64	read_char;		/* bytes read */
- 	__u64	write_char;		/* bytes written */
- 	__u64	read_syscalls;		/* read syscalls */
- 	__u64	write_syscalls;		/* write syscalls */
- 	/* Extended accounting fields end */
+	__u64	read_char;		/* bytes read */
+	__u64	write_char;		/* bytes written */
+	__u64	read_syscalls;		/* read syscalls */
+	__u64	write_syscalls;		/* write syscalls */
+	/* Extended accounting fields end */
+
+#define TASKSTATS_HAS_IO_ACCOUNTING
+	/* Per-task storage I/O accounting starts */
+	__u64	read_bytes;		/* bytes of read I/O */
+	__u64	write_bytes;		/* bytes of write I/O */
+	__u64	cancelled_write_bytes;	/* bytes of cancelled write I/O */
 };
 
 
diff --git a/include/linux/timer.h b/include/linux/timer.h
index c982304dbaf..eeef6643d4c 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -98,4 +98,10 @@ extern void run_local_timers(void);
 struct hrtimer;
 extern int it_real_fn(struct hrtimer *);
 
+unsigned long __round_jiffies(unsigned long j, int cpu);
+unsigned long __round_jiffies_relative(unsigned long j, int cpu);
+unsigned long round_jiffies(unsigned long j);
+unsigned long round_jiffies_relative(unsigned long j);
+
+
 #endif
diff --git a/include/linux/topology.h b/include/linux/topology.h
index da508d1998e..6c5a6e6e813 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -93,7 +93,7 @@
 	.groups			= NULL,			\
 	.min_interval		= 1,			\
 	.max_interval		= 2,			\
-	.busy_factor		= 8,			\
+	.busy_factor		= 64,			\
 	.imbalance_pct		= 110,			\
 	.cache_nice_tries	= 0,			\
 	.per_cpu_gain		= 25,			\
@@ -194,7 +194,8 @@
 	.wake_idx		= 0, /* unused */	\
 	.forkexec_idx		= 0, /* unused */	\
 	.per_cpu_gain		= 100,			\
-	.flags			= SD_LOAD_BALANCE,	\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_SERIALIZE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
 	.nr_balance_failed	= 0,			\
diff --git a/include/net/ip.h b/include/net/ip.h
index 83cb9ac5554..053f02b5cb8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -376,8 +376,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 			 size_t *lenp, loff_t *ppos);
 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 				  void __user *oldval, size_t __user *oldlenp,
-				  void __user *newval, size_t newlen, 
-				  void **context);
+				  void __user *newval, size_t newlen);
 #ifdef CONFIG_PROC_FS
 extern int ip_misc_proc_init(void);
 #endif
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 215461f18db..c818f87122a 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -368,7 +368,7 @@ static inline void sctp_sysctl_register(void) { return; }
 static inline void sctp_sysctl_unregister(void) { return; }
 static inline int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context) {
+		void __user *newval, size_t newlen) {
 	return -ENOSYS;
 }
 #endif
diff --git a/init/Kconfig b/init/Kconfig
index 14d484606fa..9edf103b3ec 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -304,6 +304,15 @@ config TASK_XACCT
 
 	  Say N if unsure.
 
+config TASK_IO_ACCOUNTING
+	bool "Enable per-task storage I/O accounting (EXPERIMENTAL)"
+	depends on TASK_XACCT
+	help
+	  Collect information on the number of bytes of storage I/O which this
+	  task has caused.
+
+	  Say N if unsure.
+
 config SYSCTL
 	bool
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 03e64fe4a14..122fadb972f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -425,7 +425,7 @@ static void close_files(struct files_struct * files)
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
-		if (i >= fdt->max_fdset || i >= fdt->max_fds)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->open_fds->fds_bits[j++];
 		while (set) {
@@ -466,11 +466,9 @@ void fastcall put_files_struct(struct files_struct *files)
 		 * you can free files immediately.
 		 */
 		fdt = files_fdtable(files);
-		if (fdt == &files->fdtab)
-			fdt->free_files = files;
-		else
+		if (fdt != &files->fdtab)
 			kmem_cache_free(files_cachep, files);
-		free_fdtable(fdt);
+		call_rcu(&fdt->rcu, free_fdtable_rcu);
 	}
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c859eef8e6..d16c566eb64 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -36,6 +36,7 @@
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
 #include <linux/futex.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
 #include <linux/mount.h>
@@ -613,7 +614,7 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 
 static int count_open_files(struct fdtable *fdt)
 {
-	int size = fdt->max_fdset;
+	int size = fdt->max_fds;
 	int i;
 
 	/* Find the last open fd */
@@ -640,12 +641,10 @@ static struct files_struct *alloc_files(void)
 	newf->next_fd = 0;
 	fdt = &newf->fdtab;
 	fdt->max_fds = NR_OPEN_DEFAULT;
-	fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
 	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
 	fdt->open_fds = (fd_set *)&newf->open_fds_init;
 	fdt->fd = &newf->fd_array[0];
 	INIT_RCU_HEAD(&fdt->rcu);
-	fdt->free_files = NULL;
 	fdt->next = NULL;
 	rcu_assign_pointer(newf->fdt, fdt);
 out:
@@ -661,7 +660,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 {
 	struct files_struct *newf;
 	struct file **old_fds, **new_fds;
-	int open_files, size, i, expand;
+	int open_files, size, i;
 	struct fdtable *old_fdt, *new_fdt;
 
 	*errorp = -ENOMEM;
@@ -672,25 +671,14 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	spin_lock(&oldf->file_lock);
 	old_fdt = files_fdtable(oldf);
 	new_fdt = files_fdtable(newf);
-	size = old_fdt->max_fdset;
 	open_files = count_open_files(old_fdt);
-	expand = 0;
 
 	/*
-	 * Check whether we need to allocate a larger fd array or fd set.
-	 * Note: we're not a clone task, so the open count won't  change.
+	 * Check whether we need to allocate a larger fd array and fd set.
+	 * Note: we're not a clone task, so the open count won't change.
 	 */
-	if (open_files > new_fdt->max_fdset) {
-		new_fdt->max_fdset = 0;
-		expand = 1;
-	}
 	if (open_files > new_fdt->max_fds) {
 		new_fdt->max_fds = 0;
-		expand = 1;
-	}
-
-	/* if the old fdset gets grown now, we'll only copy up to "size" fds */
-	if (expand) {
 		spin_unlock(&oldf->file_lock);
 		spin_lock(&newf->file_lock);
 		*errorp = expand_files(newf, open_files-1);
@@ -710,8 +698,10 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	old_fds = old_fdt->fd;
 	new_fds = new_fdt->fd;
 
-	memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8);
-	memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8);
+	memcpy(new_fdt->open_fds->fds_bits,
+		old_fdt->open_fds->fds_bits, open_files/8);
+	memcpy(new_fdt->close_on_exec->fds_bits,
+		old_fdt->close_on_exec->fds_bits, open_files/8);
 
 	for (i = open_files; i != 0; i--) {
 		struct file *f = *old_fds++;
@@ -736,22 +726,19 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	/* This is long word aligned thus could use a optimized version */ 
 	memset(new_fds, 0, size); 
 
-	if (new_fdt->max_fdset > open_files) {
-		int left = (new_fdt->max_fdset-open_files)/8;
+	if (new_fdt->max_fds > open_files) {
+		int left = (new_fdt->max_fds-open_files)/8;
 		int start = open_files / (8 * sizeof(unsigned long));
 
 		memset(&new_fdt->open_fds->fds_bits[start], 0, left);
 		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
 	}
 
-out:
 	return newf;
 
 out_release:
-	free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
-	free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
-	free_fd_array(new_fdt->fd, new_fdt->max_fds);
 	kmem_cache_free(files_cachep, newf);
+out:
 	return NULL;
 }
 
@@ -1055,6 +1042,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->wchar = 0;		/* I/O counter: bytes written */
 	p->syscr = 0;		/* I/O counter: read syscalls */
 	p->syscw = 0;		/* I/O counter: write syscalls */
+	task_io_accounting_init(p);
 	acct_clear_integrals(p);
 
  	p->it_virt_expires = cputime_zero;
diff --git a/kernel/sched.c b/kernel/sched.c
index f385eff4682..8a0afb97af7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -225,8 +225,10 @@ struct rq {
 	unsigned long nr_uninterruptible;
 
 	unsigned long expired_timestamp;
-	unsigned long long timestamp_last_tick;
+	/* Cached timestamp set by update_cpu_clock() */
+	unsigned long long most_recent_timestamp;
 	struct task_struct *curr, *idle;
+	unsigned long next_balance;
 	struct mm_struct *prev_mm;
 	struct prio_array *active, *expired, arrays[2];
 	int best_expired_prio;
@@ -426,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
  * bump this up when changing the output format or the meaning of an existing
  * format, so that tools can adapt (or abort)
  */
-#define SCHEDSTAT_VERSION 12
+#define SCHEDSTAT_VERSION 14
 
 static int show_schedstat(struct seq_file *seq, void *v)
 {
@@ -464,7 +466,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
 			seq_printf(seq, "domain%d %s", dcnt++, mask_str);
 			for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
 					itype++) {
-				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu",
+				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
+						"%lu",
 				    sd->lb_cnt[itype],
 				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
@@ -474,11 +477,13 @@ static int show_schedstat(struct seq_file *seq, void *v)
 				    sd->lb_nobusyq[itype],
 				    sd->lb_nobusyg[itype]);
 			}
-			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
+			    " %lu %lu %lu\n",
 			    sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
 			    sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
 			    sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
-			    sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance);
+			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
+			    sd->ttwu_move_balance);
 		}
 		preempt_enable();
 #endif
@@ -547,7 +552,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
 #endif
 
 /*
- * rq_lock - lock a given runqueue and disable interrupts.
+ * this_rq_lock - lock this runqueue and disable interrupts.
  */
 static inline struct rq *this_rq_lock(void)
 	__acquires(rq->lock)
@@ -938,13 +943,16 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
 {
 	unsigned long long now;
 
+	if (rt_task(p))
+		goto out;
+
 	now = sched_clock();
 #ifdef CONFIG_SMP
 	if (!local) {
 		/* Compensate for drifting sched_clock */
 		struct rq *this_rq = this_rq();
-		now = (now - this_rq->timestamp_last_tick)
-			+ rq->timestamp_last_tick;
+		now = (now - this_rq->most_recent_timestamp)
+			+ rq->most_recent_timestamp;
 	}
 #endif
 
@@ -959,8 +967,7 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
 				     (now - p->timestamp) >> 20);
 	}
 
-	if (!rt_task(p))
-		p->prio = recalc_task_prio(p, now);
+	p->prio = recalc_task_prio(p, now);
 
 	/*
 	 * This checks to make sure it's not an uninterruptible task
@@ -985,7 +992,7 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
 		}
 	}
 	p->timestamp = now;
-
+out:
 	__activate_task(p, rq);
 }
 
@@ -1450,7 +1457,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 
 		if (this_sd->flags & SD_WAKE_AFFINE) {
 			unsigned long tl = this_load;
-			unsigned long tl_per_task = cpu_avg_load_per_task(this_cpu);
+			unsigned long tl_per_task;
+
+			tl_per_task = cpu_avg_load_per_task(this_cpu);
 
 			/*
 			 * If sync wakeup then subtract the (maximum possible)
@@ -1688,8 +1697,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		 * Not the local CPU - must adjust timestamp. This should
 		 * get optimised away in the !CONFIG_SMP case.
 		 */
-		p->timestamp = (p->timestamp - this_rq->timestamp_last_tick)
-					+ rq->timestamp_last_tick;
+		p->timestamp = (p->timestamp - this_rq->most_recent_timestamp)
+					+ rq->most_recent_timestamp;
 		__activate_task(p, rq);
 		if (TASK_PREEMPTS_CURR(p, rq))
 			resched_task(rq->curr);
@@ -1952,6 +1961,7 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
 	__acquires(rq1->lock)
 	__acquires(rq2->lock)
 {
+	BUG_ON(!irqs_disabled());
 	if (rq1 == rq2) {
 		spin_lock(&rq1->lock);
 		__acquire(rq2->lock);	/* Fake it out ;) */
@@ -1991,6 +2001,11 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	__acquires(busiest->lock)
 	__acquires(this_rq->lock)
 {
+	if (unlikely(!irqs_disabled())) {
+		/* printk() doesn't work good under rq->lock */
+		spin_unlock(&this_rq->lock);
+		BUG_ON(1);
+	}
 	if (unlikely(!spin_trylock(&busiest->lock))) {
 		if (busiest < this_rq) {
 			spin_unlock(&this_rq->lock);
@@ -2061,8 +2076,8 @@ static void pull_task(struct rq *src_rq, struct prio_array *src_array,
 	set_task_cpu(p, this_cpu);
 	inc_nr_running(p, this_rq);
 	enqueue_task(p, this_array);
-	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
-				+ this_rq->timestamp_last_tick;
+	p->timestamp = (p->timestamp - src_rq->most_recent_timestamp)
+				+ this_rq->most_recent_timestamp;
 	/*
 	 * Note that idle threads have a prio of MAX_PRIO, for this test
 	 * to be always true for them.
@@ -2098,10 +2113,15 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 	 * 2) too many balance attempts have failed.
 	 */
 
-	if (sd->nr_balance_failed > sd->cache_nice_tries)
+	if (sd->nr_balance_failed > sd->cache_nice_tries) {
+#ifdef CONFIG_SCHEDSTATS
+		if (task_hot(p, rq->most_recent_timestamp, sd))
+			schedstat_inc(sd, lb_hot_gained[idle]);
+#endif
 		return 1;
+	}
 
-	if (task_hot(p, rq->timestamp_last_tick, sd))
+	if (task_hot(p, rq->most_recent_timestamp, sd))
 		return 0;
 	return 1;
 }
@@ -2199,11 +2219,6 @@ skip_queue:
 		goto skip_bitmap;
 	}
 
-#ifdef CONFIG_SCHEDSTATS
-	if (task_hot(tmp, busiest->timestamp_last_tick, sd))
-		schedstat_inc(sd, lb_hot_gained[idle]);
-#endif
-
 	pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
 	pulled++;
 	rem_load_move -= tmp->load_weight;
@@ -2241,7 +2256,7 @@ out:
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
 		   unsigned long *imbalance, enum idle_type idle, int *sd_idle,
-		   cpumask_t *cpus)
+		   cpumask_t *cpus, int *balance)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2270,10 +2285,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		unsigned long load, group_capacity;
 		int local_group;
 		int i;
+		unsigned int balance_cpu = -1, first_idle_cpu = 0;
 		unsigned long sum_nr_running, sum_weighted_load;
 
 		local_group = cpu_isset(this_cpu, group->cpumask);
 
+		if (local_group)
+			balance_cpu = first_cpu(group->cpumask);
+
 		/* Tally up the load of all CPUs in the group */
 		sum_weighted_load = sum_nr_running = avg_load = 0;
 
@@ -2289,9 +2308,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 				*sd_idle = 0;
 
 			/* Bias balancing toward cpus of our domain */
-			if (local_group)
+			if (local_group) {
+				if (idle_cpu(i) && !first_idle_cpu) {
+					first_idle_cpu = 1;
+					balance_cpu = i;
+				}
+
 				load = target_load(i, load_idx);
-			else
+			} else
 				load = source_load(i, load_idx);
 
 			avg_load += load;
@@ -2299,6 +2323,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 			sum_weighted_load += rq->raw_weighted_load;
 		}
 
+		/*
+		 * First idle cpu or the first cpu(busiest) in this sched group
+		 * is eligible for doing load balancing at this and above
+		 * domains.
+		 */
+		if (local_group && balance_cpu != this_cpu && balance) {
+			*balance = 0;
+			goto ret;
+		}
+
 		total_load += avg_load;
 		total_pwr += group->cpu_power;
 
@@ -2458,18 +2492,21 @@ small_imbalance:
 		pwr_now /= SCHED_LOAD_SCALE;
 
 		/* Amount of load we'd subtract */
-		tmp = busiest_load_per_task*SCHED_LOAD_SCALE/busiest->cpu_power;
+		tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
+			busiest->cpu_power;
 		if (max_load > tmp)
 			pwr_move += busiest->cpu_power *
 				min(busiest_load_per_task, max_load - tmp);
 
 		/* Amount of load we'd add */
-		if (max_load*busiest->cpu_power <
-				busiest_load_per_task*SCHED_LOAD_SCALE)
-			tmp = max_load*busiest->cpu_power/this->cpu_power;
+		if (max_load * busiest->cpu_power <
+				busiest_load_per_task * SCHED_LOAD_SCALE)
+			tmp = max_load * busiest->cpu_power / this->cpu_power;
 		else
-			tmp = busiest_load_per_task*SCHED_LOAD_SCALE/this->cpu_power;
-		pwr_move += this->cpu_power*min(this_load_per_task, this_load + tmp);
+			tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
+				this->cpu_power;
+		pwr_move += this->cpu_power *
+			min(this_load_per_task, this_load + tmp);
 		pwr_move /= SCHED_LOAD_SCALE;
 
 		/* Move if we gain throughput */
@@ -2490,8 +2527,8 @@ out_balanced:
 		*imbalance = min_load_per_task;
 		return group_min;
 	}
-ret:
 #endif
+ret:
 	*imbalance = 0;
 	return NULL;
 }
@@ -2540,17 +2577,17 @@ static inline unsigned long minus_1_or_zero(unsigned long n)
 /*
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
- *
- * Called with this_rq unlocked.
  */
 static int load_balance(int this_cpu, struct rq *this_rq,
-			struct sched_domain *sd, enum idle_type idle)
+			struct sched_domain *sd, enum idle_type idle,
+			int *balance)
 {
 	int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
 	unsigned long imbalance;
 	struct rq *busiest;
 	cpumask_t cpus = CPU_MASK_ALL;
+	unsigned long flags;
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -2566,7 +2603,11 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
 redo:
 	group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
-							&cpus);
+				   &cpus, balance);
+
+	if (*balance == 0)
+		goto out_balanced;
+
 	if (!group) {
 		schedstat_inc(sd, lb_nobusyg[idle]);
 		goto out_balanced;
@@ -2590,11 +2631,13 @@ redo:
 		 * still unbalanced. nr_moved simply stays zero, so it is
 		 * correctly treated as an imbalance.
 		 */
+		local_irq_save(flags);
 		double_rq_lock(this_rq, busiest);
 		nr_moved = move_tasks(this_rq, this_cpu, busiest,
 				      minus_1_or_zero(busiest->nr_running),
 				      imbalance, sd, idle, &all_pinned);
 		double_rq_unlock(this_rq, busiest);
+		local_irq_restore(flags);
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
 		if (unlikely(all_pinned)) {
@@ -2611,13 +2654,13 @@ redo:
 
 		if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
 
-			spin_lock(&busiest->lock);
+			spin_lock_irqsave(&busiest->lock, flags);
 
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
 			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
-				spin_unlock(&busiest->lock);
+				spin_unlock_irqrestore(&busiest->lock, flags);
 				all_pinned = 1;
 				goto out_one_pinned;
 			}
@@ -2627,7 +2670,7 @@ redo:
 				busiest->push_cpu = this_cpu;
 				active_balance = 1;
 			}
-			spin_unlock(&busiest->lock);
+			spin_unlock_irqrestore(&busiest->lock, flags);
 			if (active_balance)
 				wake_up_process(busiest->migration_thread);
 
@@ -2706,7 +2749,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 	schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
 redo:
 	group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
-				&sd_idle, &cpus);
+				   &sd_idle, &cpus, NULL);
 	if (!group) {
 		schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
 		goto out_balanced;
@@ -2766,14 +2809,28 @@ out_balanced:
 static void idle_balance(int this_cpu, struct rq *this_rq)
 {
 	struct sched_domain *sd;
+	int pulled_task = 0;
+	unsigned long next_balance = jiffies + 60 *  HZ;
 
 	for_each_domain(this_cpu, sd) {
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
 			/* If we've pulled tasks over stop searching: */
-			if (load_balance_newidle(this_cpu, this_rq, sd))
+			pulled_task = load_balance_newidle(this_cpu,
+							this_rq, sd);
+			if (time_after(next_balance,
+				  sd->last_balance + sd->balance_interval))
+				next_balance = sd->last_balance
+						+ sd->balance_interval;
+			if (pulled_task)
 				break;
 		}
 	}
+	if (!pulled_task)
+		/*
+		 * We are going idle. next_balance may be set based on
+		 * a busy processor. So reset next_balance.
+		 */
+		this_rq->next_balance = next_balance;
 }
 
 /*
@@ -2826,26 +2883,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 	spin_unlock(&target_rq->lock);
 }
 
-/*
- * rebalance_tick will get called every timer tick, on every CPU.
- *
- * It checks each scheduling domain to see if it is due to be balanced,
- * and initiates a balancing operation if so.
- *
- * Balancing parameters are set up in arch_init_sched_domains.
- */
-
-/* Don't have all balancing operations going off at once: */
-static inline unsigned long cpu_offset(int cpu)
+static void update_load(struct rq *this_rq)
 {
-	return jiffies + cpu * HZ / NR_CPUS;
-}
-
-static void
-rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
-{
-	unsigned long this_load, interval, j = cpu_offset(this_cpu);
-	struct sched_domain *sd;
+	unsigned long this_load;
 	int i, scale;
 
 	this_load = this_rq->raw_weighted_load;
@@ -2865,6 +2905,32 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 			new_load += scale-1;
 		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
 	}
+}
+
+/*
+ * run_rebalance_domains is triggered when needed from the scheduler tick.
+ *
+ * It checks each scheduling domain to see if it is due to be balanced,
+ * and initiates a balancing operation if so.
+ *
+ * Balancing parameters are set up in arch_init_sched_domains.
+ */
+static DEFINE_SPINLOCK(balancing);
+
+static void run_rebalance_domains(struct softirq_action *h)
+{
+	int this_cpu = smp_processor_id(), balance = 1;
+	struct rq *this_rq = cpu_rq(this_cpu);
+	unsigned long interval;
+	struct sched_domain *sd;
+	/*
+	 * We are idle if there are no processes running. This
+	 * is valid even if we are the idle process (SMT).
+	 */
+	enum idle_type idle = !this_rq->nr_running ?
+				SCHED_IDLE : NOT_IDLE;
+	/* Earliest time when we have to call run_rebalance_domains again */
+	unsigned long next_balance = jiffies + 60*HZ;
 
 	for_each_domain(this_cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -2879,8 +2945,13 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 		if (unlikely(!interval))
 			interval = 1;
 
-		if (j - sd->last_balance >= interval) {
-			if (load_balance(this_cpu, this_rq, sd, idle)) {
+		if (sd->flags & SD_SERIALIZE) {
+			if (!spin_trylock(&balancing))
+				goto out;
+		}
+
+		if (time_after_eq(jiffies, sd->last_balance + interval)) {
+			if (load_balance(this_cpu, this_rq, sd, idle, &balance)) {
 				/*
 				 * We've pulled tasks over so either we're no
 				 * longer idle, or one of our SMT siblings is
@@ -2888,39 +2959,48 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 				 */
 				idle = NOT_IDLE;
 			}
-			sd->last_balance += interval;
+			sd->last_balance = jiffies;
 		}
+		if (sd->flags & SD_SERIALIZE)
+			spin_unlock(&balancing);
+out:
+		if (time_after(next_balance, sd->last_balance + interval))
+			next_balance = sd->last_balance + interval;
+
+		/*
+		 * Stop the load balance at this level. There is another
+		 * CPU in our sched group which is doing load balancing more
+		 * actively.
+		 */
+		if (!balance)
+			break;
 	}
+	this_rq->next_balance = next_balance;
 }
 #else
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
-{
-}
 static inline void idle_balance(int cpu, struct rq *rq)
 {
 }
 #endif
 
-static inline int wake_priority_sleeper(struct rq *rq)
+static inline void wake_priority_sleeper(struct rq *rq)
 {
-	int ret = 0;
-
 #ifdef CONFIG_SCHED_SMT
+	if (!rq->nr_running)
+		return;
+
 	spin_lock(&rq->lock);
 	/*
 	 * If an SMT sibling task has been put to sleep for priority
 	 * reasons reschedule the idle task to see if it can now run.
 	 */
-	if (rq->nr_running) {
+	if (rq->nr_running)
 		resched_task(rq->idle);
-		ret = 1;
-	}
 	spin_unlock(&rq->lock);
 #endif
-	return ret;
 }
 
 DEFINE_PER_CPU(struct kernel_stat, kstat);
@@ -2934,7 +3014,8 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 static inline void
 update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
 {
-	p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
+	p->sched_time += now - p->last_ran;
+	p->last_ran = rq->most_recent_timestamp = now;
 }
 
 /*
@@ -2947,8 +3028,7 @@ unsigned long long current_sched_time(const struct task_struct *p)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
-	ns = p->sched_time + sched_clock() - ns;
+	ns = p->sched_time + sched_clock() - p->last_ran;
 	local_irq_restore(flags);
 
 	return ns;
@@ -3048,35 +3128,12 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 		cpustat->steal = cputime64_add(cpustat->steal, tmp);
 }
 
-/*
- * This function gets called by the timer code, with HZ frequency.
- * We call it with interrupts disabled.
- *
- * It also gets called by the fork code, when changing the parent's
- * timeslices.
- */
-void scheduler_tick(void)
+static void task_running_tick(struct rq *rq, struct task_struct *p)
 {
-	unsigned long long now = sched_clock();
-	struct task_struct *p = current;
-	int cpu = smp_processor_id();
-	struct rq *rq = cpu_rq(cpu);
-
-	update_cpu_clock(p, rq, now);
-
-	rq->timestamp_last_tick = now;
-
-	if (p == rq->idle) {
-		if (wake_priority_sleeper(rq))
-			goto out;
-		rebalance_tick(cpu, rq, SCHED_IDLE);
-		return;
-	}
-
-	/* Task might have expired already, but not scheduled off yet */
 	if (p->array != rq->active) {
+		/* Task has expired but was not scheduled yet */
 		set_tsk_need_resched(p);
-		goto out;
+		return;
 	}
 	spin_lock(&rq->lock);
 	/*
@@ -3144,8 +3201,34 @@ void scheduler_tick(void)
 	}
 out_unlock:
 	spin_unlock(&rq->lock);
-out:
-	rebalance_tick(cpu, rq, NOT_IDLE);
+}
+
+/*
+ * This function gets called by the timer code, with HZ frequency.
+ * We call it with interrupts disabled.
+ *
+ * It also gets called by the fork code, when changing the parent's
+ * timeslices.
+ */
+void scheduler_tick(void)
+{
+	unsigned long long now = sched_clock();
+	struct task_struct *p = current;
+	int cpu = smp_processor_id();
+	struct rq *rq = cpu_rq(cpu);
+
+	update_cpu_clock(p, rq, now);
+
+	if (p == rq->idle)
+		/* Task on the idle queue */
+		wake_priority_sleeper(rq);
+	else
+		task_running_tick(rq, p);
+#ifdef CONFIG_SMP
+	update_load(rq);
+	if (time_after_eq(jiffies, rq->next_balance))
+		raise_softirq(SCHED_SOFTIRQ);
+#endif
 }
 
 #ifdef CONFIG_SCHED_SMT
@@ -3291,7 +3374,8 @@ void fastcall add_preempt_count(int val)
 	/*
 	 * Spinlock count overflowing soon?
 	 */
-	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
+				PREEMPT_MASK - 10);
 }
 EXPORT_SYMBOL(add_preempt_count);
 
@@ -4990,8 +5074,8 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 		 * afterwards, and pretending it was a local activate.
 		 * This way is cleaner and logically correct.
 		 */
-		p->timestamp = p->timestamp - rq_src->timestamp_last_tick
-				+ rq_dest->timestamp_last_tick;
+		p->timestamp = p->timestamp - rq_src->most_recent_timestamp
+				+ rq_dest->most_recent_timestamp;
 		deactivate_task(p, rq_src);
 		__activate_task(p, rq_dest);
 		if (TASK_PREEMPTS_CURR(p, rq_dest))
@@ -5067,7 +5151,10 @@ wait_to_die:
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-/* Figure out where task on dead CPU should go, use force if neccessary. */
+/*
+ * Figure out where task on dead CPU should go, use force if neccessary.
+ * NOTE: interrupts should be disabled by the caller
+ */
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	unsigned long flags;
@@ -5187,6 +5274,7 @@ void idle_task_exit(void)
 	mmdrop(mm);
 }
 
+/* called under rq->lock with disabled interrupts */
 static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 {
 	struct rq *rq = cpu_rq(dead_cpu);
@@ -5203,10 +5291,11 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 	 * Drop lock around migration; if someone else moves it,
 	 * that's OK.  No task can be added to this CPU, so iteration is
 	 * fine.
+	 * NOTE: interrupts should be left disabled  --dev@
 	 */
-	spin_unlock_irq(&rq->lock);
+	spin_unlock(&rq->lock);
 	move_task_off_dead_cpu(dead_cpu, p);
-	spin_lock_irq(&rq->lock);
+	spin_lock(&rq->lock);
 
 	put_task_struct(p);
 }
@@ -5359,16 +5448,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 		if (!(sd->flags & SD_LOAD_BALANCE)) {
 			printk("does not load-balance\n");
 			if (sd->parent)
-				printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
+				printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
+						" has parent");
 			break;
 		}
 
 		printk("span %s\n", str);
 
 		if (!cpu_isset(cpu, sd->span))
-			printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
+			printk(KERN_ERR "ERROR: domain->span does not contain "
+					"CPU%d\n", cpu);
 		if (!cpu_isset(cpu, group->cpumask))
-			printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
+			printk(KERN_ERR "ERROR: domain->groups does not contain"
+					" CPU%d\n", cpu);
 
 		printk(KERN_DEBUG);
 		for (i = 0; i < level + 2; i++)
@@ -5383,7 +5475,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
 			if (!group->cpu_power) {
 				printk("\n");
-				printk(KERN_ERR "ERROR: domain->cpu_power not set\n");
+				printk(KERN_ERR "ERROR: domain->cpu_power not "
+						"set\n");
 			}
 
 			if (!cpus_weight(group->cpumask)) {
@@ -5406,15 +5499,17 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 		printk("\n");
 
 		if (!cpus_equal(sd->span, groupmask))
-			printk(KERN_ERR "ERROR: groups don't span domain->span\n");
+			printk(KERN_ERR "ERROR: groups don't span "
+					"domain->span\n");
 
 		level++;
 		sd = sd->parent;
+		if (!sd)
+			continue;
 
-		if (sd) {
-			if (!cpus_subset(groupmask, sd->span))
-				printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
-		}
+		if (!cpus_subset(groupmask, sd->span))
+			printk(KERN_ERR "ERROR: parent span is not a superset "
+				"of domain->span\n");
 
 	} while (sd);
 }
@@ -5528,28 +5623,27 @@ static int __init isolated_cpu_setup(char *str)
 __setup ("isolcpus=", isolated_cpu_setup);
 
 /*
- * init_sched_build_groups takes an array of groups, the cpumask we wish
- * to span, and a pointer to a function which identifies what group a CPU
- * belongs to. The return value of group_fn must be a valid index into the
- * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we
- * keep track of groups covered with a cpumask_t).
+ * init_sched_build_groups takes the cpumask we wish to span, and a pointer
+ * to a function which identifies what group(along with sched group) a CPU
+ * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
+ * (due to the fact that we keep track of groups covered with a cpumask_t).
  *
  * init_sched_build_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
  */
 static void
-init_sched_build_groups(struct sched_group groups[], cpumask_t span,
-			const cpumask_t *cpu_map,
-			int (*group_fn)(int cpu, const cpumask_t *cpu_map))
+init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map,
+			int (*group_fn)(int cpu, const cpumask_t *cpu_map,
+					struct sched_group **sg))
 {
 	struct sched_group *first = NULL, *last = NULL;
 	cpumask_t covered = CPU_MASK_NONE;
 	int i;
 
 	for_each_cpu_mask(i, span) {
-		int group = group_fn(i, cpu_map);
-		struct sched_group *sg = &groups[group];
+		struct sched_group *sg;
+		int group = group_fn(i, cpu_map, &sg);
 		int j;
 
 		if (cpu_isset(i, covered))
@@ -5559,7 +5653,7 @@ init_sched_build_groups(struct sched_group groups[], cpumask_t span,
 		sg->cpu_power = 0;
 
 		for_each_cpu_mask(j, span) {
-			if (group_fn(j, cpu_map) != group)
+			if (group_fn(j, cpu_map, NULL) != group)
 				continue;
 
 			cpu_set(j, covered);
@@ -5733,8 +5827,9 @@ __setup("max_cache_size=", setup_max_cache_size);
  */
 static void touch_cache(void *__cache, unsigned long __size)
 {
-	unsigned long size = __size/sizeof(long), chunk1 = size/3,
-			chunk2 = 2*size/3;
+	unsigned long size = __size / sizeof(long);
+	unsigned long chunk1 = size / 3;
+	unsigned long chunk2 = 2 * size / 3;
 	unsigned long *cache = __cache;
 	int i;
 
@@ -5843,11 +5938,11 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size)
 	 */
 	measure_one(cache, size, cpu1, cpu2);
 	for (i = 0; i < ITERATIONS; i++)
-		cost1 += measure_one(cache, size - i*1024, cpu1, cpu2);
+		cost1 += measure_one(cache, size - i * 1024, cpu1, cpu2);
 
 	measure_one(cache, size, cpu2, cpu1);
 	for (i = 0; i < ITERATIONS; i++)
-		cost1 += measure_one(cache, size - i*1024, cpu2, cpu1);
+		cost1 += measure_one(cache, size - i * 1024, cpu2, cpu1);
 
 	/*
 	 * (We measure the non-migrating [cached] cost on both
@@ -5857,17 +5952,17 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size)
 
 	measure_one(cache, size, cpu1, cpu1);
 	for (i = 0; i < ITERATIONS; i++)
-		cost2 += measure_one(cache, size - i*1024, cpu1, cpu1);
+		cost2 += measure_one(cache, size - i * 1024, cpu1, cpu1);
 
 	measure_one(cache, size, cpu2, cpu2);
 	for (i = 0; i < ITERATIONS; i++)
-		cost2 += measure_one(cache, size - i*1024, cpu2, cpu2);
+		cost2 += measure_one(cache, size - i * 1024, cpu2, cpu2);
 
 	/*
 	 * Get the per-iteration migration cost:
 	 */
-	do_div(cost1, 2*ITERATIONS);
-	do_div(cost2, 2*ITERATIONS);
+	do_div(cost1, 2 * ITERATIONS);
+	do_div(cost2, 2 * ITERATIONS);
 
 	return cost1 - cost2;
 }
@@ -5905,7 +6000,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
 	 */
 	cache = vmalloc(max_size);
 	if (!cache) {
-		printk("could not vmalloc %d bytes for cache!\n", 2*max_size);
+		printk("could not vmalloc %d bytes for cache!\n", 2 * max_size);
 		return 1000000; /* return 1 msec on very small boxen */
 	}
 
@@ -5930,7 +6025,8 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
 		avg_fluct = (avg_fluct + fluct)/2;
 
 		if (migration_debug)
-			printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): (%8Ld %8Ld)\n",
+			printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): "
+				"(%8Ld %8Ld)\n",
 				cpu1, cpu2, size,
 				(long)cost / 1000000,
 				((long)cost / 100000) % 10,
@@ -6025,20 +6121,18 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
 			-1
 #endif
 		);
-	if (system_state == SYSTEM_BOOTING) {
-		if (num_online_cpus() > 1) {
-			printk("migration_cost=");
-			for (distance = 0; distance <= max_distance; distance++) {
-				if (distance)
-					printk(",");
-				printk("%ld", (long)migration_cost[distance] / 1000);
-			}
-			printk("\n");
+	if (system_state == SYSTEM_BOOTING && num_online_cpus() > 1) {
+		printk("migration_cost=");
+		for (distance = 0; distance <= max_distance; distance++) {
+			if (distance)
+				printk(",");
+			printk("%ld", (long)migration_cost[distance] / 1000);
 		}
+		printk("\n");
 	}
 	j1 = jiffies;
 	if (migration_debug)
-		printk("migration: %ld seconds\n", (j1-j0)/HZ);
+		printk("migration: %ld seconds\n", (j1-j0) / HZ);
 
 	/*
 	 * Move back to the original CPU. NUMA-Q gets confused
@@ -6135,10 +6229,13 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
  */
 #ifdef CONFIG_SCHED_SMT
 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static struct sched_group sched_group_cpus[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
 
-static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map,
+			    struct sched_group **sg)
 {
+	if (sg)
+		*sg = &per_cpu(sched_group_cpus, cpu);
 	return cpu;
 }
 #endif
@@ -6148,39 +6245,52 @@ static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map)
  */
 #ifdef CONFIG_SCHED_MC
 static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static struct sched_group sched_group_core[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_group, sched_group_core);
 #endif
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
-static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
+			     struct sched_group **sg)
 {
+	int group;
 	cpumask_t mask = cpu_sibling_map[cpu];
 	cpus_and(mask, mask, *cpu_map);
-	return first_cpu(mask);
+	group = first_cpu(mask);
+	if (sg)
+		*sg = &per_cpu(sched_group_core, group);
+	return group;
 }
 #elif defined(CONFIG_SCHED_MC)
-static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
+			     struct sched_group **sg)
 {
+	if (sg)
+		*sg = &per_cpu(sched_group_core, cpu);
 	return cpu;
 }
 #endif
 
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static struct sched_group sched_group_phys[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
 
-static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map,
+			     struct sched_group **sg)
 {
+	int group;
 #ifdef CONFIG_SCHED_MC
 	cpumask_t mask = cpu_coregroup_map(cpu);
 	cpus_and(mask, mask, *cpu_map);
-	return first_cpu(mask);
+	group = first_cpu(mask);
 #elif defined(CONFIG_SCHED_SMT)
 	cpumask_t mask = cpu_sibling_map[cpu];
 	cpus_and(mask, mask, *cpu_map);
-	return first_cpu(mask);
+	group = first_cpu(mask);
 #else
-	return cpu;
+	group = cpu;
 #endif
+	if (sg)
+		*sg = &per_cpu(sched_group_phys, group);
+	return group;
 }
 
 #ifdef CONFIG_NUMA
@@ -6193,12 +6303,22 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains);
 static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
 
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
 
-static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
+				 struct sched_group **sg)
 {
-	return cpu_to_node(cpu);
+	cpumask_t nodemask = node_to_cpumask(cpu_to_node(cpu));
+	int group;
+
+	cpus_and(nodemask, nodemask, *cpu_map);
+	group = first_cpu(nodemask);
+
+	if (sg)
+		*sg = &per_cpu(sched_group_allnodes, group);
+	return group;
 }
+
 static void init_numa_sched_groups_power(struct sched_group *group_head)
 {
 	struct sched_group *sg = group_head;
@@ -6234,16 +6354,9 @@ static void free_sched_groups(const cpumask_t *cpu_map)
 	int cpu, i;
 
 	for_each_cpu_mask(cpu, *cpu_map) {
-		struct sched_group *sched_group_allnodes
-			= sched_group_allnodes_bycpu[cpu];
 		struct sched_group **sched_group_nodes
 			= sched_group_nodes_bycpu[cpu];
 
-		if (sched_group_allnodes) {
-			kfree(sched_group_allnodes);
-			sched_group_allnodes_bycpu[cpu] = NULL;
-		}
-
 		if (!sched_group_nodes)
 			continue;
 
@@ -6337,7 +6450,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 	struct sched_domain *sd;
 #ifdef CONFIG_NUMA
 	struct sched_group **sched_group_nodes = NULL;
-	struct sched_group *sched_group_allnodes = NULL;
+	int sd_allnodes = 0;
 
 	/*
 	 * Allocate the per-node list of sched groups
@@ -6355,7 +6468,6 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 	 * Set up domains for cpus specified by the cpu_map.
 	 */
 	for_each_cpu_mask(i, *cpu_map) {
-		int group;
 		struct sched_domain *sd = NULL, *p;
 		cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
 
@@ -6364,26 +6476,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 #ifdef CONFIG_NUMA
 		if (cpus_weight(*cpu_map)
 				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
-			if (!sched_group_allnodes) {
-				sched_group_allnodes
-					= kmalloc_node(sizeof(struct sched_group)
-						  	* MAX_NUMNODES,
-						  GFP_KERNEL,
-						  cpu_to_node(i));
-				if (!sched_group_allnodes) {
-					printk(KERN_WARNING
-					"Can not alloc allnodes sched group\n");
-					goto error;
-				}
-				sched_group_allnodes_bycpu[i]
-						= sched_group_allnodes;
-			}
 			sd = &per_cpu(allnodes_domains, i);
 			*sd = SD_ALLNODES_INIT;
 			sd->span = *cpu_map;
-			group = cpu_to_allnodes_group(i, cpu_map);
-			sd->groups = &sched_group_allnodes[group];
+			cpu_to_allnodes_group(i, cpu_map, &sd->groups);
 			p = sd;
+			sd_allnodes = 1;
 		} else
 			p = NULL;
 
@@ -6398,36 +6496,33 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 
 		p = sd;
 		sd = &per_cpu(phys_domains, i);
-		group = cpu_to_phys_group(i, cpu_map);
 		*sd = SD_CPU_INIT;
 		sd->span = nodemask;
 		sd->parent = p;
 		if (p)
 			p->child = sd;
-		sd->groups = &sched_group_phys[group];
+		cpu_to_phys_group(i, cpu_map, &sd->groups);
 
 #ifdef CONFIG_SCHED_MC
 		p = sd;
 		sd = &per_cpu(core_domains, i);
-		group = cpu_to_core_group(i, cpu_map);
 		*sd = SD_MC_INIT;
 		sd->span = cpu_coregroup_map(i);
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
-		sd->groups = &sched_group_core[group];
+		cpu_to_core_group(i, cpu_map, &sd->groups);
 #endif
 
 #ifdef CONFIG_SCHED_SMT
 		p = sd;
 		sd = &per_cpu(cpu_domains, i);
-		group = cpu_to_cpu_group(i, cpu_map);
 		*sd = SD_SIBLING_INIT;
 		sd->span = cpu_sibling_map[i];
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
-		sd->groups = &sched_group_cpus[group];
+		cpu_to_cpu_group(i, cpu_map, &sd->groups);
 #endif
 	}
 
@@ -6439,8 +6534,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 		if (i != first_cpu(this_sibling_map))
 			continue;
 
-		init_sched_build_groups(sched_group_cpus, this_sibling_map,
-					cpu_map, &cpu_to_cpu_group);
+		init_sched_build_groups(this_sibling_map, cpu_map, &cpu_to_cpu_group);
 	}
 #endif
 
@@ -6451,8 +6545,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 		cpus_and(this_core_map, this_core_map, *cpu_map);
 		if (i != first_cpu(this_core_map))
 			continue;
-		init_sched_build_groups(sched_group_core, this_core_map,
-					cpu_map, &cpu_to_core_group);
+		init_sched_build_groups(this_core_map, cpu_map, &cpu_to_core_group);
 	}
 #endif
 
@@ -6465,15 +6558,13 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 		if (cpus_empty(nodemask))
 			continue;
 
-		init_sched_build_groups(sched_group_phys, nodemask,
-					cpu_map, &cpu_to_phys_group);
+		init_sched_build_groups(nodemask, cpu_map, &cpu_to_phys_group);
 	}
 
 #ifdef CONFIG_NUMA
 	/* Set up node groups */
-	if (sched_group_allnodes)
-		init_sched_build_groups(sched_group_allnodes, *cpu_map,
-					cpu_map, &cpu_to_allnodes_group);
+	if (sd_allnodes)
+		init_sched_build_groups(*cpu_map, cpu_map, &cpu_to_allnodes_group);
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		/* Set up node groups */
@@ -6565,10 +6656,10 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 	for (i = 0; i < MAX_NUMNODES; i++)
 		init_numa_sched_groups_power(sched_group_nodes[i]);
 
-	if (sched_group_allnodes) {
-		int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map);
-		struct sched_group *sg = &sched_group_allnodes[group];
+	if (sd_allnodes) {
+		struct sched_group *sg;
 
+		cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg);
 		init_numa_sched_groups_power(sg);
 	}
 #endif
@@ -6847,6 +6938,10 @@ void __init sched_init(void)
 
 	set_load_weight(&init_task);
 
+#ifdef CONFIG_SMP
+	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
+#endif
+
 #ifdef CONFIG_RT_MUTEXES
 	plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 025fcb3c66f..130c5ec9ee0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -133,7 +133,7 @@ extern int max_lock_depth;
 
 #ifdef CONFIG_SYSCTL_SYSCALL
 static int parse_table(int __user *, int, void __user *, size_t __user *,
-		void __user *, size_t, ctl_table *, void **);
+		void __user *, size_t, ctl_table *);
 #endif
 
 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
@@ -141,12 +141,12 @@ static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 
 static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context);
+		  void __user *newval, size_t newlen);
 
 #ifdef CONFIG_SYSVIPC
 static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context);
+		  void __user *newval, size_t newlen);
 #endif
 
 #ifdef CONFIG_PROC_SYSCTL
@@ -1243,7 +1243,6 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
 	do {
 		struct ctl_table_header *head =
 			list_entry(tmp, struct ctl_table_header, ctl_entry);
-		void *context = NULL;
 
 		if (!use_table(head))
 			continue;
@@ -1251,9 +1250,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
 		spin_unlock(&sysctl_lock);
 
 		error = parse_table(name, nlen, oldval, oldlenp, 
-					newval, newlen, head->ctl_table,
-					&context);
-		kfree(context);
+					newval, newlen, head->ctl_table);
 
 		spin_lock(&sysctl_lock);
 		unuse_table(head);
@@ -1309,7 +1306,7 @@ static inline int ctl_perm(ctl_table *table, int op)
 static int parse_table(int __user *name, int nlen,
 		       void __user *oldval, size_t __user *oldlenp,
 		       void __user *newval, size_t newlen,
-		       ctl_table *table, void **context)
+		       ctl_table *table)
 {
 	int n;
 repeat:
@@ -1329,7 +1326,7 @@ repeat:
 					error = table->strategy(
 						table, name, nlen,
 						oldval, oldlenp,
-						newval, newlen, context);
+						newval, newlen);
 					if (error)
 						return error;
 				}
@@ -1340,7 +1337,7 @@ repeat:
 			}
 			error = do_sysctl_strategy(table, name, nlen,
 						   oldval, oldlenp,
-						   newval, newlen, context);
+						   newval, newlen);
 			return error;
 		}
 	}
@@ -1351,7 +1348,7 @@ repeat:
 int do_sysctl_strategy (ctl_table *table, 
 			int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
-			void __user *newval, size_t newlen, void **context)
+			void __user *newval, size_t newlen)
 {
 	int op = 0, rc;
 	size_t len;
@@ -1365,7 +1362,7 @@ int do_sysctl_strategy (ctl_table *table,
 
 	if (table->strategy) {
 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
-				     newval, newlen, context);
+				     newval, newlen);
 		if (rc < 0)
 			return rc;
 		if (rc > 0)
@@ -1931,9 +1928,6 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
 
 #define OP_SET	0
 #define OP_AND	1
-#define OP_OR	2
-#define OP_MAX	3
-#define OP_MIN	4
 
 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
 				      int *valp,
@@ -1945,13 +1939,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
 		switch(op) {
 		case OP_SET:	*valp = val; break;
 		case OP_AND:	*valp &= val; break;
-		case OP_OR:	*valp |= val; break;
-		case OP_MAX:	if(*valp < val)
-					*valp = val;
-				break;
-		case OP_MIN:	if(*valp > val)
-				*valp = val;
-				break;
 		}
 	} else {
 		int val = *valp;
@@ -2408,6 +2395,17 @@ static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
 {
 	return -ENOSYS;
 }
+static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
+		struct file *filp, void __user *buffer,
+		size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
 #endif
 
 int proc_dointvec(ctl_table *table, int write, struct file *filp,
@@ -2472,7 +2470,7 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
 /* The generic string strategy routine: */
 int sysctl_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context)
+		  void __user *newval, size_t newlen)
 {
 	if (!table->data || !table->maxlen) 
 		return -ENOTDIR;
@@ -2518,7 +2516,7 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen,
  */
 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 
 	if (newval && newlen) {
@@ -2554,7 +2552,7 @@ int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
 /* Strategy function to convert jiffies to seconds */ 
 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	if (oldval) {
 		size_t olen;
@@ -2582,7 +2580,7 @@ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
 /* Strategy function to convert jiffies to seconds */ 
 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	if (oldval) {
 		size_t olen;
@@ -2611,7 +2609,7 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 /* The generic string strategy routine: */
 static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context)
+		  void __user *newval, size_t newlen)
 {
 	struct ctl_table uts_table;
 	int r, write;
@@ -2619,7 +2617,7 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 	memcpy(&uts_table, table, sizeof(uts_table));
 	uts_table.data = get_uts(table, write);
 	r = sysctl_string(&uts_table, name, nlen,
-		oldval, oldlenp, newval, newlen, context);
+		oldval, oldlenp, newval, newlen);
 	put_uts(table, write, uts_table.data);
 	return r;
 }
@@ -2628,7 +2626,7 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 /* The generic sysctl ipc data routine. */
 static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	size_t len;
 	void *data;
@@ -2703,41 +2701,41 @@ out:
 
 int sysctl_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context)
+		  void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
 static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context)
+		  void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 74eca5939bd..22504afc0d3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -156,7 +156,7 @@ int clocksource_register(struct clocksource *c)
 	/* check if clocksource is already registered */
 	if (is_registered_source(c)) {
 		printk("register_clocksource: Cannot register %s. "
-			"Already registered!", c->name);
+		       "Already registered!", c->name);
 		ret = -EBUSY;
 	} else {
 		/* register it */
@@ -186,6 +186,7 @@ void clocksource_reselect(void)
 }
 EXPORT_SYMBOL(clocksource_reselect);
 
+#ifdef CONFIG_SYSFS
 /**
  * sysfs_show_current_clocksources - sysfs interface for current clocksource
  * @dev:	unused
@@ -275,10 +276,10 @@ sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
  * Sysfs setup bits:
  */
 static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
-			sysfs_override_clocksource);
+		   sysfs_override_clocksource);
 
 static SYSDEV_ATTR(available_clocksource, 0600,
-			sysfs_show_available_clocksources, NULL);
+		   sysfs_show_available_clocksources, NULL);
 
 static struct sysdev_class clocksource_sysclass = {
 	set_kset_name("clocksource"),
@@ -307,6 +308,7 @@ static int __init init_clocksource_sysfs(void)
 }
 
 device_initcall(init_clocksource_sysfs);
+#endif /* CONFIG_SYSFS */
 
 /**
  * boot_override_clocksource - boot clock override
diff --git a/kernel/timer.c b/kernel/timer.c
index c1c7fbcffec..0256ab443d8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -80,6 +80,138 @@ tvec_base_t boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
 static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
 
+/**
+ * __round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the "j" parameter.
+ */
+unsigned long __round_jiffies(unsigned long j, int cpu)
+{
+	int rem;
+	unsigned long original = j;
+
+	/*
+	 * We don't want all cpus firing their timers at once hitting the
+	 * same lock or cachelines, so we skew each extra cpu with an extra
+	 * 3 jiffies. This 3 jiffies came originally from the mm/ code which
+	 * already did this.
+	 * The skew is done by adding 3*cpunr, then round, then subtract this
+	 * extra offset again.
+	 */
+	j += cpu * 3;
+
+	rem = j % HZ;
+
+	/*
+	 * If the target jiffie is just after a whole second (which can happen
+	 * due to delays of the timer irq, long irq off times etc etc) then
+	 * we should round down to the whole second, not up. Use 1/4th second
+	 * as cutoff for this rounding as an extreme upper bound for this.
+	 */
+	if (rem < HZ/4) /* round down */
+		j = j - rem;
+	else /* round up */
+		j = j - rem + HZ;
+
+	/* now that we have rounded, subtract the extra skew again */
+	j -= cpu * 3;
+
+	if (j <= jiffies) /* rounding ate our timeout entirely; */
+		return original;
+	return j;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies);
+
+/**
+ * __round_jiffies_relative - function to round jiffies to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies_relative rounds a time delta  in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the "j" parameter.
+ */
+unsigned long __round_jiffies_relative(unsigned long j, int cpu)
+{
+	/*
+	 * In theory the following code can skip a jiffy in case jiffies
+	 * increments right between the addition and the later subtraction.
+	 * However since the entire point of this function is to use approximate
+	 * timeouts, it's entirely ok to not handle that.
+	 */
+	return  __round_jiffies(j + jiffies, cpu) - jiffies;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_relative);
+
+/**
+ * round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * round_jiffies rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the "j" parameter.
+ */
+unsigned long round_jiffies(unsigned long j)
+{
+	return __round_jiffies(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies);
+
+/**
+ * round_jiffies_relative - function to round jiffies to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ *
+ * round_jiffies_relative rounds a time delta  in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the "j" parameter.
+ */
+unsigned long round_jiffies_relative(unsigned long j)
+{
+	return __round_jiffies_relative(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies_relative);
+
+
 static inline void set_running_timer(tvec_base_t *base,
 					struct timer_list *timer)
 {
@@ -714,7 +846,7 @@ static int change_clocksource(void)
 		clock = new;
 		clock->cycle_last = now;
 		printk(KERN_INFO "Time: %s clocksource has been installed.\n",
-					clock->name);
+		       clock->name);
 		return 1;
 	} else if (clock->update_callback) {
 		return clock->update_callback();
@@ -722,7 +854,10 @@ static int change_clocksource(void)
 	return 0;
 }
 #else
-#define change_clocksource() (0)
+static inline int change_clocksource(void)
+{
+	return 0;
+}
 #endif
 
 /**
@@ -820,7 +955,8 @@ device_initcall(timekeeping_init_device);
  * If the error is already larger, we look ahead even further
  * to compensate for late or lost adjustments.
  */
-static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *offset)
+static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
+						 s64 *offset)
 {
 	s64 tick_error, i;
 	u32 look_ahead, adj;
@@ -844,7 +980,8 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *
 	 * Now calculate the error in (1 << look_ahead) ticks, but first
 	 * remove the single look ahead already included in the error.
 	 */
-	tick_error = current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1);
+	tick_error = current_tick_length() >>
+		(TICK_LENGTH_SHIFT - clock->shift + 1);
 	tick_error -= clock->xtime_interval >> 1;
 	error = ((error - tick_error) >> look_ahead) + tick_error;
 
@@ -896,7 +1033,8 @@ static void clocksource_adjust(struct clocksource *clock, s64 offset)
 	clock->mult += adj;
 	clock->xtime_interval += interval;
 	clock->xtime_nsec -= offset;
-	clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift);
+	clock->error -= (interval - offset) <<
+			(TICK_LENGTH_SHIFT - clock->shift);
 }
 
 /**
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 96f77013d3f..baacc369141 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -96,6 +96,15 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 	stats->write_char	= p->wchar;
 	stats->read_syscalls	= p->syscr;
 	stats->write_syscalls	= p->syscw;
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	stats->read_bytes	= p->ioac.read_bytes;
+	stats->write_bytes	= p->ioac.write_bytes;
+	stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
+#else
+	stats->read_bytes	= 0;
+	stats->write_bytes	= 0;
+	stats->cancelled_write_bytes = 0;
+#endif
 }
 #undef KB
 #undef MB
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6eccc643c32..0701ddda1df 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -47,6 +47,30 @@ config UNUSED_SYMBOLS
 	  you really need it, and what the merge plan to the mainline kernel for
 	  your module is.
 
+config DEBUG_FS
+	bool "Debug Filesystem"
+	depends on SYSFS
+	help
+	  debugfs is a virtual file system that kernel developers use to put
+	  debugging files into.  Enable this option to be able to read and
+	  write to these files.
+
+	  If unsure, say N.
+
+config HEADERS_CHECK
+	bool "Run 'make headers_check' when building vmlinux"
+	depends on !UML
+	help
+	  This option will extract the user-visible kernel headers whenever
+	  building the kernel, and will run basic sanity checks on them to
+	  ensure that exported files do not attempt to include files which
+	  were not exported, etc.
+
+	  If you're making modifications to header files which are
+	  relevant for userspace, say 'Y', and check the headers
+	  exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
+	  your build tree), to make sure they're suitable.
+
 config DEBUG_KERNEL
 	bool "Kernel debugging"
 	help
@@ -302,16 +326,6 @@ config DEBUG_INFO
 
 	  If unsure, say N.
 
-config DEBUG_FS
-	bool "Debug Filesystem"
-	depends on SYSFS
-	help
-	  debugfs is a virtual file system that kernel developers use to put
-	  debugging files into.  Enable this option to be able to read and
-	  write to these files.
-
-	  If unsure, say N.
-
 config DEBUG_VM
 	bool "Debug VM"
 	depends on DEBUG_KERNEL
@@ -372,20 +386,6 @@ config FORCED_INLINING
 	  become the default in the future, until then this option is there to
 	  test gcc for this.
 
-config HEADERS_CHECK
-	bool "Run 'make headers_check' when building vmlinux"
-	depends on !UML
-	help
-	  This option will extract the user-visible kernel headers whenever
-	  building the kernel, and will run basic sanity checks on them to
-	  ensure that exported files do not attempt to include files which
-	  were not exported, etc.
-
-	  If you're making modifications to header files which are
-	  relevant for userspace, say 'Y', and check the headers
-	  exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
-	  your build tree), to make sure they're suitable.
-
 config RCU_TORTURE_TEST
 	tristate "torture tests for RCU"
 	depends on DEBUG_KERNEL
@@ -402,6 +402,7 @@ config RCU_TORTURE_TEST
 
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
+	depends on DEBUG_KERNEL
 	depends on KPROBES
 	default n
 	help
diff --git a/mm/filemap.c b/mm/filemap.c
index 606432f71b3..8332c77b1bd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1181,8 +1181,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		if (pos < size) {
 			retval = generic_file_direct_IO(READ, iocb,
 						iov, pos, nr_segs);
-			if (retval > 0 && !is_sync_kiocb(iocb))
-				retval = -EIOCBQUEUED;
 			if (retval > 0)
 				*ppos = pos + retval;
 		}
@@ -2047,15 +2045,14 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	 * Sync the fs metadata but not the minor inode changes and
 	 * of course not the data as we did direct DMA for the IO.
 	 * i_mutex is held, which protects generic_osync_inode() from
-	 * livelocking.
+	 * livelocking.  AIO O_DIRECT ops attempt to sync metadata here.
 	 */
-	if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+	if ((written >= 0 || written == -EIOCBQUEUED) &&
+	    ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
 		if (err < 0)
 			written = err;
 	}
-	if (written == count && !is_sync_kiocb(iocb))
-		written = -EIOCBQUEUED;
 	return written;
 }
 EXPORT_SYMBOL(generic_file_direct_write);
diff --git a/mm/memory.c b/mm/memory.c
index 4198df0dff1..bf6100236e6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1110,23 +1110,29 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 {
 	pte_t *pte;
 	spinlock_t *ptl;
+	int err = 0;
 
 	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (!pte)
-		return -ENOMEM;
+		return -EAGAIN;
 	arch_enter_lazy_mmu_mode();
 	do {
 		struct page *page = ZERO_PAGE(addr);
 		pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+
+		if (unlikely(!pte_none(*pte))) {
+			err = -EEXIST;
+			pte++;
+			break;
+		}
 		page_cache_get(page);
 		page_add_file_rmap(page);
 		inc_mm_counter(mm, file_rss);
-		BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, addr, pte, zero_pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
-	return 0;
+	return err;
 }
 
 static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1134,16 +1140,18 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
 {
 	pmd_t *pmd;
 	unsigned long next;
+	int err;
 
 	pmd = pmd_alloc(mm, pud, addr);
 	if (!pmd)
-		return -ENOMEM;
+		return -EAGAIN;
 	do {
 		next = pmd_addr_end(addr, end);
-		if (zeromap_pte_range(mm, pmd, addr, next, prot))
-			return -ENOMEM;
+		err = zeromap_pte_range(mm, pmd, addr, next, prot);
+		if (err)
+			break;
 	} while (pmd++, addr = next, addr != end);
-	return 0;
+	return err;
 }
 
 static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
@@ -1151,16 +1159,18 @@ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 {
 	pud_t *pud;
 	unsigned long next;
+	int err;
 
 	pud = pud_alloc(mm, pgd, addr);
 	if (!pud)
-		return -ENOMEM;
+		return -EAGAIN;
 	do {
 		next = pud_addr_end(addr, end);
-		if (zeromap_pmd_range(mm, pud, addr, next, prot))
-			return -ENOMEM;
+		err = zeromap_pmd_range(mm, pud, addr, next, prot);
+		if (err)
+			break;
 	} while (pud++, addr = next, addr != end);
-	return 0;
+	return err;
 }
 
 int zeromap_page_range(struct vm_area_struct *vma,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8d9b19f239c..237107c1b08 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -21,6 +21,7 @@
 #include <linux/writeback.h>
 #include <linux/init.h>
 #include <linux/backing-dev.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/blkdev.h>
 #include <linux/mpage.h>
 #include <linux/rmap.h>
@@ -761,23 +762,24 @@ int __set_page_dirty_nobuffers(struct page *page)
 		struct address_space *mapping = page_mapping(page);
 		struct address_space *mapping2;
 
-		if (mapping) {
-			write_lock_irq(&mapping->tree_lock);
-			mapping2 = page_mapping(page);
-			if (mapping2) { /* Race with truncate? */
-				BUG_ON(mapping2 != mapping);
-				if (mapping_cap_account_dirty(mapping))
-					__inc_zone_page_state(page,
-								NR_FILE_DIRTY);
-				radix_tree_tag_set(&mapping->page_tree,
-					page_index(page), PAGECACHE_TAG_DIRTY);
-			}
-			write_unlock_irq(&mapping->tree_lock);
-			if (mapping->host) {
-				/* !PageAnon && !swapper_space */
-				__mark_inode_dirty(mapping->host,
-							I_DIRTY_PAGES);
+		if (!mapping)
+			return 1;
+
+		write_lock_irq(&mapping->tree_lock);
+		mapping2 = page_mapping(page);
+		if (mapping2) { /* Race with truncate? */
+			BUG_ON(mapping2 != mapping);
+			if (mapping_cap_account_dirty(mapping)) {
+				__inc_zone_page_state(page, NR_FILE_DIRTY);
+				task_io_account_write(PAGE_CACHE_SIZE);
 			}
+			radix_tree_tag_set(&mapping->page_tree,
+				page_index(page), PAGECACHE_TAG_DIRTY);
+		}
+		write_unlock_irq(&mapping->tree_lock);
+		if (mapping->host) {
+			/* !PageAnon && !swapper_space */
+			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 		}
 		return 1;
 	}
@@ -851,27 +853,26 @@ int test_clear_page_dirty(struct page *page)
 	struct address_space *mapping = page_mapping(page);
 	unsigned long flags;
 
-	if (mapping) {
-		write_lock_irqsave(&mapping->tree_lock, flags);
-		if (TestClearPageDirty(page)) {
-			radix_tree_tag_clear(&mapping->page_tree,
-						page_index(page),
-						PAGECACHE_TAG_DIRTY);
-			write_unlock_irqrestore(&mapping->tree_lock, flags);
-			/*
-			 * We can continue to use `mapping' here because the
-			 * page is locked, which pins the address_space
-			 */
-			if (mapping_cap_account_dirty(mapping)) {
-				page_mkclean(page);
-				dec_zone_page_state(page, NR_FILE_DIRTY);
-			}
-			return 1;
-		}
+	if (!mapping)
+		return TestClearPageDirty(page);
+
+	write_lock_irqsave(&mapping->tree_lock, flags);
+	if (TestClearPageDirty(page)) {
+		radix_tree_tag_clear(&mapping->page_tree,
+				page_index(page), PAGECACHE_TAG_DIRTY);
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
-		return 0;
+		/*
+		 * We can continue to use `mapping' here because the
+		 * page is locked, which pins the address_space
+		 */
+		if (mapping_cap_account_dirty(mapping)) {
+			page_mkclean(page);
+			dec_zone_page_state(page, NR_FILE_DIRTY);
+		}
+		return 1;
 	}
-	return TestClearPageDirty(page);
+	write_unlock_irqrestore(&mapping->tree_lock, flags);
+	return 0;
 }
 EXPORT_SYMBOL(test_clear_page_dirty);
 
@@ -893,17 +894,17 @@ int clear_page_dirty_for_io(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
 
-	if (mapping) {
-		if (TestClearPageDirty(page)) {
-			if (mapping_cap_account_dirty(mapping)) {
-				page_mkclean(page);
-				dec_zone_page_state(page, NR_FILE_DIRTY);
-			}
-			return 1;
+	if (!mapping)
+		return TestClearPageDirty(page);
+
+	if (TestClearPageDirty(page)) {
+		if (mapping_cap_account_dirty(mapping)) {
+			page_mkclean(page);
+			dec_zone_page_state(page, NR_FILE_DIRTY);
 		}
-		return 0;
+		return 1;
 	}
-	return TestClearPageDirty(page);
+	return 0;
 }
 EXPORT_SYMBOL(clear_page_dirty_for_io);
 
diff --git a/mm/readahead.c b/mm/readahead.c
index c0df5ed05f6..0f539e8e827 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/pagevec.h>
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
@@ -151,6 +152,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 			put_pages_list(pages);
 			break;
 		}
+		task_io_account_read(PAGE_CACHE_SIZE);
 	}
 	pagevec_lru_add(&lru_pvec);
 	return ret;
diff --git a/mm/slab.c b/mm/slab.c
index 56af694c9e6..2c655532f5e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -946,7 +946,8 @@ static void __devinit start_cpu_timer(int cpu)
 	if (keventd_up() && reap_work->work.func == NULL) {
 		init_reap_node(cpu);
 		INIT_DELAYED_WORK(reap_work, cache_reap);
-		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
+		schedule_delayed_work_on(cpu, reap_work,
+					__round_jiffies_relative(HZ, cpu));
 	}
 }
 
@@ -4006,7 +4007,7 @@ static void cache_reap(struct work_struct *unused)
 	if (!mutex_trylock(&cache_chain_mutex)) {
 		/* Give up. Setup the next iteration. */
 		schedule_delayed_work(&__get_cpu_var(reap_work),
-				      REAPTIMEOUT_CPUC);
+				      round_jiffies_relative(REAPTIMEOUT_CPUC));
 		return;
 	}
 
@@ -4052,7 +4053,8 @@ next:
 	next_reap_node();
 	refresh_cpu_vm_stats(smp_processor_id());
 	/* Set up the next iteration */
-	schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
+	schedule_delayed_work(&__get_cpu_var(reap_work),
+		round_jiffies_relative(REAPTIMEOUT_CPUC));
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/mm/truncate.c b/mm/truncate.c
index e07b1e682c3..9bfb8e85386 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
 				   do_invalidatepage */
 
@@ -69,7 +70,8 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 	if (PagePrivate(page))
 		do_invalidatepage(page, 0);
 
-	clear_page_dirty(page);
+	if (test_clear_page_dirty(page))
+		task_io_account_cancelled_write(PAGE_CACHE_SIZE);
 	ClearPageUptodate(page);
 	ClearPageMappedToDisk(page);
 	remove_from_page_cache(page);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 0b9d4c95515..fc6f3c023a5 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -167,8 +167,7 @@ static int dn_forwarding_proc(ctl_table *, int, struct file *,
 			void __user *, size_t *, loff_t *);
 static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
-			void __user *newval, size_t newlen,
-			void **context);
+			void __user *newval, size_t newlen);
 
 static struct dn_dev_sysctl_table {
 	struct ctl_table_header *sysctl_header;
@@ -347,8 +346,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
 
 static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
-			void __user *newval, size_t newlen,
-			void **context)
+			void __user *newval, size_t newlen)
 {
 #ifdef CONFIG_DECNET_ROUTER
 	struct net_device *dev = table->extra1;
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index e246f054f36..a4065eb1341 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -134,8 +134,7 @@ static int parse_addr(__le16 *addr, char *str)
 
 static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen,
 				void __user *oldval, size_t __user *oldlenp,
-				void __user *newval, size_t newlen,
-				void **context)
+				void __user *newval, size_t newlen)
 {
 	size_t len;
 	__le16 addr;
@@ -220,8 +219,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
 
 static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen,
 				void __user *oldval, size_t __user *oldlenp,
-				void __user *newval, size_t newlen,
-				void **context)
+				void __user *newval, size_t newlen)
 {
 	size_t len;
 	struct net_device *dev;
diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c
index eec1a1dd91d..e3f37fdda65 100644
--- a/net/ieee80211/softmac/ieee80211softmac_assoc.c
+++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c
@@ -438,7 +438,7 @@ ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac)
 
 	spin_lock_irqsave(&mac->lock, flags);
 	mac->associnfo.associating = 1;
-	schedule_work(&mac->associnfo.work);
+	schedule_delayed_work(&mac->associnfo.work, 0);
 	spin_unlock_irqrestore(&mac->lock, flags);
 }
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2fd899160f8..84bed40273a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1303,8 +1303,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 
 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 				  void __user *oldval, size_t __user *oldlenp,
-				  void __user *newval, size_t newlen, 
-				  void **context)
+				  void __user *newval, size_t newlen)
 {
 	int *valp = table->data;
 	int new;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11c167118e8..1aaff0a2e09 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2872,8 +2872,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 						void __user *oldval,
 						size_t __user *oldlenp,
 						void __user *newval,
-						size_t newlen,
-						void **context)
+						size_t newlen)
 {
 	int delay;
 	if (newlen != sizeof(int))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index dfcf47f10f8..fabf69a9108 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -51,8 +51,7 @@ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 static int ipv4_sysctl_forward_strategy(ctl_table *table,
 			 int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
-			 void __user *newval, size_t newlen, 
-			 void **context)
+			 void __user *newval, size_t newlen)
 {
 	int *valp = table->data;
 	int new;
@@ -111,8 +110,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
 static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
 					 int nlen, void __user *oldval,
 					 size_t __user *oldlenp,
-					 void __user *newval, size_t newlen,
-					 void **context)
+					 void __user *newval, size_t newlen)
 {
 	char val[TCP_CA_NAME_MAX];
 	ctl_table tbl = {
@@ -122,8 +120,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
 	int ret;
 
 	tcp_get_default_congestion_control(val);
-	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen,
-			    context);
+	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen)
 		ret = tcp_set_default_congestion_control(val);
 	return ret;
@@ -169,8 +166,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
 static int strategy_allowed_congestion_control(ctl_table *table, int __user *name,
 					       int nlen, void __user *oldval,
 					       size_t __user *oldlenp,
-					       void __user *newval, size_t newlen,
-					       void **context)
+					       void __user *newval,
+					       size_t newlen)
 {
 	ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
 	int ret;
@@ -180,8 +177,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
 		return -ENOMEM;
 
 	tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
-	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen,
-			    context);
+	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen)
 		ret = tcp_set_allowed_congestion_control(tbl.data);
 	kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a5e8d207a51..9b0a9064315 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3656,8 +3656,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
 					    int __user *name, int nlen,
 					    void __user *oldval,
 					    size_t __user *oldlenp,
-					    void __user *newval, size_t newlen,
-					    void **context)
+					    void __user *newval, size_t newlen)
 {
 	int *valp = table->data;
 	int new;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 56ea9283730..6a9f616de37 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1667,8 +1667,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
 static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
 					int nlen, void __user *oldval,
 					size_t __user *oldlenp,
-					void __user *newval, size_t newlen,
-					void **context)
+					void __user *newval, size_t newlen)
 {
 	struct net_device *dev = ctl->extra1;
 	struct inet6_dev *idev;
@@ -1681,14 +1680,12 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
 	switch (ctl->ctl_name) {
 	case NET_NEIGH_REACHABLE_TIME:
 		ret = sysctl_jiffies(ctl, name, nlen,
-				     oldval, oldlenp, newval, newlen,
-				     context);
+				     oldval, oldlenp, newval, newlen);
 		break;
 	case NET_NEIGH_RETRANS_TIME_MS:
 	case NET_NEIGH_REACHABLE_TIME_MS:
 		 ret = sysctl_ms_jiffies(ctl, name, nlen,
-					 oldval, oldlenp, newval, newlen,
-					 context);
+					 oldval, oldlenp, newval, newlen);
 		 break;
 	default:
 		ret = 0;
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 4f5ff19b992..f01f8c07285 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -56,6 +56,9 @@ endef
 # gcc support functions
 # See documentation in Documentation/kbuild/makefiles.txt
 
+# output directory for tests below
+TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/)
+
 # as-option
 # Usage: cflags-y += $(call as-option, -Wa$(comma)-isa=foo,)
 
@@ -66,9 +69,11 @@ as-option = $(shell if $(CC) $(CFLAGS) $(1) -Wa,-Z -c -o /dev/null \
 # as-instr
 # Usage: cflags-y += $(call as-instr, instr, option1, option2)
 
-as-instr = $(shell if echo -e "$(1)" | $(AS) >/dev/null 2>&1 -W -Z -o astest$$$$.out ; \
-		   then echo "$(2)"; else echo "$(3)"; fi; \
-	           rm -f astest$$$$.out)
+as-instr = $(shell if echo -e "$(1)" | \
+		      $(CC) $(AFLAGS) -c -xassembler - \
+			    -o $(TMPOUT)astest$$$$.out > /dev/null 2>&1; \
+		   then rm $(TMPOUT)astest$$$$.out; echo "$(2)"; \
+		   else echo "$(3)"; fi)
 
 # cc-option
 # Usage: cflags-y += $(call cc-option, -march=winchip-c6, -march=i586)
@@ -97,10 +102,10 @@ cc-ifversion = $(shell if [ $(call cc-version, $(CC)) $(1) $(2) ]; then \
 
 # ld-option
 # Usage: ldflags += $(call ld-option, -Wl$(comma)--hash-style=both)
-ld-option = $(shell if $(CC) $(1) \
-			     -nostdlib -o ldtest$$$$.out -xc /dev/null \
-             > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi; \
-	     rm -f ldtest$$$$.out)
+ld-option = $(shell if $(CC) $(1) -nostdlib -xc /dev/null \
+			     -o $(TMPOUT)ldtest$$$$.out > /dev/null 2>&1; \
+             then rm $(TMPOUT)ldtest$$$$.out; echo "$(1)"; \
+             else echo "$(2)"; fi)
 
 ###
 # Shorthand for $(Q)$(MAKE) -f scripts/Makefile.build obj=
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3753416eb9b..65fb5e8ea94 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1734,7 +1734,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
 		j++;
 		i = j * __NFDBITS;
 		fdt = files_fdtable(files);
-		if (i >= fdt->max_fds || i >= fdt->max_fdset)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->open_fds->fds_bits[j];
 		if (!set)