Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: lguest: Do not append space to guests kernel command line lguest: Revert 1ce70c4fac3c3954bd48c035f448793867592bc0, fix real problem. lguest: Sanitize the lguest clock. lguest: fix __get_vm_area usage. lguest: make sure cpu is initialized before accessing it
author: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-03-10 18:03:20 -0700
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-03-10 18:03:20 -0700
commit: aeb24d2fb08653a39abb50281b1ffa2d2a6879ab (patch)
tree: d7fe503eb64ff1d9b76bf34095819f631a39b660
parent: 5c0dea0959356d77d985ecfb2911e7a9e23b95e3 (diff)
parent: 1ef36fa64e65079de18ff5179a51af58e44d49a6 (diff)
5 files changed, 49 insertions, 45 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 0f23d67f958..bec5a32e409 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -486,9 +486,12 @@ static void concat(char *dst, char *args[])
 	unsigned int i, len = 0;
 
 	for (i = 0; args[i]; i++) {
+		if (i) {
+			strcat(dst+len, " ");
+			len++;
+		}
 		strcpy(dst+len, args[i]);
-		strcat(dst+len, " ");
-		len += strlen(args[i]) + 1;
+		len += strlen(args[i]);
 	}
 	/* In case it's empty. */
 	dst[len] = '\0';
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index cccb38a5965..a104c532ff7 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -84,7 +84,6 @@ struct lguest_data lguest_data = {
 	.blocked_interrupts = { 1 }, /* Block timer interrupts */
 	.syscall_vec = SYSCALL_VECTOR,
 };
-static cycle_t clock_base;
 
 /*G:037 async_hcall() is pretty simple: I'm quite proud of it really.  We have a
  * ring buffer of stored hypercalls which the Host will run though next time we
@@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 	case 1:	/* Basic feature request. */
 		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
 		*cx &= 0x00002201;
-		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
-		*dx &= 0x07808101;
+		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
+		*dx &= 0x07808111;
 		/* The Host can do a nice optimization if it knows that the
 		 * kernel mappings (addresses above 0xC0000000 or whatever
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
@@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 	*pmdp = pmdval;
 	lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
-		   (__pa(pmdp)&(PAGE_SIZE-1)), 0);
+		   (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
 }
 
 /* There are a couple of legacy places where the kernel sets a PTE, but we
@@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void)
 	return lguest_data.time.tv_sec;
 }
 
+/* The TSC is a Time Stamp Counter.  The Host tells us what speed it runs at,
+ * or 0 if it's unusable as a reliable clock source.  This matches what we want
+ * here: if we return 0 from this function, the x86 TSC clock will not register
+ * itself. */
+static unsigned long lguest_cpu_khz(void)
+{
+	return lguest_data.tsc_khz;
+}
+
+/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
+ * we read the time value given to us by the Host. */
 static cycle_t lguest_clock_read(void)
 {
 	unsigned long sec, nsec;
 
-	/* If the Host tells the TSC speed, we can trust that. */
-	if (lguest_data.tsc_khz)
-		return native_read_tsc();
-
-	/* If we can't use the TSC, we read the time value written by the Host.
-	 * Since it's in two parts (seconds and nanoseconds), we risk reading
-	 * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
-	 * getting 99 and 0.  As Linux tends to come apart under the stress of
-	 * time travel, we must be careful: */
+	/* Since the time is in two parts (seconds and nanoseconds), we risk
+	 * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
+	 * and getting 99 and 0.  As Linux tends to come apart under the stress
+	 * of time travel, we must be careful: */
 	do {
 		/* First we read the seconds part. */
 		sec = lguest_data.time.tv_sec;
@@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void)
 		/* Now if the seconds part has changed, try again. */
 	} while (unlikely(lguest_data.time.tv_sec != sec));
 
-	/* Our non-TSC clock is in real nanoseconds. */
+	/* Our lguest clock is in real nanoseconds. */
 	return sec*1000000000ULL + nsec;
 }
 
-/* This is what we tell the kernel is our clocksource.  */
+/* This is the fallback clocksource: lower priority than the TSC clocksource. */
 static struct clocksource lguest_clock = {
 	.name		= "lguest",
-	.rating		= 400,
+	.rating		= 200,
 	.read		= lguest_clock_read,
 	.mask		= CLOCKSOURCE_MASK(64),
 	.mult		= 1 << 22,
@@ -637,12 +642,6 @@ static struct clocksource lguest_clock = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-/* The "scheduler clock" is just our real clock, adjusted to start at zero */
-static unsigned long long lguest_sched_clock(void)
-{
-	return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
-}
-
 /* We also need a "struct clock_event_device": Linux asks us to set it to go
  * off some time in the future.  Actually, James Morris figured all this out, I
  * just applied the patch. */
@@ -712,19 +711,8 @@ static void lguest_time_init(void)
 	/* Set up the timer interrupt (0) to go to our simple timer routine */
 	set_irq_handler(0, lguest_time_irq);
 
-	/* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can
-	 * use the TSC, otherwise it's a dumb nanosecond-resolution clock.
-	 * Either way, the "rating" is set so high that it's always chosen over
-	 * any other clocksource. */
-	if (lguest_data.tsc_khz)
-		lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
-							 lguest_clock.shift);
-	clock_base = lguest_clock_read();
 	clocksource_register(&lguest_clock);
 
-	/* Now we've set up our clock, we can use it as the scheduler clock */
-	pv_time_ops.sched_clock = lguest_sched_clock;
-
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
 	lguest_clockevent.cpumask = cpumask_of_cpu(0);
@@ -995,6 +983,7 @@ __init void lguest_init(void)
 	/* time operations */
 	pv_time_ops.get_wallclock = lguest_get_wallclock;
 	pv_time_ops.time_init = lguest_time_init;
+	pv_time_ops.get_cpu_khz = lguest_cpu_khz;
 
 	/* Now is a good time to look at the implementations of these functions
 	 * before returning to the rest of lguest_init(). */
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 7743d73768d..c632c08cbbd 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -69,11 +69,22 @@ static __init int map_switcher(void)
 		switcher_page[i] = virt_to_page(addr);
 	}
 
+	/* First we check that the Switcher won't overlap the fixmap area at
+	 * the top of memory.  It's currently nowhere near, but it could have
+	 * very strange effects if it ever happened. */
+	if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){
+		err = -ENOMEM;
+		printk("lguest: mapping switcher would thwack fixmap\n");
+		goto free_pages;
+	}
+
 	/* Now we reserve the "virtual memory area" we want: 0xFFC00000
 	 * (SWITCHER_ADDR).  We might not get it in theory, but in practice
-	 * it's worked so far. */
+	 * it's worked so far.  The end address needs +1 because __get_vm_area
+	 * allocates an extra guard page, so we need space for that. */
 	switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
-				       VM_ALLOC, SWITCHER_ADDR, VMALLOC_END);
+				     VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR
+				     + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
 	if (!switcher_vma) {
 		err = -ENOMEM;
 		printk("lguest: could not map switcher pages high\n");
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 85d42d3d01a..2221485b077 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -241,15 +241,16 @@ static ssize_t write(struct file *file, const char __user *in,
 		cpu = &lg->cpus[cpu_id];
 		if (!cpu)
 			return -EINVAL;
-	}
 
-	/* Once the Guest is dead, all you can do is read() why it died. */
-	if (lg && lg->dead)
-		return -ENOENT;
+		/* Once the Guest is dead, you can only read() why it died. */
+		if (lg->dead)
+			return -ENOENT;
 
-	/* If you're not the task which owns the Guest, you can only break */
-	if (lg && current != cpu->tsk && req != LHREQ_BREAK)
-		return -EPERM;
+		/* If you're not the task which owns the Guest, all you can do
+		 * is break the Launcher out of running the Guest. */
+		if (current != cpu->tsk && req != LHREQ_BREAK)
+			return -EPERM;
+	}
 
 	switch (req) {
 	case LHREQ_INITIALIZE:
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 275f23c2deb..a7f64a9d67e 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -391,7 +391,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
 {
 	unsigned int i;
 	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-		if (lg->pgdirs[i].gpgdir == pgtable)
+		if (lg->pgdirs[i].pgdir && lg->pgdirs[i].gpgdir == pgtable)
 			break;
 	return i;
 }
author	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-03-10 18:03:20 -0700
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-03-10 18:03:20 -0700
commit	aeb24d2fb08653a39abb50281b1ffa2d2a6879ab (patch)
tree	d7fe503eb64ff1d9b76bf34095819f631a39b660
parent	5c0dea0959356d77d985ecfb2911e7a9e23b95e3 (diff)
parent	1ef36fa64e65079de18ff5179a51af58e44d49a6 (diff)