From ee2fa7435b6dddf1ca119f298ad0100cf50c0397 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 17 Sep 2008 13:47:25 +0200
Subject: AMD IOMMU: set iommu sunc flag after command queuing

The iommu->need_sync flag must be set after the command is queued to
avoid race conditions.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 69b4d060b21..a96d8c049a8 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -140,6 +140,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 {
 	struct iommu_cmd cmd;
+	int ret;
 
 	BUG_ON(iommu == NULL);
 
@@ -147,9 +148,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 	CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
 	cmd.data[0] = devid;
 
+	ret = iommu_queue_command(iommu, &cmd);
+
 	iommu->need_sync = 1;
 
-	return iommu_queue_command(iommu, &cmd);
+	return ret;
 }
 
 /*
@@ -159,6 +162,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 		u64 address, u16 domid, int pde, int s)
 {
 	struct iommu_cmd cmd;
+	int ret;
 
 	memset(&cmd, 0, sizeof(cmd));
 	address &= PAGE_MASK;
@@ -171,9 +175,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
 		cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 
+	ret = iommu_queue_command(iommu, &cmd);
+
 	iommu->need_sync = 1;
 
-	return iommu_queue_command(iommu, &cmd);
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3


From 7e4f88da7bf1887563f70bd5edbbd0479e31dc12 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 17 Sep 2008 14:19:15 +0200
Subject: AMD IOMMU: protect completion wait loop with iommu lock

The unlocked polling of the ComWaitInt bit in the IOMMU completion wait
path is racy. Protect it with the iommu lock.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a96d8c049a8..042fdc27bc9 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -101,10 +101,10 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
  */
 static int iommu_completion_wait(struct amd_iommu *iommu)
 {
-	int ret, ready = 0;
+	int ret = 0, ready = 0;
 	unsigned status = 0;
 	struct iommu_cmd cmd;
-	unsigned long i = 0;
+	unsigned long flags, i = 0;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
@@ -112,10 +112,12 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	iommu->need_sync = 0;
 
-	ret = iommu_queue_command(iommu, &cmd);
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	ret = __iommu_queue_command(iommu, &cmd);
 
 	if (ret)
-		return ret;
+		goto out;
 
 	while (!ready && (i < EXIT_LOOP_COUNT)) {
 		++i;
@@ -130,6 +132,8 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
 		printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
+out:
+	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 7ee766d8fba9dfd93bf3eca7a8d84a25404a68dc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 20 Sep 2008 22:00:40 -0700
Subject: sparc64: Fix disappearing PCI devices on e3500.

Based upon a bug report by Meelis Roos.

The OF device layer builds properties by matching bus types and
applying 'range' properties as appropriate, up to the root.

The match for "PCI" busses is looking at the 'device_type' property,
and this does work %99 of the time.

But on an E3500 system with a PCI QFE card, the DEC 21153 bridge
sitting above the QFE network interface devices has a 'name' of "pci",
but it completely lacks a 'device_type' property.  So we don't match
it as a PCI bus, and subsequently we end up with no resource values at
all for the devices sitting under that DEC bridge.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/of_device.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index f845f150f56..100ebd52749 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -169,7 +169,7 @@ static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long fla
 
 static int of_bus_pci_match(struct device_node *np)
 {
-	if (!strcmp(np->type, "pci") || !strcmp(np->type, "pciex")) {
+	if (!strcmp(np->name, "pci")) {
 		const char *model = of_get_property(np, "model", NULL);
 
 		if (model && !strcmp(model, "SUNW,simba"))
@@ -200,7 +200,7 @@ static int of_bus_simba_match(struct device_node *np)
 	/* Treat PCI busses lacking ranges property just like
 	 * simba.
 	 */
-	if (!strcmp(np->type, "pci") || !strcmp(np->type, "pciex")) {
+	if (!strcmp(np->name, "pci")) {
 		if (!of_find_property(np, "ranges", NULL))
 			return 1;
 	}
@@ -429,7 +429,7 @@ static int __init use_1to1_mapping(struct device_node *pp)
 	 * it lacks a ranges property, and this will include
 	 * cases like Simba.
 	 */
-	if (!strcmp(pp->type, "pci") || !strcmp(pp->type, "pciex"))
+	if (!strcmp(pp->name, "pci"))
 		return 0;
 
 	return 1;
@@ -714,8 +714,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
 				break;
 			}
 		} else {
-			if (!strcmp(pp->type, "pci") ||
-			    !strcmp(pp->type, "pciex")) {
+			if (!strcmp(pp->name, "pci")) {
 				unsigned int this_orig_irq = irq;
 
 				irq = pci_irq_swizzle(dp, pp, irq);
-- 
cgit v1.2.3


From b61e06f258e50b25c38a73bea782bdb6876f0f70 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Sat, 20 Sep 2008 18:02:27 +0200
Subject: x86, oprofile: BUG scheduling while atomic

nmi_shutdown() calls unregister_die_notifier() from an atomic context
after setting preempt_disable() via get_cpu_var():

[ 1049.404154] BUG: scheduling while atomic: oprofiled/7796/0x00000002
[ 1049.404171] INFO: lockdep is turned off.
[ 1049.404176] Modules linked in: oprofile af_packet rfcomm l2cap kvm_intel kvm i915 drm acpi_cpufreq cpufreq_userspace cpufreq_conservative cpufreq_ondemand cpufreq_powersave freq_table container sbs sbshc dm_mod arc4 ecb cryptomgr aead snd_hda_intel crypto_blkcipher snd_pcm_oss crypto_algapi snd_pcm iwlagn iwlcore snd_timer iTCO_wdt led_class btusb iTCO_vendor_support snd psmouse bluetooth mac80211 soundcore cfg80211 snd_page_alloc intel_agp video output button battery ac dcdbas evdev ext3 jbd mbcache sg sd_mod piix ata_piix libata scsi_mod dock tg3 libphy ehci_hcd uhci_hcd usbcore thermal processor fan fuse
[ 1049.404362] Pid: 7796, comm: oprofiled Not tainted 2.6.27-rc5-mm1 #30
[ 1049.404368] Call Trace:
[ 1049.404384]  [<ffffffff804769fd>] thread_return+0x4a0/0x7d3
[ 1049.404396]  [<ffffffff8026ad92>] generic_exec_single+0x52/0xe0
[ 1049.404405]  [<ffffffff8026ae1a>] generic_exec_single+0xda/0xe0
[ 1049.404414]  [<ffffffff8026aee3>] smp_call_function_single+0x73/0x150
[ 1049.404423]  [<ffffffff804770c5>] schedule_timeout+0x95/0xd0
[ 1049.404430]  [<ffffffff80476083>] wait_for_common+0x43/0x180
[ 1049.404438]  [<ffffffff80476154>] wait_for_common+0x114/0x180
[ 1049.404448]  [<ffffffff80236980>] default_wake_function+0x0/0x10
[ 1049.404457]  [<ffffffff8024f810>] synchronize_rcu+0x30/0x40
[ 1049.404463]  [<ffffffff8024f890>] wakeme_after_rcu+0x0/0x10
[ 1049.404472]  [<ffffffff80479ca0>] _spin_unlock_irqrestore+0x40/0x80
[ 1049.404482]  [<ffffffff80256def>] atomic_notifier_chain_unregister+0x3f/0x60
[ 1049.404501]  [<ffffffffa03d8801>] nmi_shutdown+0x51/0x90 [oprofile]
[ 1049.404517]  [<ffffffffa03d6134>] oprofile_shutdown+0x34/0x70 [oprofile]
[ 1049.404532]  [<ffffffffa03d721e>] event_buffer_release+0xe/0x40 [oprofile]
[ 1049.404543]  [<ffffffff802bdcdd>] __fput+0xcd/0x240
[ 1049.404551]  [<ffffffff802baa74>] filp_close+0x54/0x90
[ 1049.404560]  [<ffffffff8023e1d1>] put_files_struct+0xb1/0xd0
[ 1049.404568]  [<ffffffff8023f82f>] do_exit+0x18f/0x930
[ 1049.404576]  [<ffffffff8020be03>] restore_args+0x0/0x30
[ 1049.404584]  [<ffffffff80240006>] do_group_exit+0x36/0xa0
[ 1049.404592]  [<ffffffff8020b7cb>] system_call_fastpath+0x16/0x1b

This can be easily triggered with 'opcontrol --shutdown'.

Simply move get_cpu_var() above unregister_die_notifier().

Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Acked-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/oprofile/nmi_int.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 0227694f7da..8a5f1614a3d 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -295,10 +295,12 @@ static void nmi_cpu_shutdown(void *dummy)
 
 static void nmi_shutdown(void)
 {
-	struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
+	struct op_msrs *msrs;
+
 	nmi_enabled = 0;
 	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
 	unregister_die_notifier(&profile_exceptions_nb);
+	msrs = &get_cpu_var(cpu_msrs);
 	model->shutdown(msrs);
 	free_msrs();
 	put_cpu_var(cpu_msrs);
-- 
cgit v1.2.3


From 06f95ea8988513b172080c419e4be8826c55a318 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Tue, 16 Sep 2008 18:25:33 -0600
Subject: [IA64] Ski simulator doesn't need check_sal_cache_flush

Peter Chubb reported that commit 3463a93def55c309f3c0d0a8aaf216be3be42d64
(Update check_sal_cache_flush to use platform_send_ipi()) broke
Ski because it does not implement IPIs.

Tony Luck suggested we just #ifndef out the call (since the simulator
does not have the SAL bug that this code is attempting to detect and
workaround)

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c27d5b2c182..de636b21567 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -616,7 +616,9 @@ setup_arch (char **cmdline_p)
 		ia64_mca_init();
 
 	platform_setup(cmdline_p);
+#ifndef CONFIG_IA64_HP_SIM
 	check_sal_cache_flush();
+#endif
 	paging_init();
 }
 
-- 
cgit v1.2.3


From d3758f87f39c5b072dde74c55bfb988262a3a45c Mon Sep 17 00:00:00 2001
From: Jay Lan <jlan@sgi.com>
Date: Mon, 22 Sep 2008 14:21:19 -0700
Subject: [IA64] kexec fails on systems with blocks of uncached memory

Currently a memory segment in memory map with attribute of EFI_MEMORY_UC
is denoted as "System RAM" in /proc/iomem, while memory of attribute
(EFI_MEMORY_WB|EFI_MEMORY_UC) is also labeled the same.

The kexec utility then includes uncached memory as part of vmcore. The
kdump kernel MCA'ed when it tries to save the vmcore to a disk. A normal
"cached" access may cause MCAs.

This patch would label memory with attribute of EFI_MEMORY_UC only as
"Uncached RAM" so that kexec would know not to include it in the vmcore.
I will submit a separate kexec-tools patch to the kexec list.

Signed-off-by: Jay Lan <jlan@sgi.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/efi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index d45f215bc8f..51b75cea701 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1232,9 +1232,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 				if (md->attribute & EFI_MEMORY_WP) {
 					name = "System ROM";
 					flags |= IORESOURCE_READONLY;
-				} else {
+				} else if (md->attribute == EFI_MEMORY_UC)
+					name = "Uncached RAM";
+				else
 					name = "System RAM";
-				}
 				break;
 
 			case EFI_ACPI_MEMORY_NVS:
-- 
cgit v1.2.3


From 44b50e5a1af13c605d6c3b17a60e42eb0ee48d5f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 22 Sep 2008 15:42:24 -0700
Subject: sparc64: Fix missing devices due to PCI bridge test in
 of_create_pci_dev().

Just like in the arch/sparc64/kernel/of_device.c code fix commit
071d7f4c3b411beae08d27656e958070c43b78b4 ("sparc64: Fix SMP bootup
with CONFIG_STACK_DEBUG or ftrace.") we have to check the OF device
node name for "pci" instead of relying upon the 'device_type' property
being there on all PCI bridges.

Tested by Meelis Roos, and confirmed to make the PCI QFE devices
reappear on the E3500 system.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c
index 55096195458..80dad76f8b8 100644
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -425,7 +425,7 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 	dev->current_state = 4;		/* unknown power state */
 	dev->error_state = pci_channel_io_normal;
 
-	if (!strcmp(type, "pci") || !strcmp(type, "pciex")) {
+	if (!strcmp(node->name, "pci")) {
 		/* a PCI-PCI bridge */
 		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
 		dev->rom_base_reg = PCI_ROM_ADDRESS1;
-- 
cgit v1.2.3


From 05e12e1c4c09cd35ac9f4e6af1e42b0036375d72 Mon Sep 17 00:00:00 2001
From: Ravikiran G Thirumalai <kiran@scalex86.org>
Date: Mon, 22 Sep 2008 22:58:47 -0700
Subject: x86: fix 27-rc crash on vsmp due to paravirt during module load

27-rc fails to boot up if configured to use modules.

Turns out vsmp_patch was marked __init, and vsmp_patch being the
pvops 'patch' routine for vsmp, a call to vsmp_patch just turns out
to execute a code page with series of 0xcc (POISON_FREE_INITMEM -- int3).

vsmp_patch has been marked with __init ever since pvops, however,
apply_paravirt can be called during module load causing calls to
freed memory location.

Since apply_paravirt can only be called during init/module load, make
vsmp_patch with "__init_or_module"

Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vsmp_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 0c029e8959c..7766d36983f 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -61,7 +61,7 @@ static void vsmp_irq_enable(void)
 	native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
 }
 
-static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
 				  unsigned long addr, unsigned len)
 {
 	switch (type) {
-- 
cgit v1.2.3


From 4faac97d44ac27bdbb010a9c3597401a8f89341f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 22 Sep 2008 18:54:29 +0200
Subject: x86: prevent stale state of c1e_mask across CPU offline/online

Impact: hang which happens across CPU offline/online on AMD C1E systems.

When a CPU goes offline then the corresponding bit in the broadcast
mask is cleared. For AMD C1E enabled CPUs we do not reenable the
broadcast when the CPU comes online again as we do not clear the
corresponding bit in the c1e_mask, which keeps track which CPUs
have been switched to broadcast already. So on those !$@#& machines
we never switch back to broadcasting after a CPU offline/online cycle.

Clear the bit when the CPU plays dead.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process.c    | 11 ++++++++---
 arch/x86/kernel/process_32.c |  1 +
 arch/x86/kernel/process_64.c |  2 ++
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7fc4d5b0a6a..2e2247117f6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -246,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
 	return 1;
 }
 
+static cpumask_t c1e_mask = CPU_MASK_NONE;
+static int c1e_detected;
+
+void c1e_remove_cpu(int cpu)
+{
+	cpu_clear(cpu, c1e_mask);
+}
+
 /*
  * C1E aware idle routine. We check for C1E active in the interrupt
  * pending message MSR. If we detect C1E, then we handle it the same
@@ -253,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
  */
 static void c1e_idle(void)
 {
-	static cpumask_t c1e_mask = CPU_MASK_NONE;
-	static int c1e_detected;
-
 	if (need_resched())
 		return;
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3b7a1ddcc0b..4b3cfdf5421 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -88,6 +88,7 @@ static void cpu_exit_clear(void)
 	cpu_clear(cpu, cpu_callin_map);
 
 	numa_remove_cpu(cpu);
+	c1e_remove_cpu(cpu);
 }
 
 /* We don't actually take CPU down, just spin without interrupts. */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71553b664e2..e12e0e4dd25 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -93,6 +93,8 @@ DECLARE_PER_CPU(int, cpu_state);
 static inline void play_dead(void)
 {
 	idle_task_exit();
+	c1e_remove_cpu(raw_smp_processor_id());
+
 	mb();
 	/* Ack it */
 	__get_cpu_var(cpu_state) = CPU_DEAD;
-- 
cgit v1.2.3


From a8d6829044901a67732904be5f1eacdf8539604f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 22 Sep 2008 19:02:25 +0200
Subject: x86: prevent C-states hang on AMD C1E enabled machines

Impact: System hang when AMD C1E machines switch into C2/C3

AMD C1E enabled systems do not work with normal ACPI C-states
even if the BIOS is advertising them. Limit the C-states to
C1 for the ACPI processor idle code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 2e2247117f6..d8c2a299bfe 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -272,6 +272,7 @@ static void c1e_idle(void)
 			c1e_detected = 1;
 			mark_tsc_unstable("TSC halt in C1E");
 			printk(KERN_INFO "System has C1E enabled\n");
+			set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
 		}
 	}
 
-- 
cgit v1.2.3


From 09bfeea13cea843fb03eaa96b5d891fa0abdcc90 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 18 Sep 2008 21:12:10 +0200
Subject: x86: c1e_idle: don't mark TSC unstable if CPU has invariant TSC

Impact: Functional TSC is marked unstable on AMD family 0x10 and 0x11 CPUs.

This would be wrong because for those CPUs "invariant TSC" means:

   "The TSC counts at the same rate in all P-states, all C states, S0,
   or S1"

(See "Processor BIOS and Kernel Developer's Guides" for those CPUs.)

[ tglx: Changed C1E to AMD C1E in the printks to avoid confusion
	with Intel C1E ]

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d8c2a299bfe..876e9189077 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -270,8 +270,9 @@ static void c1e_idle(void)
 		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
 		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
 			c1e_detected = 1;
-			mark_tsc_unstable("TSC halt in C1E");
-			printk(KERN_INFO "System has C1E enabled\n");
+			if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+				mark_tsc_unstable("TSC halt in AMD C1E");
+			printk(KERN_INFO "System has AMD C1E enabled\n");
 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
 		}
 	}
-- 
cgit v1.2.3


From 44ce17192a01218b242e032237e1d2e52ef50429 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Tue, 23 Sep 2008 19:48:36 +0200
Subject: [MIPS] au1000: Fix gpio direction

When setting the direction of one GPIO pin we have to keep the state of the
other pins, hence use binary OR. Also gpio_direction_output() wants to set an
initial value, so add that too.

This fixes a problem with the USB power switch on mtx-1 boards.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/au1000/common/gpio.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/au1000/common/gpio.c b/arch/mips/au1000/common/gpio.c
index b485d94ce8a..1f058434b72 100644
--- a/arch/mips/au1000/common/gpio.c
+++ b/arch/mips/au1000/common/gpio.c
@@ -61,7 +61,8 @@ static int au1xxx_gpio2_direction_input(unsigned gpio)
 static int au1xxx_gpio2_direction_output(unsigned gpio, int value)
 {
 	gpio -= AU1XXX_GPIO_BASE;
-	gpio2->dir = (0x01 << gpio) | (value << gpio);
+	gpio2->dir |= 0x01 << gpio;
+	gpio2->output = (GPIO2_OUTPUT_ENABLE_MASK << gpio) | (value << gpio);
 	return 0;
 }
 
@@ -90,6 +91,7 @@ static int au1xxx_gpio1_direction_input(unsigned gpio)
 static int au1xxx_gpio1_direction_output(unsigned gpio, int value)
 {
 	gpio1->trioutclr = (0x01 & gpio);
+	au1xxx_gpio1_write(gpio, value);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 1eda81495a49a4ee91d8863b0a441a624375efea Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.c.dionne@gmail.com>
Date: Tue, 23 Sep 2008 22:40:02 -0400
Subject: x86: prevent stale state of c1e_mask across CPU offline/online, fix

Fix build error introduced by commit 4faac97d44ac27 ("x86: prevent stale
state of c1e_mask across CPU offline/online").

process_32.c needs to include idle.h to get the prototype for
c1e_remove_cpu()

Signed-off-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_32.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4b3cfdf5421..31f40b24bf5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,6 +55,7 @@
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
 #include <asm/kdebug.h>
+#include <asm/idle.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
-- 
cgit v1.2.3


From 77a9a768b7374cd23d1f400097eede9f1547f508 Mon Sep 17 00:00:00 2001
From: Jeremy Katz <katzj@redhat.com>
Date: Tue, 23 Sep 2008 21:54:00 -0400
Subject: x86: disable apm on the olpc

The OLPC doesn't support APM but also doesn't have DMI, so we can't detect
and disable it based on DMI data.  So, just disable based on machine_is_olpc()

Signed-off-by: Jeremy Katz <katzj@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apm_32.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9ee24e6bc4b..732d1f4e10e 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -234,6 +234,7 @@
 #include <asm/uaccess.h>
 #include <asm/desc.h>
 #include <asm/i8253.h>
+#include <asm/olpc.h>
 #include <asm/paravirt.h>
 #include <asm/reboot.h>
 
@@ -2217,7 +2218,7 @@ static int __init apm_init(void)
 
 	dmi_check_system(apm_dmi_table);
 
-	if (apm_info.bios.version == 0 || paravirt_enabled()) {
+	if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) {
 		printk(KERN_INFO "apm: BIOS not found.\n");
 		return -ENODEV;
 	}
-- 
cgit v1.2.3


From b4f151ff899362fec952c45d166252c9912c041f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 24 Sep 2008 17:48:26 +0100
Subject: MN10300: Move asm-arm/cnt32_to_63.h to include/linux/

Move asm-arm/cnt32_to_63.h to include/linux/ so that MN10300 can make
use of it too.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-pxa/time.c       | 2 +-
 arch/arm/mach-sa1100/generic.c | 2 +-
 arch/arm/mach-versatile/core.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 67e18509d7b..b0d6b32654c 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -17,9 +17,9 @@
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
 #include <linux/sched.h>
+#include <linux/cnt32_to_63.h>
 
 #include <asm/div64.h>
-#include <asm/cnt32_to_63.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 #include <mach/pxa-regs.h>
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 1362994c78a..b422526f6d8 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -18,9 +18,9 @@
 #include <linux/ioport.h>
 #include <linux/sched.h>	/* just for sched_clock() - funny that */
 #include <linux/platform_device.h>
+#include <linux/cnt32_to_63.h>
 
 #include <asm/div64.h>
-#include <asm/cnt32_to_63.h>
 #include <mach/hardware.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index d75e795c893..b638f10411e 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -28,8 +28,8 @@
 #include <linux/amba/clcd.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/cnt32_to_63.h>
 
-#include <asm/cnt32_to_63.h>
 #include <asm/system.h>
 #include <mach/hardware.h>
 #include <asm/io.h>
-- 
cgit v1.2.3


From 08ec3c2d45bac41c782acb4e3331ac5881b2c68a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 24 Sep 2008 17:48:31 +0100
Subject: MN10300: Make sched_clock() report time since boot

Make sched_clock() report time since boot rather than time since last
timer interrupt.

Make sched_clock() expand and scale the 32-bit TSC value running at
IOCLK speed (~33MHz) to a 64-bit nanosecond counter, using cnt32_to_63()
acquired from the ARM arch and without using slow DIVU instructions
every call.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/kernel/time.c | 52 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index babb7c2ac37..e4606586f94 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -1,6 +1,6 @@
 /* MN10300 Low level time management
  *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007-2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  * - Derived from arch/i386/kernel/time.c
  *
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/profile.h>
+#include <linux/cnt32_to_63.h>
 #include <asm/irq.h>
 #include <asm/div64.h>
 #include <asm/processor.h>
@@ -40,27 +41,54 @@ static struct irqaction timer_irq = {
 	.name		= "timer",
 };
 
+static unsigned long sched_clock_multiplier;
+
 /*
  * scheduler clock - returns current time in nanosec units.
  */
 unsigned long long sched_clock(void)
 {
 	union {
-		unsigned long long l;
-		u32 w[2];
-	} quot;
+		unsigned long long ll;
+		unsigned l[2];
+	} tsc64, result;
+	unsigned long tsc, tmp;
+	unsigned product[3]; /* 96-bit intermediate value */
+
+	/* read the TSC value
+	 */
+	tsc = 0 - get_cycles(); /* get_cycles() counts down */
 
-	quot.w[0] = mn10300_last_tsc - get_cycles();
-	quot.w[1] = 1000000000;
+	/* expand to 64-bits.
+	 * - sched_clock() must be called once a minute or better or the
+	 *   following will go horribly wrong - see cnt32_to_63()
+	 */
+	tsc64.ll = cnt32_to_63(tsc) & 0x7fffffffffffffffULL;
 
-	asm("mulu %2,%3,%0,%1"
-	    : "=r"(quot.w[1]), "=r"(quot.w[0])
-	    : "0"(quot.w[1]), "1"(quot.w[0])
+	/* scale the 64-bit TSC value to a nanosecond value via a 96-bit
+	 * intermediate
+	 */
+	asm("mulu	%2,%0,%3,%0	\n"	/* LSW * mult ->  0:%3:%0 */
+	    "mulu	%2,%1,%2,%1	\n"	/* MSW * mult -> %2:%1:0 */
+	    "add	%3,%1		\n"
+	    "addc	0,%2		\n"	/* result in %2:%1:%0 */
+	    : "=r"(product[0]), "=r"(product[1]), "=r"(product[2]), "=r"(tmp)
+	    :  "0"(tsc64.l[0]),  "1"(tsc64.l[1]),  "2"(sched_clock_multiplier)
 	    : "cc");
 
-	do_div(quot.l, MN10300_TSCCLK);
+	result.l[0] = product[1] << 16 | product[0] >> 16;
+	result.l[1] = product[2] << 16 | product[1] >> 16;
 
-	return quot.l;
+	return result.ll;
+}
+
+/*
+ * initialise the scheduler clock
+ */
+static void __init mn10300_sched_clock_init(void)
+{
+	sched_clock_multiplier =
+		__muldiv64u(NSEC_PER_SEC, 1 << 16, MN10300_TSCCLK);
 }
 
 /*
@@ -128,4 +156,6 @@ void __init time_init(void)
 	/* start the watchdog timer */
 	watchdog_go();
 #endif
+
+	mn10300_sched_clock_init();
 }
-- 
cgit v1.2.3


From d7161a65341556bacb5e6654e133803f46f51063 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Fri, 26 Sep 2008 10:36:41 -0500
Subject: kgdb, x86, arm, mips, powerpc: ignore user space single stepping

On the x86 arch, user space single step exceptions should be ignored
if they occur in the kernel space, such as ptrace stepping through a
system call.

First check if it is kgdb that is executing a single step, then ensure
it is not an accidental traversal into the user space, while in kgdb,
any other time the TIF_SINGLESTEP is set, kgdb should ignore the
exception.

On x86, arm, mips and powerpc, the kgdb_contthread usage was
inconsistent with the way single stepping is implemented in the kgdb
core.  The arch specific stub should always set the
kgdb_cpu_doing_single_step correctly if it is single stepping.  This
allows kgdb to correctly process an instruction steps if ptrace
happens to be requesting an instruction step over a system call.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/arm/kernel/kgdb.c     |  2 --
 arch/mips/kernel/kgdb.c    |  3 +--
 arch/powerpc/kernel/kgdb.c |  5 ++---
 arch/x86/kernel/kgdb.c     | 18 +++++++++++-------
 4 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index aaffaecffcd..ba8ccfede96 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -111,8 +111,6 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
 	case 'D':
 	case 'k':
 	case 'c':
-		kgdb_contthread = NULL;
-
 		/*
 		 * Try to read optional parameter, pc unchanged if no parm.
 		 * If this was a compiled breakpoint, we need to move
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 8f6d58ede33..6e152c80cd4 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -236,8 +236,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
 
 		atomic_set(&kgdb_cpu_doing_single_step, -1);
 		if (remcom_in_buffer[0] == 's')
-			if (kgdb_contthread)
-				atomic_set(&kgdb_cpu_doing_single_step, cpu);
+			atomic_set(&kgdb_cpu_doing_single_step, cpu);
 
 		return 0;
 	}
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index b4fdf2f2743..fe8f71dd0b3 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -347,9 +347,8 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
 			linux_regs->msr |= MSR_SE;
 #endif
 			kgdb_single_step = 1;
-			if (kgdb_contthread)
-				atomic_set(&kgdb_cpu_doing_single_step,
-					   raw_smp_processor_id());
+			atomic_set(&kgdb_cpu_doing_single_step,
+				   raw_smp_processor_id());
 		}
 		return 0;
 	}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index f47f0eb886b..00f7896c9a1 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -378,10 +378,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
 		if (remcomInBuffer[0] == 's') {
 			linux_regs->flags |= X86_EFLAGS_TF;
 			kgdb_single_step = 1;
-			if (kgdb_contthread) {
-				atomic_set(&kgdb_cpu_doing_single_step,
-					   raw_smp_processor_id());
-			}
+			atomic_set(&kgdb_cpu_doing_single_step,
+				   raw_smp_processor_id());
 		}
 
 		get_debugreg(dr6, 6);
@@ -466,9 +464,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
 
 	case DIE_DEBUG:
 		if (atomic_read(&kgdb_cpu_doing_single_step) ==
-			raw_smp_processor_id() &&
-			user_mode(regs))
-			return single_step_cont(regs, args);
+		    raw_smp_processor_id()) {
+			if (user_mode(regs))
+				return single_step_cont(regs, args);
+			break;
+		} else if (test_thread_flag(TIF_SINGLESTEP))
+			/* This means a user thread is single stepping
+			 * a system call which should be ignored
+			 */
+			return NOTIFY_DONE;
 		/* fall through */
 	default:
 		if (user_mode(regs))
-- 
cgit v1.2.3


From 703a1edcd1534468fc18f733c03bd91a65c8c6f0 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Fri, 26 Sep 2008 10:36:42 -0500
Subject: kgdb, x86_64: fix PS CS SS registers in gdb serial

On x86_64 the gdb serial register structure defines the PS (also known
as eflags), CS and SS registers as 4 bytes entities.

This patch splits the x86_64 regnames enum into a 32 and 64 version to
account for the 32 bit entities in the gdb serial packets.

Also the program counter is properly filled in for the sleeping
threads.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/x86/kernel/kgdb.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 00f7896c9a1..8282a213968 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -69,6 +69,9 @@ static int gdb_x86vector = -1;
  */
 void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 {
+#ifndef CONFIG_X86_32
+	u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
 	gdb_regs[GDB_AX]	= regs->ax;
 	gdb_regs[GDB_BX]	= regs->bx;
 	gdb_regs[GDB_CX]	= regs->cx;
@@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 	gdb_regs[GDB_SI]	= regs->si;
 	gdb_regs[GDB_DI]	= regs->di;
 	gdb_regs[GDB_BP]	= regs->bp;
-	gdb_regs[GDB_PS]	= regs->flags;
 	gdb_regs[GDB_PC]	= regs->ip;
 #ifdef CONFIG_X86_32
+	gdb_regs[GDB_PS]	= regs->flags;
 	gdb_regs[GDB_DS]	= regs->ds;
 	gdb_regs[GDB_ES]	= regs->es;
 	gdb_regs[GDB_CS]	= regs->cs;
@@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 	gdb_regs[GDB_R13]	= regs->r13;
 	gdb_regs[GDB_R14]	= regs->r14;
 	gdb_regs[GDB_R15]	= regs->r15;
+	gdb_regs32[GDB_PS]	= regs->flags;
+	gdb_regs32[GDB_CS]	= regs->cs;
+	gdb_regs32[GDB_SS]	= regs->ss;
 #endif
 	gdb_regs[GDB_SP]	= regs->sp;
 }
@@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
  */
 void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 {
+#ifndef CONFIG_X86_32
+	u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
 	gdb_regs[GDB_AX]	= 0;
 	gdb_regs[GDB_BX]	= 0;
 	gdb_regs[GDB_CX]	= 0;
@@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 	gdb_regs[GDB_FS]	= 0xFFFF;
 	gdb_regs[GDB_GS]	= 0xFFFF;
 #else
-	gdb_regs[GDB_PS]	= *(unsigned long *)(p->thread.sp + 8);
-	gdb_regs[GDB_PC]	= 0;
+	gdb_regs32[GDB_PS]	= *(unsigned long *)(p->thread.sp + 8);
+	gdb_regs32[GDB_CS]	= __KERNEL_CS;
+	gdb_regs32[GDB_SS]	= __KERNEL_DS;
+	gdb_regs[GDB_PC]	= p->thread.ip;
 	gdb_regs[GDB_R8]	= 0;
 	gdb_regs[GDB_R9]	= 0;
 	gdb_regs[GDB_R10]	= 0;
@@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
  */
 void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 {
+#ifndef CONFIG_X86_32
+	u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
 	regs->ax		= gdb_regs[GDB_AX];
 	regs->bx		= gdb_regs[GDB_BX];
 	regs->cx		= gdb_regs[GDB_CX];
@@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 	regs->si		= gdb_regs[GDB_SI];
 	regs->di		= gdb_regs[GDB_DI];
 	regs->bp		= gdb_regs[GDB_BP];
-	regs->flags		= gdb_regs[GDB_PS];
 	regs->ip		= gdb_regs[GDB_PC];
 #ifdef CONFIG_X86_32
+	regs->flags		= gdb_regs[GDB_PS];
 	regs->ds		= gdb_regs[GDB_DS];
 	regs->es		= gdb_regs[GDB_ES];
 	regs->cs		= gdb_regs[GDB_CS];
@@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 	regs->r13		= gdb_regs[GDB_R13];
 	regs->r14		= gdb_regs[GDB_R14];
 	regs->r15		= gdb_regs[GDB_R15];
+	regs->flags		= gdb_regs32[GDB_PS];
+	regs->cs		= gdb_regs32[GDB_CS];
+	regs->ss		= gdb_regs32[GDB_SS];
 #endif
 }
 
-- 
cgit v1.2.3


From bc173c5789e1fc6065fd378edc815914b40ee86b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 26 Sep 2008 16:22:58 +0100
Subject: ARM: Delete ARM's own cnt32_to_63.h

Delete ARM's own cnt32_to_63.h as the copy in include/linux/ should now be
used instead.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/cnt32_to_63.h | 78 --------------------------------------
 1 file changed, 78 deletions(-)
 delete mode 100644 arch/arm/include/asm/cnt32_to_63.h

(limited to 'arch')

diff --git a/arch/arm/include/asm/cnt32_to_63.h b/arch/arm/include/asm/cnt32_to_63.h
deleted file mode 100644
index 480c873fa74..00000000000
--- a/arch/arm/include/asm/cnt32_to_63.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- *  include/asm/cnt32_to_63.h -- extend a 32-bit counter to 63 bits
- *
- *  Author:	Nicolas Pitre
- *  Created:	December 3, 2006
- *  Copyright:	MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- */
-
-#ifndef __INCLUDE_CNT32_TO_63_H__
-#define __INCLUDE_CNT32_TO_63_H__
-
-#include <linux/compiler.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-
-/*
- * Prototype: u64 cnt32_to_63(u32 cnt)
- * Many hardware clock counters are only 32 bits wide and therefore have
- * a relatively short period making wrap-arounds rather frequent.  This
- * is a problem when implementing sched_clock() for example, where a 64-bit
- * non-wrapping monotonic value is expected to be returned.
- *
- * To overcome that limitation, let's extend a 32-bit counter to 63 bits
- * in a completely lock free fashion. Bits 0 to 31 of the clock are provided
- * by the hardware while bits 32 to 62 are stored in memory.  The top bit in
- * memory is used to synchronize with the hardware clock half-period.  When
- * the top bit of both counters (hardware and in memory) differ then the
- * memory is updated with a new value, incrementing it when the hardware
- * counter wraps around.
- *
- * Because a word store in memory is atomic then the incremented value will
- * always be in synch with the top bit indicating to any potential concurrent
- * reader if the value in memory is up to date or not with regards to the
- * needed increment.  And any race in updating the value in memory is harmless
- * as the same value would simply be stored more than once.
- *
- * The only restriction for the algorithm to work properly is that this
- * code must be executed at least once per each half period of the 32-bit
- * counter to properly update the state bit in memory. This is usually not a
- * problem in practice, but if it is then a kernel timer could be scheduled
- * to manage for this code to be executed often enough.
- *
- * Note that the top bit (bit 63) in the returned value should be considered
- * as garbage.  It is not cleared here because callers are likely to use a
- * multiplier on the returned value which can get rid of the top bit
- * implicitly by making the multiplier even, therefore saving on a runtime
- * clear-bit instruction. Otherwise caller must remember to clear the top
- * bit explicitly.
- */
-
-/* this is used only to give gcc a clue about good code generation */
-typedef union {
-	struct {
-#if defined(__LITTLE_ENDIAN)
-		u32 lo, hi;
-#elif defined(__BIG_ENDIAN)
-		u32 hi, lo;
-#endif
-	};
-	u64 val;
-} cnt32_to_63_t;
-
-#define cnt32_to_63(cnt_lo) \
-({ \
-	static volatile u32 __m_cnt_hi = 0; \
-	cnt32_to_63_t __x; \
-	__x.hi = __m_cnt_hi; \
-	__x.lo = (cnt_lo); \
- 	if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \
-		__m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \
-	__x.val; \
-})
-
-#endif
-- 
cgit v1.2.3


From 1575da9ee4804ca889f32f8567dd7965e6fe4abc Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 24 Sep 2008 14:57:11 +0900
Subject: m32r: remove the unused NOHIGHMEM option

Remove the unused NOHIGHMEM option.

Reviewed-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Hirokazu Takata <takata@linux-m32r.org>
---
 arch/m32r/Kconfig | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index a5f864c445b..0266519491a 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -216,10 +216,6 @@ config MEMORY_SIZE
 	default "01000000" if PLAT_M32104UT
 	default "00800000" if PLAT_OAKS32R
 
-config NOHIGHMEM
-	bool
-	default y
-
 config ARCH_DISCONTIGMEM_ENABLE
 	bool "Internal RAM Support"
 	depends on CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP || CHIP_M32104
-- 
cgit v1.2.3


From 1b20b093e7f81ff8cb322773684b260d1487b610 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 24 Sep 2008 14:58:54 +0900
Subject: m32r: don't offer CONFIG_ISA

As far as I know no M32R hardware actually has ISA slots.

And ISA drivers don't compile on M32R.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Hirokazu Takata <takata@linux-m32r.org>
---
 arch/m32r/Kconfig | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 0266519491a..f57113f1f89 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -406,11 +406,7 @@ config PCI_DIRECT
 source "drivers/pci/Kconfig"
 
 config ISA
-	bool "ISA support"
-	help
-	  Find out whether you have ISA slots on your motherboard.  ISA is the
-	  name of a bus system, i.e. the way the CPU talks to the other stuff
-	  inside your box.  If you have ISA, say Y, otherwise N.
+	bool
 
 source "drivers/pcmcia/Kconfig"
 
-- 
cgit v1.2.3


From affa6b120fa895e2bed0e131282fe89c05da1008 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 24 Sep 2008 14:59:57 +0900
Subject: m32r: export empty_zero_page

ERROR: "empty_zero_page" [fs/ext4/ext4dev.ko] undefined!

Reported-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Hirokazu Takata <takata@linux-m32r.org>
---
 arch/m32r/kernel/m32r_ksyms.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c
index 16bcb189a38..22624b51d4d 100644
--- a/arch/m32r/kernel/m32r_ksyms.c
+++ b/arch/m32r/kernel/m32r_ksyms.c
@@ -14,6 +14,7 @@
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/tlbflush.h>
+#include <asm/pgtable.h>
 
 /* platform dependent support */
 EXPORT_SYMBOL(boot_cpu_data);
@@ -65,6 +66,7 @@ EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(empty_zero_page);
 
 EXPORT_SYMBOL(_inb);
 EXPORT_SYMBOL(_inw);
-- 
cgit v1.2.3


From 4b65fd41807dec1484003a7618c492477511509e Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 24 Sep 2008 15:01:15 +0900
Subject: m32r: export __ndelay

ERROR: "__ndelay" [drivers/spi/spi_bitbang.ko] undefined!

Reported-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Hirokazu Takata <takata@linux-m32r.org>
---
 arch/m32r/lib/delay.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/m32r/lib/delay.c b/arch/m32r/lib/delay.c
index 59bfc34e0d9..ced549be80f 100644
--- a/arch/m32r/lib/delay.c
+++ b/arch/m32r/lib/delay.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/param.h>
+#include <linux/module.h>
 #ifdef CONFIG_SMP
 #include <linux/sched.h>
 #include <asm/current.h>
@@ -121,3 +122,4 @@ void __ndelay(unsigned long nsecs)
 {
 	__const_udelay(nsecs * 0x00005);  /* 2**32 / 1000000000 (rounded up) */
 }
+EXPORT_SYMBOL(__ndelay);
-- 
cgit v1.2.3


From 81e4807303c416a0defdce8b23a6204416d33280 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 24 Sep 2008 15:01:47 +0900
Subject: m32r/kernel/: cleanups

This patch contains the following cleanups:
- make the following needlessly global code static:
  - entry.S: resume_userspace
  - process.c: pm_idle
  - process.c: default_idle()
  - smp.c: send_IPI_allbutself()
  - time.c: timer_interrupt()
  - time.c: struct irq0
  - traps.c: set_eit_vector_entries()
  - traps.c: kstack_depth_to_print
  - traps.c: show_trace()
  - traps.c: die_lock
- remove the following unused code:
  - head.S: startup_32
  - process.c: hlt_counter
  - process.c: disable_hlt()
  - process.c: enable_hlt()
  - process.c: dump_task_regs()
- remove the following variables and their usages since they were
  always 0:
  - irq.c: irq_err_count
  - irq.c: irq_mis_count

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Hirokazu Takata <takata@linux-m32r.org>
---
 arch/m32r/kernel/entry.S   |  2 +-
 arch/m32r/kernel/head.S    |  1 -
 arch/m32r/kernel/irq.c     |  6 ------
 arch/m32r/kernel/process.c | 30 ++----------------------------
 arch/m32r/kernel/smp.c     |  4 ++--
 arch/m32r/kernel/time.c    |  5 ++---
 arch/m32r/kernel/traps.c   |  8 ++++----
 7 files changed, 11 insertions(+), 45 deletions(-)

(limited to 'arch')

diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S
index d4eaa2fd181..612d35b082a 100644
--- a/arch/m32r/kernel/entry.S
+++ b/arch/m32r/kernel/entry.S
@@ -143,7 +143,7 @@ ret_from_intr:
 	and3	r4, r4, #0x8000		; check BSM bit
 #endif
 	beqz	r4, resume_kernel
-ENTRY(resume_userspace)
+resume_userspace:
 	DISABLE_INTERRUPTS(r4)		; make sure we don't miss an interrupt
 					; setting need_resched or sigpending
 					; between sampling and the iret
diff --git a/arch/m32r/kernel/head.S b/arch/m32r/kernel/head.S
index dab7436d7bb..40180778a5c 100644
--- a/arch/m32r/kernel/head.S
+++ b/arch/m32r/kernel/head.S
@@ -29,7 +29,6 @@ __INITDATA
 	.global _end
 ENTRY(stext)
 ENTRY(_stext)
-ENTRY(startup_32)
 	/* Setup up the stack pointer */
 	LDIMM	(r0, spi_stack_top)
 	LDIMM	(r1, spu_stack_top)
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index d0c5b0b7da2..2aeae467009 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@@ -22,9 +22,6 @@
 #include <linux/module.h>
 #include <asm/uaccess.h>
 
-atomic_t irq_err_count;
-atomic_t irq_mis_count;
-
 /*
  * Generic, controller-independent functions:
  */
@@ -63,9 +60,6 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
 	}
 	return 0;
 }
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index a689e2978b6..5be4faaf5b1 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -35,8 +35,6 @@
 
 #include <linux/err.h>
 
-static int hlt_counter=0;
-
 /*
  * Return saved PC of a blocked thread.
  */
@@ -48,31 +46,16 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 /*
  * Powermanagement idle function, if any..
  */
-void (*pm_idle)(void) = NULL;
-EXPORT_SYMBOL(pm_idle);
+static void (*pm_idle)(void) = NULL;
 
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
-void disable_hlt(void)
-{
-	hlt_counter++;
-}
-
-EXPORT_SYMBOL(disable_hlt);
-
-void enable_hlt(void)
-{
-	hlt_counter--;
-}
-
-EXPORT_SYMBOL(enable_hlt);
-
 /*
  * We use this is we don't have any better
  * idle routine..
  */
-void default_idle(void)
+static void default_idle(void)
 {
 	/* M32R_FIXME: Please use "cpu_sleep" mode.  */
 	cpu_relax();
@@ -260,15 +243,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long spu,
 	return 0;
 }
 
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
-	/* M32R_FIXME */
-	return 1;
-}
-
 asmlinkage int sys_fork(unsigned long r0, unsigned long r1, unsigned long r2,
 	unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6,
 	struct pt_regs regs)
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 7577f971ea4..929e5c9d3ad 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -84,7 +84,7 @@ void smp_send_timer(void);
 void smp_ipi_timer_interrupt(struct pt_regs *);
 void smp_local_timer_interrupt(void);
 
-void send_IPI_allbutself(int, int);
+static void send_IPI_allbutself(int, int);
 static void send_IPI_mask(cpumask_t, int, int);
 unsigned long send_IPI_mask_phys(cpumask_t, int, int);
 
@@ -722,7 +722,7 @@ void smp_local_timer_interrupt(void)
  * ---------- --- --------------------------------------------------------
  *
  *==========================================================================*/
-void send_IPI_allbutself(int ipi_num, int try)
+static void send_IPI_allbutself(int ipi_num, int try)
 {
 	cpumask_t cpumask;
 
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index 994cc155635..6ea017727cc 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -34,7 +34,6 @@
 #include <asm/hw_irq.h>
 
 #ifdef CONFIG_SMP
-extern void send_IPI_allbutself(int, int);
 extern void smp_local_timer_interrupt(void);
 #endif
 
@@ -188,7 +187,7 @@ static long last_rtc_update = 0;
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
  */
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
 #ifndef CONFIG_SMP
 	profile_tick(CPU_PROFILING);
@@ -228,7 +227,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-struct irqaction irq0 = {
+static struct irqaction irq0 = {
 	.handler = timer_interrupt,
 	.flags = IRQF_DISABLED,
 	.mask = CPU_MASK_NONE,
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 46159a4e644..03b14e55cd8 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -61,7 +61,7 @@ extern unsigned long	eit_vector[];
 	((unsigned long)func - (unsigned long)eit_vector - entry*4)/4 \
 	+ 0xff000000UL
 
-void	set_eit_vector_entries(void)
+static void set_eit_vector_entries(void)
 {
 	extern void default_eit_handler(void);
 	extern void system_call(void);
@@ -121,9 +121,9 @@ void __init trap_init(void)
 	cpu_init();
 }
 
-int kstack_depth_to_print = 24;
+static int kstack_depth_to_print = 24;
 
-void show_trace(struct task_struct *task, unsigned long *stack)
+static void show_trace(struct task_struct *task, unsigned long *stack)
 {
 	unsigned long addr;
 
@@ -224,7 +224,7 @@ bad:
 	printk("\n");
 }
 
-DEFINE_SPINLOCK(die_lock);
+static DEFINE_SPINLOCK(die_lock);
 
 void die(const char * str, struct pt_regs * regs, long err)
 {
-- 
cgit v1.2.3


From 19506fc51852e859bb08ab5abbdb8fd02b7392f2 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 25 Sep 2008 16:45:10 +0200
Subject: [MIPS] au1000: Make sure GPIO value is zero or one

David Brownell <david-b@pacbell.net> wrote:
>       The problem is that "value" is zero-or-nonzero.
>       This code wrongly assumes it's zero-or-one.
>       Possible fix:  "((!!value) << gpio)".

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/au1000/common/gpio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/au1000/common/gpio.c b/arch/mips/au1000/common/gpio.c
index 1f058434b72..e660ddd611c 100644
--- a/arch/mips/au1000/common/gpio.c
+++ b/arch/mips/au1000/common/gpio.c
@@ -48,7 +48,7 @@ static void au1xxx_gpio2_write(unsigned gpio, int value)
 {
 	gpio -= AU1XXX_GPIO_BASE;
 
-	gpio2->output = (GPIO2_OUTPUT_ENABLE_MASK << gpio) | (value << gpio);
+	gpio2->output = (GPIO2_OUTPUT_ENABLE_MASK << gpio) | ((!!value) << gpio);
 }
 
 static int au1xxx_gpio2_direction_input(unsigned gpio)
@@ -62,7 +62,7 @@ static int au1xxx_gpio2_direction_output(unsigned gpio, int value)
 {
 	gpio -= AU1XXX_GPIO_BASE;
 	gpio2->dir |= 0x01 << gpio;
-	gpio2->output = (GPIO2_OUTPUT_ENABLE_MASK << gpio) | (value << gpio);
+	gpio2->output = (GPIO2_OUTPUT_ENABLE_MASK << gpio) | ((!!value) << gpio);
 	return 0;
 }
 
-- 
cgit v1.2.3


From f4d15f1c877644a604f8c8c3d094c13f9f8bdcf2 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sat, 27 Sep 2008 15:05:06 +0100
Subject: [MIPS] IP27: Switch to dynamic interrupt routing avoding panic on
 error.

pcibios_map_irq is no way of returning an error but on IP27 an interrupt
is possibly not routable when running out of resources.  So do the
interrupt routing at pcibios_enable_device time.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pci/pci-ip27.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index bd78368c82b..f97ab146101 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -142,26 +142,48 @@ int __cpuinit bridge_probe(nasid_t nasid, int widget_id, int masterwid)
  * on any one of the hubs connected to its xbow.
  */
 int __devinit pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	return 0;
+}
+
+/* Most MIPS systems have straight-forward swizzling needs.  */
+static inline u8 bridge_swizzle(u8 pin, u8 slot)
+{
+	return (((pin - 1) + slot) % 4) + 1;
+}
+
+static inline struct pci_dev *bridge_root_dev(struct pci_dev *dev)
+{
+	while (dev->bus->parent) {
+		/* Move up the chain of bridges. */
+		dev = dev->bus->self;
+	}
+
+	return dev;
+}
+
+/* Do platform specific device initialization at pci_enable_device() time */
+int pcibios_plat_dev_init(struct pci_dev *dev)
 {
 	struct bridge_controller *bc = BRIDGE_CONTROLLER(dev->bus);
-	int irq = bc->pci_int[slot];
+	struct pci_dev *rdev = bridge_root_dev(dev);
+	int slot = PCI_SLOT(rdev->devfn);
+	int irq;
 
+	irq = bc->pci_int[slot];
 	if (irq == -1) {
-		irq = bc->pci_int[slot] = request_bridge_irq(bc);
+		irq = request_bridge_irq(bc);
 		if (irq < 0)
-			panic("Can't allocate interrupt for PCI device %s\n",
-			      pci_name(dev));
+			return irq;
+
+		bc->pci_int[slot] = irq;
 	}
 
 	irq_to_bridge[irq] = bc;
 	irq_to_slot[irq] = slot;
 
-	return irq;
-}
+	dev->irq = irq;
 
-/* Do platform specific device initialization at pci_enable_device() time */
-int pcibios_plat_dev_init(struct pci_dev *dev)
-{
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2dc42b47a7886bdc87514990c68ecdf8de61e653 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Sat, 27 Sep 2008 16:06:16 +0200
Subject: [MIPS] BCM47xx: Fix build error due to missing PCI functions

This patch defines pcibios_map_irq() and pcibios_plat_dev_init() for
the BCM47xx platform.

It fixes the regression introduced by commit
aab547ce0d1493d400b6468c521a0137cd8c1edf.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pci/Makefile      |  1 +
 arch/mips/pci/pci-bcm47xx.c | 60 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 arch/mips/pci/pci-bcm47xx.c

(limited to 'arch')

diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index 15e01aec37f..c8c32f417b6 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_SOC_TX3927)	+= ops-tx3927.o
 obj-$(CONFIG_PCI_VR41XX)	+= ops-vr41xx.o pci-vr41xx.o
 obj-$(CONFIG_MARKEINS)		+= ops-emma2rh.o pci-emma2rh.o fixup-emma2rh.o
 obj-$(CONFIG_PCI_TX4927)	+= ops-tx4927.o
+obj-$(CONFIG_BCM47XX)		+= pci-bcm47xx.o
 
 #
 # These are still pretty much in the old state, watch, go blind.
diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c
new file mode 100644
index 00000000000..bea9b6cdfdb
--- /dev/null
+++ b/arch/mips/pci/pci-bcm47xx.c
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (C) 2008 Aurelien Jarno <aurelien@aurel32.net>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/ssb/ssb.h>
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	return 0;
+}
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	int res;
+	u8 slot, pin;
+
+	res = ssb_pcibios_plat_dev_init(dev);
+	if (res < 0) {
+		printk(KERN_ALERT "PCI: Failed to init device %s\n",
+		       pci_name(dev));
+		return res;
+	}
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	slot = PCI_SLOT(dev->devfn);
+	res = ssb_pcibios_map_irq(dev, slot, pin);
+
+	/* IRQ-0 and IRQ-1 are software interrupts. */
+	if (res < 2) {
+		printk(KERN_ALERT "PCI: Failed to map IRQ of device %s\n",
+		       pci_name(dev));
+		return res;
+	}
+
+	dev->irq = res;
+	return 0;
+}
+
-- 
cgit v1.2.3


From c459ce8b5a7d933a3bcf6915ab17ac1e036e2ac4 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 29 Sep 2008 16:39:19 -0700
Subject: [IA64] Put the space for cpu0 per-cpu area into .data section

Initial fix for making sure that we can access percpu variables
in all C code (commit: 10617bbe84628eb18ab5f723d3ba35005adde143)
inadvertantly allocated the memory in the "percpu" section of
the vmlinux ELF executable.  This confused kexec/dump.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/include/asm/sections.h | 3 +++
 arch/ia64/kernel/head.S          | 9 +++++----
 arch/ia64/kernel/vmlinux.lds.S   | 8 +++++---
 arch/ia64/mm/contig.c            | 2 +-
 arch/ia64/mm/discontig.c         | 2 +-
 5 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
index f6679989103..1a873b36a4a 100644
--- a/arch/ia64/include/asm/sections.h
+++ b/arch/ia64/include/asm/sections.h
@@ -11,6 +11,9 @@
 #include <asm-generic/sections.h>
 
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
+#ifdef	CONFIG_SMP
+extern char __cpu0_per_cpu[];
+#endif
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
 extern char __start___rse_patchlist[], __end___rse_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 8bdea8eb62e..66e491d8baa 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -367,16 +367,17 @@ start_ap:
 	;;
 #else
 (isAP)	br.few 2f
-	mov r20=r19
-	sub r19=r19,r18
+	movl r20=__cpu0_per_cpu
 	;;
 	shr.u r18=r18,3
 1:
-	ld8 r21=[r20],8;;
-	st8[r19]=r21,8
+	ld8 r21=[r19],8;;
+	st8[r20]=r21,8
 	adds r18=-1,r18;;
 	cmp4.lt p7,p6=0,r18
 (p7)	br.cond.dptk.few 1b
+	mov r19=r20
+	;;
 2:
 #endif
 	tpa r19=r19
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index de71da811cd..10a7d47e851 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -215,9 +215,6 @@ SECTIONS
   /* Per-cpu data: */
   percpu : { } :percpu
   . = ALIGN(PERCPU_PAGE_SIZE);
-#ifdef	CONFIG_SMP
-  . = . + PERCPU_PAGE_SIZE;	/* cpu0 per-cpu space */
-#endif
   __phys_per_cpu_start = .;
   .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
 	{
@@ -233,6 +230,11 @@ SECTIONS
   data : { } :data
   .data : AT(ADDR(.data) - LOAD_OFFSET)
 	{
+#ifdef	CONFIG_SMP
+  . = ALIGN(PERCPU_PAGE_SIZE);
+		__cpu0_per_cpu = .;
+  . = . + PERCPU_PAGE_SIZE;	/* cpu0 per-cpu space */
+#endif
 		DATA_DATA
 		*(.data1)
 		*(.gnu.linkonce.d*)
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index e566ff43884..0ee085efbe2 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -163,7 +163,7 @@ per_cpu_init (void)
 	 * get_zeroed_page().
 	 */
 	if (first_time) {
-		void *cpu0_data = __phys_per_cpu_start - PERCPU_PAGE_SIZE;
+		void *cpu0_data = __cpu0_per_cpu;
 
 		first_time=0;
 
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 78026aabaa7..d8c5fcd89e5 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -144,7 +144,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
 
 	for_each_possible_early_cpu(cpu) {
 		if (cpu == 0) {
-			void *cpu0_data = __phys_per_cpu_start - PERCPU_PAGE_SIZE;
+			void *cpu0_data = __cpu0_per_cpu;
 			__per_cpu_offset[cpu] = (char*)cpu0_data -
 				__per_cpu_start;
 		} else if (node == node_cpuid[cpu].nid) {
-- 
cgit v1.2.3


From ad611045ce5d059af84a9855b22ca3f7a99d47be Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Wed, 24 Sep 2008 16:39:04 +0000
Subject: powerpc: Fix PCI in Holly device tree

The PCI bridge on the Holly board is incorrectly represented in the
device tree.  The current device tree node for the PCI bridge sits
under the tsi-bridge node.  That's not obviously wrong, but the PCI
bridge translates some PCI spaces into CPU address ranges which were
not translated by the "ranges" property in tsi-bridge node.

We used to get away with this problem because the PCI bridge discovery
code was also buggy, assuming incorrectly that PCI host bridge nodes
were always directly under the root bus and treating the translated
addresses as raw CPU addresses, rather than parent bus addresses.
This has since been fixed, thus breaking Holly.

This could be fixed by adding extra translations to the tsi-bridge
node, but this patch instead moves the Holly PCI bridge out of the
tsi-bridge node to the root bus.  This makes the tsi-bridge node
represent only the built-in IO devices in the bridge, with a
more-or-less contiguous address range.  This is the same convention
used on Freescale SoC chips, where the "soc" node represents only the
IMMR region, and the PCI and other bus bridges are separate nodes
under the root bus.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/boot/dts/holly.dts | 106 ++++++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 53 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts
index f87fe7b9ced..c6e11ebeceb 100644
--- a/arch/powerpc/boot/dts/holly.dts
+++ b/arch/powerpc/boot/dts/holly.dts
@@ -133,61 +133,61 @@
 			reg = <0x00007400 0x00000400>;
 			big-endian;
 		};
+	};
 
-		pci@1000 {
-			device_type = "pci";
-			compatible = "tsi109-pci", "tsi108-pci";
-			#interrupt-cells = <1>;
-			#size-cells = <2>;
-			#address-cells = <3>;
-			reg = <0x00001000 0x00001000>;
-			bus-range = <0x0 0x0>;
-			/*----------------------------------------------------+
-			| PCI memory range.
-			| 01 denotes I/O space
-			| 02 denotes 32-bit memory space
-			+----------------------------------------------------*/
-			ranges = <0x02000000 0x00000000 0x40000000 0x40000000 0x00000000 0x10000000
-				  0x01000000 0x00000000 0x00000000 0x7e000000 0x00000000 0x00010000>;
-			clock-frequency = <133333332>;
-			interrupt-parent = <&MPIC>;
+	pci@c0001000 {
+		device_type = "pci";
+		compatible = "tsi109-pci", "tsi108-pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xc0001000 0x00001000>;
+		bus-range = <0x0 0x0>;
+		/*----------------------------------------------------+
+		| PCI memory range.
+		| 01 denotes I/O space
+		| 02 denotes 32-bit memory space
+		+----------------------------------------------------*/
+		ranges = <0x02000000 0x00000000 0x40000000 0x40000000 0x00000000 0x10000000
+			  0x01000000 0x00000000 0x00000000 0x7e000000 0x00000000 0x00010000>;
+		clock-frequency = <133333332>;
+		interrupt-parent = <&MPIC>;
+		interrupts = <0x17 0x2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		/*----------------------------------------------------+
+		| The INTA, INTB, INTC, INTD are shared.
+		+----------------------------------------------------*/
+		interrupt-map = <
+			0x800 0x0 0x0 0x1 &RT0 0x24 0x0
+			0x800 0x0 0x0 0x2 &RT0 0x25 0x0
+			0x800 0x0 0x0 0x3 &RT0 0x26 0x0
+			0x800 0x0 0x0 0x4 &RT0 0x27 0x0
+
+			0x1000 0x0 0x0 0x1 &RT0 0x25 0x0
+			0x1000 0x0 0x0 0x2 &RT0 0x26 0x0
+			0x1000 0x0 0x0 0x3 &RT0 0x27 0x0
+			0x1000 0x0 0x0 0x4 &RT0 0x24 0x0
+
+			0x1800 0x0 0x0 0x1 &RT0 0x26 0x0
+			0x1800 0x0 0x0 0x2 &RT0 0x27 0x0
+			0x1800 0x0 0x0 0x3 &RT0 0x24 0x0
+			0x1800 0x0 0x0 0x4 &RT0 0x25 0x0
+
+			0x2000 0x0 0x0 0x1 &RT0 0x27 0x0
+			0x2000 0x0 0x0 0x2 &RT0 0x24 0x0
+			0x2000 0x0 0x0 0x3 &RT0 0x25 0x0
+			0x2000 0x0 0x0 0x4 &RT0 0x26 0x0
+			>;
+
+		RT0: router@1180 {
+			device_type = "pic-router";
+			interrupt-controller;
+			big-endian;
+			clock-frequency = <0>;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
 			interrupts = <0x17 0x2>;
-			interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
-			/*----------------------------------------------------+
-			| The INTA, INTB, INTC, INTD are shared.
-			+----------------------------------------------------*/
-			interrupt-map = <
-				0x800 0x0 0x0 0x1 &RT0 0x24 0x0
-				0x800 0x0 0x0 0x2 &RT0 0x25 0x0
-				0x800 0x0 0x0 0x3 &RT0 0x26 0x0
-				0x800 0x0 0x0 0x4 &RT0 0x27 0x0
-
-				0x1000 0x0 0x0 0x1 &RT0 0x25 0x0
-				0x1000 0x0 0x0 0x2 &RT0 0x26 0x0
-				0x1000 0x0 0x0 0x3 &RT0 0x27 0x0
-				0x1000 0x0 0x0 0x4 &RT0 0x24 0x0
-
-				0x1800 0x0 0x0 0x1 &RT0 0x26 0x0
-				0x1800 0x0 0x0 0x2 &RT0 0x27 0x0
-				0x1800 0x0 0x0 0x3 &RT0 0x24 0x0
-				0x1800 0x0 0x0 0x4 &RT0 0x25 0x0
-
-				0x2000 0x0 0x0 0x1 &RT0 0x27 0x0
-				0x2000 0x0 0x0 0x2 &RT0 0x24 0x0
-				0x2000 0x0 0x0 0x3 &RT0 0x25 0x0
-				0x2000 0x0 0x0 0x4 &RT0 0x26 0x0
-				>;
-
-			RT0: router@1180 {
- 				device_type = "pic-router";
- 				interrupt-controller;
- 				big-endian;
- 				clock-frequency = <0>;
- 				#address-cells = <0>;
- 				#interrupt-cells = <2>;
- 				interrupts = <0x17 0x2>;
-				interrupt-parent = <&MPIC>;
-			};
+			interrupt-parent = <&MPIC>;
 		};
 	};
 
-- 
cgit v1.2.3


From 61e9916eba35dfb76d38013a5aae9a59cc50877a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 24 Sep 2008 22:56:25 +0000
Subject: powerpc: Fix failure to shutdown with CPU hotplug

I tracked down the shutdown regression to CPUs not dying
when being shut down during power-off. This turns out to
be due to the system_state being SYSTEM_POWER_OFF, which
this code doesn't take as a valid state for shutting off
CPUs in.

This has never made sense to me, but when I added hotplug
code to implement hibernate I only "made it work" and did
not question the need to check the system_state. Thomas
Gleixner helped me dig, but the only thing we found is
that it was added with the original commit that added CPU
hotplug support.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Joel Schopp <jschopp@austin.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/idle.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index d308a9f70f1..31982d05d81 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -34,11 +34,7 @@
 #include <asm/smp.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
-/* this is used for software suspend, and that shuts down
- * CPUs even while the system is still booting... */
-#define cpu_should_die()	(cpu_is_offline(smp_processor_id()) && \
-				   (system_state == SYSTEM_RUNNING     \
-				 || system_state == SYSTEM_BOOTING))
+#define cpu_should_die()	cpu_is_offline(smp_processor_id())
 #else
 #define cpu_should_die()	0
 #endif
-- 
cgit v1.2.3


From 3dcd7e269d2223126f6ee9bc893f5a6166e1770d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=2EA=2E=20Magall=C3=B3n?= <jamagallon@ono.com>
Date: Tue, 30 Sep 2008 10:02:52 +0200
Subject: x86: fix typo in enable_mtrr_cleanup early parameter

Correct typo for 'enable_mtrr_cleanup' early boot param name.

Signed-off-by: J.A. Magallon <jamagallon@ono.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b117d7f8a56..885c8265e6b 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -834,7 +834,7 @@ static int __init enable_mtrr_cleanup_setup(char *str)
 		enable_mtrr_cleanup = 1;
 	return 0;
 }
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
 
 struct var_mtrr_state {
 	unsigned long	range_startk;
-- 
cgit v1.2.3


From de59985e3a623d4d5d6207f1777398ca0606ab1c Mon Sep 17 00:00:00 2001
From: Zachary Amsden <zach@vmware.com>
Date: Tue, 30 Sep 2008 11:02:12 -0700
Subject: x86: Fix broken LDT access in VMI

After investigating a JRE failure, I found this bug was introduced a
long time ago, and had already managed to survive another bugfix which
occurred on the same line.  The result is a total failure of the JRE due
to LDT selectors not working properly.

This one took a long time to rear up because LDT usage is not very
common, but the bug is quite serious.  It got introduced along with
another bug, already fixed, by 75b8bb3e56ca09a467fbbe5229bc68627f7445be

Signed-off-by: Zachary Amsden <zach@vmware.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Glauber de Oliveira Costa <gcosta@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/vmi_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db5..edfb09f3047 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
 				const void *desc)
 {
 	u32 *ldt_entry = (u32 *)desc;
-	vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
+	vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
 }
 
 static void vmi_load_sp0(struct tss_struct *tss,
-- 
cgit v1.2.3


From dc63b52673d71f9d49b9d72d263a9f32df18c3ee Mon Sep 17 00:00:00 2001
From: Zachary Amsden <zach@vmware.com>
Date: Tue, 30 Sep 2008 11:02:12 -0700
Subject: x86, vmi: fix broken LDT access

This one took a long time to rear up because LDT usage is not very
common, but the bug is quite serious.  It got introduced along with
another bug, already fixed, by 75b8bb3e56ca09a467fbbe5229bc68627f7445be

After investigating a JRE failure, I found this bug was introduced a long time
ago, and had already managed to survive another bugfix which occurred on the
same line.  The result is a total failure of the JRE due to LDT selectors not
working properly.

Signed-off-by: Zachary Amsden <zach@vmware.com>
Cc: Glauber de Oliveira Costa <gcosta@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vmi_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db5..edfb09f3047 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
 				const void *desc)
 {
 	u32 *ldt_entry = (u32 *)desc;
-	vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
+	vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
 }
 
 static void vmi_load_sp0(struct tss_struct *tss,
-- 
cgit v1.2.3


From d6478fad430e37148b56f642c87301ba72476675 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 Oct 2008 13:47:06 +0100
Subject: MN10300: Fix IRQ handling

Fix the IRQ handling on the MN10300 arch.

This patch makes a number of significant changes:

 (1) It separates the irq_chip definition for edge-triggered interrupts from
     the one for level-triggered interrupts.

     This is necessary because the MN10300 PIC latches the IRQ channel's
     interrupt request bit (GxICR_REQUEST), even after the device has ceased to
     assert its interrupt line and the interrupt channel has been disabled in
     the PIC.  So for level-triggered interrupts we need to clear this bit when
     we re-enable - which is achieved by setting GxICR_DETECT but not
     GxICR_REQUEST when writing to the register.

     Not doing this results in spurious interrupts occurring because calling
     mask_ack() at the start of handle_level_irq() is insufficient - it fails
     to clear the REQUEST latch because the device that caused the interrupt is
     still asserting its interrupt line at this point.

 (2) IRQ disablement [irq_chip::disable_irq()] shouldn't clear the interrupt
     request flag for edge-triggered interrupts lest it lose an interrupt.

 (3) IRQ unmasking [irq_chip::unmask_irq()] also shouldn't clear the interrupt
     request flag for edge-triggered interrupts lest it lose an interrupt.

 (4) The end() operation is now left to the default (no-operation) as
     __do_IRQ() is compiled out.  This may affect misrouted_irq(), but
     according to Thomas Gleixner it's the correct thing to do.

 (5) handle_level_irq() is used for edge-triggered interrupts rather than
     handle_edge_irq() as the MN10300 PIC latches interrupt events even on
     masked IRQ channels, thus rendering IRQ_PENDING unnecessary.  It is
     sufficient to call mask_ack() at the start and unmask() at the end.

 (6) For level-triggered interrupts, ack() is now NULL as it's not used, and
     there is no effective ACK function on the PIC.  mask_ack() is now the
     same as mask() as the latch continues to latch, even when the channel is
     masked.

Further, the patch discards the disable() op implementation as its now the same
as the mask() op implementation, which is used instead.

It also discards the enable() op implementations as they're now the same as
the unmask() op implementations, which are used instead.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/kernel/irq.c             | 71 ++++++++++++++++++++++-------------
 arch/mn10300/unit-asb2303/unit-init.c |  2 +-
 arch/mn10300/unit-asb2305/unit-init.c |  2 +-
 3 files changed, 47 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index 761c434a248..56c64ccc9c2 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -20,22 +20,8 @@ EXPORT_SYMBOL(__mn10300_irq_enabled_epsw);
 atomic_t irq_err_count;
 
 /*
- * MN10300 INTC controller operations
+ * MN10300 interrupt controller operations
  */
-static void mn10300_cpupic_disable(unsigned int irq)
-{
-	u16 tmp = GxICR(irq);
-	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_DETECT;
-	tmp = GxICR(irq);
-}
-
-static void mn10300_cpupic_enable(unsigned int irq)
-{
-	u16 tmp = GxICR(irq);
-	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE;
-	tmp = GxICR(irq);
-}
-
 static void mn10300_cpupic_ack(unsigned int irq)
 {
 	u16 tmp;
@@ -60,26 +46,54 @@ static void mn10300_cpupic_mask_ack(unsigned int irq)
 static void mn10300_cpupic_unmask(unsigned int irq)
 {
 	u16 tmp = GxICR(irq);
-	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE | GxICR_DETECT;
+	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE;
 	tmp = GxICR(irq);
 }
 
-static void mn10300_cpupic_end(unsigned int irq)
+static void mn10300_cpupic_unmask_clear(unsigned int irq)
 {
+	/* the MN10300 PIC latches its interrupt request bit, even after the
+	 * device has ceased to assert its interrupt line and the interrupt
+	 * channel has been disabled in the PIC, so for level-triggered
+	 * interrupts we need to clear the request bit when we re-enable */
 	u16 tmp = GxICR(irq);
-	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE;
+	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE | GxICR_DETECT;
 	tmp = GxICR(irq);
 }
 
-static struct irq_chip mn10300_cpu_pic = {
-	.name		= "cpu",
-	.disable	= mn10300_cpupic_disable,
-	.enable		= mn10300_cpupic_enable,
+/*
+ * MN10300 PIC level-triggered IRQ handling.
+ *
+ * The PIC has no 'ACK' function per se.  It is possible to clear individual
+ * channel latches, but each latch relatches whether or not the channel is
+ * masked, so we need to clear the latch when we unmask the channel.
+ *
+ * Also for this reason, we don't supply an ack() op (it's unused anyway if
+ * mask_ack() is provided), and mask_ack() just masks.
+ */
+static struct irq_chip mn10300_cpu_pic_level = {
+	.name		= "cpu_l",
+	.disable	= mn10300_cpupic_mask,
+	.enable		= mn10300_cpupic_unmask_clear,
+	.ack		= NULL,
+	.mask		= mn10300_cpupic_mask,
+	.mask_ack	= mn10300_cpupic_mask,
+	.unmask		= mn10300_cpupic_unmask_clear,
+};
+
+/*
+ * MN10300 PIC edge-triggered IRQ handling.
+ *
+ * We use the latch clearing function of the PIC as the 'ACK' function.
+ */
+static struct irq_chip mn10300_cpu_pic_edge = {
+	.name		= "cpu_e",
+	.disable	= mn10300_cpupic_mask,
+	.enable		= mn10300_cpupic_unmask,
 	.ack		= mn10300_cpupic_ack,
 	.mask		= mn10300_cpupic_mask,
 	.mask_ack	= mn10300_cpupic_mask_ack,
 	.unmask		= mn10300_cpupic_unmask,
-	.end		= mn10300_cpupic_end,
 };
 
 /*
@@ -114,7 +128,8 @@ void set_intr_level(int irq, u16 level)
  */
 void set_intr_postackable(int irq)
 {
-	set_irq_handler(irq, handle_level_irq);
+	set_irq_chip_and_handler(irq, &mn10300_cpu_pic_level,
+				 handle_level_irq);
 }
 
 /*
@@ -126,8 +141,12 @@ void __init init_IRQ(void)
 
 	for (irq = 0; irq < NR_IRQS; irq++)
 		if (irq_desc[irq].chip == &no_irq_type)
-			set_irq_chip_and_handler(irq, &mn10300_cpu_pic,
-						 handle_edge_irq);
+			/* due to the PIC latching interrupt requests, even
+			 * when the IRQ is disabled, IRQ_PENDING is superfluous
+			 * and we can use handle_level_irq() for edge-triggered
+			 * interrupts */
+			set_irq_chip_and_handler(irq, &mn10300_cpu_pic_edge,
+						 handle_level_irq);
 	unit_init_IRQ();
 }
 
diff --git a/arch/mn10300/unit-asb2303/unit-init.c b/arch/mn10300/unit-asb2303/unit-init.c
index 14b2c817cff..70e8cb4ea26 100644
--- a/arch/mn10300/unit-asb2303/unit-init.c
+++ b/arch/mn10300/unit-asb2303/unit-init.c
@@ -51,7 +51,7 @@ void __init unit_init_IRQ(void)
 		switch (GET_XIRQ_TRIGGER(extnum)) {
 		case XIRQ_TRIGGER_HILEVEL:
 		case XIRQ_TRIGGER_LOWLEVEL:
-			set_irq_handler(XIRQ2IRQ(extnum), handle_level_irq);
+			set_intr_postackable(XIRQ2IRQ(extnum));
 			break;
 		default:
 			break;
diff --git a/arch/mn10300/unit-asb2305/unit-init.c b/arch/mn10300/unit-asb2305/unit-init.c
index 6a352414a35..72812a9439a 100644
--- a/arch/mn10300/unit-asb2305/unit-init.c
+++ b/arch/mn10300/unit-asb2305/unit-init.c
@@ -52,7 +52,7 @@ void __init unit_init_IRQ(void)
 		switch (GET_XIRQ_TRIGGER(extnum)) {
 		case XIRQ_TRIGGER_HILEVEL:
 		case XIRQ_TRIGGER_LOWLEVEL:
-			set_irq_handler(XIRQ2IRQ(extnum), handle_level_irq);
+			set_intr_postackable(XIRQ2IRQ(extnum));
 			break;
 		default:
 			break;
-- 
cgit v1.2.3


From 1fce2d01dff65a76cd08c1b145acd9d3c20021d2 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 2 Oct 2008 00:58:49 -0500
Subject: powerpc: Fix boot hang regression on MPC8544DS

Commit 00c5372d37a78990c1530184a9c792ee60a30067 caused the MPC8544DS
board to hang at boot.  The MPC8544DS is unique in that it doesn't use
the PCI slots on the ULI (unlike the MPC8572DS or MPC8610HPCD).  So
the dummy read at the end of the address space causes us to hang.

We can detect the situation by comparing the bridge's BARs versus
the root complex.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/fsl_uli1575.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index ef74a0763ec..8c619963bec 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -219,11 +219,21 @@ static void __devinit quirk_final_uli5249(struct pci_dev *dev)
 	int i;
 	u8 *dummy;
 	struct pci_bus *bus = dev->bus;
+	resource_size_t end = 0;
+
+	for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCES+3; i++) {
+		unsigned long flags = pci_resource_flags(dev, i);
+		if ((flags & (IORESOURCE_MEM|IORESOURCE_PREFETCH)) == IORESOURCE_MEM)
+			end = pci_resource_end(dev, i);
+	}
 
 	for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
 		if ((bus->resource[i]) &&
 			(bus->resource[i]->flags & IORESOURCE_MEM)) {
-			dummy = ioremap(bus->resource[i]->end - 3, 0x4);
+			if (bus->resource[i]->end == end)
+				dummy = ioremap(bus->resource[i]->start, 0x4);
+			else
+				dummy = ioremap(bus->resource[i]->end - 3, 0x4);
 			if (dummy) {
 				in_8(dummy);
 				iounmap(dummy);
-- 
cgit v1.2.3


From b7e4226e4f427b59dc8e9c45a2a1a1ed1353a140 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 1 Oct 2008 21:52:41 +0100
Subject: [MIPS] Build fix: Fix irq flags type

Though from a hardware perspective it would be sensible to use only a
32-bit unsigned int type Linux defines interrupt flags to be stored in
an unsigned long and nothing else.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/ptrace.c | 2 +-
 arch/mips/kernel/smtc.c   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 35234b92b9a..96ffc9c6d19 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -238,7 +238,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		case FPC_EIR: {	/* implementation / version register */
 			unsigned int flags;
 #ifdef CONFIG_MIPS_MT_SMTC
-			unsigned int irqflags;
+			unsigned long irqflags;
 			unsigned int mtflags;
 #endif /* CONFIG_MIPS_MT_SMTC */
 
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index a516286532a..05f2708a902 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -556,7 +556,7 @@ void mipsmt_prepare_cpus(void)
 void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
 {
 	extern u32 kernelsp[NR_CPUS];
-	long flags;
+	unsigned long flags;
 	int mtflags;
 
 	LOCK_MT_PRA();
@@ -753,7 +753,7 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
 {
 	int tcstatus;
 	struct smtc_ipi *pipi;
-	long flags;
+	unsigned long flags;
 	int mtflags;
 
 	if (cpu == smp_processor_id()) {
@@ -975,7 +975,7 @@ static irqreturn_t ipi_interrupt(int irq, void *dev_idm)
 	struct smtc_ipi *pipi;
 	unsigned long tcstatus;
 	int sent;
-	long flags;
+	unsigned long flags;
 	unsigned int mtflags;
 	unsigned int vpflags;
 
-- 
cgit v1.2.3


From 498a863fdfb3e60d2e0e964cd202c2030bda1005 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 1 Oct 2008 22:23:52 +0100
Subject: [MIPS] SMTC: Build fix: Fix filename in Makefile

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mti-malta/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile
index 3b7dd722c32..cef2db8d222 100644
--- a/arch/mips/mti-malta/Makefile
+++ b/arch/mips/mti-malta/Makefile
@@ -15,6 +15,6 @@ obj-$(CONFIG_EARLY_PRINTK)	+= malta-console.o
 obj-$(CONFIG_PCI)		+= malta-pci.o
 
 # FIXME FIXME FIXME
-obj-$(CONFIG_MIPS_MT_SMTC)	+= malta_smtc.o
+obj-$(CONFIG_MIPS_MT_SMTC)	+= malta-smtc.o
 
 EXTRA_CFLAGS += -Werror
-- 
cgit v1.2.3


From 9cc123631b6630948eb5059d218f44424888daa7 Mon Sep 17 00:00:00 2001
From: "Kevin D. Kissell" <kevink@paralogos.com>
Date: Tue, 9 Sep 2008 21:33:36 +0200
Subject: [MIPS] SMTC: Fix holes in SMTC and FPU affinity support.

Signed-off-by: Kevin D. Kissell <kevink@paralogos.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/mips-mt-fpaff.c |  2 +-
 arch/mips/kernel/process.c       | 19 +++++++++----------
 arch/mips/kernel/traps.c         |  6 ++++--
 3 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index df4d3f2f740..dc9eb72ed9d 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -159,7 +159,7 @@ __setup("fpaff=", fpaff_thresh);
 /*
  * FPU Use Factor empirically derived from experiments on 34K
  */
-#define FPUSEFACTOR 333
+#define FPUSEFACTOR 2000
 
 static __init int mt_fp_affinity_init(void)
 {
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index ce7684335a4..22fc19bbe87 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -55,7 +55,7 @@ void __noreturn cpu_idle(void)
 	while (1) {
 		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
-#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
+#ifdef CONFIG_MIPS_MT_SMTC
 			extern void smtc_idle_loop_hook(void);
 
 			smtc_idle_loop_hook();
@@ -145,19 +145,18 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
 	 */
 	p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
 	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
+
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC restores TCStatus after Status, and the CU bits
+	 * are aliased there.
+	 */
+	childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1);
+#endif
 	clear_tsk_thread_flag(p, TIF_USEDFPU);
 
 #ifdef CONFIG_MIPS_MT_FPAFF
 	clear_tsk_thread_flag(p, TIF_FPUBOUND);
-
-	/*
-	 * FPU affinity support is cleaner if we track the
-	 * user-visible CPU affinity from the very beginning.
-	 * The generic cpus_allowed mask will already have
-	 * been copied from the parent before copy_thread
-	 * is invoked.
-	 */
-	p->thread.user_cpus_allowed = p->cpus_allowed;
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
 	if (clone_flags & CLONE_SETTLS)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5fd0cd020af..b602ac6eb47 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -825,8 +825,10 @@ static void mt_ase_fp_affinity(void)
 		if (cpus_intersects(current->cpus_allowed, mt_fpu_cpumask)) {
 			cpumask_t tmask;
 
-			cpus_and(tmask, current->thread.user_cpus_allowed,
-			         mt_fpu_cpumask);
+			current->thread.user_cpus_allowed
+				= current->cpus_allowed;
+			cpus_and(tmask, current->cpus_allowed,
+				mt_fpu_cpumask);
 			set_cpus_allowed(current, tmask);
 			set_thread_flag(TIF_FPUBOUND);
 		}
-- 
cgit v1.2.3


From d2bb01b042a38219fbddaafc214c5beb96248d2f Mon Sep 17 00:00:00 2001
From: "Kevin D. Kissell" <kevink@paralogos.com>
Date: Tue, 9 Sep 2008 21:35:01 +0200
Subject: [MIPS] SMTC: Close tiny holes in the SMTC IPI replay system.

Signed-off-by: Kevin D. Kissell <kevink@paralogos.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/entry.S | 10 +++++-----
 arch/mips/kernel/smtc.c  |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index e29598ae939..ffa331029e0 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -79,11 +79,6 @@ FEXPORT(syscall_exit)
 
 FEXPORT(restore_all)			# restore full frame
 #ifdef CONFIG_MIPS_MT_SMTC
-/* Detect and execute deferred IPI "interrupts" */
-	LONG_L	s0, TI_REGS($28)
-	LONG_S	sp, TI_REGS($28)
-	jal	deferred_smtc_ipi
-	LONG_S	s0, TI_REGS($28)
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 /* Re-arm any temporarily masked interrupts not explicitly "acked" */
 	mfc0	v0, CP0_TCSTATUS
@@ -112,6 +107,11 @@ FEXPORT(restore_all)			# restore full frame
 	xor	t0, t0, t3
 	mtc0	t0, CP0_TCCONTEXT
 #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
+/* Detect and execute deferred IPI "interrupts" */
+	LONG_L	s0, TI_REGS($28)
+	LONG_S	sp, TI_REGS($28)
+	jal	deferred_smtc_ipi
+	LONG_S	s0, TI_REGS($28)
 #endif /* CONFIG_MIPS_MT_SMTC */
 	.set	noat
 	RESTORE_TEMP
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 05f2708a902..39b491b9ad8 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -70,7 +70,7 @@ static atomic_t ipi_timer_latch[NR_CPUS];
 
 #define IPIBUF_PER_CPU 4
 
-static struct smtc_ipi_q IPIQ[NR_CPUS];
+struct smtc_ipi_q IPIQ[NR_CPUS];
 static struct smtc_ipi_q freeIPIq;
 
 
-- 
cgit v1.2.3


From 8531a35e5e275b17c57c39b7911bc2b37025f28c Mon Sep 17 00:00:00 2001
From: "Kevin D. Kissell" <kevink@paralogos.com>
Date: Tue, 9 Sep 2008 21:48:52 +0200
Subject: [MIPS] SMTC: Fix SMTC dyntick support.

Rework of SMTC support to make it work with the new clock event system,
allowing "tickless" operation, and to make it compatible with the use of
the "wait_irqoff" idle loop.  The new clocking scheme means that the
previously optional IPI instant replay mechanism is now required, and has
been made more robust.

Signed-off-by: Kevin D. Kissell <kevink@paralogos.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                |  26 +---
 arch/mips/kernel/Makefile        |   1 +
 arch/mips/kernel/cevt-r4k.c      | 173 ++++++---------------
 arch/mips/kernel/cevt-smtc.c     | 321 +++++++++++++++++++++++++++++++++++++++
 arch/mips/kernel/cpu-probe.c     |  10 +-
 arch/mips/kernel/genex.S         |   4 +-
 arch/mips/kernel/smtc.c          | 252 +++++++++++++++---------------
 arch/mips/mti-malta/malta-smtc.c |   9 +-
 8 files changed, 523 insertions(+), 273 deletions(-)
 create mode 100644 arch/mips/kernel/cevt-smtc.c

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 49896a2a1d7..c930b8ceb41 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1403,7 +1403,6 @@ config MIPS_MT_SMTC
 	depends on CPU_MIPS32_R2
 	#depends on CPU_MIPS64_R2		# once there is hardware ...
 	depends on SYS_SUPPORTS_MULTITHREADING
-	select GENERIC_CLOCKEVENTS_BROADCAST
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
 	select MIPS_MT
@@ -1451,32 +1450,17 @@ config MIPS_VPE_LOADER
 	  Includes a loader for loading an elf relocatable object
 	  onto another VPE and running it.
 
-config MIPS_MT_SMTC_INSTANT_REPLAY
-	bool "Low-latency Dispatch of Deferred SMTC IPIs"
-	depends on MIPS_MT_SMTC && !PREEMPT
-	default y
-	help
-	  SMTC pseudo-interrupts between TCs are deferred and queued
-	  if the target TC is interrupt-inhibited (IXMT). In the first
-	  SMTC prototypes, these queued IPIs were serviced on return
-	  to user mode, or on entry into the kernel idle loop. The
-	  INSTANT_REPLAY option dispatches them as part of local_irq_restore()
-	  processing, which adds runtime overhead (hence the option to turn
-	  it off), but ensures that IPIs are handled promptly even under
-	  heavy I/O interrupt load.
-
 config MIPS_MT_SMTC_IM_BACKSTOP
 	bool "Use per-TC register bits as backstop for inhibited IM bits"
 	depends on MIPS_MT_SMTC
-	default y
+	default n
 	help
 	  To support multiple TC microthreads acting as "CPUs" within
 	  a VPE, VPE-wide interrupt mask bits must be specially manipulated
 	  during interrupt handling. To support legacy drivers and interrupt
 	  controller management code, SMTC has a "backstop" to track and
 	  if necessary restore the interrupt mask. This has some performance
-	  impact on interrupt service overhead. Disable it only if you know
-	  what you are doing.
+	  impact on interrupt service overhead.
 
 config MIPS_MT_SMTC_IRQAFF
 	bool "Support IRQ affinity API"
@@ -1486,10 +1470,8 @@ config MIPS_MT_SMTC_IRQAFF
 	  Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.)
 	  for SMTC Linux kernel. Requires platform support, of which
 	  an example can be found in the MIPS kernel i8259 and Malta
-	  platform code.  It is recommended that MIPS_MT_SMTC_INSTANT_REPLAY
-	  be enabled if MIPS_MT_SMTC_IRQAFF is used. Adds overhead to
-	  interrupt dispatch, and should be used only if you know what
-	  you are doing.
+	  platform code.  Adds some overhead to interrupt dispatch, and
+	  should be used only if you know what you are doing.
 
 config MIPS_VPE_LOADER_TOM
 	bool "Load VPE program into memory hidden from linux"
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 706f9397479..25775cb5400 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y		+= cpu-probe.o branch.o entry.o genex.o irq.o process.o \
 
 obj-$(CONFIG_CEVT_BCM1480)	+= cevt-bcm1480.o
 obj-$(CONFIG_CEVT_R4K)		+= cevt-r4k.o
+obj-$(CONFIG_MIPS_MT_SMTC)	+= cevt-smtc.o
 obj-$(CONFIG_CEVT_DS1287)	+= cevt-ds1287.o
 obj-$(CONFIG_CEVT_GT641XX)	+= cevt-gt641xx.o
 obj-$(CONFIG_CEVT_SB1250)	+= cevt-sb1250.o
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 24a2d907aa0..4a4c59f2737 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -12,6 +12,14 @@
 
 #include <asm/smtc_ipi.h>
 #include <asm/time.h>
+#include <asm/cevt-r4k.h>
+
+/*
+ * The SMTC Kernel for the 34K, 1004K, et. al. replaces several
+ * of these routines with SMTC-specific variants.
+ */
+
+#ifndef CONFIG_MIPS_MT_SMTC
 
 static int mips_next_event(unsigned long delta,
                            struct clock_event_device *evt)
@@ -19,60 +27,27 @@ static int mips_next_event(unsigned long delta,
 	unsigned int cnt;
 	int res;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	{
-	unsigned long flags, vpflags;
-	local_irq_save(flags);
-	vpflags = dvpe();
-#endif
 	cnt = read_c0_count();
 	cnt += delta;
 	write_c0_compare(cnt);
 	res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0;
-#ifdef CONFIG_MIPS_MT_SMTC
-	evpe(vpflags);
-	local_irq_restore(flags);
-	}
-#endif
 	return res;
 }
 
-static void mips_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
+#endif /* CONFIG_MIPS_MT_SMTC */
+
+void mips_set_clock_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
 {
 	/* Nothing to do ...  */
 }
 
-static DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
-static int cp0_timer_irq_installed;
+DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
+int cp0_timer_irq_installed;
 
-/*
- * Timer ack for an R4k-compatible timer of a known frequency.
- */
-static void c0_timer_ack(void)
-{
-	write_c0_compare(read_c0_compare());
-}
+#ifndef CONFIG_MIPS_MT_SMTC
 
-/*
- * Possibly handle a performance counter interrupt.
- * Return true if the timer interrupt should not be checked
- */
-static inline int handle_perf_irq(int r2)
-{
-	/*
-	 * The performance counter overflow interrupt may be shared with the
-	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
-	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
-	 * and we can't reliably determine if a counter interrupt has also
-	 * happened (!r2) then don't check for a timer interrupt.
-	 */
-	return (cp0_perfcount_irq < 0) &&
-		perf_irq() == IRQ_HANDLED &&
-		!r2;
-}
-
-static irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
+irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 {
 	const int r2 = cpu_has_mips_r2;
 	struct clock_event_device *cd;
@@ -93,12 +68,8 @@ static irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 	 * interrupt.  Being the paranoiacs we are we check anyway.
 	 */
 	if (!r2 || (read_c0_cause() & (1 << 30))) {
-		c0_timer_ack();
-#ifdef CONFIG_MIPS_MT_SMTC
-		if (cpu_data[cpu].vpe_id)
-			goto out;
-		cpu = 0;
-#endif
+		/* Clear Count/Compare Interrupt */
+		write_c0_compare(read_c0_compare());
 		cd = &per_cpu(mips_clockevent_device, cpu);
 		cd->event_handler(cd);
 	}
@@ -107,65 +78,16 @@ out:
 	return IRQ_HANDLED;
 }
 
-static struct irqaction c0_compare_irqaction = {
+#endif /* Not CONFIG_MIPS_MT_SMTC */
+
+struct irqaction c0_compare_irqaction = {
 	.handler = c0_compare_interrupt,
-#ifdef CONFIG_MIPS_MT_SMTC
-	.flags = IRQF_DISABLED,
-#else
 	.flags = IRQF_DISABLED | IRQF_PERCPU,
-#endif
 	.name = "timer",
 };
 
-#ifdef CONFIG_MIPS_MT_SMTC
-DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
-
-static void smtc_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
-{
-}
-
-static void mips_broadcast(cpumask_t mask)
-{
-	unsigned int cpu;
-
-	for_each_cpu_mask(cpu, mask)
-		smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
-}
-
-static void setup_smtc_dummy_clockevent_device(void)
-{
-	//uint64_t mips_freq = mips_hpt_^frequency;
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
 
-	cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-
-	cd->name		= "SMTC";
-	cd->features		= CLOCK_EVT_FEAT_DUMMY;
-
-	/* Calculate the min / max delta */
-	cd->mult	= 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
-	cd->shift		= 0; //32;
-	cd->max_delta_ns	= 0; //clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= 0; //clockevent_delta2ns(0x30, cd);
-
-	cd->rating		= 200;
-	cd->irq			= 17; //-1;
-//	if (cpu)
-//		cd->cpumask	= CPU_MASK_ALL; // cpumask_of_cpu(cpu);
-//	else
-		cd->cpumask	= cpumask_of_cpu(cpu);
-
-	cd->set_mode		= smtc_set_mode;
-
-	cd->broadcast		= mips_broadcast;
-
-	clockevents_register_device(cd);
-}
-#endif
-
-static void mips_event_handler(struct clock_event_device *dev)
+void mips_event_handler(struct clock_event_device *dev)
 {
 }
 
@@ -177,7 +99,23 @@ static int c0_compare_int_pending(void)
 	return (read_c0_cause() >> cp0_compare_irq) & 0x100;
 }
 
-static int c0_compare_int_usable(void)
+/*
+ * Compare interrupt can be routed and latched outside the core,
+ * so a single execution hazard barrier may not be enough to give
+ * it time to clear as seen in the Cause register.  4 time the
+ * pipeline depth seems reasonably conservative, and empirically
+ * works better in configurations with high CPU/bus clock ratios.
+ */
+
+#define compare_change_hazard() \
+	do { \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+	} while (0)
+
+int c0_compare_int_usable(void)
 {
 	unsigned int delta;
 	unsigned int cnt;
@@ -187,7 +125,7 @@ static int c0_compare_int_usable(void)
 	 */
 	if (c0_compare_int_pending()) {
 		write_c0_compare(read_c0_count());
-		irq_disable_hazard();
+		compare_change_hazard();
 		if (c0_compare_int_pending())
 			return 0;
 	}
@@ -196,7 +134,7 @@ static int c0_compare_int_usable(void)
 		cnt = read_c0_count();
 		cnt += delta;
 		write_c0_compare(cnt);
-		irq_disable_hazard();
+		compare_change_hazard();
 		if ((int)(read_c0_count() - cnt) < 0)
 		    break;
 		/* increase delta if the timer was already expired */
@@ -205,11 +143,12 @@ static int c0_compare_int_usable(void)
 	while ((int)(read_c0_count() - cnt) <= 0)
 		;	/* Wait for expiry  */
 
+	compare_change_hazard();
 	if (!c0_compare_int_pending())
 		return 0;
 
 	write_c0_compare(read_c0_count());
-	irq_disable_hazard();
+	compare_change_hazard();
 	if (c0_compare_int_pending())
 		return 0;
 
@@ -219,6 +158,8 @@ static int c0_compare_int_usable(void)
 	return 1;
 }
 
+#ifndef CONFIG_MIPS_MT_SMTC
+
 int __cpuinit mips_clockevent_init(void)
 {
 	uint64_t mips_freq = mips_hpt_frequency;
@@ -229,17 +170,6 @@ int __cpuinit mips_clockevent_init(void)
 	if (!cpu_has_counter || !mips_hpt_frequency)
 		return -ENXIO;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	setup_smtc_dummy_clockevent_device();
-
-	/*
-	 * On SMTC we only register VPE0's compare interrupt as clockevent
-	 * device.
-	 */
-	if (cpu)
-		return 0;
-#endif
-
 	if (!c0_compare_int_usable())
 		return -ENXIO;
 
@@ -265,13 +195,9 @@ int __cpuinit mips_clockevent_init(void)
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-#ifdef CONFIG_MIPS_MT_SMTC
-	cd->cpumask		= CPU_MASK_ALL;
-#else
 	cd->cpumask		= cpumask_of_cpu(cpu);
-#endif
 	cd->set_next_event	= mips_next_event;
-	cd->set_mode		= mips_set_mode;
+	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
 
 	clockevents_register_device(cd);
@@ -281,12 +207,9 @@ int __cpuinit mips_clockevent_init(void)
 
 	cp0_timer_irq_installed = 1;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#define CPUCTR_IMASKBIT (0x100 << cp0_compare_irq)
-	setup_irq_smtc(irq, &c0_compare_irqaction, CPUCTR_IMASKBIT);
-#else
 	setup_irq(irq, &c0_compare_irqaction);
-#endif
 
 	return 0;
 }
+
+#endif /* Not CONFIG_MIPS_MT_SMTC */
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
new file mode 100644
index 00000000000..5162fe4b595
--- /dev/null
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -0,0 +1,321 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 MIPS Technologies, Inc.
+ * Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org>
+ * Copyright (C) 2008 Kevin D. Kissell, Paralogos sarl
+ */
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+
+#include <asm/smtc_ipi.h>
+#include <asm/time.h>
+#include <asm/cevt-r4k.h>
+
+/*
+ * Variant clock event timer support for SMTC on MIPS 34K, 1004K
+ * or other MIPS MT cores.
+ *
+ * Notes on SMTC Support:
+ *
+ * SMTC has multiple microthread TCs pretending to be Linux CPUs.
+ * But there's only one Count/Compare pair per VPE, and Compare
+ * interrupts are taken opportunisitically by available TCs
+ * bound to the VPE with the Count register.  The new timer
+ * framework provides for global broadcasts, but we really
+ * want VPE-level multicasts for best behavior. So instead
+ * of invoking the high-level clock-event broadcast code,
+ * this version of SMTC support uses the historical SMTC
+ * multicast mechanisms "under the hood", appearing to the
+ * generic clock layer as if the interrupts are per-CPU.
+ *
+ * The approach taken here is to maintain a set of NR_CPUS
+ * virtual timers, and track which "CPU" needs to be alerted
+ * at each event.
+ *
+ * It's unlikely that we'll see a MIPS MT core with more than
+ * 2 VPEs, but we *know* that we won't need to handle more
+ * VPEs than we have "CPUs".  So NCPUs arrays of NCPUs elements
+ * is always going to be overkill, but always going to be enough.
+ */
+
+unsigned long smtc_nexttime[NR_CPUS][NR_CPUS];
+static int smtc_nextinvpe[NR_CPUS];
+
+/*
+ * Timestamps stored are absolute values to be programmed
+ * into Count register.  Valid timestamps will never be zero.
+ * If a Zero Count value is actually calculated, it is converted
+ * to be a 1, which will introduce 1 or two CPU cycles of error
+ * roughly once every four billion events, which at 1000 HZ means
+ * about once every 50 days.  If that's actually a problem, one
+ * could alternate squashing 0 to 1 and to -1.
+ */
+
+#define MAKEVALID(x) (((x) == 0L) ? 1L : (x))
+#define ISVALID(x) ((x) != 0L)
+
+/*
+ * Time comparison is subtle, as it's really truncated
+ * modular arithmetic.
+ */
+
+#define IS_SOONER(a, b, reference) \
+    (((a) - (unsigned long)(reference)) < ((b) - (unsigned long)(reference)))
+
+/*
+ * CATCHUP_INCREMENT, used when the function falls behind the counter.
+ * Could be an increasing function instead of a constant;
+ */
+
+#define CATCHUP_INCREMENT 64
+
+static int mips_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	unsigned long flags;
+	unsigned int mtflags;
+	unsigned long timestamp, reference, previous;
+	unsigned long nextcomp = 0L;
+	int vpe = current_cpu_data.vpe_id;
+	int cpu = smp_processor_id();
+	local_irq_save(flags);
+	mtflags = dmt();
+
+	/*
+	 * Maintain the per-TC virtual timer
+	 * and program the per-VPE shared Count register
+	 * as appropriate here...
+	 */
+	reference = (unsigned long)read_c0_count();
+	timestamp = MAKEVALID(reference + delta);
+	/*
+	 * To really model the clock, we have to catch the case
+	 * where the current next-in-VPE timestamp is the old
+	 * timestamp for the calling CPE, but the new value is
+	 * in fact later.  In that case, we have to do a full
+	 * scan and discover the new next-in-VPE CPU id and
+	 * timestamp.
+	 */
+	previous = smtc_nexttime[vpe][cpu];
+	if (cpu == smtc_nextinvpe[vpe] && ISVALID(previous)
+	    && IS_SOONER(previous, timestamp, reference)) {
+		int i;
+		int soonest = cpu;
+
+		/*
+		 * Update timestamp array here, so that new
+		 * value gets considered along with those of
+		 * other virtual CPUs on the VPE.
+		 */
+		smtc_nexttime[vpe][cpu] = timestamp;
+		for_each_online_cpu(i) {
+			if (ISVALID(smtc_nexttime[vpe][i])
+			    && IS_SOONER(smtc_nexttime[vpe][i],
+				smtc_nexttime[vpe][soonest], reference)) {
+				    soonest = i;
+			}
+		}
+		smtc_nextinvpe[vpe] = soonest;
+		nextcomp = smtc_nexttime[vpe][soonest];
+	/*
+	 * Otherwise, we don't have to process the whole array rank,
+	 * we just have to see if the event horizon has gotten closer.
+	 */
+	} else {
+		if (!ISVALID(smtc_nexttime[vpe][smtc_nextinvpe[vpe]]) ||
+		    IS_SOONER(timestamp,
+			smtc_nexttime[vpe][smtc_nextinvpe[vpe]], reference)) {
+			    smtc_nextinvpe[vpe] = cpu;
+			    nextcomp = timestamp;
+		}
+		/*
+		 * Since next-in-VPE may me the same as the executing
+		 * virtual CPU, we update the array *after* checking
+		 * its value.
+		 */
+		smtc_nexttime[vpe][cpu] = timestamp;
+	}
+
+	/*
+	 * It may be that, in fact, we don't need to update Compare,
+	 * but if we do, we want to make sure we didn't fall into
+	 * a crack just behind Count.
+	 */
+	if (ISVALID(nextcomp)) {
+		write_c0_compare(nextcomp);
+		ehb();
+		/*
+		 * We never return an error, we just make sure
+		 * that we trigger the handlers as quickly as
+		 * we can if we fell behind.
+		 */
+		while ((nextcomp - (unsigned long)read_c0_count())
+			> (unsigned long)LONG_MAX) {
+			nextcomp += CATCHUP_INCREMENT;
+			write_c0_compare(nextcomp);
+			ehb();
+		}
+	}
+	emt(mtflags);
+	local_irq_restore(flags);
+	return 0;
+}
+
+
+void smtc_distribute_timer(int vpe)
+{
+	unsigned long flags;
+	unsigned int mtflags;
+	int cpu;
+	struct clock_event_device *cd;
+	unsigned long nextstamp = 0L;
+	unsigned long reference;
+
+
+repeat:
+	for_each_online_cpu(cpu) {
+	    /*
+	     * Find virtual CPUs within the current VPE who have
+	     * unserviced timer requests whose time is now past.
+	     */
+	    local_irq_save(flags);
+	    mtflags = dmt();
+	    if (cpu_data[cpu].vpe_id == vpe &&
+		ISVALID(smtc_nexttime[vpe][cpu])) {
+		reference = (unsigned long)read_c0_count();
+		if ((smtc_nexttime[vpe][cpu] - reference)
+			 > (unsigned long)LONG_MAX) {
+			    smtc_nexttime[vpe][cpu] = 0L;
+			    emt(mtflags);
+			    local_irq_restore(flags);
+			    /*
+			     * We don't send IPIs to ourself.
+			     */
+			    if (cpu != smp_processor_id()) {
+				smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
+			    } else {
+				cd = &per_cpu(mips_clockevent_device, cpu);
+				cd->event_handler(cd);
+			    }
+		} else {
+			/* Local to VPE but Valid Time not yet reached. */
+			if (!ISVALID(nextstamp) ||
+			    IS_SOONER(smtc_nexttime[vpe][cpu], nextstamp,
+			    reference)) {
+				smtc_nextinvpe[vpe] = cpu;
+				nextstamp = smtc_nexttime[vpe][cpu];
+			}
+			emt(mtflags);
+			local_irq_restore(flags);
+		}
+	    } else {
+		emt(mtflags);
+		local_irq_restore(flags);
+
+	    }
+	}
+	/* Reprogram for interrupt at next soonest timestamp for VPE */
+	if (ISVALID(nextstamp)) {
+		write_c0_compare(nextstamp);
+		ehb();
+		if ((nextstamp - (unsigned long)read_c0_count())
+			> (unsigned long)LONG_MAX)
+				goto repeat;
+	}
+}
+
+
+irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	/* If we're running SMTC, we've got MIPS MT and therefore MIPS32R2 */
+	handle_perf_irq(1);
+
+	if (read_c0_cause() & (1 << 30)) {
+		/* Clear Count/Compare Interrupt */
+		write_c0_compare(read_c0_compare());
+		smtc_distribute_timer(cpu_data[cpu].vpe_id);
+	}
+	return IRQ_HANDLED;
+}
+
+
+int __cpuinit mips_clockevent_init(void)
+{
+	uint64_t mips_freq = mips_hpt_frequency;
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *cd;
+	unsigned int irq;
+	int i;
+	int j;
+
+	if (!cpu_has_counter || !mips_hpt_frequency)
+		return -ENXIO;
+	if (cpu == 0) {
+		for (i = 0; i < num_possible_cpus(); i++) {
+			smtc_nextinvpe[i] = 0;
+			for (j = 0; j < num_possible_cpus(); j++)
+				smtc_nexttime[i][j] = 0L;
+		}
+		/*
+		 * SMTC also can't have the usablility test
+		 * run by secondary TCs once Compare is in use.
+		 */
+		if (!c0_compare_int_usable())
+			return -ENXIO;
+	}
+
+	/*
+	 * With vectored interrupts things are getting platform specific.
+	 * get_c0_compare_int is a hook to allow a platform to return the
+	 * interrupt number of it's liking.
+	 */
+	irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
+	if (get_c0_compare_int)
+		irq = get_c0_compare_int();
+
+	cd = &per_cpu(mips_clockevent_device, cpu);
+
+	cd->name		= "MIPS";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+
+	/* Calculate the min / max delta */
+	cd->mult	= div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
+	cd->shift		= 32;
+	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
+	cd->min_delta_ns	= clockevent_delta2ns(0x300, cd);
+
+	cd->rating		= 300;
+	cd->irq			= irq;
+	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->set_next_event	= mips_next_event;
+	cd->set_mode		= mips_set_clock_mode;
+	cd->event_handler	= mips_event_handler;
+
+	clockevents_register_device(cd);
+
+	/*
+	 * On SMTC we only want to do the data structure
+	 * initialization and IRQ setup once.
+	 */
+	if (cpu)
+		return 0;
+	/*
+	 * And we need the hwmask associated with the c0_compare
+	 * vector to be initialized.
+	 */
+	irq_hwmask[irq] = (0x100 << cp0_compare_irq);
+	if (cp0_timer_irq_installed)
+		return 0;
+
+	cp0_timer_irq_installed = 1;
+
+	setup_irq(irq, &c0_compare_irqaction);
+
+	return 0;
+}
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 11c92dc5379..e621fda8ab3 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -54,14 +54,18 @@ extern void r4k_wait(void);
  * interrupt is requested" restriction in the MIPS32/MIPS64 architecture makes
  * using this version a gamble.
  */
-static void r4k_wait_irqoff(void)
+void r4k_wait_irqoff(void)
 {
 	local_irq_disable();
 	if (!need_resched())
-		__asm__("	.set	mips3		\n"
+		__asm__("	.set	push		\n"
+			"	.set	mips3		\n"
 			"	wait			\n"
-			"	.set	mips0		\n");
+			"	.set	pop		\n");
 	local_irq_enable();
+	__asm__(" 	.globl __pastwait	\n"
+		"__pastwait:			\n");
+	return;
 }
 
 /*
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index f886dd7f708..01dcbe38fa0 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -282,8 +282,8 @@ NESTED(except_vec_vi_handler, 0, sp)
 	and	t0, a0, t1
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 	mfc0	t2, CP0_TCCONTEXT
-	or	t0, t0, t2
-	mtc0	t0, CP0_TCCONTEXT
+	or	t2, t0, t2
+	mtc0	t2, CP0_TCCONTEXT
 #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
 	xor	t1, t1, t0
 	mtc0	t1, CP0_STATUS
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 39b491b9ad8..897fb2b4751 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -1,4 +1,21 @@
-/* Copyright (C) 2004 Mips Technologies, Inc */
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * Copyright (C) 2004 Mips Technologies, Inc
+ * Copyright (C) 2008 Kevin D. Kissell
+ */
 
 #include <linux/clockchips.h>
 #include <linux/kernel.h>
@@ -21,7 +38,6 @@
 #include <asm/time.h>
 #include <asm/addrspace.h>
 #include <asm/smtc.h>
-#include <asm/smtc_ipi.h>
 #include <asm/smtc_proc.h>
 
 /*
@@ -58,11 +74,6 @@ unsigned long irq_hwmask[NR_IRQS];
 
 asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS];
 
-/*
- * Clock interrupt "latch" buffers, per "CPU"
- */
-
-static atomic_t ipi_timer_latch[NR_CPUS];
 
 /*
  * Number of InterProcessor Interrupt (IPI) message buffers to allocate
@@ -282,7 +293,7 @@ static void smtc_configure_tlb(void)
  * phys_cpu_present_map and the logical/physical mappings.
  */
 
-int __init mipsmt_build_cpu_map(int start_cpu_slot)
+int __init smtc_build_cpu_map(int start_cpu_slot)
 {
 	int i, ntcs;
 
@@ -325,7 +336,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 	write_tc_c0_tcstatus((read_tc_c0_tcstatus()
 			& ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT))
 			| TCSTATUS_A);
-	write_tc_c0_tccontext(0);
+	/*
+	 * TCContext gets an offset from the base of the IPIQ array
+	 * to be used in low-level code to detect the presence of
+	 * an active IPI queue
+	 */
+	write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);
 	/* Bind tc to vpe */
 	write_tc_c0_tcbind(vpe);
 	/* In general, all TCs should have the same cpu_data indications */
@@ -336,10 +352,18 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 		cpu_data[cpu].options &= ~MIPS_CPU_FPU;
 	cpu_data[cpu].vpe_id = vpe;
 	cpu_data[cpu].tc_id = tc;
+	/* Multi-core SMTC hasn't been tested, but be prepared */
+	cpu_data[cpu].core = (read_vpe_c0_ebase() >> 1) & 0xff;
 }
 
+/*
+ * Tweak to get Count registes in as close a sync as possible.
+ * Value seems good for 34K-class cores.
+ */
+
+#define CP0_SKEW 8
 
-void mipsmt_prepare_cpus(void)
+void smtc_prepare_cpus(int cpus)
 {
 	int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu;
 	unsigned long flags;
@@ -363,13 +387,13 @@ void mipsmt_prepare_cpus(void)
 		IPIQ[i].head = IPIQ[i].tail = NULL;
 		spin_lock_init(&IPIQ[i].lock);
 		IPIQ[i].depth = 0;
-		atomic_set(&ipi_timer_latch[i], 0);
 	}
 
 	/* cpu_data index starts at zero */
 	cpu = 0;
 	cpu_data[cpu].vpe_id = 0;
 	cpu_data[cpu].tc_id = 0;
+	cpu_data[cpu].core = (read_c0_ebase() >> 1) & 0xff;
 	cpu++;
 
 	/* Report on boot-time options */
@@ -484,7 +508,8 @@ void mipsmt_prepare_cpus(void)
 			write_vpe_c0_compare(0);
 			/* Propagate Config7 */
 			write_vpe_c0_config7(read_c0_config7());
-			write_vpe_c0_count(read_c0_count());
+			write_vpe_c0_count(read_c0_count() + CP0_SKEW);
+			ehb();
 		}
 		/* enable multi-threading within VPE */
 		write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE);
@@ -585,24 +610,22 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
 
 void smtc_init_secondary(void)
 {
-	/*
-	 * Start timer on secondary VPEs if necessary.
-	 * plat_timer_setup has already have been invoked by init/main
-	 * on "boot" TC.  Like per_cpu_trap_init() hack, this assumes that
-	 * SMTC init code assigns TCs consdecutively and in ascending order
-	 * to across available VPEs.
-	 */
-	if (((read_c0_tcbind() & TCBIND_CURTC) != 0) &&
-	    ((read_c0_tcbind() & TCBIND_CURVPE)
-	    != cpu_data[smp_processor_id() - 1].vpe_id)){
-		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
-	}
-
 	local_irq_enable();
 }
 
 void smtc_smp_finish(void)
 {
+	int cpu = smp_processor_id();
+
+	/*
+	 * Lowest-numbered CPU per VPE starts a clock tick.
+	 * Like per_cpu_trap_init() hack, this assumes that
+	 * SMTC init code assigns TCs consdecutively and
+	 * in ascending order across available VPEs.
+	 */
+	if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))
+		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
+
 	printk("TC %d going on-line as CPU %d\n",
 		cpu_data[smp_processor_id()].tc_id, smp_processor_id());
 }
@@ -755,6 +778,8 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
 	struct smtc_ipi *pipi;
 	unsigned long flags;
 	int mtflags;
+	unsigned long tcrestart;
+	extern void r4k_wait_irqoff(void), __pastwait(void);
 
 	if (cpu == smp_processor_id()) {
 		printk("Cannot Send IPI to self!\n");
@@ -771,8 +796,6 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
 	pipi->arg = (void *)action;
 	pipi->dest = cpu;
 	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
-		if (type == SMTC_CLOCK_TICK)
-			atomic_inc(&ipi_timer_latch[cpu]);
 		/* If not on same VPE, enqueue and send cross-VPE interrupt */
 		smtc_ipi_nq(&IPIQ[cpu], pipi);
 		LOCK_CORE_PRA();
@@ -800,22 +823,29 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
 
 		if ((tcstatus & TCSTATUS_IXMT) != 0) {
 			/*
-			 * Spin-waiting here can deadlock,
-			 * so we queue the message for the target TC.
+			 * If we're in the the irq-off version of the wait
+			 * loop, we need to force exit from the wait and
+			 * do a direct post of the IPI.
+			 */
+			if (cpu_wait == r4k_wait_irqoff) {
+				tcrestart = read_tc_c0_tcrestart();
+				if (tcrestart >= (unsigned long)r4k_wait_irqoff
+				    && tcrestart < (unsigned long)__pastwait) {
+					write_tc_c0_tcrestart(__pastwait);
+					tcstatus &= ~TCSTATUS_IXMT;
+					write_tc_c0_tcstatus(tcstatus);
+					goto postdirect;
+				}
+			}
+			/*
+			 * Otherwise we queue the message for the target TC
+			 * to pick up when he does a local_irq_restore()
 			 */
 			write_tc_c0_tchalt(0);
 			UNLOCK_CORE_PRA();
-			/* Try to reduce redundant timer interrupt messages */
-			if (type == SMTC_CLOCK_TICK) {
-			    if (atomic_postincrement(&ipi_timer_latch[cpu])!=0){
-				smtc_ipi_nq(&freeIPIq, pipi);
-				return;
-			    }
-			}
 			smtc_ipi_nq(&IPIQ[cpu], pipi);
 		} else {
-			if (type == SMTC_CLOCK_TICK)
-				atomic_inc(&ipi_timer_latch[cpu]);
+postdirect:
 			post_direct_ipi(cpu, pipi);
 			write_tc_c0_tchalt(0);
 			UNLOCK_CORE_PRA();
@@ -883,7 +913,7 @@ static void ipi_call_interrupt(void)
 	smp_call_function_interrupt();
 }
 
-DECLARE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
+DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 
 void ipi_decode(struct smtc_ipi *pipi)
 {
@@ -891,20 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi)
 	struct clock_event_device *cd;
 	void *arg_copy = pipi->arg;
 	int type_copy = pipi->type;
-	int ticks;
-
 	smtc_ipi_nq(&freeIPIq, pipi);
 	switch (type_copy) {
 	case SMTC_CLOCK_TICK:
 		irq_enter();
 		kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++;
-		cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-		ticks = atomic_read(&ipi_timer_latch[cpu]);
-		atomic_sub(ticks, &ipi_timer_latch[cpu]);
-		while (ticks) {
-			cd->event_handler(cd);
-			ticks--;
-		}
+		cd = &per_cpu(mips_clockevent_device, cpu);
+		cd->event_handler(cd);
 		irq_exit();
 		break;
 
@@ -937,24 +960,48 @@ void ipi_decode(struct smtc_ipi *pipi)
 	}
 }
 
+/*
+ * Similar to smtc_ipi_replay(), but invoked from context restore,
+ * so it reuses the current exception frame rather than set up a
+ * new one with self_ipi.
+ */
+
 void deferred_smtc_ipi(void)
 {
-	struct smtc_ipi *pipi;
-	unsigned long flags;
-/* DEBUG */
-	int q = smp_processor_id();
+	int cpu = smp_processor_id();
 
 	/*
 	 * Test is not atomic, but much faster than a dequeue,
 	 * and the vast majority of invocations will have a null queue.
+	 * If irq_disabled when this was called, then any IPIs queued
+	 * after we test last will be taken on the next irq_enable/restore.
+	 * If interrupts were enabled, then any IPIs added after the
+	 * last test will be taken directly.
 	 */
-	if (IPIQ[q].head != NULL) {
-		while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) {
-			/* ipi_decode() should be called with interrupts off */
-			local_irq_save(flags);
+
+	while (IPIQ[cpu].head != NULL) {
+		struct smtc_ipi_q *q = &IPIQ[cpu];
+		struct smtc_ipi *pipi;
+		unsigned long flags;
+
+		/*
+		 * It may be possible we'll come in with interrupts
+		 * already enabled.
+		 */
+		local_irq_save(flags);
+
+		spin_lock(&q->lock);
+		pipi = __smtc_ipi_dq(q);
+		spin_unlock(&q->lock);
+		if (pipi != NULL)
 			ipi_decode(pipi);
-			local_irq_restore(flags);
-		}
+		/*
+		 * The use of the __raw_local restore isn't
+		 * as obviously necessary here as in smtc_ipi_replay(),
+		 * but it's more efficient, given that we're already
+		 * running down the IPI queue.
+		 */
+		__raw_local_irq_restore(flags);
 	}
 }
 
@@ -1066,55 +1113,53 @@ static void setup_cross_vpe_interrupts(unsigned int nvpe)
 
 /*
  * SMTC-specific hacks invoked from elsewhere in the kernel.
- *
- * smtc_ipi_replay is called from raw_local_irq_restore which is only ever
- * called with interrupts disabled.  We do rely on interrupts being disabled
- * here because using spin_lock_irqsave()/spin_unlock_irqrestore() would
- * result in a recursive call to raw_local_irq_restore().
  */
 
-static void __smtc_ipi_replay(void)
+ /*
+  * smtc_ipi_replay is called from raw_local_irq_restore
+  */
+
+void smtc_ipi_replay(void)
 {
 	unsigned int cpu = smp_processor_id();
 
 	/*
 	 * To the extent that we've ever turned interrupts off,
 	 * we may have accumulated deferred IPIs.  This is subtle.
-	 * If we use the smtc_ipi_qdepth() macro, we'll get an
-	 * exact number - but we'll also disable interrupts
-	 * and create a window of failure where a new IPI gets
-	 * queued after we test the depth but before we re-enable
-	 * interrupts. So long as IXMT never gets set, however,
 	 * we should be OK:  If we pick up something and dispatch
 	 * it here, that's great. If we see nothing, but concurrent
 	 * with this operation, another TC sends us an IPI, IXMT
 	 * is clear, and we'll handle it as a real pseudo-interrupt
-	 * and not a pseudo-pseudo interrupt.
+	 * and not a pseudo-pseudo interrupt.  The important thing
+	 * is to do the last check for queued message *after* the
+	 * re-enabling of interrupts.
 	 */
-	if (IPIQ[cpu].depth > 0) {
-		while (1) {
-			struct smtc_ipi_q *q = &IPIQ[cpu];
-			struct smtc_ipi *pipi;
-			extern void self_ipi(struct smtc_ipi *);
-
-			spin_lock(&q->lock);
-			pipi = __smtc_ipi_dq(q);
-			spin_unlock(&q->lock);
-			if (!pipi)
-				break;
+	while (IPIQ[cpu].head != NULL) {
+		struct smtc_ipi_q *q = &IPIQ[cpu];
+		struct smtc_ipi *pipi;
+		unsigned long flags;
 
+		/*
+		 * It's just possible we'll come in with interrupts
+		 * already enabled.
+		 */
+		local_irq_save(flags);
+
+		spin_lock(&q->lock);
+		pipi = __smtc_ipi_dq(q);
+		spin_unlock(&q->lock);
+		/*
+		 ** But use a raw restore here to avoid recursion.
+		 */
+		__raw_local_irq_restore(flags);
+
+		if (pipi) {
 			self_ipi(pipi);
 			smtc_cpu_stats[cpu].selfipis++;
 		}
 	}
 }
 
-void smtc_ipi_replay(void)
-{
-	raw_local_irq_disable();
-	__smtc_ipi_replay();
-}
-
 EXPORT_SYMBOL(smtc_ipi_replay);
 
 void smtc_idle_loop_hook(void)
@@ -1193,40 +1238,13 @@ void smtc_idle_loop_hook(void)
 		}
 	}
 
-	/*
-	 * Now that we limit outstanding timer IPIs, check for hung TC
-	 */
-	for (tc = 0; tc < NR_CPUS; tc++) {
-		/* Don't check ourself - we'll dequeue IPIs just below */
-		if ((tc != smp_processor_id()) &&
-		    atomic_read(&ipi_timer_latch[tc]) > timerq_limit) {
-		    if (clock_hang_reported[tc] == 0) {
-			pdb_msg += sprintf(pdb_msg,
-				"TC %d looks hung with timer latch at %d\n",
-				tc, atomic_read(&ipi_timer_latch[tc]));
-			clock_hang_reported[tc]++;
-			}
-		}
-	}
 	emt(mtflags);
 	local_irq_restore(flags);
 	if (pdb_msg != &id_ho_db_msg[0])
 		printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg);
 #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
 
-	/*
-	 * Replay any accumulated deferred IPIs. If "Instant Replay"
-	 * is in use, there should never be any.
-	 */
-#ifndef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY
-	{
-		unsigned long flags;
-
-		local_irq_save(flags);
-		__smtc_ipi_replay();
-		local_irq_restore(flags);
-	}
-#endif /* CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY */
+	smtc_ipi_replay();
 }
 
 void smtc_soft_dump(void)
@@ -1242,10 +1260,6 @@ void smtc_soft_dump(void)
 		printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
 	}
 	smtc_ipi_qdump();
-	printk("Timer IPI Backlogs:\n");
-	for (i=0; i < NR_CPUS; i++) {
-		printk("%d: %d\n", i, atomic_read(&ipi_timer_latch[i]));
-	}
 	printk("%d Recoveries of \"stolen\" FPU\n",
 	       atomic_read(&smtc_fpu_recoveries));
 }
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ea705e4945..f84a46a8ae6 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -84,12 +84,17 @@ static void msmtc_cpus_done(void)
 
 static void __init msmtc_smp_setup(void)
 {
-	mipsmt_build_cpu_map(0);
+	/*
+	 * we won't get the definitive value until
+	 * we've run smtc_prepare_cpus later, but
+	 * we would appear to need an upper bound now.
+	 */
+	smp_num_siblings = smtc_build_cpu_map(0);
 }
 
 static void __init msmtc_prepare_cpus(unsigned int max_cpus)
 {
-	mipsmt_prepare_cpus();
+	smtc_prepare_cpus(max_cpus);
 }
 
 struct plat_smp_ops msmtc_smp_ops = {
-- 
cgit v1.2.3


From d3d238c7744d08c36a114a59cb537d4c0c6c9a86 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 3 Oct 2008 21:54:59 +0200
Subject: [S390] nohz: Fix __udelay.

This fixes a regression that came with 934b2857cc576ae53c92a66e63fce7ddcfa74691
("[S390] nohz/sclp: disable timer on synchronous waits.").
If udelay() gets called from a disabled context it sets the clock comparator
to a value where it expects the next interrupt. When the interrupt happens
the clock comparator gets not reset and therefore the interrupt condition
doesn't get cleared. The result is an endless timer interrupt loop.

In addition this patch fixes also the following:

rcutorture reveals that our __udelay implementation is still buggy,
since it might schedule tasklets, but prevents their execution:

NOHZ: local_softirq_pending 42
NOHZ: local_softirq_pending 02
NOHZ: local_softirq_pending 142
NOHZ: local_softirq_pending 02

To fix this we make sure that only the clock comparator interrupt
is enabled when the enabled wait psw is loaded.
Also no code gets called anymore which might schedule tasklets.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/time.c |  2 ++
 arch/s390/lib/delay.c   | 88 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 56 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index ca114fe46ff..06acb1a18bb 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -169,6 +169,8 @@ void init_cpu_timer(void)
 
 static void clock_comparator_interrupt(__u16 code)
 {
+	if (S390_lowcore.clock_comparator == -1ULL)
+		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
 static void etr_timing_alert(struct etr_irq_parm *);
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index fc6ab6094df..0953cee05ef 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -1,14 +1,9 @@
 /*
- *  arch/s390/lib/delay.c
  *    Precise Delay Loops for S390
  *
- *  S390 version
- *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *
- *  Derived from "arch/i386/lib/delay.c"
- *    Copyright (C) 1993 Linus Torvalds
- *    Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *    Copyright IBM Corp. 1999,2008
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
  */
 
 #include <linux/sched.h>
@@ -29,30 +24,31 @@ void __delay(unsigned long loops)
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 
-/*
- * Waits for 'usecs' microseconds using the TOD clock comparator.
- */
-void __udelay(unsigned long usecs)
+static void __udelay_disabled(unsigned long usecs)
 {
-	u64 end, time, old_cc = 0;
-	unsigned long flags, cr0, mask, dummy;
-	int irq_context;
+	unsigned long mask, cr0, cr0_saved;
+	u64 clock_saved;
 
-	irq_context = in_interrupt();
-	if (!irq_context)
-		local_bh_disable();
-	local_irq_save(flags);
-	if (raw_irqs_disabled_flags(flags)) {
-		old_cc = local_tick_disable();
-		S390_lowcore.clock_comparator = -1ULL;
-		__ctl_store(cr0, 0, 0);
-		dummy = (cr0 & 0xffff00e0) | 0x00000800;
-		__ctl_load(dummy , 0, 0);
-		mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
-	} else
-		mask = psw_kernel_bits | PSW_MASK_WAIT |
-			PSW_MASK_EXT | PSW_MASK_IO;
+	clock_saved = local_tick_disable();
+	set_clock_comparator(get_clock() + ((u64) usecs << 12));
+	__ctl_store(cr0_saved, 0, 0);
+	cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
+	__ctl_load(cr0 , 0, 0);
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
+	trace_hardirqs_on();
+	__load_psw_mask(mask);
+	local_irq_disable();
+	__ctl_load(cr0_saved, 0, 0);
+	local_tick_enable(clock_saved);
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
 
+static void __udelay_enabled(unsigned long usecs)
+{
+	unsigned long mask;
+	u64 end, time;
+
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO;
 	end = get_clock() + ((u64) usecs << 12);
 	do {
 		time = end < S390_lowcore.clock_comparator ?
@@ -62,13 +58,37 @@ void __udelay(unsigned long usecs)
 		__load_psw_mask(mask);
 		local_irq_disable();
 	} while (get_clock() < end);
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
 
-	if (raw_irqs_disabled_flags(flags)) {
-		__ctl_load(cr0, 0, 0);
-		local_tick_enable(old_cc);
+/*
+ * Waits for 'usecs' microseconds using the TOD clock comparator.
+ */
+void __udelay(unsigned long usecs)
+{
+	unsigned long flags;
+
+	preempt_disable();
+	local_irq_save(flags);
+	if (in_irq()) {
+		__udelay_disabled(usecs);
+		goto out;
+	}
+	if (in_softirq()) {
+		if (raw_irqs_disabled_flags(flags))
+			__udelay_disabled(usecs);
+		else
+			__udelay_enabled(usecs);
+		goto out;
 	}
-	if (!irq_context)
+	if (raw_irqs_disabled_flags(flags)) {
+		local_bh_disable();
+		__udelay_disabled(usecs);
 		_local_bh_enable();
-	set_clock_comparator(S390_lowcore.clock_comparator);
+		goto out;
+	}
+	__udelay_enabled(usecs);
+out:
 	local_irq_restore(flags);
+	preempt_enable();
 }
-- 
cgit v1.2.3


From cc65f1ec192dc54de57483194502e9fa00934c39 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 3 Oct 2008 13:00:56 -0700
Subject: x86 setup: correct segfault in generation of 32-bit reloc kernel

Impact: segfault on build of a 32-bit relocatable kernel

When converting arch/x86/boot/compressed/relocs.c to support unlimited
sections, the computation of sym_strtab in walk_relocs() was done
incorrectly.  This causes a segfault for some people when building the
relocatable 32-bit kernel.

Pointed out by Anonymous <pageexec@freemail.hu>.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/compressed/relocs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index a1310c52fc0..857e492c571 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -492,7 +492,7 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
 			continue;
 		}
 		sh_symtab = sec_symtab->symtab;
-		sym_strtab = sec->link->strtab;
+		sym_strtab = sec_symtab->link->strtab;
 		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
 			Elf32_Rel *rel;
 			Elf32_Sym *sym;
-- 
cgit v1.2.3


From 6b918657b7431e4c5c953b8222ae2f4fc1b2576a Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Date: Thu, 7 Aug 2008 14:08:49 +0200
Subject: atmel-mci: Platform code for supporting multiple mmc slots

Add the necessary platform infrastructure to support multiple mmc/sdcard
slots all at once through a single controller. Currently, the driver
will use the first valid slot it finds and stick with that, but later
patches will add support for switching between several slots on the fly.

Extend the platform data structure with per-slot information: MMC/SDcard
bus width and card detect/write protect pins. This will affect the pin
muxing as well as the capabilities announced to the mmc core.

Note that board code is now required to supply a mci_platform_data
struct to at32_add_device_mci().

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
---
 arch/avr32/boards/atngw100/setup.c      |  7 +++-
 arch/avr32/boards/atstk1000/atstk1002.c | 18 +++++----
 arch/avr32/boards/atstk1000/atstk1003.c | 12 +++++-
 arch/avr32/boards/atstk1000/atstk1004.c | 12 +++++-
 arch/avr32/include/asm/atmel-mci.h      | 22 +++++++++-
 arch/avr32/mach-at32ap/at32ap700x.c     | 72 ++++++++++++++++++++++++---------
 6 files changed, 113 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c
index b8286f1ce85..f3085208959 100644
--- a/arch/avr32/boards/atngw100/setup.c
+++ b/arch/avr32/boards/atngw100/setup.c
@@ -53,8 +53,11 @@ static struct spi_board_info spi0_board_info[] __initdata = {
 };
 
 static struct mci_platform_data __initdata mci0_data = {
-	.detect_pin	= GPIO_PIN_PC(25),
-	.wp_pin		= GPIO_PIN_PE(0),
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= GPIO_PIN_PC(25),
+		.wp_pin		= GPIO_PIN_PE(0),
+	},
 };
 
 /*
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
index dfc3443e23a..4fedbc4488d 100644
--- a/arch/avr32/boards/atstk1000/atstk1002.c
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -264,16 +264,20 @@ void __init setup_board(void)
 
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
 
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+
 /* MMC card detect requires MACB0 *NOT* be used */
 #ifdef CONFIG_BOARD_ATSTK1002_SW6_CUSTOM
-static struct mci_platform_data __initdata mci0_data = {
-	.detect_pin	= GPIO_PIN_PC(14),	/* gpio30/sdcd */
-	.wp_pin		= GPIO_PIN_PC(15),	/* gpio31/sdwp */
-};
-#define MCI_PDATA	&mci0_data
+		.detect_pin	= GPIO_PIN_PC(14), /* gpio30/sdcd */
+		.wp_pin		= GPIO_PIN_PC(15), /* gpio31/sdwp */
 #else
-#define MCI_PDATA	NULL
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
 #endif	/* SW6 for sd{cd,wp} routing */
+	},
+};
 
 #endif	/* SW2 for MMC signal routing */
 
@@ -326,7 +330,7 @@ static int __init atstk1002_init(void)
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, MCI_PDATA);
+	at32_add_device_mci(0, &mci0_pdata);
 #endif
 #ifdef CONFIG_BOARD_ATSTK1002_SW5_CUSTOM
 	set_hw_addr(at32_add_device_eth(1, &eth_data[1]));
diff --git a/arch/avr32/boards/atstk1000/atstk1003.c b/arch/avr32/boards/atstk1000/atstk1003.c
index 0cf664174c1..acc61235b89 100644
--- a/arch/avr32/boards/atstk1000/atstk1003.c
+++ b/arch/avr32/boards/atstk1000/atstk1003.c
@@ -66,6 +66,16 @@ static struct spi_board_info spi1_board_info[] __initdata = { {
 } };
 #endif
 
+#ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
+	},
+};
+#endif
+
 #ifdef CONFIG_BOARD_ATSTK1000_EXTDAC
 static void __init atstk1003_setup_extdac(void)
 {
@@ -154,7 +164,7 @@ static int __init atstk1003_init(void)
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, NULL);
+	at32_add_device_mci(0, &mci0_data);
 #endif
 	at32_add_device_usba(0, NULL);
 #ifndef CONFIG_BOARD_ATSTK100X_SW3_CUSTOM
diff --git a/arch/avr32/boards/atstk1000/atstk1004.c b/arch/avr32/boards/atstk1000/atstk1004.c
index 50a5273e591..d6a2d02f032 100644
--- a/arch/avr32/boards/atstk1000/atstk1004.c
+++ b/arch/avr32/boards/atstk1000/atstk1004.c
@@ -71,6 +71,16 @@ static struct spi_board_info spi1_board_info[] __initdata = { {
 } };
 #endif
 
+#ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
+	},
+};
+#endif
+
 #ifdef CONFIG_BOARD_ATSTK1000_EXTDAC
 static void __init atstk1004_setup_extdac(void)
 {
@@ -137,7 +147,7 @@ static int __init atstk1004_init(void)
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, NULL);
+	at32_add_device_mci(0, &mci0_data);
 #endif
 	at32_add_device_lcdc(0, &atstk1000_lcdc_data,
 			     fbmem_start, fbmem_size, 0);
diff --git a/arch/avr32/include/asm/atmel-mci.h b/arch/avr32/include/asm/atmel-mci.h
index c2ea6e1c9aa..d38c64ca41e 100644
--- a/arch/avr32/include/asm/atmel-mci.h
+++ b/arch/avr32/include/asm/atmel-mci.h
@@ -1,9 +1,29 @@
 #ifndef __ASM_AVR32_ATMEL_MCI_H
 #define __ASM_AVR32_ATMEL_MCI_H
 
-struct mci_platform_data {
+/**
+ * struct mci_slot_pdata - board-specific per-slot configuration
+ * @bus_width: Number of data lines wired up the slot
+ * @detect_pin: GPIO pin wired to the card detect switch
+ * @wp_pin: GPIO pin wired to the write protect sensor
+ *
+ * If a given slot is not present on the board, @bus_width should be
+ * set to 0. The other fields are ignored in this case.
+ *
+ * Any pins that aren't available should be set to a negative value.
+ */
+struct mci_slot_pdata {
+	unsigned int		bus_width;
 	int			detect_pin;
 	int			wp_pin;
 };
 
+/**
+ * struct mci_platform_data - board-specific MMC/SDcard configuration
+ * @slot: Per-slot configuration data.
+ */
+struct mci_platform_data {
+	struct mci_slot_pdata	slot[2];
+};
+
 #endif /* __ASM_AVR32_ATMEL_MCI_H */
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index e01dbe4ebb4..9967d5a3b6e 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1272,10 +1272,13 @@ static struct clk atmel_mci0_pclk = {
 struct platform_device *__init
 at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 {
-	struct mci_platform_data	_data;
 	struct platform_device		*pdev;
 
-	if (id != 0)
+	if (id != 0 || !data)
+		return NULL;
+
+	/* Must have at least one usable slot */
+	if (!data->slot[0].bus_width && !data->slot[1].bus_width)
 		return NULL;
 
 	pdev = platform_device_alloc("atmel_mci", id);
@@ -1286,28 +1289,61 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 				ARRAY_SIZE(atmel_mci0_resource)))
 		goto fail;
 
-	if (!data) {
-		data = &_data;
-		memset(data, -1, sizeof(struct mci_platform_data));
-		data->detect_pin = GPIO_PIN_NONE;
-		data->wp_pin = GPIO_PIN_NONE;
-	}
 
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct mci_platform_data)))
 		goto fail;
 
-	select_peripheral(PA(10), PERIPH_A, 0);	/* CLK	 */
-	select_peripheral(PA(11), PERIPH_A, 0);	/* CMD	 */
-	select_peripheral(PA(12), PERIPH_A, 0);	/* DATA0 */
-	select_peripheral(PA(13), PERIPH_A, 0);	/* DATA1 */
-	select_peripheral(PA(14), PERIPH_A, 0);	/* DATA2 */
-	select_peripheral(PA(15), PERIPH_A, 0);	/* DATA3 */
+	/* CLK line is common to both slots */
+	select_peripheral(PA(10), PERIPH_A, 0);
 
-	if (gpio_is_valid(data->detect_pin))
-		at32_select_gpio(data->detect_pin, 0);
-	if (gpio_is_valid(data->wp_pin))
-		at32_select_gpio(data->wp_pin, 0);
+	switch (data->slot[0].bus_width) {
+	case 4:
+		select_peripheral(PA(13), PERIPH_A, 0);	/* DATA1 */
+		select_peripheral(PA(14), PERIPH_A, 0);	/* DATA2 */
+		select_peripheral(PA(15), PERIPH_A, 0);	/* DATA3 */
+		/* fall through */
+	case 1:
+		select_peripheral(PA(11), PERIPH_A, 0);	/* CMD	 */
+		select_peripheral(PA(12), PERIPH_A, 0);	/* DATA0 */
+
+		if (gpio_is_valid(data->slot[0].detect_pin))
+			at32_select_gpio(data->slot[0].detect_pin, 0);
+		if (gpio_is_valid(data->slot[0].wp_pin))
+			at32_select_gpio(data->slot[0].wp_pin, 0);
+		break;
+	case 0:
+		/* Slot is unused */
+		break;
+	default:
+		goto fail;
+	}
+
+	switch (data->slot[1].bus_width) {
+	case 4:
+		select_peripheral(PB(8),  PERIPH_B, 0);	/* DATA1 */
+		select_peripheral(PB(9),  PERIPH_B, 0);	/* DATA2 */
+		select_peripheral(PB(10), PERIPH_B, 0);	/* DATA3 */
+		/* fall through */
+	case 1:
+		select_peripheral(PB(6),  PERIPH_B, 0); /* CMD	 */
+		select_peripheral(PB(7),  PERIPH_B, 0); /* DATA0 */
+
+		if (gpio_is_valid(data->slot[1].detect_pin))
+			at32_select_gpio(data->slot[1].detect_pin, 0);
+		if (gpio_is_valid(data->slot[1].wp_pin))
+			at32_select_gpio(data->slot[1].wp_pin, 0);
+		break;
+	case 0:
+		/* Slot is unused */
+		break;
+	default:
+		if (!data->slot[0].bus_width)
+			goto fail;
+
+		data->slot[1].bus_width = 0;
+		break;
+	}
 
 	atmel_mci0_pclk.dev = &pdev->dev;
 
-- 
cgit v1.2.3


From 965ebf33ea5afb6386f5b57cc71e6572253746b3 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Date: Wed, 17 Sep 2008 20:53:55 +0200
Subject: atmel-mci: support multiple mmc slots

The Atmel MCI controller can drive multiple cards through separate sets
of pins, but only one at a time. This patch adds support for
multiplexing access to the controller so that multiple card slots can be
used as if they were hooked up to separate mmc controllers.

The atmel-mci driver registers each slot as a separate mmc_host. Both
access the same common controller state, but they also have some state
on their own for card detection/write protect handling, and separate
shadows of the MR and SDCR registers.

When one of the slots receives a request from the mmc core, the common
controller state is checked. If it's idle, the request is submitted
immediately. If not, the request is added to a queue. When a request is
done, the queue is checked and if there is a queued request, it is
submitted before the completion callback is called.

This patch also includes a few cleanups and fixes, including a locking
overhaul. I had to change the locking extensively in any case, so I
might as well try to get it right. The driver no longer takes any
irq-safe locks, which may or may not improve the overall system
performance.

This patch also adds a bit of documentation of the internal data
structures.

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
---
 arch/avr32/include/asm/atmel-mci.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/avr32/include/asm/atmel-mci.h b/arch/avr32/include/asm/atmel-mci.h
index d38c64ca41e..5d5ae1295cf 100644
--- a/arch/avr32/include/asm/atmel-mci.h
+++ b/arch/avr32/include/asm/atmel-mci.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_AVR32_ATMEL_MCI_H
 #define __ASM_AVR32_ATMEL_MCI_H
 
+#define ATMEL_MCI_MAX_NR_SLOTS	2
+
 /**
  * struct mci_slot_pdata - board-specific per-slot configuration
  * @bus_width: Number of data lines wired up the slot
@@ -11,6 +13,10 @@
  * set to 0. The other fields are ignored in this case.
  *
  * Any pins that aren't available should be set to a negative value.
+ *
+ * Note that support for multiple slots is experimental -- some cards
+ * might get upset if we don't get the clock management exactly right.
+ * But in most cases, it should work just fine.
  */
 struct mci_slot_pdata {
 	unsigned int		bus_width;
@@ -23,7 +29,7 @@ struct mci_slot_pdata {
  * @slot: Per-slot configuration data.
  */
 struct mci_platform_data {
-	struct mci_slot_pdata	slot[2];
+	struct mci_slot_pdata	slot[ATMEL_MCI_MAX_NR_SLOTS];
 };
 
 #endif /* __ASM_AVR32_ATMEL_MCI_H */
-- 
cgit v1.2.3


From 65e8b083fc8ec303499baa1924ae032d46d29990 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Date: Wed, 30 Jul 2008 20:29:03 +0200
Subject: atmel-mci: Add experimental DMA support

This adds support for DMA transfers through the generic DMA engine
framework with the DMA slave extensions.

The driver has been tested using mmc-block and ext3fs on several SD,
SDHC and MMC+ cards. Reads and writes work fine, with read transfer
rates up to 7.5 MiB/s on fast cards with debugging disabled.

Unfortunately, the driver has been known to lock up from time to time
with DMA enabled, so DMA support is currently optional and marked
EXPERIMENTAL. However, I didn't see any problems while testing 13
different cards (MMC, SD and SDHC of different brands and sizes), so I
suspect the "Initialize BLKR before sending data transfer command" fix
that was posted earlier fixed this as well.

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
---
 arch/avr32/include/asm/atmel-mci.h  |  4 ++++
 arch/avr32/mach-at32ap/at32ap700x.c | 16 ++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'arch')

diff --git a/arch/avr32/include/asm/atmel-mci.h b/arch/avr32/include/asm/atmel-mci.h
index 5d5ae1295cf..59f3fadd0b6 100644
--- a/arch/avr32/include/asm/atmel-mci.h
+++ b/arch/avr32/include/asm/atmel-mci.h
@@ -3,6 +3,8 @@
 
 #define ATMEL_MCI_MAX_NR_SLOTS	2
 
+struct dma_slave;
+
 /**
  * struct mci_slot_pdata - board-specific per-slot configuration
  * @bus_width: Number of data lines wired up the slot
@@ -26,9 +28,11 @@ struct mci_slot_pdata {
 
 /**
  * struct mci_platform_data - board-specific MMC/SDcard configuration
+ * @dma_slave: DMA slave interface to use in data transfers, or NULL.
  * @slot: Per-slot configuration data.
  */
 struct mci_platform_data {
+	struct dma_slave	*dma_slave;
 	struct mci_slot_pdata	slot[ATMEL_MCI_MAX_NR_SLOTS];
 };
 
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 9967d5a3b6e..f1b9a3ac273 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1273,6 +1273,7 @@ struct platform_device *__init
 at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 {
 	struct platform_device		*pdev;
+	struct dw_dma_slave		*dws;
 
 	if (id != 0 || !data)
 		return NULL;
@@ -1289,6 +1290,21 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 				ARRAY_SIZE(atmel_mci0_resource)))
 		goto fail;
 
+	if (data->dma_slave)
+		dws = kmemdup(to_dw_dma_slave(data->dma_slave),
+				sizeof(struct dw_dma_slave), GFP_KERNEL);
+	else
+		dws = kzalloc(sizeof(struct dw_dma_slave), GFP_KERNEL);
+
+	dws->slave.dev = &pdev->dev;
+	dws->slave.dma_dev = &dw_dmac0_device.dev;
+	dws->slave.reg_width = DMA_SLAVE_WIDTH_32BIT;
+	dws->cfg_hi = (DWC_CFGH_SRC_PER(0)
+				| DWC_CFGH_DST_PER(1));
+	dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL
+				| DWC_CFGL_HS_SRC_POL);
+
+	data->dma_slave = &dws->slave;
 
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct mci_platform_data)))
-- 
cgit v1.2.3