aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-01-31 00:40:09 +1100
committerLinus Torvalds <torvalds@linux-foundation.org>2008-01-31 00:40:09 +1100
commitdd430ca20c40ecccd6954a7efd13d4398f507728 (patch)
treeb65089436d17b2bcc6054ede2e335a821b50007f
parent60e233172eabdd1f831bd48631b9626ce2279d9b (diff)
parentafadcd788f37bfa62d92662e54a720c26c91becf (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (890 commits) x86: fix nodemap_size according to nodeid bits x86: fix overlap between pagetable with bss section x86: add PCI IDs to k8topology_64.c x86: fix early_ioremap pagetable ops x86: use the same pgd_list for PAE and 64-bit x86: defer cr3 reload when doing pud_clear() x86: early boot debugging via FireWire (ohci1394_dma=early) x86: don't special-case pmd allocations as much x86: shrink some ifdefs in fault.c x86: ignore spurious faults x86: remove nx_enabled from fault.c x86: unify fault_32|64.c x86: unify fault_32|64.c with ifdefs x86: unify fault_32|64.c by ifdef'd function bodies x86: arch/x86/mm/init_32.c printk fixes x86: arch/x86/mm/init_32.c cleanup x86: arch/x86/mm/init_64.c printk fixes x86: unify ioremap x86: fixes some bugs about EFI memory map handling x86: use reboot_type on EFI 32 ...
-rw-r--r--Documentation/debugging-via-ohci1394.txt179
-rw-r--r--Documentation/kernel-parameters.txt51
-rw-r--r--Documentation/x86_64/boot-options.txt8
-rw-r--r--Documentation/x86_64/uefi.txt9
-rw-r--r--arch/arm/Kconfig5
-rw-r--r--arch/ia64/Kconfig8
-rw-r--r--arch/ia64/ia32/binfmt_elf32.c3
-rw-r--r--arch/ia64/kernel/module.c2
-rw-r--r--arch/m32r/Kconfig5
-rw-r--r--arch/mips/Kconfig5
-rw-r--r--arch/mips/kernel/i8253.c12
-rw-r--r--arch/parisc/Kconfig5
-rw-r--r--arch/powerpc/Kconfig8
-rw-r--r--arch/powerpc/kernel/ptrace.c52
-rw-r--r--arch/sparc64/Kconfig8
-rw-r--r--arch/um/kernel/ksyms.c4
-rw-r--r--arch/um/sys-i386/signal.c50
-rw-r--r--arch/um/sys-x86_64/signal.c70
-rw-r--r--arch/x86/Kconfig311
-rw-r--r--arch/x86/Kconfig.cpu65
-rw-r--r--arch/x86/Kconfig.debug129
-rw-r--r--arch/x86/Makefile247
-rw-r--r--arch/x86/Makefile_32175
-rw-r--r--arch/x86/Makefile_64144
-rw-r--r--arch/x86/boot/Makefile10
-rw-r--r--arch/x86/boot/apm.c3
-rw-r--r--arch/x86/boot/boot.h17
-rw-r--r--arch/x86/boot/cmdline.c65
-rw-r--r--arch/x86/boot/compressed/Makefile62
-rw-r--r--arch/x86/boot/compressed/Makefile_3250
-rw-r--r--arch/x86/boot/compressed/Makefile_6430
-rw-r--r--arch/x86/boot/compressed/misc.c (renamed from arch/x86/boot/compressed/misc_32.c)77
-rw-r--r--arch/x86/boot/compressed/misc_64.c371
-rw-r--r--arch/x86/boot/compressed/relocs.c7
-rw-r--r--arch/x86/boot/compressed/vmlinux.scr (renamed from arch/x86/boot/compressed/vmlinux_64.scr)2
-rw-r--r--arch/x86/boot/compressed/vmlinux_32.lds10
-rw-r--r--arch/x86/boot/compressed/vmlinux_32.scr10
-rw-r--r--arch/x86/boot/compressed/vmlinux_64.lds12
-rw-r--r--arch/x86/boot/edd.c13
-rw-r--r--arch/x86/boot/header.S5
-rw-r--r--arch/x86/boot/main.c31
-rw-r--r--arch/x86/boot/pm.c6
-rw-r--r--arch/x86/boot/pmjump.S54
-rw-r--r--arch/x86/boot/video-bios.c3
-rw-r--r--arch/x86/boot/video-vesa.c26
-rw-r--r--arch/x86/boot/video-vga.c20
-rw-r--r--arch/x86/boot/video.c33
-rw-r--r--arch/x86/boot/video.h3
-rw-r--r--arch/x86/boot/voyager.c4
-rw-r--r--arch/x86/configs/i386_defconfig4
-rw-r--r--arch/x86/configs/x86_64_defconfig9
-rw-r--r--arch/x86/ia32/Makefile41
-rw-r--r--arch/x86/ia32/audit.c2
-rw-r--r--arch/x86/ia32/fpu32.c183
-rw-r--r--arch/x86/ia32/ia32_aout.c246
-rw-r--r--arch/x86/ia32/ia32_binfmt.c285
-rw-r--r--arch/x86/ia32/ia32_signal.c472
-rw-r--r--arch/x86/ia32/ia32entry.S11
-rw-r--r--arch/x86/ia32/ipc32.c30
-rw-r--r--arch/x86/ia32/mmap32.c79
-rw-r--r--arch/x86/ia32/ptrace32.c404
-rw-r--r--arch/x86/ia32/sys_ia32.c504
-rw-r--r--arch/x86/ia32/syscall32.c83
-rw-r--r--arch/x86/ia32/syscall32_syscall.S17
-rw-r--r--arch/x86/ia32/tls32.c163
-rw-r--r--arch/x86/ia32/vsyscall-sigreturn.S143
-rw-r--r--arch/x86/ia32/vsyscall-sysenter.S95
-rw-r--r--arch/x86/ia32/vsyscall.lds80
-rw-r--r--arch/x86/kernel/Makefile96
-rw-r--r--arch/x86/kernel/Makefile_3288
-rw-r--r--arch/x86/kernel/Makefile_6445
-rw-r--r--arch/x86/kernel/acpi/Makefile2
-rw-r--r--arch/x86/kernel/acpi/sleep.c87
-rw-r--r--arch/x86/kernel/acpi/sleep_32.c70
-rw-r--r--arch/x86/kernel/acpi/sleep_64.c117
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S32
-rw-r--r--arch/x86/kernel/alternative.c40
-rw-r--r--arch/x86/kernel/aperture_64.c374
-rw-r--r--arch/x86/kernel/apic_32.c156
-rw-r--r--arch/x86/kernel/apic_64.c1257
-rw-r--r--arch/x86/kernel/apm_32.c379
-rw-r--r--arch/x86/kernel/asm-offsets_32.c65
-rw-r--r--arch/x86/kernel/asm-offsets_64.c56
-rw-r--r--arch/x86/kernel/bootflag.c50
-rw-r--r--arch/x86/kernel/bugs_64.c1
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c23
-rw-r--r--arch/x86/kernel/cpu/bugs.c5
-rw-r--r--arch/x86/kernel/cpu/common.c179
-rw-r--r--arch/x86/kernel/cpu/cpu.h3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c25
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c12
-rw-r--r--arch/x86/kernel/cpu/cyrix.c6
-rw-r--r--arch/x86/kernel/cpu/intel.c39
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c25
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c45
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c21
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c35
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c23
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c27
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c23
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c147
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h9
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c3
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c1
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/cpuid.c2
-rw-r--r--arch/x86/kernel/doublefault_32.c19
-rw-r--r--arch/x86/kernel/ds.c464
-rw-r--r--arch/x86/kernel/e820_32.c241
-rw-r--r--arch/x86/kernel/e820_64.c428
-rw-r--r--arch/x86/kernel/early-quirks.c127
-rw-r--r--arch/x86/kernel/efi.c512
-rw-r--r--arch/x86/kernel/efi_32.c618
-rw-r--r--arch/x86/kernel/efi_64.c134
-rw-r--r--arch/x86/kernel/efi_stub_64.S109
-rw-r--r--arch/x86/kernel/entry_32.S26
-rw-r--r--arch/x86/kernel/entry_64.S101
-rw-r--r--arch/x86/kernel/genapic_64.c15
-rw-r--r--arch/x86/kernel/geode_32.c48
-rw-r--r--arch/x86/kernel/head64.c63
-rw-r--r--arch/x86/kernel/head_32.S17
-rw-r--r--arch/x86/kernel/head_64.S48
-rw-r--r--arch/x86/kernel/hpet.c60
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c7
-rw-r--r--arch/x86/kernel/i387.c479
-rw-r--r--arch/x86/kernel/i387_32.c544
-rw-r--r--arch/x86/kernel/i387_64.c150
-rw-r--r--arch/x86/kernel/i8253.c72
-rw-r--r--arch/x86/kernel/i8259_32.c24
-rw-r--r--arch/x86/kernel/i8259_64.c160
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic_32.c13
-rw-r--r--arch/x86/kernel/io_apic_64.c112
-rw-r--r--arch/x86/kernel/io_delay.c114
-rw-r--r--arch/x86/kernel/ioport.c (renamed from arch/x86/kernel/ioport_32.c)85
-rw-r--r--arch/x86/kernel/ioport_64.c117
-rw-r--r--arch/x86/kernel/irq_32.c22
-rw-r--r--arch/x86/kernel/irq_64.c30
-rw-r--r--arch/x86/kernel/kdebugfs.c65
-rw-r--r--arch/x86/kernel/kprobes.c1066
-rw-r--r--arch/x86/kernel/kprobes_32.c756
-rw-r--r--arch/x86/kernel/kprobes_64.c749
-rw-r--r--arch/x86/kernel/ldt.c (renamed from arch/x86/kernel/ldt_32.c)112
-rw-r--r--arch/x86/kernel/ldt_64.c250
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c5
-rw-r--r--arch/x86/kernel/mfgpt_32.c15
-rw-r--r--arch/x86/kernel/microcode.c12
-rw-r--r--arch/x86/kernel/mpparse_32.c39
-rw-r--r--arch/x86/kernel/mpparse_64.c28
-rw-r--r--arch/x86/kernel/nmi_32.c14
-rw-r--r--arch/x86/kernel/nmi_64.c99
-rw-r--r--arch/x86/kernel/numaq_32.c2
-rw-r--r--arch/x86/kernel/paravirt.c (renamed from arch/x86/kernel/paravirt_32.c)96
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c49
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c57
-rw-r--r--arch/x86/kernel/pci-calgary_64.c5
-rw-r--r--arch/x86/kernel/pci-dma_64.c3
-rw-r--r--arch/x86/kernel/pci-gart_64.c510
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c1
-rw-r--r--arch/x86/kernel/pmtimer_64.c4
-rw-r--r--arch/x86/kernel/process_32.c419
-rw-r--r--arch/x86/kernel/process_64.c342
-rw-r--r--arch/x86/kernel/ptrace.c1545
-rw-r--r--arch/x86/kernel/ptrace_32.c717
-rw-r--r--arch/x86/kernel/ptrace_64.c621
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c (renamed from arch/x86/kernel/reboot_32.c)284
-rw-r--r--arch/x86/kernel/reboot_64.c176
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c14
-rw-r--r--arch/x86/kernel/rtc.c204
-rw-r--r--arch/x86/kernel/setup64.c59
-rw-r--r--arch/x86/kernel/setup_32.c285
-rw-r--r--arch/x86/kernel/setup_64.c555
-rw-r--r--arch/x86/kernel/signal_32.c225
-rw-r--r--arch/x86/kernel/signal_64.c133
-rw-r--r--arch/x86/kernel/smp_32.c15
-rw-r--r--arch/x86/kernel/smp_64.c91
-rw-r--r--arch/x86/kernel/smpboot_32.c61
-rw-r--r--arch/x86/kernel/smpboot_64.c79
-rw-r--r--arch/x86/kernel/smpcommon_32.c7
-rw-r--r--arch/x86/kernel/srat_32.c8
-rw-r--r--arch/x86/kernel/stacktrace.c12
-rw-r--r--arch/x86/kernel/step.c203
-rw-r--r--arch/x86/kernel/suspend_64.c30
-rw-r--r--arch/x86/kernel/suspend_asm_64.S32
-rw-r--r--arch/x86/kernel/sys_x86_64.c98
-rw-r--r--arch/x86/kernel/test_nx.c176
-rw-r--r--arch/x86/kernel/test_rodata.c86
-rw-r--r--arch/x86/kernel/time_32.c114
-rw-r--r--arch/x86/kernel/time_64.c187
-rw-r--r--arch/x86/kernel/tls.c213
-rw-r--r--arch/x86/kernel/tls.h21
-rw-r--r--arch/x86/kernel/topology.c23
-rw-r--r--arch/x86/kernel/traps_32.c341
-rw-r--r--arch/x86/kernel/traps_64.c367
-rw-r--r--arch/x86/kernel/tsc_32.c62
-rw-r--r--arch/x86/kernel/tsc_64.c100
-rw-r--r--arch/x86/kernel/tsc_sync.c30
-rw-r--r--arch/x86/kernel/vm86_32.c115
-rw-r--r--arch/x86/kernel/vmi_32.c126
-rw-r--r--arch/x86/kernel/vmiclock_32.c3
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S23
-rw-r--r--arch/x86/kernel/vsmp_64.c11
-rw-r--r--arch/x86/kernel/vsyscall_32.S15
-rw-r--r--arch/x86/kernel/vsyscall_32.lds.S67
-rw-r--r--arch/x86/kernel/vsyscall_64.c11
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c13
-rw-r--r--arch/x86/lguest/Kconfig1
-rw-r--r--arch/x86/lguest/boot.c51
-rw-r--r--arch/x86/lib/Makefile26
-rw-r--r--arch/x86/lib/Makefile_3211
-rw-r--r--arch/x86/lib/Makefile_6413
-rw-r--r--arch/x86/lib/memcpy_32.c4
-rw-r--r--arch/x86/lib/memmove_64.c4
-rw-r--r--arch/x86/lib/semaphore_32.S22
-rw-r--r--arch/x86/lib/thunk_64.S2
-rw-r--r--arch/x86/mach-rdc321x/Makefile5
-rw-r--r--arch/x86/mach-rdc321x/gpio.c91
-rw-r--r--arch/x86/mach-rdc321x/platform.c68
-rw-r--r--arch/x86/mach-rdc321x/wdt.c275
-rw-r--r--arch/x86/mach-visws/mpparse.c16
-rw-r--r--arch/x86/mach-voyager/setup.c34
-rw-r--r--arch/x86/mach-voyager/voyager_basic.c132
-rw-r--r--arch/x86/mach-voyager/voyager_cat.c601
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c690
-rw-r--r--arch/x86/mach-voyager/voyager_thread.c52
-rw-r--r--arch/x86/math-emu/errors.c882
-rw-r--r--arch/x86/math-emu/exception.h9
-rw-r--r--arch/x86/math-emu/fpu_arith.c150
-rw-r--r--arch/x86/math-emu/fpu_asm.h1
-rw-r--r--arch/x86/math-emu/fpu_aux.c211
-rw-r--r--arch/x86/math-emu/fpu_emu.h67
-rw-r--r--arch/x86/math-emu/fpu_entry.c1230
-rw-r--r--arch/x86/math-emu/fpu_etc.c185
-rw-r--r--arch/x86/math-emu/fpu_proto.h28
-rw-r--r--arch/x86/math-emu/fpu_tags.c94
-rw-r--r--arch/x86/math-emu/fpu_trig.c2884
-rw-r--r--arch/x86/math-emu/get_address.c650
-rw-r--r--arch/x86/math-emu/load_store.c448
-rw-r--r--arch/x86/math-emu/poly.h69
-rw-r--r--arch/x86/math-emu/poly_2xm1.c199
-rw-r--r--arch/x86/math-emu/poly_atan.c353
-rw-r--r--arch/x86/math-emu/poly_l2.c376
-rw-r--r--arch/x86/math-emu/poly_sin.c599
-rw-r--r--arch/x86/math-emu/poly_tan.c338
-rw-r--r--arch/x86/math-emu/reg_add_sub.c563
-rw-r--r--arch/x86/math-emu/reg_compare.c567
-rw-r--r--arch/x86/math-emu/reg_constant.c73
-rw-r--r--arch/x86/math-emu/reg_convert.c57
-rw-r--r--arch/x86/math-emu/reg_divide.c301
-rw-r--r--arch/x86/math-emu/reg_ld_str.c2147
-rw-r--r--arch/x86/math-emu/reg_mul.c163
-rw-r--r--arch/x86/math-emu/status_w.h8
-rw-r--r--arch/x86/mm/Makefile_323
-rw-r--r--arch/x86/mm/Makefile_643
-rw-r--r--arch/x86/mm/boot_ioremap_32.c100
-rw-r--r--arch/x86/mm/discontig_32.c110
-rw-r--r--arch/x86/mm/extable.c62
-rw-r--r--arch/x86/mm/extable_32.c35
-rw-r--r--arch/x86/mm/extable_64.c34
-rw-r--r--arch/x86/mm/fault.c986
-rw-r--r--arch/x86/mm/fault_32.c659
-rw-r--r--arch/x86/mm/fault_64.c623
-rw-r--r--arch/x86/mm/highmem_32.c47
-rw-r--r--arch/x86/mm/hugetlbpage.c3
-rw-r--r--arch/x86/mm/init_32.c425
-rw-r--r--arch/x86/mm/init_64.c418
-rw-r--r--arch/x86/mm/ioremap.c501
-rw-r--r--arch/x86/mm/ioremap_32.c274
-rw-r--r--arch/x86/mm/ioremap_64.c210
-rw-r--r--arch/x86/mm/k8topology_64.c173
-rw-r--r--arch/x86/mm/mmap.c (renamed from arch/x86/mm/mmap_32.c)86
-rw-r--r--arch/x86/mm/mmap_64.c29
-rw-r--r--arch/x86/mm/numa_64.c274
-rw-r--r--arch/x86/mm/pageattr-test.c224
-rw-r--r--arch/x86/mm/pageattr.c564
-rw-r--r--arch/x86/mm/pageattr_32.c278
-rw-r--r--arch/x86/mm/pageattr_64.c255
-rw-r--r--arch/x86/mm/pgtable_32.c145
-rw-r--r--arch/x86/mm/srat_64.c95
-rw-r--r--arch/x86/oprofile/backtrace.c12
-rw-r--r--arch/x86/oprofile/nmi_int.c212
-rw-r--r--arch/x86/pci/common.c17
-rw-r--r--arch/x86/pci/fixup.c30
-rw-r--r--arch/x86/pci/irq.c20
-rw-r--r--arch/x86/power/cpu.c18
-rw-r--r--arch/x86/vdso/.gitignore5
-rw-r--r--arch/x86/vdso/Makefile132
-rw-r--r--arch/x86/vdso/vclock_gettime.c1
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S64
-rw-r--r--arch/x86/vdso/vdso-start.S2
-rw-r--r--arch/x86/vdso/vdso.lds.S94
-rw-r--r--arch/x86/vdso/vdso32-setup.c (renamed from arch/x86/kernel/sysenter_32.c)164
-rw-r--r--arch/x86/vdso/vdso32.S19
-rw-r--r--arch/x86/vdso/vdso32/.gitignore1
-rw-r--r--arch/x86/vdso/vdso32/int80.S (renamed from arch/x86/kernel/vsyscall-int80_32.S)21
-rw-r--r--arch/x86/vdso/vdso32/note.S (renamed from arch/x86/kernel/vsyscall-note_32.S)5
-rw-r--r--arch/x86/vdso/vdso32/sigreturn.S (renamed from arch/x86/kernel/vsyscall-sigreturn_32.S)87
-rw-r--r--arch/x86/vdso/vdso32/syscall.S (renamed from arch/x86/ia32/vsyscall-syscall.S)22
-rw-r--r--arch/x86/vdso/vdso32/sysenter.S (renamed from arch/x86/kernel/vsyscall-sysenter_32.S)42
-rw-r--r--arch/x86/vdso/vdso32/vdso32.lds.S37
-rw-r--r--arch/x86/vdso/vgetcpu.c4
-rw-r--r--arch/x86/vdso/vma.c18
-rw-r--r--arch/x86/vdso/voffset.h1
-rw-r--r--arch/x86/xen/Kconfig1
-rw-r--r--arch/x86/xen/enlighten.c102
-rw-r--r--arch/x86/xen/events.c4
-rw-r--r--arch/x86/xen/mmu.c23
-rw-r--r--arch/x86/xen/setup.c9
-rw-r--r--arch/x86/xen/smp.c8
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--arch/x86/xen/xen-head.S6
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/acpi/processor_idle.c34
-rw-r--r--drivers/char/agp/ali-agp.c2
-rw-r--r--drivers/char/agp/backend.c3
-rw-r--r--drivers/char/agp/generic.c3
-rw-r--r--drivers/char/agp/i460-agp.c2
-rw-r--r--drivers/char/agp/intel-agp.c11
-rw-r--r--drivers/char/hpet.c126
-rw-r--r--drivers/char/rtc.c253
-rw-r--r--drivers/cpufreq/cpufreq.c2
-rw-r--r--drivers/firmware/dmi_scan.c26
-rw-r--r--drivers/ieee1394/Makefile1
-rw-r--r--drivers/ieee1394/init_ohci1394_dma.c285
-rw-r--r--drivers/input/mouse/pc110pad.c7
-rw-r--r--drivers/kvm/svm.c2
-rw-r--r--drivers/kvm/vmx.c8
-rw-r--r--drivers/lguest/x86/core.c4
-rw-r--r--drivers/pnp/pnpbios/bioscalls.c5
-rw-r--r--drivers/video/vermilion/vermilion.c15
-rw-r--r--fs/Kconfig.binfmt4
-rw-r--r--fs/Makefile1
-rw-r--r--fs/aio.c2
-rw-r--r--fs/binfmt_elf.c677
-rw-r--r--fs/compat_binfmt_elf.c131
-rw-r--r--fs/jbd/checkpoint.c3
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd2/checkpoint.c3
-rw-r--r--fs/jbd2/commit.c2
-rw-r--r--include/acpi/reboot.h9
-rw-r--r--include/asm-alpha/agp.h1
-rw-r--r--include/asm-generic/bug.h17
-rw-r--r--include/asm-generic/percpu.h97
-rw-r--r--include/asm-generic/tlb.h1
-rw-r--r--include/asm-generic/vmlinux.lds.h1
-rw-r--r--include/asm-ia64/acpi.h2
-rw-r--r--include/asm-ia64/agp.h1
-rw-r--r--include/asm-ia64/percpu.h24
-rw-r--r--include/asm-m32r/signal.h2
-rw-r--r--include/asm-parisc/agp.h1
-rw-r--r--include/asm-powerpc/agp.h1
-rw-r--r--include/asm-powerpc/percpu.h17
-rw-r--r--include/asm-powerpc/ptrace.h7
-rw-r--r--include/asm-s390/percpu.h20
-rw-r--r--include/asm-sparc64/agp.h1
-rw-r--r--include/asm-sparc64/percpu.h16
-rw-r--r--include/asm-um/asm.h6
-rw-r--r--include/asm-um/linkage.h1
-rw-r--r--include/asm-um/nops.h6
-rw-r--r--include/asm-x86/Kbuild4
-rw-r--r--include/asm-x86/acpi.h151
-rw-r--r--include/asm-x86/acpi_32.h143
-rw-r--r--include/asm-x86/acpi_64.h153
-rw-r--r--include/asm-x86/agp.h9
-rw-r--r--include/asm-x86/alternative.h162
-rw-r--r--include/asm-x86/alternative_32.h154
-rw-r--r--include/asm-x86/alternative_64.h159
-rw-r--r--include/asm-x86/apic.h141
-rw-r--r--include/asm-x86/apic_32.h127
-rw-r--r--include/asm-x86/apic_64.h102
-rw-r--r--include/asm-x86/apicdef.h412
-rw-r--r--include/asm-x86/apicdef_32.h375
-rw-r--r--include/asm-x86/apicdef_64.h392
-rw-r--r--include/asm-x86/arch_hooks.h5
-rw-r--r--include/asm-x86/asm.h32
-rw-r--r--include/asm-x86/bitops.h316
-rw-r--r--include/asm-x86/bitops_32.h324
-rw-r--r--include/asm-x86/bitops_64.h297
-rw-r--r--include/asm-x86/bootparam.h5
-rw-r--r--include/asm-x86/bug.h3
-rw-r--r--include/asm-x86/bugs.h3
-rw-r--r--include/asm-x86/cacheflush.h35
-rw-r--r--include/asm-x86/calling.h194
-rw-r--r--include/asm-x86/checksum_64.h2
-rw-r--r--include/asm-x86/cmpxchg_32.h122
-rw-r--r--include/asm-x86/compat.h2
-rw-r--r--include/asm-x86/cpu.h2
-rw-r--r--include/asm-x86/cpufeature.h208
-rw-r--r--include/asm-x86/cpufeature_32.h176
-rw-r--r--include/asm-x86/cpufeature_64.h30
-rw-r--r--include/asm-x86/desc.h380
-rw-r--r--include/asm-x86/desc_32.h244
-rw-r--r--include/asm-x86/desc_64.h203
-rw-r--r--include/asm-x86/desc_defs.h47
-rw-r--r--include/asm-x86/dma.h318
-rw-r--r--include/asm-x86/dma_32.h297
-rw-r--r--include/asm-x86/dma_64.h304
-rw-r--r--include/asm-x86/dmi.h10
-rw-r--r--include/asm-x86/ds.h72
-rw-r--r--include/asm-x86/e820.h6
-rw-r--r--include/asm-x86/e820_32.h9
-rw-r--r--include/asm-x86/e820_64.h16
-rw-r--r--include/asm-x86/efi.h97
-rw-r--r--include/asm-x86/elf.h212
-rw-r--r--include/asm-x86/emergency-restart.h12
-rw-r--r--include/asm-x86/fixmap_32.h24
-rw-r--r--include/asm-x86/fixmap_64.h6
-rw-r--r--include/asm-x86/fpu32.h10
-rw-r--r--include/asm-x86/futex.h138
-rw-r--r--include/asm-x86/futex_32.h135
-rw-r--r--include/asm-x86/futex_64.h125
-rw-r--r--include/asm-x86/gart.h5
-rw-r--r--include/asm-x86/geode.h12
-rw-r--r--include/asm-x86/gpio.h6
-rw-r--r--include/asm-x86/hpet.h8
-rw-r--r--include/asm-x86/hw_irq_32.h16
-rw-r--r--include/asm-x86/hw_irq_64.h2
-rw-r--r--include/asm-x86/i387.h361
-rw-r--r--include/asm-x86/i387_32.h151
-rw-r--r--include/asm-x86/i387_64.h214
-rw-r--r--include/asm-x86/i8253.h3
-rw-r--r--include/asm-x86/i8259.h20
-rw-r--r--include/asm-x86/ia32.h6
-rw-r--r--include/asm-x86/ia32_unistd.h2
-rw-r--r--include/asm-x86/ide.h2
-rw-r--r--include/asm-x86/idle.h1
-rw-r--r--include/asm-x86/io_32.h35
-rw-r--r--include/asm-x86/io_64.h57
-rw-r--r--include/asm-x86/io_apic.h158
-rw-r--r--include/asm-x86/io_apic_32.h155
-rw-r--r--include/asm-x86/io_apic_64.h138
-rw-r--r--include/asm-x86/irqflags.h246
-rw-r--r--include/asm-x86/irqflags_32.h197
-rw-r--r--include/asm-x86/irqflags_64.h176
-rw-r--r--include/asm-x86/k8.h1
-rw-r--r--include/asm-x86/kdebug.h11
-rw-r--r--include/asm-x86/kexec.h169
-rw-r--r--include/asm-x86/kexec_32.h99
-rw-r--r--include/asm-x86/kexec_64.h94
-rw-r--r--include/asm-x86/kprobes.h103
-rw-r--r--include/asm-x86/kprobes_32.h94
-rw-r--r--include/asm-x86/kprobes_64.h90
-rw-r--r--include/asm-x86/lguest.h14
-rw-r--r--include/asm-x86/linkage.h26
-rw-r--r--include/asm-x86/linkage_32.h15
-rw-r--r--include/asm-x86/linkage_64.h6
-rw-r--r--include/asm-x86/local.h243
-rw-r--r--include/asm-x86/local_32.h233
-rw-r--r--include/asm-x86/local_64.h222
-rw-r--r--include/asm-x86/mach-bigsmp/mach_apic.h12
-rw-r--r--include/asm-x86/mach-default/apm.h2
-rw-r--r--include/asm-x86/mach-default/io_ports.h25
-rw-r--r--include/asm-x86/mach-default/mach_apic.h18
-rw-r--r--include/asm-x86/mach-default/mach_time.h111
-rw-r--r--include/asm-x86/mach-default/mach_timer.h2
-rw-r--r--include/asm-x86/mach-default/mach_traps.h2
-rw-r--r--include/asm-x86/mach-es7000/mach_apic.h10
-rw-r--r--include/asm-x86/mach-generic/gpio.h15
-rw-r--r--include/asm-x86/mach-numaq/mach_apic.h10
-rw-r--r--include/asm-x86/mach-rdc321x/gpio.h56
-rw-r--r--include/asm-x86/mach-rdc321x/rdc321x_defs.h6
-rw-r--r--include/asm-x86/mach-summit/mach_apic.h18
-rw-r--r--include/asm-x86/math_emu.h5
-rw-r--r--include/asm-x86/mc146818rtc.h101
-rw-r--r--include/asm-x86/mc146818rtc_32.h97
-rw-r--r--include/asm-x86/mc146818rtc_64.h29
-rw-r--r--include/asm-x86/mce.h18
-rw-r--r--include/asm-x86/mmsegment.h8
-rw-r--r--include/asm-x86/mmu.h8
-rw-r--r--include/asm-x86/mmu_context_32.h2
-rw-r--r--include/asm-x86/mmu_context_64.h13
-rw-r--r--include/asm-x86/mmzone_32.h3
-rw-r--r--include/asm-x86/mmzone_64.h12
-rw-r--r--include/asm-x86/module.h81
-rw-r--r--include/asm-x86/module_32.h75
-rw-r--r--include/asm-x86/module_64.h10
-rw-r--r--include/asm-x86/mpspec.h116
-rw-r--r--include/asm-x86/mpspec_32.h81
-rw-r--r--include/asm-x86/mpspec_64.h233
-rw-r--r--include/asm-x86/mpspec_def.h87
-rw-r--r--include/asm-x86/msr-index.h15
-rw-r--r--include/asm-x86/msr.h299
-rw-r--r--include/asm-x86/mtrr.h14
-rw-r--r--include/asm-x86/mutex_32.h7
-rw-r--r--include/asm-x86/nmi_32.h3
-rw-r--r--include/asm-x86/nmi_64.h5
-rw-r--r--include/asm-x86/nops.h90
-rw-r--r--include/asm-x86/numa_32.h14
-rw-r--r--include/asm-x86/numa_64.h12
-rw-r--r--include/asm-x86/page.h190
-rw-r--r--include/asm-x86/page_32.h247
-rw-r--r--include/asm-x86/page_64.h110
-rw-r--r--include/asm-x86/paravirt.h615
-rw-r--r--include/asm-x86/pci.h17
-rw-r--r--include/asm-x86/pci_64.h1
-rw-r--r--include/asm-x86/pda.h73
-rw-r--r--include/asm-x86/percpu.h145
-rw-r--r--include/asm-x86/percpu_32.h154
-rw-r--r--include/asm-x86/percpu_64.h68
-rw-r--r--include/asm-x86/pgalloc_32.h85
-rw-r--r--include/asm-x86/pgtable-2level.h45
-rw-r--r--include/asm-x86/pgtable-3level.h88
-rw-r--r--include/asm-x86/pgtable.h359
-rw-r--r--include/asm-x86/pgtable_32.h291
-rw-r--r--include/asm-x86/pgtable_64.h260
-rw-r--r--include/asm-x86/processor.h841
-rw-r--r--include/asm-x86/processor_32.h786
-rw-r--r--include/asm-x86/processor_64.h452
-rw-r--r--include/asm-x86/proto.h69
-rw-r--r--include/asm-x86/ptrace-abi.h62
-rw-r--r--include/asm-x86/ptrace.h220
-rw-r--r--include/asm-x86/resume-trace.h23
-rw-r--r--include/asm-x86/resume-trace_32.h13
-rw-r--r--include/asm-x86/resume-trace_64.h13
-rw-r--r--include/asm-x86/rio.h4
-rw-r--r--include/asm-x86/rwlock.h1
-rw-r--r--include/asm-x86/rwsem.h14
-rw-r--r--include/asm-x86/scatterlist.h34
-rw-r--r--include/asm-x86/scatterlist_32.h28
-rw-r--r--include/asm-x86/scatterlist_64.h29
-rw-r--r--include/asm-x86/segment.h203
-rw-r--r--include/asm-x86/segment_32.h148
-rw-r--r--include/asm-x86/segment_64.h53
-rw-r--r--include/asm-x86/semaphore_32.h8
-rw-r--r--include/asm-x86/setup.h11
-rw-r--r--include/asm-x86/sigcontext.h42
-rw-r--r--include/asm-x86/sigcontext32.h22
-rw-r--r--include/asm-x86/signal.h11
-rw-r--r--include/asm-x86/smp_32.h119
-rw-r--r--include/asm-x86/smp_64.h137
-rw-r--r--include/asm-x86/sparsemem.h35
-rw-r--r--include/asm-x86/sparsemem_32.h31
-rw-r--r--include/asm-x86/sparsemem_64.h26
-rw-r--r--include/asm-x86/spinlock.h295
-rw-r--r--include/asm-x86/spinlock_32.h221
-rw-r--r--include/asm-x86/spinlock_64.h167
-rw-r--r--include/asm-x86/spinlock_types.h2
-rw-r--r--include/asm-x86/stacktrace.h5
-rw-r--r--include/asm-x86/suspend_32.h4
-rw-r--r--include/asm-x86/suspend_64.h11
-rw-r--r--include/asm-x86/system.h413
-rw-r--r--include/asm-x86/system_32.h320
-rw-r--r--include/asm-x86/system_64.h163
-rw-r--r--include/asm-x86/thread_info_32.h16
-rw-r--r--include/asm-x86/thread_info_64.h34
-rw-r--r--include/asm-x86/time.h26
-rw-r--r--include/asm-x86/timer.h23
-rw-r--r--include/asm-x86/timex.h2
-rw-r--r--include/asm-x86/tlbflush.h157
-rw-r--r--include/asm-x86/tlbflush_32.h168
-rw-r--r--include/asm-x86/tlbflush_64.h100
-rw-r--r--include/asm-x86/topology.h187
-rw-r--r--include/asm-x86/topology_32.h121
-rw-r--r--include/asm-x86/topology_64.h71
-rw-r--r--include/asm-x86/tsc.h40
-rw-r--r--include/asm-x86/uaccess_64.h2
-rw-r--r--include/asm-x86/unistd_32.h2
-rw-r--r--include/asm-x86/user_32.h24
-rw-r--r--include/asm-x86/user_64.h41
-rw-r--r--include/asm-x86/vdso.h28
-rw-r--r--include/asm-x86/vsyscall.h2
-rw-r--r--include/asm-x86/vsyscall32.h20
-rw-r--r--include/asm-x86/xor_32.h2
-rw-r--r--include/asm-x86/xor_64.h2
-rw-r--r--include/linux/acpi_pmtmr.h2
-rw-r--r--include/linux/clocksource.h3
-rw-r--r--include/linux/compat.h15
-rw-r--r--include/linux/const.h5
-rw-r--r--include/linux/cpumask.h4
-rw-r--r--include/linux/elf.h1
-rw-r--r--include/linux/hpet.h5
-rw-r--r--include/linux/init_ohci1394_dma.h4
-rw-r--r--include/linux/ioport.h2
-rw-r--r--include/linux/kernel.h3
-rw-r--r--include/linux/kprobes.h10
-rw-r--r--include/linux/linkage.h8
-rw-r--r--include/linux/mm.h15
-rw-r--r--include/linux/pci_ids.h7
-rw-r--r--include/linux/percpu.h24
-rw-r--r--include/linux/ptrace.h75
-rw-r--r--include/linux/regset.h368
-rw-r--r--include/linux/sched.h21
-rw-r--r--include/linux/smp.h2
-rw-r--r--include/linux/spinlock.h6
-rw-r--r--include/linux/spinlock_types.h4
-rw-r--r--include/linux/spinlock_up.h2
-rw-r--r--include/linux/suspend.h3
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/thread_info.h10
-rw-r--r--include/linux/tick.h6
-rw-r--r--include/linux/timer.h6
-rw-r--r--include/xen/page.h6
-rw-r--r--init/main.c25
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/backtracetest.c48
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/irq/proc.c21
-rw-r--r--kernel/irq/spurious.c5
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/module.c8
-rw-r--r--kernel/panic.c29
-rw-r--r--kernel/printk.c7
-rw-r--r--kernel/ptrace.c165
-rw-r--r--kernel/sched.c16
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/softirq.c11
-rw-r--r--kernel/spinlock.c3
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/test_kprobes.c216
-rw-r--r--kernel/time/clockevents.c13
-rw-r--r--kernel/time/clocksource.c31
-rw-r--r--kernel/time/tick-broadcast.c7
-rw-r--r--kernel/time/tick-internal.h2
-rw-r--r--kernel/time/tick-sched.c76
-rw-r--r--kernel/time/timekeeping.c28
-rw-r--r--kernel/time/timer_stats.c2
-rw-r--r--kernel/timer.c82
-rw-r--r--lib/Kconfig.debug52
-rw-r--r--lib/rwsem.c8
-rw-r--r--mm/memory.c39
-rw-r--r--mm/mmap.c3
-rw-r--r--net/sunrpc/svc.c1
-rw-r--r--sound/pci/intel8x0.c8
635 files changed, 36098 insertions, 36866 deletions
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
new file mode 100644
index 00000000000..de4804e8b39
--- /dev/null
+++ b/Documentation/debugging-via-ohci1394.txt
@@ -0,0 +1,179 @@
+
+ Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
+ ---------------------------------------------------------------------------
+
+Introduction
+------------
+
+Basically all FireWire controllers which are in use today are compliant
+to the OHCI-1394 specification which defines the controller to be a PCI
+bus master which uses DMA to offload data transfers from the CPU and has
+a "Physical Response Unit" which executes specific requests by employing
+PCI-Bus master DMA after applying filters defined by the OHCI-1394 driver.
+
+Once properly configured, remote machines can send these requests to
+ask the OHCI-1394 controller to perform read and write requests on
+physical system memory and, for read requests, send the result of
+the physical memory read back to the requester.
+
+With that, it is possible to debug issues by reading interesting memory
+locations such as buffers like the printk buffer or the process table.
+
+Retrieving a full system memory dump is also possible over the FireWire,
+using data transfer rates in the order of 10MB/s or more.
+
+Memory access is currently limited to the low 4G of physical address
+space which can be a problem on IA64 machines where memory is located
+mostly above that limit, but it is rarely a problem on more common
+hardware such as hardware based on x86, x86-64 and PowerPC.
+
+Together with a early initialization of the OHCI-1394 controller for debugging,
+this facility proved most useful for examining long debugs logs in the printk
+buffer on to debug early boot problems in areas like ACPI where the system
+fails to boot and other means for debugging (serial port) are either not
+available (notebooks) or too slow for extensive debug information (like ACPI).
+
+Drivers
+-------
+
+The OHCI-1394 drivers in drivers/firewire and drivers/ieee1394 initialize
+the OHCI-1394 controllers to a working state and can be used to enable
+physical DMA. By default you only have to load the driver, and physical
+DMA access will be granted to all remote nodes, but it can be turned off
+when using the ohci1394 driver.
+
+Because these drivers depend on the PCI enumeration to be completed, an
+initialization routine which can runs pretty early (long before console_init(),
+which makes the printk buffer appear on the console can be called) was written.
+
+To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
+Provide code for enabling DMA over FireWire early on boot) and pass the
+parameter "ohci1394_dma=early" to the recompiled kernel on boot.
+
+Tools
+-----
+
+firescope - Originally developed by Benjamin Herrenschmidt, Andi Kleen ported
+it from PowerPC to x86 and x86_64 and added functionality, firescope can now
+be used to view the printk buffer of a remote machine, even with live update.
+
+Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
+from 32-bit firescope and vice versa:
+- ftp://ftp.suse.de/private/bk/firewire/tools/firescope-0.2.2.tar.bz2
+
+and he implemented fast system dump (alpha version - read README.txt):
+- ftp://ftp.suse.de/private/bk/firewire/tools/firedump-0.1.tar.bz2
+
+There is also a gdb proxy for firewire which allows to use gdb to access
+data which can be referenced from symbols found by gdb in vmlinux:
+- ftp://ftp.suse.de/private/bk/firewire/tools/fireproxy-0.33.tar.bz2
+
+The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
+yet stable) with kgdb over an memory-based communication module (kgdbom).
+
+Getting Started
+---------------
+
+The OHCI-1394 specification regulates that the OHCI-1394 controller must
+disable all physical DMA on each bus reset.
+
+This means that if you want to debug an issue in a system state where
+interrupts are disabled and where no polling of the OHCI-1394 controller
+for bus resets takes place, you have to establish any FireWire cable
+connections and fully initialize all FireWire hardware __before__ the
+system enters such state.
+
+Step-by-step instructions for using firescope with early OHCI initialization:
+
+1) Verify that your hardware is supported:
+
+ Load the ohci1394 or the fw-ohci module and check your kernel logs.
+ You should see a line similar to
+
+ ohci1394: fw-host0: OHCI-1394 1.1 (PCI): IRQ=[18] MMIO=[fe9ff800-fe9fffff]
+ ... Max Packet=[2048] IR/IT contexts=[4/8]
+
+ when loading the driver. If you have no supported controller, many PCI,
+ CardBus and even some Express cards which are fully compliant to OHCI-1394
+ specification are available. If it requires no driver for Windows operating
+ systems, it most likely is. Only specialized shops have cards which are not
+ compliant, they are based on TI PCILynx chips and require drivers for Win-
+ dows operating systems.
+
+2) Establish a working FireWire cable connection:
+
+ Any FireWire cable, as long at it provides electrically and mechanically
+ stable connection and has matching connectors (there are small 4-pin and
+ large 6-pin FireWire ports) will do.
+
+ If an driver is running on both machines you should see a line like
+
+ ieee1394: Node added: ID:BUS[0-01:1023] GUID[0090270001b84bba]
+
+ on both machines in the kernel log when the cable is plugged in
+ and connects the two machines.
+
+3) Test physical DMA using firescope:
+
+ On the debug host,
+ - load the raw1394 module,
+ - make sure that /dev/raw1394 is accessible,
+ then start firescope:
+
+ $ firescope
+ Port 0 (ohci1394) opened, 2 nodes detected
+
+ FireScope
+ ---------
+ Target : <unspecified>
+ Gen : 1
+ [Ctrl-T] choose target
+ [Ctrl-H] this menu
+ [Ctrl-Q] quit
+
+ ------> Press Ctrl-T now, the output should be similar to:
+
+ 2 nodes available, local node is: 0
+ 0: ffc0, uuid: 00000000 00000000 [LOCAL]
+ 1: ffc1, uuid: 00279000 ba4bb801
+
+ Besides the [LOCAL] node, it must show another node without error message.
+
+4) Prepare for debugging with early OHCI-1394 initialization:
+
+ 4.1) Kernel compilation and installation on debug target
+
+ Compile the kernel to be debugged with CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ (Kernel hacking: Provide code for enabling DMA over FireWire early on boot)
+ enabled and install it on the machine to be debugged (debug target).
+
+ 4.2) Transfer the System.map of the debugged kernel to the debug host
+
+ Copy the System.map of the kernel be debugged to the debug host (the host
+ which is connected to the debugged machine over the FireWire cable).
+
+5) Retrieving the printk buffer contents:
+
+ With the FireWire cable connected, the OHCI-1394 driver on the debugging
+ host loaded, reboot the debugged machine, booting the kernel which has
+ CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early.
+
+ Then, on the debugging host, run firescope, for example by using -A:
+
+ firescope -A System.map-of-debug-target-kernel
+
+ Note: -A automatically attaches to the first non-local node. It only works
+ reliably if only connected two machines are connected using FireWire.
+
+ After having attached to the debug target, press Ctrl-D to view the
+ complete printk buffer or Ctrl-U to enter auto update mode and get an
+ updated live view of recent kernel messages logged on the debug target.
+
+ Call "firescope -h" to get more information on firescope's options.
+
+Notes
+-----
+Documentation and specifications: ftp://ftp.suse.de/private/bk/firewire/docs
+
+FireWire is a trademark of Apple Inc. - for more information please refer to:
+http://en.wikipedia.org/wiki/FireWire
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 880f882160e..5d171b7b839 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -416,8 +416,21 @@ and is between 256 and 4096 characters. It is defined in the file
[SPARC64] tick
[X86-64] hpet,tsc
- code_bytes [IA32] How many bytes of object code to print in an
- oops report.
+ clearcpuid=BITNUM [X86]
+ Disable CPUID feature X for the kernel. See
+ include/asm-x86/cpufeature.h for the valid bit numbers.
+ Note the Linux specific bits are not necessarily
+ stable over kernel options, but the vendor specific
+ ones should be.
+ Also note that user programs calling CPUID directly
+ or using the feature without checking anything
+ will still see it. This just prevents it from
+ being used by the kernel or shown in /proc/cpuinfo.
+ Also note the kernel might malfunction if you disable
+ some critical bits.
+
+ code_bytes [IA32/X86_64] How many bytes of object code to print
+ in an oops report.
Range: 0 - 8192
Default: 64
@@ -570,6 +583,12 @@ and is between 256 and 4096 characters. It is defined in the file
See drivers/char/README.epca and
Documentation/digiepca.txt.
+ disable_mtrr_trim [X86, Intel and AMD only]
+ By default the kernel will trim any uncacheable
+ memory out of your available memory pool based on
+ MTRR settings. This parameter disables that behavior,
+ possibly causing your machine to run very slowly.
+
dmasound= [HW,OSS] Sound subsystem buffers
dscc4.setup= [NET]
@@ -660,6 +679,10 @@ and is between 256 and 4096 characters. It is defined in the file
gamma= [HW,DRM]
+ gart_fix_e820= [X86_64] disable the fix e820 for K8 GART
+ Format: off | on
+ default: on
+
gdth= [HW,SCSI]
See header of drivers/scsi/gdth.c.
@@ -794,6 +817,16 @@ and is between 256 and 4096 characters. It is defined in the file
for translation below 32 bit and if not available
then look in the higher range.
+ io_delay= [X86-32,X86-64] I/O delay method
+ 0x80
+ Standard port 0x80 based delay
+ 0xed
+ Alternate port 0xed based delay (needed on some systems)
+ udelay
+ Simple two microseconds delay
+ none
+ No delay
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
@@ -1059,6 +1092,11 @@ and is between 256 and 4096 characters. It is defined in the file
Multi-Function General Purpose Timers on AMD Geode
platforms.
+ mfgptfix [X86-32] Fix MFGPT timers on AMD Geode platforms when
+ the BIOS has incorrectly applied a workaround. TinyBIOS
+ version 0.98 is known to be affected, 0.99 fixes the
+ problem by letting the user disable the workaround.
+
mga= [HW,DRM]
mousedev.tap_time=
@@ -1159,6 +1197,8 @@ and is between 256 and 4096 characters. It is defined in the file
nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
+ noefi [X86-32,X86-64] Disable EFI runtime services support.
+
noexec [IA-64]
noexec [X86-32,X86-64]
@@ -1169,6 +1209,8 @@ and is between 256 and 4096 characters. It is defined in the file
register save and restore. The kernel will only save
legacy floating-point registers on task switch.
+ noclflush [BUGS=X86] Don't use the CLFLUSH instruction
+
nohlt [BUGS=ARM]
no-hlt [BUGS=X86-32] Tells the kernel that the hlt
@@ -1978,6 +2020,11 @@ and is between 256 and 4096 characters. It is defined in the file
vdso=1: enable VDSO (default)
vdso=0: disable VDSO mapping
+ vdso32= [X86-32,X86-64]
+ vdso32=2: enable compat VDSO (default with COMPAT_VDSO)
+ vdso32=1: enable 32-bit VDSO (default)
+ vdso32=0: disable 32-bit VDSO mapping
+
vector= [IA-64,SMP]
vector=percpu: enable percpu vector domain
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 945311840a1..34abae4e944 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -110,12 +110,18 @@ Idle loop
Rebooting
- reboot=b[ios] | t[riple] | k[bd] [, [w]arm | [c]old]
+ reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old]
bios Use the CPU reboot vector for warm reset
warm Don't set the cold reboot flag
cold Set the cold reboot flag
triple Force a triple fault (init)
kbd Use the keyboard controller. cold reset (default)
+ acpi Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the
+ ACPI reset does not work, the reboot path attempts the reset using
+ the keyboard controller.
+ efi Use efi reset_system runtime service. If EFI is not configured or the
+ EFI reset does not work, the reboot path attempts the reset using
+ the keyboard controller.
Using warm reset will be much faster especially on big memory
systems because the BIOS will not go through the memory check.
diff --git a/Documentation/x86_64/uefi.txt b/Documentation/x86_64/uefi.txt
index 91a98edfb58..7d77120a518 100644
--- a/Documentation/x86_64/uefi.txt
+++ b/Documentation/x86_64/uefi.txt
@@ -19,6 +19,10 @@ Mechanics:
- Build the kernel with the following configuration.
CONFIG_FB_EFI=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+ If EFI runtime services are expected, the following configuration should
+ be selected.
+ CONFIG_EFI=y
+ CONFIG_EFI_VARS=y or m # optional
- Create a VFAT partition on the disk
- Copy the following to the VFAT partition:
elilo bootloader with x86_64 support, elilo configuration file,
@@ -27,3 +31,8 @@ Mechanics:
can be found in the elilo sourceforge project.
- Boot to EFI shell and invoke elilo choosing the kernel image built
in first step.
+- If some or all EFI runtime services don't work, you can try following
+ kernel command line parameters to turn off some or all EFI runtime
+ services.
+ noefi turn off all EFI runtime services
+ reboot_type=k turn off EFI reboot runtime service
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index de211ac3853..77201d3f747 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -91,6 +91,11 @@ config GENERIC_IRQ_PROBE
bool
default y
+config GENERIC_LOCKBREAK
+ bool
+ default y
+ depends on SMP && PREEMPT
+
config RWSEM_GENERIC_SPINLOCK
bool
default y
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index bef47725d4a..5a41e75ae1f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -42,6 +42,11 @@ config MMU
config SWIOTLB
bool
+config GENERIC_LOCKBREAK
+ bool
+ default y
+ depends on SMP && PREEMPT
+
config RWSEM_XCHGADD_ALGORITHM
bool
default y
@@ -75,6 +80,9 @@ config GENERIC_TIME_VSYSCALL
bool
default y
+config ARCH_SETS_UP_PER_CPU_AREA
+ def_bool y
+
config DMI
bool
default y
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index 3e35987af45..4f0c30c38e9 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -222,7 +222,8 @@ elf32_set_personality (void)
}
static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+elf32_map(struct file *filep, unsigned long addr, struct elf_phdr *eppnt,
+ int prot, int type, unsigned long unused)
{
unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 196287928ba..e699eb6c44b 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -947,7 +947,7 @@ percpu_modcopy (void *pcpudst, const void *src, unsigned long size)
{
unsigned int i;
for_each_possible_cpu(i) {
- memcpy(pcpudst + __per_cpu_offset[i], src, size);
+ memcpy(pcpudst + per_cpu_offset(i), src, size);
}
}
#endif /* CONFIG_SMP */
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index ab9a264cb19..f7237c5f531 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -235,6 +235,11 @@ config IRAM_SIZE
# Define implied options from the CPU selection here
#
+config GENERIC_LOCKBREAK
+ bool
+ default y
+ depends on SMP && PREEMPT
+
config RWSEM_GENERIC_SPINLOCK
bool
depends on M32R
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 6b0f85f02c7..4fad0a34b99 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -694,6 +694,11 @@ source "arch/mips/vr41xx/Kconfig"
endmenu
+config GENERIC_LOCKBREAK
+ bool
+ default y
+ depends on SMP && PREEMPT
+
config RWSEM_GENERIC_SPINLOCK
bool
default y
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index c2d497ceffd..fc4aa07b6d3 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -24,9 +24,7 @@ DEFINE_SPINLOCK(i8253_lock);
static void init_pit_timer(enum clock_event_mode mode,
struct clock_event_device *evt)
{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
+ spin_lock(&i8253_lock);
switch(mode) {
case CLOCK_EVT_MODE_PERIODIC:
@@ -55,7 +53,7 @@ static void init_pit_timer(enum clock_event_mode mode,
/* Nothing to do here */
break;
}
- spin_unlock_irqrestore(&i8253_lock, flags);
+ spin_unlock(&i8253_lock);
}
/*
@@ -65,12 +63,10 @@ static void init_pit_timer(enum clock_event_mode mode,
*/
static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
+ spin_lock(&i8253_lock);
outb_p(delta & 0xff , PIT_CH0); /* LSB */
outb(delta >> 8 , PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
+ spin_unlock(&i8253_lock);
return 0;
}
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index b8ef1787a19..2b649c46631 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -19,6 +19,11 @@ config MMU
config STACK_GROWSUP
def_bool y
+config GENERIC_LOCKBREAK
+ bool
+ default y
+ depends on SMP && PREEMPT
+
config RWSEM_GENERIC_SPINLOCK
def_bool y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 232c298c933..fb85f6b72fc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -42,6 +42,9 @@ config GENERIC_HARDIRQS
bool
default y
+config ARCH_SETS_UP_PER_CPU_AREA
+ def_bool PPC64
+
config IRQ_PER_CPU
bool
default y
@@ -53,6 +56,11 @@ config RWSEM_XCHGADD_ALGORITHM
bool
default y
+config GENERIC_LOCKBREAK
+ bool
+ default y
+ depends on SMP && PREEMPT
+
config ARCH_HAS_ILOG2_U32
bool
default y
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 3e17d154d0d..8b056d2295c 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -256,7 +256,7 @@ static int set_evrregs(struct task_struct *task, unsigned long *data)
#endif /* CONFIG_SPE */
-static void set_single_step(struct task_struct *task)
+void user_enable_single_step(struct task_struct *task)
{
struct pt_regs *regs = task->thread.regs;
@@ -271,7 +271,7 @@ static void set_single_step(struct task_struct *task)
set_tsk_thread_flag(task, TIF_SINGLESTEP);
}
-static void clear_single_step(struct task_struct *task)
+void user_disable_single_step(struct task_struct *task)
{
struct pt_regs *regs = task->thread.regs;
@@ -313,7 +313,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
void ptrace_disable(struct task_struct *child)
{
/* make sure the single step bit is not set. */
- clear_single_step(child);
+ user_disable_single_step(child);
}
/*
@@ -445,52 +445,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
}
- case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
- case PTRACE_CONT: { /* restart after signal. */
- ret = -EIO;
- if (!valid_signal(data))
- break;
- if (request == PTRACE_SYSCALL)
- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- else
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- child->exit_code = data;
- /* make sure the single step bit is not set. */
- clear_single_step(child);
- wake_up_process(child);
- ret = 0;
- break;
- }
-
-/*
- * make the child exit. Best I can do is send it a sigkill.
- * perhaps it should be put in the status that it wants to
- * exit.
- */
- case PTRACE_KILL: {
- ret = 0;
- if (child->exit_state == EXIT_ZOMBIE) /* already dead */
- break;
- child->exit_code = SIGKILL;
- /* make sure the single step bit is not set. */
- clear_single_step(child);
- wake_up_process(child);
- break;
- }
-
- case PTRACE_SINGLESTEP: { /* set the trap flag. */
- ret = -EIO;
- if (!valid_signal(data))
- break;
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- set_single_step(child);
- child->exit_code = data;
- /* give it a chance to run. */
- wake_up_process(child);
- ret = 0;
- break;
- }
-
case PTRACE_GET_DEBUGREG: {
ret = -EINVAL;
/* We only support one DABR and no IABRS at the moment */
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 10b212a1f9f..26f5791baa3 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -66,6 +66,9 @@ config AUDIT_ARCH
bool
default y
+config ARCH_SETS_UP_PER_CPU_AREA
+ def_bool y
+
config ARCH_NO_VIRT_TO_BUS
def_bool y
@@ -200,6 +203,11 @@ config US2E_FREQ
If in doubt, say N.
# Global things across all Sun machines.
+config GENERIC_LOCKBREAK
+ bool
+ default y
+ depends on SMP && PREEMPT
+
config RWSEM_GENERIC_SPINLOCK
bool
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 1b388b41d95..7c7142ba3bd 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -71,10 +71,10 @@ EXPORT_SYMBOL(dump_thread);
/* required for SMP */
-extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void __write_lock_failed(rwlock_t *rw);
EXPORT_SYMBOL(__write_lock_failed);
-extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
+extern void __read_lock_failed(rwlock_t *rw);
EXPORT_SYMBOL(__read_lock_failed);
#endif
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index 0147227ce18..19053d46cb6 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -3,10 +3,10 @@
* Licensed under the GPL
*/
-#include "linux/ptrace.h"
-#include "asm/unistd.h"
-#include "asm/uaccess.h"
-#include "asm/ucontext.h"
+#include <linux/ptrace.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
#include "frame_kern.h"
#include "skas.h"
@@ -18,17 +18,17 @@ void copy_sc(struct uml_pt_regs *regs, void *from)
REGS_FS(regs->gp) = sc->fs;
REGS_ES(regs->gp) = sc->es;
REGS_DS(regs->gp) = sc->ds;
- REGS_EDI(regs->gp) = sc->edi;
- REGS_ESI(regs->gp) = sc->esi;
- REGS_EBP(regs->gp) = sc->ebp;
- REGS_SP(regs->gp) = sc->esp;
- REGS_EBX(regs->gp) = sc->ebx;
- REGS_EDX(regs->gp) = sc->edx;
- REGS_ECX(regs->gp) = sc->ecx;
- REGS_EAX(regs->gp) = sc->eax;
- REGS_IP(regs->gp) = sc->eip;
+ REGS_EDI(regs->gp) = sc->di;
+ REGS_ESI(regs->gp) = sc->si;
+ REGS_EBP(regs->gp) = sc->bp;
+ REGS_SP(regs->gp) = sc->sp;
+ REGS_EBX(regs->gp) = sc->bx;
+ REGS_EDX(regs->gp) = sc->dx;
+ REGS_ECX(regs->gp) = sc->cx;
+ REGS_EAX(regs->gp) = sc->ax;
+ REGS_IP(regs->gp) = sc->ip;
REGS_CS(regs->gp) = sc->cs;
- REGS_EFLAGS(regs->gp) = sc->eflags;
+ REGS_EFLAGS(regs->gp) = sc->flags;
REGS_SS(regs->gp) = sc->ss;
}
@@ -229,18 +229,18 @@ static int copy_sc_to_user(struct sigcontext __user *to,
sc.fs = REGS_FS(regs->regs.gp);
sc.es = REGS_ES(regs->regs.gp);
sc.ds = REGS_DS(regs->regs.gp);
- sc.edi = REGS_EDI(regs->regs.gp);
- sc.esi = REGS_ESI(regs->regs.gp);
- sc.ebp = REGS_EBP(regs->regs.gp);
- sc.esp = sp;
- sc.ebx = REGS_EBX(regs->regs.gp);
- sc.edx = REGS_EDX(regs->regs.gp);
- sc.ecx = REGS_ECX(regs->regs.gp);
- sc.eax = REGS_EAX(regs->regs.gp);
- sc.eip = REGS_IP(regs->regs.gp);
+ sc.di = REGS_EDI(regs->regs.gp);
+ sc.si = REGS_ESI(regs->regs.gp);
+ sc.bp = REGS_EBP(regs->regs.gp);
+ sc.sp = sp;
+ sc.bx = REGS_EBX(regs->regs.gp);
+ sc.dx = REGS_EDX(regs->regs.gp);
+ sc.cx = REGS_ECX(regs->regs.gp);
+ sc.ax = REGS_EAX(regs->regs.gp);
+ sc.ip = REGS_IP(regs->regs.gp);
sc.cs = REGS_CS(regs->regs.gp);
- sc.eflags = REGS_EFLAGS(regs->regs.gp);
- sc.esp_at_signal = regs->regs.gp[UESP];
+ sc.flags = REGS_EFLAGS(regs->regs.gp);
+ sc.sp_at_signal = regs->regs.gp[UESP];
sc.ss = regs->regs.gp[SS];
sc.cr2 = fi->cr2;
sc.err = fi->error_code;
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
index 1778d33808f..7457436b433 100644
--- a/arch/um/sys-x86_64/signal.c
+++ b/arch/um/sys-x86_64/signal.c
@@ -4,11 +4,11 @@
* Licensed under the GPL
*/
-#include "linux/personality.h"
-#include "linux/ptrace.h"
-#include "asm/unistd.h"
-#include "asm/uaccess.h"
-#include "asm/ucontext.h"
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
#include "frame_kern.h"
#include "skas.h"
@@ -27,16 +27,16 @@ void copy_sc(struct uml_pt_regs *regs, void *from)
GETREG(regs, R13, sc, r13);
GETREG(regs, R14, sc, r14);
GETREG(regs, R15, sc, r15);
- GETREG(regs, RDI, sc, rdi);
- GETREG(regs, RSI, sc, rsi);
- GETREG(regs, RBP, sc, rbp);
- GETREG(regs, RBX, sc, rbx);
- GETREG(regs, RDX, sc, rdx);
- GETREG(regs, RAX, sc, rax);
- GETREG(regs, RCX, sc, rcx);
- GETREG(regs, RSP, sc, rsp);
- GETREG(regs, RIP, sc, rip);
- GETREG(regs, EFLAGS, sc, eflags);
+ GETREG(regs, RDI, sc, di);
+ GETREG(regs, RSI, sc, si);
+ GETREG(regs, RBP, sc, bp);
+ GETREG(regs, RBX, sc, bx);
+ GETREG(regs, RDX, sc, dx);
+ GETREG(regs, RAX, sc, ax);
+ GETREG(regs, RCX, sc, cx);
+ GETREG(regs, RSP, sc, sp);
+ GETREG(regs, RIP, sc, ip);
+ GETREG(regs, EFLAGS, sc, flags);
GETREG(regs, CS, sc, cs);
#undef GETREG
@@ -61,16 +61,16 @@ static int copy_sc_from_user(struct pt_regs *regs,
err |= GETREG(regs, R13, from, r13);
err |= GETREG(regs, R14, from, r14);
err |= GETREG(regs, R15, from, r15);
- err |= GETREG(regs, RDI, from, rdi);
- err |= GETREG(regs, RSI, from, rsi);
- err |= GETREG(regs, RBP, from, rbp);
- err |= GETREG(regs, RBX, from, rbx);
- err |= GETREG(regs, RDX, from, rdx);
- err |= GETREG(regs, RAX, from, rax);
- err |= GETREG(regs, RCX, from, rcx);
- err |= GETREG(regs, RSP, from, rsp);
- err |= GETREG(regs, RIP, from, rip);
- err |= GETREG(regs, EFLAGS, from, eflags);
+ err |= GETREG(regs, RDI, from, di);
+ err |= GETREG(regs, RSI, from, si);
+ err |= GETREG(regs, RBP, from, bp);
+ err |= GETREG(regs, RBX, from, bx);
+ err |= GETREG(regs, RDX, from, dx);
+ err |= GETREG(regs, RAX, from, ax);
+ err |= GETREG(regs, RCX, from, cx);
+ err |= GETREG(regs, RSP, from, sp);
+ err |= GETREG(regs, RIP, from, ip);
+ err |= GETREG(regs, EFLAGS, from, flags);
err |= GETREG(regs, CS, from, cs);
if (err)
return 1;
@@ -108,19 +108,19 @@ static int copy_sc_to_user(struct sigcontext __user *to,
__put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)], \
&(sc)->regname)
- err |= PUTREG(regs, RDI, to, rdi);
- err |= PUTREG(regs, RSI, to, rsi);
- err |= PUTREG(regs, RBP, to, rbp);
+ err |= PUTREG(regs, RDI, to, di);
+ err |= PUTREG(regs, RSI, to, si);
+ err |= PUTREG(regs, RBP, to, bp);
/*
* Must use orignal RSP, which is passed in, rather than what's in
* the pt_regs, because that's already been updated to point at the
* signal frame.
*/
- err |= __put_user(sp, &to->rsp);
- err |= PUTREG(regs, RBX, to, rbx);
- err |= PUTREG(regs, RDX, to, rdx);
- err |= PUTREG(regs, RCX, to, rcx);
- err |= PUTREG(regs, RAX, to, rax);
+ err |= __put_user(sp, &to->sp);
+ err |= PUTREG(regs, RBX, to, bx);
+ err |= PUTREG(regs, RDX, to, dx);
+ err |= PUTREG(regs, RCX, to, cx);
+ err |= PUTREG(regs, RAX, to, ax);
err |= PUTREG(regs, R8, to, r8);
err |= PUTREG(regs, R9, to, r9);
err |= PUTREG(regs, R10, to, r10);
@@ -135,8 +135,8 @@ static int copy_sc_to_user(struct sigcontext __user *to,
err |= __put_user(fi->error_code, &to->err);
err |= __put_user(fi->trap_no, &to->trapno);
- err |= PUTREG(regs, RIP, to, rip);
- err |= PUTREG(regs, EFLAGS, to, eflags);
+ err |= PUTREG(regs, RIP, to, ip);
+ err |= PUTREG(regs, EFLAGS, to, flags);
#undef PUTREG
err |= __put_user(mask, &to->oldmask);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 80b7ba4056d..fb3eea3e38e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,81 +17,69 @@ config X86_64
### Arch settings
config X86
- bool
- default y
+ def_bool y
+
+config GENERIC_LOCKBREAK
+ def_bool n
config GENERIC_TIME
- bool
- default y
+ def_bool y
config GENERIC_CMOS_UPDATE
- bool
- default y
+ def_bool y
config CLOCKSOURCE_WATCHDOG
- bool
- default y
+ def_bool y
config GENERIC_CLOCKEVENTS
- bool
- default y
+ def_bool y
config GENERIC_CLOCKEVENTS_BROADCAST
- bool
- default y
+ def_bool y
depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
config LOCKDEP_SUPPORT
- bool
- default y
+ def_bool y
config STACKTRACE_SUPPORT
- bool
- default y
+ def_bool y
config SEMAPHORE_SLEEPERS
- bool
- default y
+ def_bool y
config MMU
- bool
- default y
+ def_bool y
config ZONE_DMA
- bool
- default y
+ def_bool y
config QUICKLIST
- bool
- default X86_32
+ def_bool X86_32
config SBUS
bool
config GENERIC_ISA_DMA
- bool
- default y
+ def_bool y
config GENERIC_IOMAP
- bool
- default y
+ def_bool y
config GENERIC_BUG
- bool
- default y
+ def_bool y
depends on BUG
config GENERIC_HWEIGHT
- bool
- default y
+ def_bool y
+
+config GENERIC_GPIO
+ def_bool n
config ARCH_MAY_HAVE_PC_FDC
- bool
- default y
+ def_bool y
config DMI
- bool
- default y
+ def_bool y
config RWSEM_GENERIC_SPINLOCK
def_bool !X86_XADD
@@ -112,6 +100,9 @@ config GENERIC_TIME_VSYSCALL
bool
default X86_64
+config HAVE_SETUP_PER_CPU_AREA
+ def_bool X86_64
+
config ARCH_SUPPORTS_OPROFILE
bool
default y
@@ -144,9 +135,17 @@ config GENERIC_PENDING_IRQ
config X86_SMP
bool
- depends on X86_32 && SMP && !X86_VOYAGER
+ depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
default y
+config X86_32_SMP
+ def_bool y
+ depends on X86_32 && SMP
+
+config X86_64_SMP
+ def_bool y
+ depends on X86_64 && SMP
+
config X86_HT
bool
depends on SMP
@@ -292,6 +291,18 @@ config X86_ES7000
Only choose this option if you have such a system, otherwise you
should say N here.
+config X86_RDC321X
+ bool "RDC R-321x SoC"
+ depends on X86_32
+ select M486
+ select X86_REBOOTFIXUPS
+ select GENERIC_GPIO
+ select LEDS_GPIO
+ help
+ This option is needed for RDC R-321x system-on-chip, also known
+ as R-8610-(G).
+ If you don't have one of these chips, you should say N here.
+
config X86_VSMP
bool "Support for ScaleMP vSMP"
depends on X86_64 && PCI
@@ -303,8 +314,8 @@ config X86_VSMP
endchoice
config SCHED_NO_NO_OMIT_FRAME_POINTER
- bool "Single-depth WCHAN output"
- default y
+ def_bool y
+ prompt "Single-depth WCHAN output"
depends on X86_32
help
Calculate simpler /proc/<PID>/wchan values. If this option
@@ -314,18 +325,8 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
If in doubt, say "Y".
-config PARAVIRT
- bool
- depends on X86_32 && !(X86_VISWS || X86_VOYAGER)
- help
- This changes the kernel so it can modify itself when it is run
- under a hypervisor, potentially improving performance significantly
- over full virtualization. However, when run without a hypervisor
- the kernel is theoretically slower and slightly larger.
-
menuconfig PARAVIRT_GUEST
bool "Paravirtualized guest support"
- depends on X86_32
help
Say Y here to get to see options related to running Linux under
various hypervisors. This option alone does not add any kernel code.
@@ -339,6 +340,7 @@ source "arch/x86/xen/Kconfig"
config VMI
bool "VMI Guest support"
select PARAVIRT
+ depends on X86_32
depends on !(X86_VISWS || X86_VOYAGER)
help
VMI provides a paravirtualized interface to the VMware ESX server
@@ -348,40 +350,43 @@ config VMI
source "arch/x86/lguest/Kconfig"
+config PARAVIRT
+ bool "Enable paravirtualization code"
+ depends on !(X86_VISWS || X86_VOYAGER)
+ help
+ This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization. However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
endif
config ACPI_SRAT
- bool
- default y
+ def_bool y
depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
select ACPI_NUMA
config HAVE_ARCH_PARSE_SRAT
- bool
- default y
- depends on ACPI_SRAT
+ def_bool y
+ depends on ACPI_SRAT
config X86_SUMMIT_NUMA
- bool
- default y
+ def_bool y
depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH)
config X86_CYCLONE_TIMER
- bool
- default y
+ def_bool y
depends on X86_32 && X86_SUMMIT || X86_GENERICARCH
config ES7000_CLUSTERED_APIC
- bool
- default y
+ def_bool y
depends on SMP && X86_ES7000 && MPENTIUMIII
source "arch/x86/Kconfig.cpu"
config HPET_TIMER
- bool
+ def_bool X86_64
prompt "HPET Timer Support" if X86_32
- default X86_64
help
Use the IA-PC HPET (High Precision Event Timer) to manage
time in preference to the PIT and RTC, if a HPET is
@@ -399,9 +404,8 @@ config HPET_TIMER
Choose N to continue using the legacy 8254 timer.
config HPET_EMULATE_RTC
- bool
- depends on HPET_TIMER && RTC=y
- default y
+ def_bool y
+ depends on HPET_TIMER && (RTC=y || RTC=m)
# Mark as embedded because too many people got it wrong.
# The code disables itself when not needed.
@@ -441,8 +445,8 @@ config CALGARY_IOMMU
If unsure, say Y.
config CALGARY_IOMMU_ENABLED_BY_DEFAULT
- bool "Should Calgary be enabled by default?"
- default y
+ def_bool y
+ prompt "Should Calgary be enabled by default?"
depends on CALGARY_IOMMU
help
Should Calgary be enabled by default? if you choose 'y', Calgary
@@ -486,9 +490,9 @@ config SCHED_SMT
N here.
config SCHED_MC
- bool "Multi-core scheduler support"
+ def_bool y
+ prompt "Multi-core scheduler support"
depends on (X86_64 && SMP) || (X86_32 && X86_HT)
- default y
help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
@@ -522,19 +526,16 @@ config X86_UP_IOAPIC
an IO-APIC, then the kernel will still run with no slowdown at all.
config X86_LOCAL_APIC
- bool
+ def_bool y
depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH))
- default y
config X86_IO_APIC
- bool
+ def_bool y
depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH))
- default y
config X86_VISWS_APIC
- bool
+ def_bool y
depends on X86_32 && X86_VISWS
- default y
config X86_MCE
bool "Machine Check Exception"
@@ -554,17 +555,17 @@ config X86_MCE
the 386 and 486, so nearly everyone can say Y here.
config X86_MCE_INTEL
- bool "Intel MCE features"
+ def_bool y
+ prompt "Intel MCE features"
depends on X86_64 && X86_MCE && X86_LOCAL_APIC
- default y
help
Additional support for intel specific MCE features such as
the thermal monitor.
config X86_MCE_AMD
- bool "AMD MCE features"
+ def_bool y
+ prompt "AMD MCE features"
depends on X86_64 && X86_MCE && X86_LOCAL_APIC
- default y
help
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
@@ -637,9 +638,9 @@ config I8K
Say N otherwise.
config X86_REBOOTFIXUPS
- bool "Enable X86 board specific fixups for reboot"
+ def_bool n
+ prompt "Enable X86 board specific fixups for reboot"
depends on X86_32 && X86
- default n
---help---
This enables chipset and/or board specific fixups to be done
in order to get reboot to work correctly. This is only needed on
@@ -648,7 +649,7 @@ config X86_REBOOTFIXUPS
system.
Currently, the only fixup is for the Geode machines using
- CS5530A and CS5536 chipsets.
+ CS5530A and CS5536 chipsets and the RDC R-321x SoC.
Say Y if you want to enable the fixup. Currently, it's safe to
enable this option even if you don't need it.
@@ -672,9 +673,8 @@ config MICROCODE
module will be called microcode.
config MICROCODE_OLD_INTERFACE
- bool
+ def_bool y
depends on MICROCODE
- default y
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
@@ -798,13 +798,12 @@ config PAGE_OFFSET
depends on X86_32
config HIGHMEM
- bool
+ def_bool y
depends on X86_32 && (HIGHMEM64G || HIGHMEM4G)
- default y
config X86_PAE
- bool "PAE (Physical Address Extension) Support"
- default n
+ def_bool n
+ prompt "PAE (Physical Address Extension) Support"
depends on X86_32 && !HIGHMEM4G
select RESOURCES_64BIT
help
@@ -836,10 +835,10 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
config K8_NUMA
- bool "Old style AMD Opteron NUMA detection"
- depends on X86_64 && NUMA && PCI
- default y
- help
+ def_bool y
+ prompt "Old style AMD Opteron NUMA detection"
+ depends on X86_64 && NUMA && PCI
+ help
Enable K8 NUMA node topology detection. You should say Y here if
you have a multi processor AMD K8 system. This uses an old
method to read the NUMA configuration directly from the builtin
@@ -847,10 +846,10 @@ config K8_NUMA
instead, which also takes priority if both are compiled in.
config X86_64_ACPI_NUMA
- bool "ACPI NUMA detection"
+ def_bool y
+ prompt "ACPI NUMA detection"
depends on X86_64 && NUMA && ACPI && PCI
select ACPI_NUMA
- default y
help
Enable ACPI SRAT based node topology detection.
@@ -864,52 +863,53 @@ config NUMA_EMU
config NODES_SHIFT
int
+ range 1 15 if X86_64
default "6" if X86_64
default "4" if X86_NUMAQ
default "3"
depends on NEED_MULTIPLE_NODES
config HAVE_ARCH_BOOTMEM_NODE
- bool
+ def_bool y
depends on X86_32 && NUMA
- default y
config ARCH_HAVE_MEMORY_PRESENT
- bool
+ def_bool y
depends on X86_32 && DISCONTIGMEM
- default y
config NEED_NODE_MEMMAP_SIZE
- bool
+ def_bool y
depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
- default y
config HAVE_ARCH_ALLOC_REMAP
- bool
+ def_bool y
depends on X86_32 && NUMA
- default y
config ARCH_FLATMEM_ENABLE
def_bool y
- depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA)
+ depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
- depends on NUMA
+ depends on NUMA && X86_32
config ARCH_DISCONTIGMEM_DEFAULT
def_bool y
- depends on NUMA
+ depends on NUMA && X86_32
+
+config ARCH_SPARSEMEM_DEFAULT
+ def_bool y
+ depends on X86_64
config ARCH_SPARSEMEM_ENABLE
def_bool y
- depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64))
+ depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
config ARCH_SELECT_MEMORY_MODEL
def_bool y
- depends on X86_32 && ARCH_SPARSEMEM_ENABLE
+ depends on ARCH_SPARSEMEM_ENABLE
config ARCH_MEMORY_PROBE
def_bool X86_64
@@ -987,42 +987,32 @@ config MTRR
See <file:Documentation/mtrr.txt> for more information.
config EFI
- bool "Boot from EFI support"
- depends on X86_32 && ACPI
- default n
+ def_bool n
+ prompt "EFI runtime service support"
+ depends on ACPI
---help---
- This enables the kernel to boot on EFI platforms using
- system configuration information passed to it from the firmware.
- This also enables the kernel to use any EFI runtime services that are
+ This enables the kernel to use EFI runtime services that are
available (such as the EFI variable services).
- This option is only useful on systems that have EFI firmware
- and will result in a kernel image that is ~8k larger. In addition,
- you must use the latest ELILO loader available at
- <http://elilo.sourceforge.net> in order to take advantage of
- kernel initialization using EFI information (neither GRUB nor LILO know
- anything about EFI). However, even with this option, the resultant
- kernel should continue to boot on existing non-EFI platforms.
+ This option is only useful on systems that have EFI firmware.
+ In addition, you should use the latest ELILO loader available
+ at <http://elilo.sourceforge.net> in order to take advantage
+ of EFI runtime services. However, even with this option, the
+ resultant kernel should continue to boot on existing non-EFI
+ platforms.
config IRQBALANCE
- bool "Enable kernel irq balancing"
+ def_bool y
+ prompt "Enable kernel irq balancing"
depends on X86_32 && SMP && X86_IO_APIC
- default y
help
The default yes will allow the kernel to do irq load balancing.
Saying no will keep the kernel from doing irq load balancing.
-# turning this on wastes a bunch of space.
-# Summit needs it only when NUMA is on
-config BOOT_IOREMAP
- bool
- depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
- default y
-
config SECCOMP
- bool "Enable seccomp to safely compute untrusted bytecode"
+ def_bool y
+ prompt "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
- default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
@@ -1189,11 +1179,11 @@ config HOTPLUG_CPU
suspend.
config COMPAT_VDSO
- bool "Compat VDSO support"
- default y
- depends on X86_32
+ def_bool y
+ prompt "Compat VDSO support"
+ depends on X86_32 || IA32_EMULATION
help
- Map the VDSO to the predictable old-style address too.
+ Map the 32-bit VDSO to the predictable old-style address too.
---help---
Say N here if you are running a sufficiently recent glibc
version (2.3.3 or later), to remove the high-mapped
@@ -1207,30 +1197,26 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y
depends on X86_64 || (X86_32 && HIGHMEM)
-config MEMORY_HOTPLUG_RESERVE
- def_bool X86_64
- depends on (MEMORY_HOTPLUG && DISCONTIGMEM)
-
config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool X86_64
depends on NUMA
-config OUT_OF_LINE_PFN_TO_PAGE
- def_bool X86_64
- depends on DISCONTIGMEM
-
menu "Power management options"
depends on !X86_VOYAGER
config ARCH_HIBERNATION_HEADER
- bool
+ def_bool y
depends on X86_64 && HIBERNATION
- default y
source "kernel/power/Kconfig"
source "drivers/acpi/Kconfig"
+config X86_APM_BOOT
+ bool
+ default y
+ depends on APM || APM_MODULE
+
menuconfig APM
tristate "APM (Advanced Power Management) BIOS support"
depends on X86_32 && PM_SLEEP && !X86_VISWS
@@ -1371,7 +1357,7 @@ menu "Bus options (PCI etc.)"
config PCI
bool "PCI support" if !X86_VISWS
depends on !X86_VOYAGER
- default y if X86_VISWS
+ default y
select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
help
Find out whether you have a PCI motherboard. PCI is the name of a
@@ -1418,25 +1404,21 @@ config PCI_GOANY
endchoice
config PCI_BIOS
- bool
+ def_bool y
depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
- default y
# x86-64 doesn't support PCI BIOS access from long mode so always go direct.
config PCI_DIRECT
- bool
+ def_bool y
depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
- default y
config PCI_MMCONFIG
- bool
+ def_bool y
depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
- default y
config PCI_DOMAINS
- bool
+ def_bool y
depends on PCI
- default y
config PCI_MMCONFIG
bool "Support mmconfig PCI config space access"
@@ -1453,9 +1435,9 @@ config DMAR
remapping devices.
config DMAR_GFX_WA
- bool "Support for Graphics workaround"
+ def_bool y
+ prompt "Support for Graphics workaround"
depends on DMAR
- default y
help
Current Graphics drivers tend to use physical address
for DMA and avoid using DMA APIs. Setting this config
@@ -1464,9 +1446,8 @@ config DMAR_GFX_WA
to use physical addresses for DMA.
config DMAR_FLOPPY_WA
- bool
+ def_bool y
depends on DMAR
- default y
help
Floppy disk drivers are know to bypass DMA API calls
thereby failing to work when IOMMU is enabled. This
@@ -1479,8 +1460,7 @@ source "drivers/pci/Kconfig"
# x86_64 have no ISA slots, but do have ISA-style DMA.
config ISA_DMA_API
- bool
- default y
+ def_bool y
if X86_32
@@ -1546,9 +1526,9 @@ config SCx200HR_TIMER
other workaround is idle=poll boot option.
config GEODE_MFGPT_TIMER
- bool "Geode Multi-Function General Purpose Timer (MFGPT) events"
+ def_bool y
+ prompt "Geode Multi-Function General Purpose Timer (MFGPT) events"
depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS
- default y
help
This driver provides a clock event source based on the MFGPT
timer(s) in the CS5535 and CS5536 companion chip for the geode.
@@ -1575,6 +1555,7 @@ source "fs/Kconfig.binfmt"
config IA32_EMULATION
bool "IA32 Emulation"
depends on X86_64
+ select COMPAT_BINFMT_ELF
help
Include code to run 32-bit programs under a 64-bit kernel. You should
likely turn this on, unless you're 100% sure that you don't have any
@@ -1587,18 +1568,16 @@ config IA32_AOUT
Support old a.out binaries in the 32bit emulation.
config COMPAT
- bool
+ def_bool y
depends on IA32_EMULATION
- default y
config COMPAT_FOR_U64_ALIGNMENT
def_bool COMPAT
depends on X86_64
config SYSVIPC_COMPAT
- bool
+ def_bool y
depends on X86_64 && COMPAT && SYSVIPC
- default y
endmenu
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index c30162202dc..e09a6b73a1a 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -219,10 +219,10 @@ config MGEODEGX1
Select this for a Geode GX1 (Cyrix MediaGX) chip.
config MGEODE_LX
- bool "Geode GX/LX"
+ bool "Geode GX/LX"
depends on X86_32
- help
- Select this for AMD Geode GX and LX processors.
+ help
+ Select this for AMD Geode GX and LX processors.
config MCYRIXIII
bool "CyrixIII/VIA-C3"
@@ -258,7 +258,7 @@ config MPSC
Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
Xeon CPUs with Intel 64bit which is compatible with x86-64.
Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
- Netburst core and shouldn't use this option. You can distinguish them
+ Netburst core and shouldn't use this option. You can distinguish them
using the cpu family field
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
@@ -317,81 +317,75 @@ config X86_L1_CACHE_SHIFT
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
config X86_XADD
- bool
+ def_bool y
depends on X86_32 && !M386
- default y
config X86_PPRO_FENCE
- bool
+ bool "PentiumPro memory ordering errata workaround"
depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
- default y
+ help
+ Old PentiumPro multiprocessor systems had errata that could cause memory
+ operations to violate the x86 ordering standard in rare cases. Enabling this
+ option will attempt to work around some (but not all) occurances of
+ this problem, at the cost of much heavier spinlock and memory barrier
+ operations.
+
+ If unsure, say n here. Even distro kernels should think twice before enabling
+ this: there are few systems, and an unlikely bug.
config X86_F00F_BUG
- bool
+ def_bool y
depends on M586MMX || M586TSC || M586 || M486 || M386
- default y
config X86_WP_WORKS_OK
- bool
+ def_bool y
depends on X86_32 && !M386
- default y
config X86_INVLPG
- bool
+ def_bool y
depends on X86_32 && !M386
- default y
config X86_BSWAP
- bool
+ def_bool y
depends on X86_32 && !M386
- default y
config X86_POPAD_OK
- bool
+ def_bool y
depends on X86_32 && !M386
- default y
config X86_ALIGNMENT_16
- bool
+ def_bool y
depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
- default y
config X86_GOOD_APIC
- bool
+ def_bool y
depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64
- default y
config X86_INTEL_USERCOPY
- bool
+ def_bool y
depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
- default y
config X86_USE_PPRO_CHECKSUM
- bool
+ def_bool y
depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
- default y
config X86_USE_3DNOW
- bool
+ def_bool y
depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
- default y
config X86_OOSTORE
- bool
+ def_bool y
depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
- default y
config X86_TSC
- bool
+ def_bool y
depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
- default y
# this should be set for all -march=.. options where the compiler
# generates cmov.
config X86_CMOV
- bool
+ def_bool y
depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
- default y
config X86_MINIMUM_CPU_FAMILY
int
@@ -399,3 +393,6 @@ config X86_MINIMUM_CPU_FAMILY
default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
default "3"
+config X86_DEBUGCTLMSR
+ def_bool y
+ depends on !(M586MMX || M586TSC || M586 || M486 || M386)
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 761ca7b5f12..2e1e3af28c3 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -6,7 +6,7 @@ config TRACE_IRQFLAGS_SUPPORT
source "lib/Kconfig.debug"
config EARLY_PRINTK
- bool "Early printk" if EMBEDDED && DEBUG_KERNEL && X86_32
+ bool "Early printk" if EMBEDDED
default y
help
Write kernel log output directly into the VGA buffer or to a serial
@@ -40,22 +40,49 @@ comment "Page alloc debug is incompatible with Software Suspend on i386"
config DEBUG_PAGEALLOC
bool "Debug page memory allocations"
- depends on DEBUG_KERNEL && !HIBERNATION && !HUGETLBFS
- depends on X86_32
+ depends on DEBUG_KERNEL && X86_32
help
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
of memory corruptions.
+config DEBUG_PER_CPU_MAPS
+ bool "Debug access to per_cpu maps"
+ depends on DEBUG_KERNEL
+ depends on X86_64_SMP
+ default n
+ help
+ Say Y to verify that the per_cpu map being accessed has
+ been setup. Adds a fair amount of code to kernel memory
+ and decreases performance.
+
+ Say N if unsure.
+
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
+ default y
depends on DEBUG_KERNEL
help
Mark the kernel read-only data as write-protected in the pagetables,
in order to catch accidental (and incorrect) writes to such const
- data. This option may have a slight performance impact because a
- portion of the kernel code won't be covered by a 2MB TLB anymore.
- If in doubt, say "N".
+ data. This is recommended so that we can catch kernel bugs sooner.
+ If in doubt, say "Y".
+
+config DEBUG_RODATA_TEST
+ bool "Testcase for the DEBUG_RODATA feature"
+ depends on DEBUG_RODATA
+ help
+ This option enables a testcase for the DEBUG_RODATA
+ feature as well as for the change_page_attr() infrastructure.
+ If in doubt, say "N"
+
+config DEBUG_NX_TEST
+ tristate "Testcase for the NX non-executable stack feature"
+ depends on DEBUG_KERNEL && m
+ help
+ This option enables a testcase for the CPU NX capability
+ and the software setup of this feature.
+ If in doubt, say "N"
config 4KSTACKS
bool "Use 4Kb for kernel stacks instead of 8Kb"
@@ -75,8 +102,7 @@ config X86_FIND_SMP_CONFIG
config X86_MPPARSE
def_bool y
- depends on X86_LOCAL_APIC && !X86_VISWS
- depends on X86_32
+ depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64
config DOUBLEFAULT
default y
@@ -112,4 +138,91 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+#
+# IO delay types:
+#
+
+config IO_DELAY_TYPE_0X80
+ int
+ default "0"
+
+config IO_DELAY_TYPE_0XED
+ int
+ default "1"
+
+config IO_DELAY_TYPE_UDELAY
+ int
+ default "2"
+
+config IO_DELAY_TYPE_NONE
+ int
+ default "3"
+
+choice
+ prompt "IO delay type"
+ default IO_DELAY_0XED
+
+config IO_DELAY_0X80
+ bool "port 0x80 based port-IO delay [recommended]"
+ help
+ This is the traditional Linux IO delay used for in/out_p.
+ It is the most tested hence safest selection here.
+
+config IO_DELAY_0XED
+ bool "port 0xed based port-IO delay"
+ help
+ Use port 0xed as the IO delay. This frees up port 0x80 which is
+ often used as a hardware-debug port.
+
+config IO_DELAY_UDELAY
+ bool "udelay based port-IO delay"
+ help
+ Use udelay(2) as the IO delay method. This provides the delay
+ while not having any side-effect on the IO port space.
+
+config IO_DELAY_NONE
+ bool "no port-IO delay"
+ help
+ No port-IO delay. Will break on old boxes that require port-IO
+ delay for certain operations. Should work on most new machines.
+
+endchoice
+
+if IO_DELAY_0X80
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0X80
+endif
+
+if IO_DELAY_0XED
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0XED
+endif
+
+if IO_DELAY_UDELAY
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_UDELAY
+endif
+
+if IO_DELAY_NONE
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_NONE
+endif
+
+config DEBUG_BOOT_PARAMS
+ bool "Debug boot parameters"
+ depends on DEBUG_KERNEL
+ depends on DEBUG_FS
+ help
+ This option will cause struct boot_params to be exported via debugfs.
+
+config CPA_DEBUG
+ bool "CPA self test code"
+ depends on DEBUG_KERNEL
+ help
+ Do change_page_attr self tests at boot.
+
endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 7aa1dc6d67c..b08f18261df 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -7,13 +7,252 @@ else
KBUILD_DEFCONFIG := $(ARCH)_defconfig
endif
-# No need to remake these files
-$(srctree)/arch/x86/Makefile%: ;
+# BITS is used as extension for files which are available in a 32 bit
+# and a 64 bit version to simplify shared Makefiles.
+# e.g.: obj-y += foo_$(BITS).o
+export BITS
ifeq ($(CONFIG_X86_32),y)
+ BITS := 32
UTS_MACHINE := i386
- include $(srctree)/arch/x86/Makefile_32
+ CHECKFLAGS += -D__i386__
+
+ biarch := $(call cc-option,-m32)
+ KBUILD_AFLAGS += $(biarch)
+ KBUILD_CFLAGS += $(biarch)
+
+ ifdef CONFIG_RELOCATABLE
+ LDFLAGS_vmlinux := --emit-relocs
+ endif
+
+ KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
+
+ # prevent gcc from keeping the stack 16 byte aligned
+ KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
+
+ # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
+ # a lot more stack due to the lack of sharing of stacklots:
+ KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
+ echo $(call cc-option,-fno-unit-at-a-time); fi ;)
+
+ # CPU-specific tuning. Anything which can be shared with UML should go here.
+ include $(srctree)/arch/x86/Makefile_32.cpu
+ KBUILD_CFLAGS += $(cflags-y)
+
+ # temporary until string.h is fixed
+ KBUILD_CFLAGS += -ffreestanding
else
+ BITS := 64
UTS_MACHINE := x86_64
- include $(srctree)/arch/x86/Makefile_64
+ CHECKFLAGS += -D__x86_64__ -m64
+
+ KBUILD_AFLAGS += -m64
+ KBUILD_CFLAGS += -m64
+
+ # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
+ cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
+ cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+
+ cflags-$(CONFIG_MCORE2) += \
+ $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
+ cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
+ KBUILD_CFLAGS += $(cflags-y)
+
+ KBUILD_CFLAGS += -mno-red-zone
+ KBUILD_CFLAGS += -mcmodel=kernel
+
+ # -funit-at-a-time shrinks the kernel .text considerably
+ # unfortunately it makes reading oopses harder.
+ KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
+
+ # this works around some issues with generating unwind tables in older gccs
+ # newer gccs do it by default
+ KBUILD_CFLAGS += -maccumulate-outgoing-args
+
+ stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
+ stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
+ "$(CC)" -fstack-protector )
+ stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
+ "$(CC)" -fstack-protector-all )
+
+ KBUILD_CFLAGS += $(stackp-y)
+endif
+
+# Stackpointer is addressed different for 32 bit and 64 bit x86
+sp-$(CONFIG_X86_32) := esp
+sp-$(CONFIG_X86_64) := rsp
+
+# do binutils support CFI?
+cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
+# is .cfi_signal_frame supported too?
+cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
+
+LDFLAGS := -m elf_$(UTS_MACHINE)
+OBJCOPYFLAGS := -O binary -R .note -R .comment -S
+
+# Speed up the build
+KBUILD_CFLAGS += -pipe
+# Workaround for a gcc prelease that unfortunately was shipped in a suse release
+KBUILD_CFLAGS += -Wno-sign-compare
+#
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+# prevent gcc from generating any FP code by mistake
+KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+
+###
+# Sub architecture support
+# fcore-y is linked before mcore-y files.
+
+# Default subarch .c files
+mcore-y := arch/x86/mach-default/
+
+# Voyager subarch support
+mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager
+mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/
+
+# VISWS subarch support
+mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws
+mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/
+
+# NUMAQ subarch support
+mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq
+mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/
+
+# BIGSMP subarch support
+mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp
+mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/
+
+#Summit subarch support
+mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit
+mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/
+
+# generic subarchitecture
+mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
+fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
+mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/
+
+
+# ES7000 subarch support
+mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000
+fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/
+mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/
+
+# RDC R-321x subarch support
+mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x
+mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default
+core-$(CONFIG_X86_RDC321X) += arch/x86/mach-rdc321x/
+
+# default subarch .h files
+mflags-y += -Iinclude/asm-x86/mach-default
+
+# 64 bit does not support subarch support - clear sub arch variables
+fcore-$(CONFIG_X86_64) :=
+mcore-$(CONFIG_X86_64) :=
+mflags-$(CONFIG_X86_64) :=
+
+KBUILD_CFLAGS += $(mflags-y)
+KBUILD_AFLAGS += $(mflags-y)
+
+###
+# Kernel objects
+
+head-y := arch/x86/kernel/head_$(BITS).o
+head-$(CONFIG_X86_64) += arch/x86/kernel/head64.o
+head-y += arch/x86/kernel/init_task.o
+
+libs-y += arch/x86/lib/
+
+# Sub architecture files that needs linking first
+core-y += $(fcore-y)
+
+# Xen paravirtualization support
+core-$(CONFIG_XEN) += arch/x86/xen/
+
+# lguest paravirtualization support
+core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
+
+core-y += arch/x86/kernel/
+core-y += arch/x86/mm/
+
+# Remaining sub architecture files
+core-y += $(mcore-y)
+
+core-y += arch/x86/crypto/
+core-y += arch/x86/vdso/
+core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
+
+# drivers-y are linked after core-y
+drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
+drivers-$(CONFIG_PCI) += arch/x86/pci/
+
+# must be linked after kernel/
+drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
+
+ifeq ($(CONFIG_X86_32),y)
+drivers-$(CONFIG_PM) += arch/x86/power/
+drivers-$(CONFIG_FB) += arch/x86/video/
endif
+
+####
+# boot loader support. Several targets are kept for legacy purposes
+
+boot := arch/x86/boot
+
+PHONY += zImage bzImage compressed zlilo bzlilo \
+ zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
+
+# Default kernel to build
+all: bzImage
+
+# KBUILD_IMAGE specify target image being built
+ KBUILD_IMAGE := $(boot)/bzImage
+zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage
+
+zImage bzImage: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
+ $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
+ $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/bzImage
+
+compressed: zImage
+
+zlilo bzlilo: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
+
+zdisk bzdisk: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
+
+fdimage fdimage144 fdimage288 isoimage: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
+
+install: vdso_install
+ $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
+
+PHONY += vdso_install
+vdso_install:
+ $(Q)$(MAKE) $(build)=arch/x86/vdso $@
+
+archclean:
+ $(Q)rm -rf $(objtree)/arch/i386
+ $(Q)rm -rf $(objtree)/arch/x86_64
+ $(Q)$(MAKE) $(clean)=$(boot)
+
+define archhelp
+ echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
+ echo ' install - Install kernel using'
+ echo ' (your) ~/bin/installkernel or'
+ echo ' (distribution) /sbin/installkernel or'
+ echo ' install to $$(INSTALL_PATH) and run lilo'
+ echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
+ echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
+ echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
+ echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
+ echo ' bzdisk/fdimage*/isoimage also accept:'
+ echo ' FDARGS="..." arguments for the booted kernel'
+ echo ' FDINITRD=file initrd for the booted kernel'
+endef
+
+CLEAN_FILES += arch/x86/boot/fdimage \
+ arch/x86/boot/image.iso \
+ arch/x86/boot/mtools.conf
diff --git a/arch/x86/Makefile_32 b/arch/x86/Makefile_32
deleted file mode 100644
index 50394da2f6c..00000000000
--- a/arch/x86/Makefile_32
+++ /dev/null
@@ -1,175 +0,0 @@
-#
-# i386 Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" cleaning up for this architecture.
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License. See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1994 by Linus Torvalds
-#
-# 19990713 Artur Skawina <skawina@geocities.com>
-# Added '-march' and '-mpreferred-stack-boundary' support
-#
-# 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net>
-# Added support for GEODE CPU
-
-# BITS is used as extension for files which are available in a 32 bit
-# and a 64 bit version to simplify shared Makefiles.
-# e.g.: obj-y += foo_$(BITS).o
-BITS := 32
-export BITS
-
-HAS_BIARCH := $(call cc-option-yn, -m32)
-ifeq ($(HAS_BIARCH),y)
-AS := $(AS) --32
-LD := $(LD) -m elf_i386
-CC := $(CC) -m32
-endif
-
-LDFLAGS := -m elf_i386
-OBJCOPYFLAGS := -O binary -R .note -R .comment -S
-ifdef CONFIG_RELOCATABLE
-LDFLAGS_vmlinux := --emit-relocs
-endif
-CHECKFLAGS += -D__i386__
-
-KBUILD_CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return
-
-# prevent gcc from keeping the stack 16 byte aligned
-KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
-
-# CPU-specific tuning. Anything which can be shared with UML should go here.
-include $(srctree)/arch/x86/Makefile_32.cpu
-
-# temporary until string.h is fixed
-cflags-y += -ffreestanding
-
-# this works around some issues with generating unwind tables in older gccs
-# newer gccs do it by default
-cflags-y += -maccumulate-outgoing-args
-
-# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
-# a lot more stack due to the lack of sharing of stacklots:
-KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
-
-# do binutils support CFI?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset esp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset esp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-
-# is .cfi_signal_frame supported too?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-
-KBUILD_CFLAGS += $(cflags-y)
-
-# Default subarch .c files
-mcore-y := arch/x86/mach-default
-
-# Voyager subarch support
-mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager
-mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager
-
-# VISWS subarch support
-mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws
-mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws
-
-# NUMAQ subarch support
-mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq
-mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default
-
-# BIGSMP subarch support
-mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp
-mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default
-
-#Summit subarch support
-mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit
-mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default
-
-# generic subarchitecture
-mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-x86/mach-generic
-mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default
-core-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
-
-# ES7000 subarch support
-mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000
-mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default
-core-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/
-
-# Xen paravirtualization support
-core-$(CONFIG_XEN) += arch/x86/xen/
-
-# lguest paravirtualization support
-core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
-
-# default subarch .h files
-mflags-y += -Iinclude/asm-x86/mach-default
-
-head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task.o
-
-libs-y += arch/x86/lib/
-core-y += arch/x86/kernel/ \
- arch/x86/mm/ \
- $(mcore-y)/ \
- arch/x86/crypto/
-drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
-drivers-$(CONFIG_PCI) += arch/x86/pci/
-# must be linked after kernel/
-drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
-drivers-$(CONFIG_PM) += arch/x86/power/
-drivers-$(CONFIG_FB) += arch/x86/video/
-
-KBUILD_CFLAGS += $(mflags-y)
-KBUILD_AFLAGS += $(mflags-y)
-
-boot := arch/x86/boot
-
-PHONY += zImage bzImage compressed zlilo bzlilo \
- zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
-
-all: bzImage
-
-# KBUILD_IMAGE specify target image being built
- KBUILD_IMAGE := $(boot)/bzImage
-zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage
-
-zImage bzImage: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
- $(Q)mkdir -p $(objtree)/arch/i386/boot
- $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage
-
-compressed: zImage
-
-zlilo bzlilo: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
-
-zdisk bzdisk: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
-
-fdimage fdimage144 fdimage288 isoimage: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
-
-install:
- $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
-
-archclean:
- $(Q)rm -rf $(objtree)/arch/i386/boot
- $(Q)$(MAKE) $(clean)=arch/x86/boot
-
-define archhelp
- echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
- echo ' install - Install kernel using'
- echo ' (your) ~/bin/installkernel or'
- echo ' (distribution) /sbin/installkernel or'
- echo ' install to $$(INSTALL_PATH) and run lilo'
- echo ' bzdisk - Create a boot floppy in /dev/fd0'
- echo ' fdimage - Create a boot floppy image'
- echo ' isoimage - Create a boot CD-ROM image'
-endef
-
-CLEAN_FILES += arch/x86/boot/fdimage \
- arch/x86/boot/image.iso \
- arch/x86/boot/mtools.conf
diff --git a/arch/x86/Makefile_64 b/arch/x86/Makefile_64
deleted file mode 100644
index a804860022e..00000000000
--- a/arch/x86/Makefile_64
+++ /dev/null
@@ -1,144 +0,0 @@
-#
-# x86_64 Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" and "archdep" for cleaning up and making dependencies for
-# this architecture
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License. See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1994 by Linus Torvalds
-#
-# 19990713 Artur Skawina <skawina@geocities.com>
-# Added '-march' and '-mpreferred-stack-boundary' support
-# 20000913 Pavel Machek <pavel@suse.cz>
-# Converted for x86_64 architecture
-# 20010105 Andi Kleen, add IA32 compiler.
-# ....and later removed it again....
-#
-# $Id: Makefile,v 1.31 2002/03/22 15:56:07 ak Exp $
-
-# BITS is used as extension for files which are available in a 32 bit
-# and a 64 bit version to simplify shared Makefiles.
-# e.g.: obj-y += foo_$(BITS).o
-BITS := 64
-export BITS
-
-LDFLAGS := -m elf_x86_64
-OBJCOPYFLAGS := -O binary -R .note -R .comment -S
-LDFLAGS_vmlinux :=
-CHECKFLAGS += -D__x86_64__ -m64
-
-cflags-y :=
-cflags-kernel-y :=
-cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
-cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
-# will eventually. Use -mtune=generic as fallback
-cflags-$(CONFIG_MCORE2) += \
- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
-cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
-
-cflags-y += -m64
-cflags-y += -mno-red-zone
-cflags-y += -mcmodel=kernel
-cflags-y += -pipe
-cflags-y += -Wno-sign-compare
-cflags-y += -fno-asynchronous-unwind-tables
-ifneq ($(CONFIG_DEBUG_INFO),y)
-# -fweb shrinks the kernel a bit, but the difference is very small
-# it also messes up debugging, so don't use it for now.
-#cflags-y += $(call cc-option,-fweb)
-endif
-# -funit-at-a-time shrinks the kernel .text considerably
-# unfortunately it makes reading oopses harder.
-cflags-y += $(call cc-option,-funit-at-a-time)
-# prevent gcc from generating any FP code by mistake
-cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
-# this works around some issues with generating unwind tables in older gccs
-# newer gccs do it by default
-cflags-y += -maccumulate-outgoing-args
-
-# do binutils support CFI?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-
-# is .cfi_signal_frame supported too?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-
-cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector )
-cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector-all )
-
-KBUILD_CFLAGS += $(cflags-y)
-CFLAGS_KERNEL += $(cflags-kernel-y)
-KBUILD_AFLAGS += -m64
-
-head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task.o
-
-libs-y += arch/x86/lib/
-core-y += arch/x86/kernel/ \
- arch/x86/mm/ \
- arch/x86/crypto/ \
- arch/x86/vdso/
-core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
-drivers-$(CONFIG_PCI) += arch/x86/pci/
-drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
-
-boot := arch/x86/boot
-
-PHONY += bzImage bzlilo install archmrproper \
- fdimage fdimage144 fdimage288 isoimage archclean
-
-#Default target when executing "make"
-all: bzImage
-
-BOOTIMAGE := arch/x86/boot/bzImage
-KBUILD_IMAGE := $(BOOTIMAGE)
-
-bzImage: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) $(BOOTIMAGE)
- $(Q)mkdir -p $(objtree)/arch/x86_64/boot
- $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage
-
-bzlilo: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zlilo
-
-bzdisk: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk
-
-fdimage fdimage144 fdimage288 isoimage: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
-
-install: vdso_install
- $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
-
-vdso_install:
-ifeq ($(CONFIG_IA32_EMULATION),y)
- $(Q)$(MAKE) $(build)=arch/x86/ia32 $@
-endif
- $(Q)$(MAKE) $(build)=arch/x86/vdso $@
-
-archclean:
- $(Q)rm -rf $(objtree)/arch/x86_64/boot
- $(Q)$(MAKE) $(clean)=$(boot)
-
-define archhelp
- echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
- echo ' install - Install kernel using'
- echo ' (your) ~/bin/installkernel or'
- echo ' (distribution) /sbin/installkernel or'
- echo ' install to $$(INSTALL_PATH) and run lilo'
- echo ' bzdisk - Create a boot floppy in /dev/fd0'
- echo ' fdimage - Create a boot floppy image'
- echo ' isoimage - Create a boot CD-ROM image'
-endef
-
-CLEAN_FILES += arch/x86/boot/fdimage \
- arch/x86/boot/image.iso \
- arch/x86/boot/mtools.conf
-
-
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 7a3116ccf38..349b81a39c4 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -28,9 +28,11 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
targets := vmlinux.bin setup.bin setup.elf zImage bzImage
subdir- := compressed
-setup-y += a20.o apm.o cmdline.o copy.o cpu.o cpucheck.o edd.o
+setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
-setup-y += printf.o string.o tty.o video.o version.o voyager.o
+setup-y += printf.o string.o tty.o video.o version.o
+setup-$(CONFIG_X86_APM_BOOT) += apm.o
+setup-$(CONFIG_X86_VOYAGER) += voyager.o
# The link order of the video-*.o modules can matter. In particular,
# video-vga.o *must* be listed first, followed by video-vesa.o.
@@ -49,10 +51,7 @@ HOSTCFLAGS_build.o := $(LINUXINCLUDE)
# How to compile the 16-bit code. Note we always compile for -march=i386,
# that way we can complain to the user if the CPU is insufficient.
-cflags-$(CONFIG_X86_32) :=
-cflags-$(CONFIG_X86_64) := -m32
KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
- $(cflags-y) \
-Wall -Wstrict-prototypes \
-march=i386 -mregparm=3 \
-include $(srctree)/$(src)/code16gcc.h \
@@ -62,6 +61,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
$(call cc-option, -fno-unit-at-a-time)) \
$(call cc-option, -fno-stack-protector) \
$(call cc-option, -mpreferred-stack-boundary=2)
+KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
$(obj)/zImage: IMAGE_OFFSET := 0x1000
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index eab50c55a3a..c117c7fb859 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -19,8 +19,6 @@
#include "boot.h"
-#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
-
int query_apm_bios(void)
{
u16 ax, bx, cx, dx, di;
@@ -95,4 +93,3 @@ int query_apm_bios(void)
return 0;
}
-#endif
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index d2b5adf4651..7822a4983da 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -109,7 +109,7 @@ typedef unsigned int addr_t;
static inline u8 rdfs8(addr_t addr)
{
u8 v;
- asm volatile("movb %%fs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr));
+ asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
return v;
}
static inline u16 rdfs16(addr_t addr)
@@ -127,21 +127,21 @@ static inline u32 rdfs32(addr_t addr)
static inline void wrfs8(u8 v, addr_t addr)
{
- asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "r" (v));
+ asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
}
static inline void wrfs16(u16 v, addr_t addr)
{
- asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "r" (v));
+ asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
}
static inline void wrfs32(u32 v, addr_t addr)
{
- asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "r" (v));
+ asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
}
static inline u8 rdgs8(addr_t addr)
{
u8 v;
- asm volatile("movb %%gs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr));
+ asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
return v;
}
static inline u16 rdgs16(addr_t addr)
@@ -159,15 +159,15 @@ static inline u32 rdgs32(addr_t addr)
static inline void wrgs8(u8 v, addr_t addr)
{
- asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "r" (v));
+ asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
}
static inline void wrgs16(u16 v, addr_t addr)
{
- asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "r" (v));
+ asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
}
static inline void wrgs32(u32 v, addr_t addr)
{
- asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "r" (v));
+ asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
}
/* Note: these only return true/false, not a signed return value! */
@@ -241,6 +241,7 @@ int query_apm_bios(void);
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
+int cmdline_find_option_bool(const char *option);
/* cpu.c, cpucheck.c */
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 34bb778c435..680408a0f46 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -95,3 +95,68 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize)
return len;
}
+
+/*
+ * Find a boolean option (like quiet,noapic,nosmp....)
+ *
+ * Returns the position of that option (starts counting with 1)
+ * or 0 on not found
+ */
+int cmdline_find_option_bool(const char *option)
+{
+ u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr;
+ addr_t cptr;
+ char c;
+ int pos = 0, wstart = 0;
+ const char *opptr = NULL;
+ enum {
+ st_wordstart, /* Start of word/after whitespace */
+ st_wordcmp, /* Comparing this word */
+ st_wordskip, /* Miscompare, skip */
+ } state = st_wordstart;
+
+ if (!cmdline_ptr || cmdline_ptr >= 0x100000)
+ return -1; /* No command line, or inaccessible */
+
+ cptr = cmdline_ptr & 0xf;
+ set_fs(cmdline_ptr >> 4);
+
+ while (cptr < 0x10000) {
+ c = rdfs8(cptr++);
+ pos++;
+
+ switch (state) {
+ case st_wordstart:
+ if (!c)
+ return 0;
+ else if (myisspace(c))
+ break;
+
+ state = st_wordcmp;
+ opptr = option;
+ wstart = pos;
+ /* fall through */
+
+ case st_wordcmp:
+ if (!*opptr)
+ if (!c || myisspace(c))
+ return wstart;
+ else
+ state = st_wordskip;
+ else if (!c)
+ return 0;
+ else if (c != *opptr++)
+ state = st_wordskip;
+ break;
+
+ case st_wordskip:
+ if (!c)
+ return 0;
+ else if (myisspace(c))
+ state = st_wordstart;
+ break;
+ }
+ }
+
+ return 0; /* Buffer overrun */
+}
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 52c1db85452..fe24ceabd90 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -1,5 +1,63 @@
+#
+# linux/arch/x86/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc.o piggy.o
+
+KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
+cflags-$(CONFIG_X86_64) := -mcmodel=small
+KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
+KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
+
+KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+
+LDFLAGS := -m elf_$(UTS_MACHINE)
+LDFLAGS_vmlinux := -T
+
+$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
+ $(call if_changed,ld)
+ @:
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+
ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/boot/compressed/Makefile_32
+targets += vmlinux.bin.all vmlinux.relocs
+hostprogs-y := relocs
+
+quiet_cmd_relocs = RELOCS $@
+ cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
+$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
+ $(call if_changed,relocs)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
+quiet_cmd_relocbin = BUILD $@
+ cmd_relocbin = cat $(filter-out FORCE,$^) > $@
+$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,relocbin)
+
+ifdef CONFIG_RELOCATABLE
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
+ $(call if_changed,gzip)
else
-include ${srctree}/arch/x86/boot/compressed/Makefile_64
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,gzip)
endif
+LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
+
+else
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,gzip)
+
+LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
+endif
+
+
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
+ $(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/Makefile_32 b/arch/x86/boot/compressed/Makefile_32
deleted file mode 100644
index e43ff7c56e6..00000000000
--- a/arch/x86/boot/compressed/Makefile_32
+++ /dev/null
@@ -1,50 +0,0 @@
-#
-# linux/arch/x86/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-targets := vmlinux vmlinux.bin vmlinux.bin.gz head_32.o misc_32.o piggy.o \
- vmlinux.bin.all vmlinux.relocs
-EXTRA_AFLAGS := -traditional
-
-LDFLAGS_vmlinux := -T
-hostprogs-y := relocs
-
-KBUILD_CFLAGS := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
- -fno-strict-aliasing -fPIC \
- $(call cc-option,-ffreestanding) \
- $(call cc-option,-fno-stack-protector)
-LDFLAGS := -m elf_i386
-
-$(obj)/vmlinux: $(src)/vmlinux_32.lds $(obj)/head_32.o $(obj)/misc_32.o $(obj)/piggy.o FORCE
- $(call if_changed,ld)
- @:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
- $(call if_changed,objcopy)
-
-quiet_cmd_relocs = RELOCS $@
- cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
-$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
- $(call if_changed,relocs)
-
-vmlinux.bin.all-y := $(obj)/vmlinux.bin
-vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
-quiet_cmd_relocbin = BUILD $@
- cmd_relocbin = cat $(filter-out FORCE,$^) > $@
-$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,relocbin)
-
-ifdef CONFIG_RELOCATABLE
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
- $(call if_changed,gzip)
-else
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
- $(call if_changed,gzip)
-endif
-
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
-
-$(obj)/piggy.o: $(src)/vmlinux_32.scr $(obj)/vmlinux.bin.gz FORCE
- $(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/Makefile_64 b/arch/x86/boot/compressed/Makefile_64
deleted file mode 100644
index 7801e8dd90b..00000000000
--- a/arch/x86/boot/compressed/Makefile_64
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# linux/arch/x86/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-targets := vmlinux vmlinux.bin vmlinux.bin.gz head_64.o misc_64.o piggy.o
-
-KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUXINCLUDE) -O2 \
- -fno-strict-aliasing -fPIC -mcmodel=small \
- $(call cc-option, -ffreestanding) \
- $(call cc-option, -fno-stack-protector)
-KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
-LDFLAGS := -m elf_x86_64
-
-LDFLAGS_vmlinux := -T
-$(obj)/vmlinux: $(src)/vmlinux_64.lds $(obj)/head_64.o $(obj)/misc_64.o $(obj)/piggy.o FORCE
- $(call if_changed,ld)
- @:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
- $(call if_changed,objcopy)
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
- $(call if_changed,gzip)
-
-LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
-
-$(obj)/piggy.o: $(obj)/vmlinux_64.scr $(obj)/vmlinux.bin.gz FORCE
- $(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc.c
index b74d60d1b2f..8182e32c1b4 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -1,7 +1,7 @@
/*
* misc.c
- *
- * This is a collection of several routines from gzip-1.0.3
+ *
+ * This is a collection of several routines from gzip-1.0.3
* adapted for Linux.
*
* malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
@@ -9,9 +9,18 @@
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
+/*
+ * we have to be careful, because no indirections are allowed here, and
+ * paravirt_ops is a kind of one. As it will only run in baremetal anyway,
+ * we just keep it from happening
+ */
#undef CONFIG_PARAVIRT
+#ifdef CONFIG_X86_64
+#define _LINUX_STRING_H_ 1
+#define __LINUX_BITMAP_H 1
+#endif
+
#include <linux/linkage.h>
-#include <linux/vmalloc.h>
#include <linux/screen_info.h>
#include <asm/io.h>
#include <asm/page.h>
@@ -186,10 +195,20 @@ static void *memcpy(void *dest, const void *src, unsigned n);
static void putstr(const char *);
-static unsigned long free_mem_ptr;
-static unsigned long free_mem_end_ptr;
+#ifdef CONFIG_X86_64
+#define memptr long
+#else
+#define memptr unsigned
+#endif
+
+static memptr free_mem_ptr;
+static memptr free_mem_end_ptr;
+#ifdef CONFIG_X86_64
+#define HEAP_SIZE 0x7000
+#else
#define HEAP_SIZE 0x4000
+#endif
static char *vidmem = (char *)0xb8000;
static int vidport;
@@ -230,7 +249,7 @@ static void gzip_mark(void **ptr)
static void gzip_release(void **ptr)
{
- free_mem_ptr = (unsigned long) *ptr;
+ free_mem_ptr = (memptr) *ptr;
}
static void scroll(void)
@@ -247,8 +266,10 @@ static void putstr(const char *s)
int x,y,pos;
char c;
+#ifdef CONFIG_X86_32
if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0)
return;
+#endif
x = RM_SCREEN_INFO.orig_x;
y = RM_SCREEN_INFO.orig_y;
@@ -261,7 +282,7 @@ static void putstr(const char *s)
y--;
}
} else {
- vidmem [ ( x + cols * y ) * 2 ] = c;
+ vidmem [(x + cols * y) * 2] = c;
if ( ++x >= cols ) {
x = 0;
if ( ++y >= lines ) {
@@ -276,16 +297,16 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
{
int i;
- char *ss = (char*)s;
+ char *ss = s;
for (i=0;i<n;i++) ss[i] = c;
return s;
@@ -294,7 +315,8 @@ static void* memset(void* s, int c, unsigned n)
static void* memcpy(void* dest, const void* src, unsigned n)
{
int i;
- char *d = (char *)dest, *s = (char *)src;
+ const char *s = src;
+ char *d = dest;
for (i=0;i<n;i++) d[i] = s[i];
return dest;
@@ -339,11 +361,13 @@ static void error(char *x)
putstr(x);
putstr("\n\n -- System halted");
- while(1); /* Halt */
+ while (1)
+ asm("hlt");
}
-asmlinkage void decompress_kernel(void *rmode, unsigned long end,
- uch *input_data, unsigned long input_len, uch *output)
+asmlinkage void decompress_kernel(void *rmode, memptr heap,
+ uch *input_data, unsigned long input_len,
+ uch *output)
{
real_mode = rmode;
@@ -358,25 +382,32 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long end,
lines = RM_SCREEN_INFO.orig_video_lines;
cols = RM_SCREEN_INFO.orig_video_cols;
- window = output; /* Output buffer (Normally at 1M) */
- free_mem_ptr = end; /* Heap */
- free_mem_end_ptr = end + HEAP_SIZE;
- inbuf = input_data; /* Input buffer */
+ window = output; /* Output buffer (Normally at 1M) */
+ free_mem_ptr = heap; /* Heap */
+ free_mem_end_ptr = heap + HEAP_SIZE;
+ inbuf = input_data; /* Input buffer */
insize = input_len;
inptr = 0;
+#ifdef CONFIG_X86_64
+ if ((ulg)output & (__KERNEL_ALIGN - 1))
+ error("Destination address not 2M aligned");
+ if ((ulg)output >= 0xffffffffffUL)
+ error("Destination address too large");
+#else
if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1))
error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
- if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff))
+ if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
error("Destination address too large");
#ifndef CONFIG_RELOCATABLE
if ((u32)output != LOAD_PHYSICAL_ADDR)
error("Wrong destination address");
#endif
+#endif
makecrc();
- putstr("Uncompressing Linux... ");
+ putstr("\nDecompressing Linux... ");
gunzip();
- putstr("Ok, booting the kernel.\n");
+ putstr("done.\nBooting the kernel.\n");
return;
}
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
deleted file mode 100644
index 6ea015aa65e..00000000000
--- a/arch/x86/boot/compressed/misc_64.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * misc.c
- *
- * This is a collection of several routines from gzip-1.0.3
- * adapted for Linux.
- *
- * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
- * puts by Nick Holloway 1993, better puts by Martin Mares 1995
- * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
- */
-
-#define _LINUX_STRING_H_ 1
-#define __LINUX_BITMAP_H 1
-
-#include <linux/linkage.h>
-#include <linux/screen_info.h>
-#include <asm/io.h>
-#include <asm/page.h>
-
-/* WARNING!!
- * This code is compiled with -fPIC and it is relocated dynamically
- * at run time, but no relocation processing is performed.
- * This means that it is not safe to place pointers in static structures.
- */
-
-/*
- * Getting to provable safe in place decompression is hard.
- * Worst case behaviours need to be analyzed.
- * Background information:
- *
- * The file layout is:
- * magic[2]
- * method[1]
- * flags[1]
- * timestamp[4]
- * extraflags[1]
- * os[1]
- * compressed data blocks[N]
- * crc[4] orig_len[4]
- *
- * resulting in 18 bytes of non compressed data overhead.
- *
- * Files divided into blocks
- * 1 bit (last block flag)
- * 2 bits (block type)
- *
- * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
- * The smallest block type encoding is always used.
- *
- * stored:
- * 32 bits length in bytes.
- *
- * fixed:
- * magic fixed tree.
- * symbols.
- *
- * dynamic:
- * dynamic tree encoding.
- * symbols.
- *
- *
- * The buffer for decompression in place is the length of the
- * uncompressed data, plus a small amount extra to keep the algorithm safe.
- * The compressed data is placed at the end of the buffer. The output
- * pointer is placed at the start of the buffer and the input pointer
- * is placed where the compressed data starts. Problems will occur
- * when the output pointer overruns the input pointer.
- *
- * The output pointer can only overrun the input pointer if the input
- * pointer is moving faster than the output pointer. A condition only
- * triggered by data whose compressed form is larger than the uncompressed
- * form.
- *
- * The worst case at the block level is a growth of the compressed data
- * of 5 bytes per 32767 bytes.
- *
- * The worst case internal to a compressed block is very hard to figure.
- * The worst case can at least be boundined by having one bit that represents
- * 32764 bytes and then all of the rest of the bytes representing the very
- * very last byte.
- *
- * All of which is enough to compute an amount of extra data that is required
- * to be safe. To avoid problems at the block level allocating 5 extra bytes
- * per 32767 bytes of data is sufficient. To avoind problems internal to a block
- * adding an extra 32767 bytes (the worst case uncompressed block size) is
- * sufficient, to ensure that in the worst case the decompressed data for
- * block will stop the byte before the compressed data for a block begins.
- * To avoid problems with the compressed data's meta information an extra 18
- * bytes are needed. Leading to the formula:
- *
- * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
- *
- * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
- * Adding 32768 instead of 32767 just makes for round numbers.
- * Adding the decompressor_size is necessary as it musht live after all
- * of the data as well. Last I measured the decompressor is about 14K.
- * 10K of actual data and 4K of bss.
- *
- */
-
-/*
- * gzip declarations
- */
-
-#define OF(args) args
-#define STATIC static
-
-#undef memset
-#undef memcpy
-#define memzero(s, n) memset ((s), 0, (n))
-
-typedef unsigned char uch;
-typedef unsigned short ush;
-typedef unsigned long ulg;
-
-#define WSIZE 0x80000000 /* Window size must be at least 32k,
- * and a power of two
- * We don't actually have a window just
- * a huge output buffer so I report
- * a 2G windows size, as that should
- * always be larger than our output buffer.
- */
-
-static uch *inbuf; /* input buffer */
-static uch *window; /* Sliding window buffer, (and final output buffer) */
-
-static unsigned insize; /* valid bytes in inbuf */
-static unsigned inptr; /* index of next byte to be processed in inbuf */
-static unsigned outcnt; /* bytes in output buffer */
-
-/* gzip flag byte */
-#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
-#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
-#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
-#define COMMENT 0x10 /* bit 4 set: file comment present */
-#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
-#define RESERVED 0xC0 /* bit 6,7: reserved */
-
-#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf())
-
-/* Diagnostic functions */
-#ifdef DEBUG
-# define Assert(cond,msg) {if(!(cond)) error(msg);}
-# define Trace(x) fprintf x
-# define Tracev(x) {if (verbose) fprintf x ;}
-# define Tracevv(x) {if (verbose>1) fprintf x ;}
-# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
-# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
-#else
-# define Assert(cond,msg)
-# define Trace(x)
-# define Tracev(x)
-# define Tracevv(x)
-# define Tracec(c,x)
-# define Tracecv(c,x)
-#endif
-
-static int fill_inbuf(void);
-static void flush_window(void);
-static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
-
-/*
- * This is set up by the setup-routine at boot-time
- */
-static unsigned char *real_mode; /* Pointer to real-mode data */
-
-#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
-#ifndef STANDARD_MEMORY_BIOS_CALL
-#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
-#endif
-#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
-
-extern unsigned char input_data[];
-extern int input_len;
-
-static long bytes_out = 0;
-
-static void *malloc(int size);
-static void free(void *where);
-
-static void *memset(void *s, int c, unsigned n);
-static void *memcpy(void *dest, const void *src, unsigned n);
-
-static void putstr(const char *);
-
-static long free_mem_ptr;
-static long free_mem_end_ptr;
-
-#define HEAP_SIZE 0x7000
-
-static char *vidmem = (char *)0xb8000;
-static int vidport;
-static int lines, cols;
-
-#include "../../../../lib/inflate.c"
-
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr <= 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
-static void scroll(void)
-{
- int i;
-
- memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
- for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
- vidmem[i] = ' ';
-}
-
-static void putstr(const char *s)
-{
- int x,y,pos;
- char c;
-
- x = RM_SCREEN_INFO.orig_x;
- y = RM_SCREEN_INFO.orig_y;
-
- while ( ( c = *s++ ) != '\0' ) {
- if ( c == '\n' ) {
- x = 0;
- if ( ++y >= lines ) {
- scroll();
- y--;
- }
- } else {
- vidmem [ ( x + cols * y ) * 2 ] = c;
- if ( ++x >= cols ) {
- x = 0;
- if ( ++y >= lines ) {
- scroll();
- y--;
- }
- }
- }
- }
-
- RM_SCREEN_INFO.orig_x = x;
- RM_SCREEN_INFO.orig_y = y;
-
- pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
-}
-
-static void* memset(void* s, int c, unsigned n)
-{
- int i;
- char *ss = (char*)s;
-
- for (i=0;i<n;i++) ss[i] = c;
- return s;
-}
-
-static void* memcpy(void* dest, const void* src, unsigned n)
-{
- int i;
- char *d = (char *)dest, *s = (char *)src;
-
- for (i=0;i<n;i++) d[i] = s[i];
- return dest;
-}
-
-/* ===========================================================================
- * Fill the input buffer. This is called only when the buffer is empty
- * and at least one byte is really needed.
- */
-static int fill_inbuf(void)
-{
- error("ran out of input data");
- return 0;
-}
-
-/* ===========================================================================
- * Write the output window window[0..outcnt-1] and update crc and bytes_out.
- * (Used for the decompressed data only.)
- */
-static void flush_window(void)
-{
- /* With my window equal to my output buffer
- * I only need to compute the crc here.
- */
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, ch;
-
- in = window;
- for (n = 0; n < outcnt; n++) {
- ch = *in++;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- outcnt = 0;
-}
-
-static void error(char *x)
-{
- putstr("\n\n");
- putstr(x);
- putstr("\n\n -- System halted");
-
- while(1); /* Halt */
-}
-
-asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
- uch *input_data, unsigned long input_len, uch *output)
-{
- real_mode = rmode;
-
- if (RM_SCREEN_INFO.orig_video_mode == 7) {
- vidmem = (char *) 0xb0000;
- vidport = 0x3b4;
- } else {
- vidmem = (char *) 0xb8000;
- vidport = 0x3d4;
- }
-
- lines = RM_SCREEN_INFO.orig_video_lines;
- cols = RM_SCREEN_INFO.orig_video_cols;
-
- window = output; /* Output buffer (Normally at 1M) */
- free_mem_ptr = heap; /* Heap */
- free_mem_end_ptr = heap + HEAP_SIZE;
- inbuf = input_data; /* Input buffer */
- insize = input_len;
- inptr = 0;
-
- if ((ulg)output & (__KERNEL_ALIGN - 1))
- error("Destination address not 2M aligned");
- if ((ulg)output >= 0xffffffffffUL)
- error("Destination address too large");
-
- makecrc();
- putstr(".\nDecompressing Linux...");
- gunzip();
- putstr("done.\nBooting the kernel.\n");
- return;
-}
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 7a0d00b2cf2..d01ea42187e 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -27,11 +27,6 @@ static unsigned long *relocs;
* absolute relocations present w.r.t these symbols.
*/
static const char* safe_abs_relocs[] = {
- "__kernel_vsyscall",
- "__kernel_rt_sigreturn",
- "__kernel_sigreturn",
- "SYSENTER_RETURN",
- "VDSO_NOTE_MASK",
"xen_irq_disable_direct_reloc",
"xen_save_fl_direct_reloc",
};
@@ -45,6 +40,8 @@ static int is_safe_abs_reloc(const char* sym_name)
/* Match found */
return 1;
}
+ if (strncmp(sym_name, "VDSO", 4) == 0)
+ return 1;
if (strncmp(sym_name, "__crc_", 6) == 0)
return 1;
return 0;
diff --git a/arch/x86/boot/compressed/vmlinux_64.scr b/arch/x86/boot/compressed/vmlinux.scr
index bd1429ce193..f02382ae5c4 100644
--- a/arch/x86/boot/compressed/vmlinux_64.scr
+++ b/arch/x86/boot/compressed/vmlinux.scr
@@ -1,6 +1,6 @@
SECTIONS
{
- .text.compressed : {
+ .rodata.compressed : {
input_len = .;
LONG(input_data_end - input_data) input_data = .;
*(.data)
diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds
index cc4854f6c6c..bb3c48379c4 100644
--- a/arch/x86/boot/compressed/vmlinux_32.lds
+++ b/arch/x86/boot/compressed/vmlinux_32.lds
@@ -3,17 +3,17 @@ OUTPUT_ARCH(i386)
ENTRY(startup_32)
SECTIONS
{
- /* Be careful parts of head.S assume startup_32 is at
- * address 0.
+ /* Be careful parts of head_32.S assume startup_32 is at
+ * address 0.
*/
- . = 0 ;
+ . = 0;
.text.head : {
_head = . ;
*(.text.head)
_ehead = . ;
}
- .data.compressed : {
- *(.data.compressed)
+ .rodata.compressed : {
+ *(.rodata.compressed)
}
.text : {
_text = .; /* Text */
diff --git a/arch/x86/boot/compressed/vmlinux_32.scr b/arch/x86/boot/compressed/vmlinux_32.scr
deleted file mode 100644
index 707a88f7f29..00000000000
--- a/arch/x86/boot/compressed/vmlinux_32.scr
+++ /dev/null
@@ -1,10 +0,0 @@
-SECTIONS
-{
- .data.compressed : {
- input_len = .;
- LONG(input_data_end - input_data) input_data = .;
- *(.data)
- output_len = . - 4;
- input_data_end = .;
- }
-}
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds
index 94c13e557fb..f6e5b445f45 100644
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ b/arch/x86/boot/compressed/vmlinux_64.lds
@@ -3,15 +3,19 @@ OUTPUT_ARCH(i386:x86-64)
ENTRY(startup_64)
SECTIONS
{
- /* Be careful parts of head.S assume startup_32 is at
- * address 0.
+ /* Be careful parts of head_64.S assume startup_64 is at
+ * address 0.
*/
. = 0;
- .text : {
+ .text.head : {
_head = . ;
*(.text.head)
_ehead = . ;
- *(.text.compressed)
+ }
+ .rodata.compressed : {
+ *(.rodata.compressed)
+ }
+ .text : {
_text = .; /* Text */
*(.text)
*(.text.*)
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index bd138e442ec..8721dc46a0b 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -129,6 +129,7 @@ void query_edd(void)
char eddarg[8];
int do_mbr = 1;
int do_edd = 1;
+ int be_quiet;
int devno;
struct edd_info ei, *edp;
u32 *mbrptr;
@@ -140,12 +141,21 @@ void query_edd(void)
do_edd = 0;
}
+ be_quiet = cmdline_find_option_bool("quiet");
+
edp = boot_params.eddbuf;
mbrptr = boot_params.edd_mbr_sig_buffer;
if (!do_edd)
return;
+ /* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe,
+ * so give a hint if this happens.
+ */
+
+ if (!be_quiet)
+ printf("Probing EDD (edd=off to disable)... ");
+
for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
/*
* Scan the BIOS-supported hard disks and query EDD
@@ -162,6 +172,9 @@ void query_edd(void)
if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++))
boot_params.edd_mbr_sig_buf_entries = devno-0x80+1;
}
+
+ if (!be_quiet)
+ printf("ok\n");
}
#endif
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 4cc5b0411db..64ad9016585 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -195,10 +195,13 @@ cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
# can be located anywhere in
# low memory 0x10000 or higher.
-ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff
+ramdisk_max: .long 0x7fffffff
# (Header version 0x0203 or later)
# The highest safe address for
# the contents of an initrd
+ # The current kernel allows up to 4 GB,
+ # but leave it at 2 GB to avoid
+ # possible bootloader bugs.
kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
#required for protected mode
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 1f95750ede2..7828da5cfd0 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -100,20 +100,32 @@ static void set_bios_mode(void)
#endif
}
-void main(void)
+static void init_heap(void)
{
- /* First, copy the boot header into the "zeropage" */
- copy_boot_params();
+ char *stack_end;
- /* End of heap check */
if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
- heap_end = (char *)(boot_params.hdr.heap_end_ptr
- +0x200-STACK_SIZE);
+ asm("leal %P1(%%esp),%0"
+ : "=r" (stack_end) : "i" (-STACK_SIZE));
+
+ heap_end = (char *)
+ ((size_t)boot_params.hdr.heap_end_ptr + 0x200);
+ if (heap_end > stack_end)
+ heap_end = stack_end;
} else {
/* Boot protocol 2.00 only, no heap available */
puts("WARNING: Ancient bootloader, some functionality "
"may be limited!\n");
}
+}
+
+void main(void)
+{
+ /* First, copy the boot header into the "zeropage" */
+ copy_boot_params();
+
+ /* End of heap check */
+ init_heap();
/* Make sure we have all the proper CPU support */
if (validate_cpu()) {
@@ -131,9 +143,6 @@ void main(void)
/* Set keyboard repeat rate (why?) */
keyboard_set_repeat();
- /* Set the video mode */
- set_video();
-
/* Query MCA information */
query_mca();
@@ -154,6 +163,10 @@ void main(void)
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
query_edd();
#endif
+
+ /* Set the video mode */
+ set_video();
+
/* Do the last things and invoke protected mode */
go_to_protected_mode();
}
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 09fb342cc62..1a0f936c160 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -104,7 +104,7 @@ static void reset_coprocessor(void)
(((u64)(base & 0xff000000) << 32) | \
((u64)flags << 40) | \
((u64)(limit & 0x00ff0000) << 32) | \
- ((u64)(base & 0x00ffff00) << 16) | \
+ ((u64)(base & 0x00ffffff) << 16) | \
((u64)(limit & 0x0000ffff)))
struct gdt_ptr {
@@ -121,6 +121,10 @@ static void setup_gdt(void)
[GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
/* DS: data, read/write, 4 GB, base 0 */
[GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
+ /* TSS: 32-bit tss, 104 bytes, base 4096 */
+ /* We only have a TSS here to keep Intel VT happy;
+ we don't actually use it for anything. */
+ [GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
};
/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
of the gdt_ptr contents. Thus, make it static so it will
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index fa6bed1fac1..f5402d51f7c 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -15,6 +15,7 @@
*/
#include <asm/boot.h>
+#include <asm/processor-flags.h>
#include <asm/segment.h>
.text
@@ -29,28 +30,55 @@
*/
protected_mode_jump:
movl %edx, %esi # Pointer to boot_params table
- movl %eax, 2f # Patch ljmpl instruction
+
+ xorl %ebx, %ebx
+ movw %cs, %bx
+ shll $4, %ebx
+ addl %ebx, 2f
movw $__BOOT_DS, %cx
- xorl %ebx, %ebx # Per the 32-bit boot protocol
- xorl %ebp, %ebp # Per the 32-bit boot protocol
- xorl %edi, %edi # Per the 32-bit boot protocol
+ movw $__BOOT_TSS, %di
movl %cr0, %edx
- orb $1, %dl # Protected mode (PE) bit
+ orb $X86_CR0_PE, %dl # Protected mode
movl %edx, %cr0
jmp 1f # Short jump to serialize on 386/486
1:
- movw %cx, %ds
- movw %cx, %es
- movw %cx, %fs
- movw %cx, %gs
- movw %cx, %ss
-
- # Jump to the 32-bit entrypoint
+ # Transition to 32-bit mode
.byte 0x66, 0xea # ljmpl opcode
-2: .long 0 # offset
+2: .long in_pm32 # offset
.word __BOOT_CS # segment
.size protected_mode_jump, .-protected_mode_jump
+
+ .code32
+ .type in_pm32, @function
+in_pm32:
+ # Set up data segments for flat 32-bit mode
+ movl %ecx, %ds
+ movl %ecx, %es
+ movl %ecx, %fs
+ movl %ecx, %gs
+ movl %ecx, %ss
+ # The 32-bit code sets up its own stack, but this way we do have
+ # a valid stack if some debugging hack wants to use it.
+ addl %ebx, %esp
+
+ # Set up TR to make Intel VT happy
+ ltr %di
+
+ # Clear registers to allow for future extensions to the
+ # 32-bit boot protocol
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %ebx, %ebx
+ xorl %ebp, %ebp
+ xorl %edi, %edi
+
+ # Set up LDTR to make Intel VT happy
+ lldt %cx
+
+ jmpl *%eax # Jump to the 32-bit entrypoint
+
+ .size in_pm32, .-in_pm32
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index ed0672a8187..ff664a11709 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -104,6 +104,7 @@ static int bios_probe(void)
mi = GET_HEAP(struct mode_info, 1);
mi->mode = VIDEO_FIRST_BIOS+mode;
+ mi->depth = 0; /* text */
mi->x = rdfs16(0x44a);
mi->y = rdfs8(0x484)+1;
nmodes++;
@@ -116,7 +117,7 @@ static int bios_probe(void)
__videocard video_bios =
{
- .card_name = "BIOS (scanned)",
+ .card_name = "BIOS",
.probe = bios_probe,
.set_mode = bios_set_mode,
.unsafe = 1,
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 4716b9a9635..662dd2f1306 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -79,20 +79,28 @@ static int vesa_probe(void)
/* Text Mode, TTY BIOS supported,
supported by hardware */
mi = GET_HEAP(struct mode_info, 1);
- mi->mode = mode + VIDEO_FIRST_VESA;
- mi->x = vminfo.h_res;
- mi->y = vminfo.v_res;
+ mi->mode = mode + VIDEO_FIRST_VESA;
+ mi->depth = 0; /* text */
+ mi->x = vminfo.h_res;
+ mi->y = vminfo.v_res;
nmodes++;
- } else if ((vminfo.mode_attr & 0x99) == 0x99) {
+ } else if ((vminfo.mode_attr & 0x99) == 0x99 &&
+ (vminfo.memory_layout == 4 ||
+ vminfo.memory_layout == 6) &&
+ vminfo.memory_planes == 1) {
#ifdef CONFIG_FB
/* Graphics mode, color, linear frame buffer
- supported -- register the mode but hide from
- the menu. Only do this if framebuffer is
- configured, however, otherwise the user will
- be left without a screen. */
+ supported. Only register the mode if
+ if framebuffer is configured, however,
+ otherwise the user will be left without a screen.
+ We don't require CONFIG_FB_VESA, however, since
+ some of the other framebuffer drivers can use
+ this mode-setting, too. */
mi = GET_HEAP(struct mode_info, 1);
mi->mode = mode + VIDEO_FIRST_VESA;
- mi->x = mi->y = 0;
+ mi->depth = vminfo.bpp;
+ mi->x = vminfo.h_res;
+ mi->y = vminfo.v_res;
nmodes++;
#endif
}
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index aef02f9ec0c..7259387b7d1 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -18,22 +18,22 @@
#include "video.h"
static struct mode_info vga_modes[] = {
- { VIDEO_80x25, 80, 25 },
- { VIDEO_8POINT, 80, 50 },
- { VIDEO_80x43, 80, 43 },
- { VIDEO_80x28, 80, 28 },
- { VIDEO_80x30, 80, 30 },
- { VIDEO_80x34, 80, 34 },
- { VIDEO_80x60, 80, 60 },
+ { VIDEO_80x25, 80, 25, 0 },
+ { VIDEO_8POINT, 80, 50, 0 },
+ { VIDEO_80x43, 80, 43, 0 },
+ { VIDEO_80x28, 80, 28, 0 },
+ { VIDEO_80x30, 80, 30, 0 },
+ { VIDEO_80x34, 80, 34, 0 },
+ { VIDEO_80x60, 80, 60, 0 },
};
static struct mode_info ega_modes[] = {
- { VIDEO_80x25, 80, 25 },
- { VIDEO_8POINT, 80, 43 },
+ { VIDEO_80x25, 80, 25, 0 },
+ { VIDEO_8POINT, 80, 43, 0 },
};
static struct mode_info cga_modes[] = {
- { VIDEO_80x25, 80, 25 },
+ { VIDEO_80x25, 80, 25, 0 },
};
__videocard video_vga;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index ad9712f0173..696d08f3843 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -293,13 +293,28 @@ static void display_menu(void)
struct mode_info *mi;
char ch;
int i;
+ int nmodes;
+ int modes_per_line;
+ int col;
- puts("Mode: COLSxROWS:\n");
+ nmodes = 0;
+ for (card = video_cards; card < video_cards_end; card++)
+ nmodes += card->nmodes;
+ modes_per_line = 1;
+ if (nmodes >= 20)
+ modes_per_line = 3;
+
+ for (col = 0; col < modes_per_line; col++)
+ puts("Mode: Resolution: Type: ");
+ putchar('\n');
+
+ col = 0;
ch = '0';
for (card = video_cards; card < video_cards_end; card++) {
mi = card->modes;
for (i = 0; i < card->nmodes; i++, mi++) {
+ char resbuf[32];
int visible = mi->x && mi->y;
u16 mode_id = mi->mode ? mi->mode :
(mi->y << 8)+mi->x;
@@ -307,8 +322,18 @@ static void display_menu(void)
if (!visible)
continue; /* Hidden mode */
- printf("%c %04X %3dx%-3d %s\n",
- ch, mode_id, mi->x, mi->y, card->card_name);
+ if (mi->depth)
+ sprintf(resbuf, "%dx%d", mi->y, mi->depth);
+ else
+ sprintf(resbuf, "%d", mi->y);
+
+ printf("%c %03X %4dx%-7s %-6s",
+ ch, mode_id, mi->x, resbuf, card->card_name);
+ col++;
+ if (col >= modes_per_line) {
+ putchar('\n');
+ col = 0;
+ }
if (ch == '9')
ch = 'a';
@@ -318,6 +343,8 @@ static void display_menu(void)
ch++;
}
}
+ if (col)
+ putchar('\n');
}
#define H(x) ((x)-'a'+10)
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index b92447d5121..d69347f79e8 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -83,7 +83,8 @@ void store_screen(void);
struct mode_info {
u16 mode; /* Mode number (vga= style) */
- u8 x, y; /* Width, height */
+ u16 x, y; /* Width, height */
+ u16 depth; /* Bits per pixel, 0 for text mode */
};
struct card_info {
diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c
index 61c8fe0453b..6499e3239b4 100644
--- a/arch/x86/boot/voyager.c
+++ b/arch/x86/boot/voyager.c
@@ -16,8 +16,6 @@
#include "boot.h"
-#ifdef CONFIG_X86_VOYAGER
-
int query_voyager(void)
{
u8 err;
@@ -42,5 +40,3 @@ int query_voyager(void)
copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */
return 0;
}
-
-#endif /* CONFIG_X86_VOYAGER */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 54ee1764fda..77562e7cdab 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -99,9 +99,9 @@ CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_AS is not set
# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="anticipatory"
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 38a83f9c966..9e2b0ef851d 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -145,15 +145,6 @@ CONFIG_K8_NUMA=y
CONFIG_NODES_SHIFT=6
CONFIG_X86_64_ACPI_NUMA=y
CONFIG_NUMA_EMU=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM_MANUAL=y
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_NEED_MULTIPLE_NODES=y
# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPLIT_PTLOCK_CPUS=4
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index e2edda255a8..52d0ccfcf6e 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -2,9 +2,7 @@
# Makefile for the ia32 kernel emulation subsystem.
#
-obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
- ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \
- mmap32.o
+obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
sysv-$(CONFIG_SYSVIPC) := ipc32.o
obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
@@ -13,40 +11,3 @@ obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
audit-class-$(CONFIG_AUDIT) := audit.o
obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
-
-$(obj)/syscall32_syscall.o: \
- $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
-
-# Teach kbuild about targets
-targets := $(foreach F,$(addprefix vsyscall-,sysenter syscall),\
- $F.o $F.so $F.so.dbg)
-
-# The DSO images are built using a special linker script
-quiet_cmd_syscall = SYSCALL $@
- cmd_syscall = $(CC) -m32 -nostdlib -shared \
- $(call ld-option, -Wl$(comma)--hash-style=sysv) \
- -Wl,-soname=linux-gate.so.1 -o $@ \
- -Wl,-T,$(filter-out FORCE,$^)
-
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-$(obj)/vsyscall-sysenter.so.dbg $(obj)/vsyscall-syscall.so.dbg: \
-$(obj)/vsyscall-%.so.dbg: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
- $(call if_changed,syscall)
-
-AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
-AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
-
-vdsos := vdso32-sysenter.so vdso32-syscall.so
-
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(@:vdso32-%.so=$(obj)/vsyscall-%.so.dbg) \
- $(MODLIB)/vdso/$@
-
-$(vdsos):
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: $(vdsos)
diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c
index 91b7b5922df..5d7b381da69 100644
--- a/arch/x86/ia32/audit.c
+++ b/arch/x86/ia32/audit.c
@@ -27,7 +27,7 @@ unsigned ia32_signal_class[] = {
int ia32_classify_syscall(unsigned syscall)
{
- switch(syscall) {
+ switch (syscall) {
case __NR_open:
return 2;
case __NR_openat:
diff --git a/arch/x86/ia32/fpu32.c b/arch/x86/ia32/fpu32.c
deleted file mode 100644
index 2c8209a3605..00000000000
--- a/arch/x86/ia32/fpu32.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
- * This is used for ptrace, signals and coredumps in 32bit emulation.
- */
-
-#include <linux/sched.h>
-#include <asm/sigcontext32.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/i387.h>
-
-static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
-{
- unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
- /* Transform each pair of bits into 01 (valid) or 00 (empty) */
- tmp = ~twd;
- tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
- /* and move the valid bits to the lower byte. */
- tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
- tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
- tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
- return tmp;
-}
-
-static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
-{
- struct _fpxreg *st = NULL;
- unsigned long tos = (fxsave->swd >> 11) & 7;
- unsigned long twd = (unsigned long) fxsave->twd;
- unsigned long tag;
- unsigned long ret = 0xffff0000;
- int i;
-
-#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
-
- for (i = 0 ; i < 8 ; i++) {
- if (twd & 0x1) {
- st = FPREG_ADDR( fxsave, (i - tos) & 7 );
-
- switch (st->exponent & 0x7fff) {
- case 0x7fff:
- tag = 2; /* Special */
- break;
- case 0x0000:
- if ( !st->significand[0] &&
- !st->significand[1] &&
- !st->significand[2] &&
- !st->significand[3] ) {
- tag = 1; /* Zero */
- } else {
- tag = 2; /* Special */
- }
- break;
- default:
- if (st->significand[3] & 0x8000) {
- tag = 0; /* Valid */
- } else {
- tag = 2; /* Special */
- }
- break;
- }
- } else {
- tag = 3; /* Empty */
- }
- ret |= (tag << (2 * i));
- twd = twd >> 1;
- }
- return ret;
-}
-
-
-static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
- struct _fpstate_ia32 __user *buf)
-{
- struct _fpxreg *to;
- struct _fpreg __user *from;
- int i;
- u32 v;
- int err = 0;
-
-#define G(num,val) err |= __get_user(val, num + (u32 __user *)buf)
- G(0, fxsave->cwd);
- G(1, fxsave->swd);
- G(2, fxsave->twd);
- fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
- G(3, fxsave->rip);
- G(4, v);
- fxsave->fop = v>>16; /* cs ignored */
- G(5, fxsave->rdp);
- /* 6: ds ignored */
-#undef G
- if (err)
- return -1;
-
- to = (struct _fpxreg *)&fxsave->st_space[0];
- from = &buf->_st[0];
- for (i = 0 ; i < 8 ; i++, to++, from++) {
- if (__copy_from_user(to, from, sizeof(*from)))
- return -1;
- }
- return 0;
-}
-
-
-static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf,
- struct i387_fxsave_struct *fxsave,
- struct pt_regs *regs,
- struct task_struct *tsk)
-{
- struct _fpreg __user *to;
- struct _fpxreg *from;
- int i;
- u16 cs,ds;
- int err = 0;
-
- if (tsk == current) {
- /* should be actually ds/cs at fpu exception time,
- but that information is not available in 64bit mode. */
- asm("movw %%ds,%0 " : "=r" (ds));
- asm("movw %%cs,%0 " : "=r" (cs));
- } else { /* ptrace. task has stopped. */
- ds = tsk->thread.ds;
- cs = regs->cs;
- }
-
-#define P(num,val) err |= __put_user(val, num + (u32 __user *)buf)
- P(0, (u32)fxsave->cwd | 0xffff0000);
- P(1, (u32)fxsave->swd | 0xffff0000);
- P(2, twd_fxsr_to_i387(fxsave));
- P(3, (u32)fxsave->rip);
- P(4, cs | ((u32)fxsave->fop) << 16);
- P(5, fxsave->rdp);
- P(6, 0xffff0000 | ds);
-#undef P
-
- if (err)
- return -1;
-
- to = &buf->_st[0];
- from = (struct _fpxreg *) &fxsave->st_space[0];
- for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
- if (__copy_to_user(to, from, sizeof(*to)))
- return -1;
- }
- return 0;
-}
-
-int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave)
-{
- clear_fpu(tsk);
- if (!fsave) {
- if (__copy_from_user(&tsk->thread.i387.fxsave,
- &buf->_fxsr_env[0],
- sizeof(struct i387_fxsave_struct)))
- return -1;
- tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
- set_stopped_child_used_math(tsk);
- }
- return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
-}
-
-int save_i387_ia32(struct task_struct *tsk,
- struct _fpstate_ia32 __user *buf,
- struct pt_regs *regs,
- int fsave)
-{
- int err = 0;
-
- init_fpu(tsk);
- if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk))
- return -1;
- if (fsave)
- return 0;
- err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
- if (fsave)
- return err ? -1 : 1;
- err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
- err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
- sizeof(struct i387_fxsave_struct));
- return err ? -1 : 1;
-}
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index f82e1a94fcb..e4c12079171 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -25,6 +25,7 @@
#include <linux/binfmts.h>
#include <linux/personality.h>
#include <linux/init.h>
+#include <linux/jiffies.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -36,61 +37,67 @@
#undef WARN_OLD
#undef CORE_DUMP /* probably broken */
-static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
-static int load_aout_library(struct file*);
+static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
+static int load_aout_library(struct file *);
#ifdef CORE_DUMP
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
+ unsigned long limit);
/*
* fill in the user structure for a core dump..
*/
-static void dump_thread32(struct pt_regs * regs, struct user32 * dump)
+static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
{
- u32 fs,gs;
+ u32 fs, gs;
/* changed the size calculations - should hopefully work better. lbt */
dump->magic = CMAGIC;
dump->start_code = 0;
- dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1);
+ dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+ dump->u_dsize = ((unsigned long)
+ (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
dump->u_dsize -= dump->u_tsize;
dump->u_ssize = 0;
- dump->u_debugreg[0] = current->thread.debugreg0;
- dump->u_debugreg[1] = current->thread.debugreg1;
- dump->u_debugreg[2] = current->thread.debugreg2;
- dump->u_debugreg[3] = current->thread.debugreg3;
- dump->u_debugreg[4] = 0;
- dump->u_debugreg[5] = 0;
- dump->u_debugreg[6] = current->thread.debugreg6;
- dump->u_debugreg[7] = current->thread.debugreg7;
-
- if (dump->start_stack < 0xc0000000)
- dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT;
-
- dump->regs.ebx = regs->rbx;
- dump->regs.ecx = regs->rcx;
- dump->regs.edx = regs->rdx;
- dump->regs.esi = regs->rsi;
- dump->regs.edi = regs->rdi;
- dump->regs.ebp = regs->rbp;
- dump->regs.eax = regs->rax;
+ dump->u_debugreg[0] = current->thread.debugreg0;
+ dump->u_debugreg[1] = current->thread.debugreg1;
+ dump->u_debugreg[2] = current->thread.debugreg2;
+ dump->u_debugreg[3] = current->thread.debugreg3;
+ dump->u_debugreg[4] = 0;
+ dump->u_debugreg[5] = 0;
+ dump->u_debugreg[6] = current->thread.debugreg6;
+ dump->u_debugreg[7] = current->thread.debugreg7;
+
+ if (dump->start_stack < 0xc0000000) {
+ unsigned long tmp;
+
+ tmp = (unsigned long) (0xc0000000 - dump->start_stack);
+ dump->u_ssize = tmp >> PAGE_SHIFT;
+ }
+
+ dump->regs.bx = regs->bx;
+ dump->regs.cx = regs->cx;
+ dump->regs.dx = regs->dx;
+ dump->regs.si = regs->si;
+ dump->regs.di = regs->di;
+ dump->regs.bp = regs->bp;
+ dump->regs.ax = regs->ax;
dump->regs.ds = current->thread.ds;
dump->regs.es = current->thread.es;
asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
- asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
- dump->regs.orig_eax = regs->orig_rax;
- dump->regs.eip = regs->rip;
+ asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
+ dump->regs.orig_ax = regs->orig_ax;
+ dump->regs.ip = regs->ip;
dump->regs.cs = regs->cs;
- dump->regs.eflags = regs->eflags;
- dump->regs.esp = regs->rsp;
+ dump->regs.flags = regs->flags;
+ dump->regs.sp = regs->sp;
dump->regs.ss = regs->ss;
#if 1 /* FIXME */
dump->u_fpvalid = 0;
#else
- dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+ dump->u_fpvalid = dump_fpu(regs, &dump->i387);
#endif
}
@@ -128,15 +135,19 @@ static int dump_write(struct file *file, const void *addr, int nr)
return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
}
-#define DUMP_WRITE(addr, nr) \
+#define DUMP_WRITE(addr, nr) \
if (!dump_write(file, (void *)(addr), (nr))) \
goto end_coredump;
-#define DUMP_SEEK(offset) \
-if (file->f_op->llseek) { \
- if (file->f_op->llseek(file,(offset),0) != (offset)) \
- goto end_coredump; \
-} else file->f_pos = (offset)
+#define DUMP_SEEK(offset) \
+ if (file->f_op->llseek) { \
+ if (file->f_op->llseek(file, (offset), 0) != (offset)) \
+ goto end_coredump; \
+ } else \
+ file->f_pos = (offset)
+
+#define START_DATA() (u.u_tsize << PAGE_SHIFT)
+#define START_STACK(u) (u.start_stack)
/*
* Routine writes a core dump image in the current directory.
@@ -148,62 +159,70 @@ if (file->f_op->llseek) { \
* dumping of the process results in another error..
*/
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
+ unsigned long limit)
{
mm_segment_t fs;
int has_dumped = 0;
unsigned long dump_start, dump_size;
struct user32 dump;
-# define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
-# define START_STACK(u) (u.start_stack)
fs = get_fs();
set_fs(KERNEL_DS);
has_dumped = 1;
current->flags |= PF_DUMPCORE;
- strncpy(dump.u_comm, current->comm, sizeof(current->comm));
- dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump)));
+ strncpy(dump.u_comm, current->comm, sizeof(current->comm));
+ dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) -
+ ((unsigned long)(&dump)));
dump.signal = signr;
dump_thread32(regs, &dump);
-/* If the size of the dump file exceeds the rlimit, then see what would happen
- if we wrote the stack, but not the data area. */
+ /*
+ * If the size of the dump file exceeds the rlimit, then see
+ * what would happen if we wrote the stack, but not the data
+ * area.
+ */
if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit)
dump.u_dsize = 0;
-/* Make sure we have enough room to write the stack and data areas. */
+ /* Make sure we have enough room to write the stack and data areas. */
if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
dump.u_ssize = 0;
-/* make sure we actually have a data and stack area to dump */
+ /* make sure we actually have a data and stack area to dump */
set_fs(USER_DS);
- if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
+ if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump),
+ dump.u_dsize << PAGE_SHIFT))
dump.u_dsize = 0;
- if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
+ if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump),
+ dump.u_ssize << PAGE_SHIFT))
dump.u_ssize = 0;
set_fs(KERNEL_DS);
-/* struct user */
- DUMP_WRITE(&dump,sizeof(dump));
-/* Now dump all of the user data. Include malloced stuff as well */
+ /* struct user */
+ DUMP_WRITE(&dump, sizeof(dump));
+ /* Now dump all of the user data. Include malloced stuff as well */
DUMP_SEEK(PAGE_SIZE);
-/* now we start writing out the user space info */
+ /* now we start writing out the user space info */
set_fs(USER_DS);
-/* Dump the data area */
+ /* Dump the data area */
if (dump.u_dsize != 0) {
dump_start = START_DATA(dump);
dump_size = dump.u_dsize << PAGE_SHIFT;
- DUMP_WRITE(dump_start,dump_size);
+ DUMP_WRITE(dump_start, dump_size);
}
-/* Now prepare to dump the stack area */
+ /* Now prepare to dump the stack area */
if (dump.u_ssize != 0) {
dump_start = START_STACK(dump);
dump_size = dump.u_ssize << PAGE_SHIFT;
- DUMP_WRITE(dump_start,dump_size);
+ DUMP_WRITE(dump_start, dump_size);
}
-/* Finally dump the task struct. Not be used by gdb, but could be useful */
+ /*
+ * Finally dump the task struct. Not be used by gdb, but
+ * could be useful
+ */
set_fs(KERNEL_DS);
- DUMP_WRITE(current,sizeof(*current));
+ DUMP_WRITE(current, sizeof(*current));
end_coredump:
set_fs(fs);
return has_dumped;
@@ -217,35 +236,34 @@ end_coredump:
*/
static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
{
- u32 __user *argv;
- u32 __user *envp;
- u32 __user *sp;
- int argc = bprm->argc;
- int envc = bprm->envc;
+ u32 __user *argv, *envp, *sp;
+ int argc = bprm->argc, envc = bprm->envc;
sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
sp -= envc+1;
envp = sp;
sp -= argc+1;
argv = sp;
- put_user((unsigned long) envp,--sp);
- put_user((unsigned long) argv,--sp);
- put_user(argc,--sp);
+ put_user((unsigned long) envp, --sp);
+ put_user((unsigned long) argv, --sp);
+ put_user(argc, --sp);
current->mm->arg_start = (unsigned long) p;
- while (argc-->0) {
+ while (argc-- > 0) {
char c;
- put_user((u32)(unsigned long)p,argv++);
+
+ put_user((u32)(unsigned long)p, argv++);
do {
- get_user(c,p++);
+ get_user(c, p++);
} while (c);
}
put_user(0, argv);
current->mm->arg_end = current->mm->env_start = (unsigned long) p;
- while (envc-->0) {
+ while (envc-- > 0) {
char c;
- put_user((u32)(unsigned long)p,envp++);
+
+ put_user((u32)(unsigned long)p, envp++);
do {
- get_user(c,p++);
+ get_user(c, p++);
} while (c);
}
put_user(0, envp);
@@ -257,20 +275,18 @@ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
* These are the functions used to load a.out style executables and shared
* libraries. There is no binary dependent code anywhere else.
*/
-
-static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
+ unsigned long error, fd_offset, rlim;
struct exec ex;
- unsigned long error;
- unsigned long fd_offset;
- unsigned long rlim;
int retval;
ex = *((struct exec *) bprm->buf); /* exec-header */
if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
N_TRSIZE(ex) || N_DRSIZE(ex) ||
- i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+ i_size_read(bprm->file->f_path.dentry->d_inode) <
+ ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
return -ENOEXEC;
}
@@ -291,13 +307,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if (retval)
return retval;
- regs->cs = __USER32_CS;
+ regs->cs = __USER32_CS;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
regs->r13 = regs->r14 = regs->r15 = 0;
/* OK, This is the point of no return */
set_personality(PER_LINUX);
- set_thread_flag(TIF_IA32);
+ set_thread_flag(TIF_IA32);
clear_thread_flag(TIF_ABI_PENDING);
current->mm->end_code = ex.a_text +
@@ -311,7 +327,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
current->mm->mmap = NULL;
compute_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
+ current->flags &= ~PF_FORKNOEXEC;
if (N_MAGIC(ex) == OMAGIC) {
unsigned long text_addr, map_size;
@@ -338,30 +354,31 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
send_sig(SIGKILL, current, 0);
return error;
}
-
+
flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
} else {
#ifdef WARN_OLD
static unsigned long error_time, error_time2;
if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
- (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
- {
+ (N_MAGIC(ex) != NMAGIC) &&
+ time_after(jiffies, error_time2 + 5*HZ)) {
printk(KERN_NOTICE "executable not page aligned\n");
error_time2 = jiffies;
}
if ((fd_offset & ~PAGE_MASK) != 0 &&
- (jiffies-error_time) > 5*HZ)
- {
- printk(KERN_WARNING
- "fd_offset is not page aligned. Please convert program: %s\n",
+ time_after(jiffies, error_time + 5*HZ)) {
+ printk(KERN_WARNING
+ "fd_offset is not page aligned. Please convert "
+ "program: %s\n",
bprm->file->f_path.dentry->d_name.name);
error_time = jiffies;
}
#endif
- if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
loff_t pos = fd_offset;
+
down_write(&current->mm->mmap_sem);
do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
up_write(&current->mm->mmap_sem);
@@ -376,9 +393,10 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
down_write(&current->mm->mmap_sem);
error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
- PROT_READ | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
- fd_offset);
+ PROT_READ | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
+ MAP_EXECUTABLE | MAP_32BIT,
+ fd_offset);
up_write(&current->mm->mmap_sem);
if (error != N_TXTADDR(ex)) {
@@ -387,9 +405,10 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
}
down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+ error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
+ MAP_EXECUTABLE | MAP_32BIT,
fd_offset + ex.a_text);
up_write(&current->mm->mmap_sem);
if (error != N_DATADDR(ex)) {
@@ -403,9 +422,9 @@ beyond_if:
set_brk(current->mm->start_brk, current->mm->brk);
retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0) {
- /* Someone check-me: is this error path enough? */
- send_sig(SIGKILL, current, 0);
+ if (retval < 0) {
+ /* Someone check-me: is this error path enough? */
+ send_sig(SIGKILL, current, 0);
return retval;
}
@@ -414,10 +433,10 @@ beyond_if:
/* start thread */
asm volatile("movl %0,%%fs" :: "r" (0)); \
asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
- load_gs_index(0);
- (regs)->rip = ex.a_entry;
- (regs)->rsp = current->mm->start_stack;
- (regs)->eflags = 0x200;
+ load_gs_index(0);
+ (regs)->ip = ex.a_entry;
+ (regs)->sp = current->mm->start_stack;
+ (regs)->flags = 0x200;
(regs)->cs = __USER32_CS;
(regs)->ss = __USER32_DS;
regs->r8 = regs->r9 = regs->r10 = regs->r11 =
@@ -425,7 +444,7 @@ beyond_if:
set_fs(USER_DS);
if (unlikely(current->ptrace & PT_PTRACED)) {
if (current->ptrace & PT_TRACE_EXEC)
- ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+ ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
else
send_sig(SIGTRAP, current, 0);
}
@@ -434,9 +453,8 @@ beyond_if:
static int load_aout_library(struct file *file)
{
- struct inode * inode;
- unsigned long bss, start_addr, len;
- unsigned long error;
+ struct inode *inode;
+ unsigned long bss, start_addr, len, error;
int retval;
struct exec ex;
@@ -450,7 +468,8 @@ static int load_aout_library(struct file *file)
/* We come in here for the regular a.out style of shared libraries */
if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
- i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+ i_size_read(inode) <
+ ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
goto out;
}
@@ -467,10 +486,10 @@ static int load_aout_library(struct file *file)
#ifdef WARN_OLD
static unsigned long error_time;
- if ((jiffies-error_time) > 5*HZ)
- {
- printk(KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert library: %s\n",
+ if (time_after(jiffies, error_time + 5*HZ)) {
+ printk(KERN_WARNING
+ "N_TXTOFF is not page aligned. Please convert "
+ "library: %s\n",
file->f_path.dentry->d_name.name);
error_time = jiffies;
}
@@ -478,11 +497,12 @@ static int load_aout_library(struct file *file)
down_write(&current->mm->mmap_sem);
do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
up_write(&current->mm->mmap_sem);
-
+
file->f_op->read(file, (char __user *)start_addr,
ex.a_text + ex.a_data, &pos);
flush_icache_range((unsigned long) start_addr,
- (unsigned long) start_addr + ex.a_text + ex.a_data);
+ (unsigned long) start_addr + ex.a_text +
+ ex.a_data);
retval = 0;
goto out;
diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c
deleted file mode 100644
index 55822d2cf05..00000000000
--- a/arch/x86/ia32/ia32_binfmt.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Written 2000,2002 by Andi Kleen.
- *
- * Loosely based on the sparc64 and IA64 32bit emulation loaders.
- * This tricks binfmt_elf.c into loading 32bit binaries using lots
- * of ugly preprocessor tricks. Talk about very very poor man's inheritance.
- */
-
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/compat.h>
-#include <linux/string.h>
-#include <linux/binfmts.h>
-#include <linux/mm.h>
-#include <linux/security.h>
-#include <linux/elfcore-compat.h>
-
-#include <asm/segment.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/user32.h>
-#include <asm/sigcontext32.h>
-#include <asm/fpu32.h>
-#include <asm/i387.h>
-#include <asm/uaccess.h>
-#include <asm/ia32.h>
-#include <asm/vsyscall32.h>
-
-#undef ELF_ARCH
-#undef ELF_CLASS
-#define ELF_CLASS ELFCLASS32
-#define ELF_ARCH EM_386
-
-#undef elfhdr
-#undef elf_phdr
-#undef elf_note
-#undef elf_addr_t
-#define elfhdr elf32_hdr
-#define elf_phdr elf32_phdr
-#define elf_note elf32_note
-#define elf_addr_t Elf32_Off
-
-#define ELF_NAME "elf/i386"
-
-#define AT_SYSINFO 32
-#define AT_SYSINFO_EHDR 33
-
-int sysctl_vsyscall32 = 1;
-
-#undef ARCH_DLINFO
-#define ARCH_DLINFO do { \
- if (sysctl_vsyscall32) { \
- current->mm->context.vdso = (void *)VSYSCALL32_BASE; \
- NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
- NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \
- } \
-} while(0)
-
-struct file;
-
-#define IA32_EMULATOR 1
-
-#undef ELF_ET_DYN_BASE
-
-#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
-
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
-
-#define _GET_SEG(x) \
- ({ __u32 seg; asm("movl %%" __stringify(x) ",%0" : "=r"(seg)); seg; })
-
-/* Assumes current==process to be dumped */
-#undef ELF_CORE_COPY_REGS
-#define ELF_CORE_COPY_REGS(pr_reg, regs) \
- pr_reg[0] = regs->rbx; \
- pr_reg[1] = regs->rcx; \
- pr_reg[2] = regs->rdx; \
- pr_reg[3] = regs->rsi; \
- pr_reg[4] = regs->rdi; \
- pr_reg[5] = regs->rbp; \
- pr_reg[6] = regs->rax; \
- pr_reg[7] = _GET_SEG(ds); \
- pr_reg[8] = _GET_SEG(es); \
- pr_reg[9] = _GET_SEG(fs); \
- pr_reg[10] = _GET_SEG(gs); \
- pr_reg[11] = regs->orig_rax; \
- pr_reg[12] = regs->rip; \
- pr_reg[13] = regs->cs; \
- pr_reg[14] = regs->eflags; \
- pr_reg[15] = regs->rsp; \
- pr_reg[16] = regs->ss;
-
-
-#define elf_prstatus compat_elf_prstatus
-#define elf_prpsinfo compat_elf_prpsinfo
-#define elf_fpregset_t struct user_i387_ia32_struct
-#define elf_fpxregset_t struct user32_fxsr_struct
-#define user user32
-
-#undef elf_read_implies_exec
-#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X)
-
-#define elf_core_copy_regs elf32_core_copy_regs
-static inline void elf32_core_copy_regs(compat_elf_gregset_t *elfregs,
- struct pt_regs *regs)
-{
- ELF_CORE_COPY_REGS((&elfregs->ebx), regs)
-}
-
-#define elf_core_copy_task_regs elf32_core_copy_task_regs
-static inline int elf32_core_copy_task_regs(struct task_struct *t,
- compat_elf_gregset_t* elfregs)
-{
- struct pt_regs *pp = task_pt_regs(t);
- ELF_CORE_COPY_REGS((&elfregs->ebx), pp);
- /* fix wrong segments */
- elfregs->ds = t->thread.ds;
- elfregs->fs = t->thread.fsindex;
- elfregs->gs = t->thread.gsindex;
- elfregs->es = t->thread.es;
- return 1;
-}
-
-#define elf_core_copy_task_fpregs elf32_core_copy_task_fpregs
-static inline int
-elf32_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs,
- elf_fpregset_t *fpu)
-{
- struct _fpstate_ia32 *fpstate = (void*)fpu;
- mm_segment_t oldfs = get_fs();
-
- if (!tsk_used_math(tsk))
- return 0;
- if (!regs)
- regs = task_pt_regs(tsk);
- if (tsk == current)
- unlazy_fpu(tsk);
- set_fs(KERNEL_DS);
- save_i387_ia32(tsk, fpstate, regs, 1);
- /* Correct for i386 bug. It puts the fop into the upper 16bits of
- the tag word (like FXSAVE), not into the fcs*/
- fpstate->cssel |= fpstate->tag & 0xffff0000;
- set_fs(oldfs);
- return 1;
-}
-
-#define ELF_CORE_COPY_XFPREGS 1
-#define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
-#define elf_core_copy_task_xfpregs elf32_core_copy_task_xfpregs
-static inline int
-elf32_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
-{
- struct pt_regs *regs = task_pt_regs(t);
- if (!tsk_used_math(t))
- return 0;
- if (t == current)
- unlazy_fpu(t);
- memcpy(xfpu, &t->thread.i387.fxsave, sizeof(elf_fpxregset_t));
- xfpu->fcs = regs->cs;
- xfpu->fos = t->thread.ds; /* right? */
- return 1;
-}
-
-#undef elf_check_arch
-#define elf_check_arch(x) \
- ((x)->e_machine == EM_386)
-
-extern int force_personality32;
-
-#undef ELF_EXEC_PAGESIZE
-#undef ELF_HWCAP
-#undef ELF_PLATFORM
-#undef SET_PERSONALITY
-#define ELF_EXEC_PAGESIZE PAGE_SIZE
-#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
-#define ELF_PLATFORM ("i686")
-#define SET_PERSONALITY(ex, ibcs2) \
-do { \
- unsigned long new_flags = 0; \
- if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \
- new_flags = _TIF_IA32; \
- if ((current_thread_info()->flags & _TIF_IA32) \
- != new_flags) \
- set_thread_flag(TIF_ABI_PENDING); \
- else \
- clear_thread_flag(TIF_ABI_PENDING); \
- /* XXX This overwrites the user set personality */ \
- current->personality |= force_personality32; \
-} while (0)
-
-/* Override some function names */
-#define elf_format elf32_format
-
-#define init_elf_binfmt init_elf32_binfmt
-#define exit_elf_binfmt exit_elf32_binfmt
-
-#define load_elf_binary load_elf32_binary
-
-#undef ELF_PLAT_INIT
-#define ELF_PLAT_INIT(r, load_addr) elf32_init(r)
-
-#undef start_thread
-#define start_thread(regs,new_rip,new_rsp) do { \
- asm volatile("movl %0,%%fs" :: "r" (0)); \
- asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
- load_gs_index(0); \
- (regs)->rip = (new_rip); \
- (regs)->rsp = (new_rsp); \
- (regs)->eflags = 0x200; \
- (regs)->cs = __USER32_CS; \
- (regs)->ss = __USER32_DS; \
- set_fs(USER_DS); \
-} while(0)
-
-
-#include <linux/module.h>
-
-MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries.");
-MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
-
-#undef MODULE_DESCRIPTION
-#undef MODULE_AUTHOR
-
-static void elf32_init(struct pt_regs *);
-
-#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
-#define arch_setup_additional_pages syscall32_setup_pages
-extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
-
-#include "../../../fs/binfmt_elf.c"
-
-static void elf32_init(struct pt_regs *regs)
-{
- struct task_struct *me = current;
- regs->rdi = 0;
- regs->rsi = 0;
- regs->rdx = 0;
- regs->rcx = 0;
- regs->rax = 0;
- regs->rbx = 0;
- regs->rbp = 0;
- regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
- regs->r13 = regs->r14 = regs->r15 = 0;
- me->thread.fs = 0;
- me->thread.gs = 0;
- me->thread.fsindex = 0;
- me->thread.gsindex = 0;
- me->thread.ds = __USER_DS;
- me->thread.es = __USER_DS;
-}
-
-#ifdef CONFIG_SYSCTL
-/* Register vsyscall32 into the ABI table */
-#include <linux/sysctl.h>
-
-static ctl_table abi_table2[] = {
- {
- .procname = "vsyscall32",
- .data = &sysctl_vsyscall32,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {}
-};
-
-static ctl_table abi_root_table2[] = {
- {
- .ctl_name = CTL_ABI,
- .procname = "abi",
- .mode = 0555,
- .child = abi_table2
- },
- {}
-};
-
-static __init int ia32_binfmt_init(void)
-{
- register_sysctl_table(abi_root_table2);
- return 0;
-}
-__initcall(ia32_binfmt_init);
-#endif
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 6ea19c25f90..1c0503bdfb1 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -29,9 +29,8 @@
#include <asm/ia32_unistd.h>
#include <asm/user32.h>
#include <asm/sigcontext32.h>
-#include <asm/fpu32.h>
#include <asm/proto.h>
-#include <asm/vsyscall32.h>
+#include <asm/vdso.h>
#define DEBUG_SIG 0
@@ -43,7 +42,8 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
{
int err;
- if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+
+ if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
return -EFAULT;
/* If you change siginfo_t structure, please make sure that
@@ -53,16 +53,19 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
3 ints plus the relevant union member. */
err = __put_user(from->si_signo, &to->si_signo);
err |= __put_user(from->si_errno, &to->si_errno);
- err |= __put_user((short)from->si_code, &to->si_code);
+ err |= __put_user((short)from->si_code, &to->si_code);
if (from->si_code < 0) {
err |= __put_user(from->si_pid, &to->si_pid);
- err |= __put_user(from->si_uid, &to->si_uid);
- err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
} else {
- /* First 32bits of unions are always present:
- * si_pid === si_band === si_tid === si_addr(LS half) */
- err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]);
+ /*
+ * First 32bits of unions are always present:
+ * si_pid === si_band === si_tid === si_addr(LS half)
+ */
+ err |= __put_user(from->_sifields._pad[0],
+ &to->_sifields._pad[0]);
switch (from->si_code >> 16) {
case __SI_FAULT >> 16:
break;
@@ -76,14 +79,15 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
err |= __put_user(from->si_uid, &to->si_uid);
break;
case __SI_POLL >> 16:
- err |= __put_user(from->si_fd, &to->si_fd);
+ err |= __put_user(from->si_fd, &to->si_fd);
break;
case __SI_TIMER >> 16:
- err |= __put_user(from->si_overrun, &to->si_overrun);
+ err |= __put_user(from->si_overrun, &to->si_overrun);
err |= __put_user(ptr_to_compat(from->si_ptr),
- &to->si_ptr);
+ &to->si_ptr);
break;
- case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
+ /* This is not generated by the kernel as of now. */
+ case __SI_RT >> 16:
case __SI_MESGQ >> 16:
err |= __put_user(from->si_uid, &to->si_uid);
err |= __put_user(from->si_int, &to->si_int);
@@ -97,7 +101,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{
int err;
u32 ptr32;
- if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
+
+ if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
return -EFAULT;
err = __get_user(to->si_signo, &from->si_signo);
@@ -112,8 +117,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
return err;
}
-asmlinkage long
-sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
+asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
{
mask &= _BLOCKABLE;
spin_lock_irq(&current->sighand->siglock);
@@ -128,36 +132,37 @@ sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
return -ERESTARTNOHAND;
}
-asmlinkage long
-sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
- stack_ia32_t __user *uoss_ptr,
- struct pt_regs *regs)
+asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
+ stack_ia32_t __user *uoss_ptr,
+ struct pt_regs *regs)
{
- stack_t uss,uoss;
+ stack_t uss, uoss;
int ret;
- mm_segment_t seg;
- if (uss_ptr) {
+ mm_segment_t seg;
+
+ if (uss_ptr) {
u32 ptr;
- memset(&uss,0,sizeof(stack_t));
- if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
+
+ memset(&uss, 0, sizeof(stack_t));
+ if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) ||
__get_user(ptr, &uss_ptr->ss_sp) ||
__get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
__get_user(uss.ss_size, &uss_ptr->ss_size))
return -EFAULT;
uss.ss_sp = compat_ptr(ptr);
}
- seg = get_fs();
- set_fs(KERNEL_DS);
- ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp);
- set_fs(seg);
+ seg = get_fs();
+ set_fs(KERNEL_DS);
+ ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp);
+ set_fs(seg);
if (ret >= 0 && uoss_ptr) {
- if (!access_ok(VERIFY_WRITE,uoss_ptr,sizeof(stack_ia32_t)) ||
+ if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) ||
__put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
__put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
__put_user(uoss.ss_size, &uoss_ptr->ss_size))
ret = -EFAULT;
- }
- return ret;
+ }
+ return ret;
}
/*
@@ -186,87 +191,85 @@ struct rt_sigframe
char retcode[8];
};
-static int
-ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *peax)
+#define COPY(x) { \
+ unsigned int reg; \
+ err |= __get_user(reg, &sc->x); \
+ regs->x = reg; \
+}
+
+#define RELOAD_SEG(seg,mask) \
+ { unsigned int cur; \
+ unsigned short pre; \
+ err |= __get_user(pre, &sc->seg); \
+ asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \
+ pre |= mask; \
+ if (pre != cur) loadsegment(seg, pre); }
+
+static int ia32_restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext_ia32 __user *sc,
+ unsigned int *peax)
{
- unsigned int err = 0;
-
+ unsigned int tmpflags, gs, oldgs, err = 0;
+ struct _fpstate_ia32 __user *buf;
+ u32 tmp;
+
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
#if DEBUG_SIG
- printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
- sc, sc->err, sc->eip, sc->cs, sc->eflags);
+ printk(KERN_DEBUG "SIG restore_sigcontext: "
+ "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
+ sc, sc->err, sc->ip, sc->cs, sc->flags);
#endif
-#define COPY(x) { \
- unsigned int reg; \
- err |= __get_user(reg, &sc->e ##x); \
- regs->r ## x = reg; \
-}
-#define RELOAD_SEG(seg,mask) \
- { unsigned int cur; \
- unsigned short pre; \
- err |= __get_user(pre, &sc->seg); \
- asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \
- pre |= mask; \
- if (pre != cur) loadsegment(seg,pre); }
-
- /* Reload fs and gs if they have changed in the signal handler.
- This does not handle long fs/gs base changes in the handler, but
- does not clobber them at least in the normal case. */
-
- {
- unsigned gs, oldgs;
- err |= __get_user(gs, &sc->gs);
- gs |= 3;
- asm("movl %%gs,%0" : "=r" (oldgs));
- if (gs != oldgs)
- load_gs_index(gs);
- }
- RELOAD_SEG(fs,3);
- RELOAD_SEG(ds,3);
- RELOAD_SEG(es,3);
+ /*
+ * Reload fs and gs if they have changed in the signal
+ * handler. This does not handle long fs/gs base changes in
+ * the handler, but does not clobber them at least in the
+ * normal case.
+ */
+ err |= __get_user(gs, &sc->gs);
+ gs |= 3;
+ asm("movl %%gs,%0" : "=r" (oldgs));
+ if (gs != oldgs)
+ load_gs_index(gs);
+
+ RELOAD_SEG(fs, 3);
+ RELOAD_SEG(ds, 3);
+ RELOAD_SEG(es, 3);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
- /* Don't touch extended registers */
-
- err |= __get_user(regs->cs, &sc->cs);
- regs->cs |= 3;
- err |= __get_user(regs->ss, &sc->ss);
- regs->ss |= 3;
-
- {
- unsigned int tmpflags;
- err |= __get_user(tmpflags, &sc->eflags);
- regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
- regs->orig_rax = -1; /* disable syscall checks */
- }
+ /* Don't touch extended registers */
+
+ err |= __get_user(regs->cs, &sc->cs);
+ regs->cs |= 3;
+ err |= __get_user(regs->ss, &sc->ss);
+ regs->ss |= 3;
+
+ err |= __get_user(tmpflags, &sc->flags);
+ regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
+ /* disable syscall checks */
+ regs->orig_ax = -1;
+
+ err |= __get_user(tmp, &sc->fpstate);
+ buf = compat_ptr(tmp);
+ if (buf) {
+ if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
+ goto badframe;
+ err |= restore_i387_ia32(buf);
+ } else {
+ struct task_struct *me = current;
- {
- u32 tmp;
- struct _fpstate_ia32 __user * buf;
- err |= __get_user(tmp, &sc->fpstate);
- buf = compat_ptr(tmp);
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387_ia32(current, buf, 0);
- } else {
- struct task_struct *me = current;
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
+ if (used_math()) {
+ clear_fpu(me);
+ clear_used_math();
}
}
- {
- u32 tmp;
- err |= __get_user(tmp, &sc->eax);
- *peax = tmp;
- }
+ err |= __get_user(tmp, &sc->ax);
+ *peax = tmp;
+
return err;
badframe:
@@ -275,15 +278,16 @@ badframe:
asmlinkage long sys32_sigreturn(struct pt_regs *regs)
{
- struct sigframe __user *frame = (struct sigframe __user *)(regs->rsp-8);
+ struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8);
sigset_t set;
- unsigned int eax;
+ unsigned int ax;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
|| (_COMPAT_NSIG_WORDS > 1
- && __copy_from_user((((char *) &set.sig) + 4), &frame->extramask,
+ && __copy_from_user((((char *) &set.sig) + 4),
+ &frame->extramask,
sizeof(frame->extramask))))
goto badframe;
@@ -292,24 +296,24 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
-
- if (ia32_restore_sigcontext(regs, &frame->sc, &eax))
+
+ if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
goto badframe;
- return eax;
+ return ax;
badframe:
signal_fault(regs, frame, "32bit sigreturn");
return 0;
-}
+}
asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
sigset_t set;
- unsigned int eax;
+ unsigned int ax;
struct pt_regs tregs;
- frame = (struct rt_sigframe __user *)(regs->rsp - 4);
+ frame = (struct rt_sigframe __user *)(regs->sp - 4);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
@@ -321,28 +325,28 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
-
- if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+
+ if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
tregs = *regs;
if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
goto badframe;
- return eax;
+ return ax;
badframe:
- signal_fault(regs,frame,"32bit rt sigreturn");
+ signal_fault(regs, frame, "32bit rt sigreturn");
return 0;
-}
+}
/*
* Set up a signal frame.
*/
-static int
-ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate,
- struct pt_regs *regs, unsigned int mask)
+static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
+ struct _fpstate_ia32 __user *fpstate,
+ struct pt_regs *regs, unsigned int mask)
{
int tmp, err = 0;
@@ -356,26 +360,26 @@ ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __
__asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
err |= __put_user(tmp, (unsigned int __user *)&sc->es);
- err |= __put_user((u32)regs->rdi, &sc->edi);
- err |= __put_user((u32)regs->rsi, &sc->esi);
- err |= __put_user((u32)regs->rbp, &sc->ebp);
- err |= __put_user((u32)regs->rsp, &sc->esp);
- err |= __put_user((u32)regs->rbx, &sc->ebx);
- err |= __put_user((u32)regs->rdx, &sc->edx);
- err |= __put_user((u32)regs->rcx, &sc->ecx);
- err |= __put_user((u32)regs->rax, &sc->eax);
+ err |= __put_user((u32)regs->di, &sc->di);
+ err |= __put_user((u32)regs->si, &sc->si);
+ err |= __put_user((u32)regs->bp, &sc->bp);
+ err |= __put_user((u32)regs->sp, &sc->sp);
+ err |= __put_user((u32)regs->bx, &sc->bx);
+ err |= __put_user((u32)regs->dx, &sc->dx);
+ err |= __put_user((u32)regs->cx, &sc->cx);
+ err |= __put_user((u32)regs->ax, &sc->ax);
err |= __put_user((u32)regs->cs, &sc->cs);
err |= __put_user((u32)regs->ss, &sc->ss);
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
- err |= __put_user((u32)regs->rip, &sc->eip);
- err |= __put_user((u32)regs->eflags, &sc->eflags);
- err |= __put_user((u32)regs->rsp, &sc->esp_at_signal);
+ err |= __put_user((u32)regs->ip, &sc->ip);
+ err |= __put_user((u32)regs->flags, &sc->flags);
+ err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
- tmp = save_i387_ia32(current, fpstate, regs, 0);
+ tmp = save_i387_ia32(fpstate);
if (tmp < 0)
err = -EFAULT;
- else {
+ else {
clear_used_math();
stts();
err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
@@ -392,40 +396,53 @@ ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __
/*
* Determine which stack to use..
*/
-static void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+ size_t frame_size)
{
- unsigned long rsp;
+ unsigned long sp;
/* Default to using normal stack */
- rsp = regs->rsp;
+ sp = regs->sp;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(rsp) == 0)
- rsp = current->sas_ss_sp + current->sas_ss_size;
+ if (sas_ss_flags(sp) == 0)
+ sp = current->sas_ss_sp + current->sas_ss_size;
}
/* This is the legacy signal stack switching. */
else if ((regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer) {
- rsp = (unsigned long) ka->sa.sa_restorer;
- }
+ ka->sa.sa_restorer)
+ sp = (unsigned long) ka->sa.sa_restorer;
- rsp -= frame_size;
+ sp -= frame_size;
/* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0. */
- rsp = ((rsp + 4) & -16ul) - 4;
- return (void __user *) rsp;
+ sp = ((sp + 4) & -16ul) - 4;
+ return (void __user *) sp;
}
int ia32_setup_frame(int sig, struct k_sigaction *ka,
- compat_sigset_t *set, struct pt_regs * regs)
+ compat_sigset_t *set, struct pt_regs *regs)
{
struct sigframe __user *frame;
+ void __user *restorer;
int err = 0;
+ /* copy_to_user optimizes that into a single 8 byte store */
+ static const struct {
+ u16 poplmovl;
+ u32 val;
+ u16 int80;
+ u16 pad;
+ } __attribute__((packed)) code = {
+ 0xb858, /* popl %eax ; movl $...,%eax */
+ __NR_ia32_sigreturn,
+ 0x80cd, /* int $0x80 */
+ 0,
+ };
+
frame = get_sigframe(ka, regs, sizeof(*frame));
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
@@ -443,64 +460,53 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
if (_COMPAT_NSIG_WORDS > 1) {
err |= __copy_to_user(frame->extramask, &set->sig[1],
sizeof(frame->extramask));
+ if (err)
+ goto give_sigsegv;
}
- if (err)
- goto give_sigsegv;
- /* Return stub is in 32bit vsyscall page */
- {
- void __user *restorer;
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ restorer = ka->sa.sa_restorer;
+ } else {
+ /* Return stub is in 32bit vsyscall page */
if (current->binfmt->hasvdso)
- restorer = VSYSCALL32_SIGRETURN;
+ restorer = VDSO32_SYMBOL(current->mm->context.vdso,
+ sigreturn);
else
- restorer = (void *)&frame->retcode;
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
- err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
- }
- /* These are actually not used anymore, but left because some
- gdb versions depend on them as a marker. */
- {
- /* copy_to_user optimizes that into a single 8 byte store */
- static const struct {
- u16 poplmovl;
- u32 val;
- u16 int80;
- u16 pad;
- } __attribute__((packed)) code = {
- 0xb858, /* popl %eax ; movl $...,%eax */
- __NR_ia32_sigreturn,
- 0x80cd, /* int $0x80 */
- 0,
- };
- err |= __copy_to_user(frame->retcode, &code, 8);
+ restorer = &frame->retcode;
}
+ err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+
+ /*
+ * These are actually not used anymore, but left because some
+ * gdb versions depend on them as a marker.
+ */
+ err |= __copy_to_user(frame->retcode, &code, 8);
if (err)
goto give_sigsegv;
/* Set up registers for signal handler */
- regs->rsp = (unsigned long) frame;
- regs->rip = (unsigned long) ka->sa.sa_handler;
+ regs->sp = (unsigned long) frame;
+ regs->ip = (unsigned long) ka->sa.sa_handler;
/* Make -mregparm=3 work */
- regs->rax = sig;
- regs->rdx = 0;
- regs->rcx = 0;
+ regs->ax = sig;
+ regs->dx = 0;
+ regs->cx = 0;
- asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
- asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+ asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+ asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
- regs->cs = __USER32_CS;
- regs->ss = __USER32_DS;
+ regs->cs = __USER32_CS;
+ regs->ss = __USER32_DS;
set_fs(USER_DS);
- regs->eflags &= ~TF_MASK;
+ regs->flags &= ~X86_EFLAGS_TF;
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
#if DEBUG_SIG
- printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
+ current->comm, current->pid, frame, regs->ip, frame->pretcode);
#endif
return 0;
@@ -511,25 +517,34 @@ give_sigsegv:
}
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- compat_sigset_t *set, struct pt_regs * regs)
+ compat_sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
+ struct exec_domain *ed = current_thread_info()->exec_domain;
+ void __user *restorer;
int err = 0;
+ /* __copy_to_user optimizes that into a single 8 byte store */
+ static const struct {
+ u8 movl;
+ u32 val;
+ u16 int80;
+ u16 pad;
+ u8 pad2;
+ } __attribute__((packed)) code = {
+ 0xb8,
+ __NR_ia32_rt_sigreturn,
+ 0x80cd,
+ 0,
+ };
+
frame = get_sigframe(ka, regs, sizeof(*frame));
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
- {
- struct exec_domain *ed = current_thread_info()->exec_domain;
- err |= __put_user((ed
- && ed->signal_invmap
- && sig < 32
- ? ed->signal_invmap[sig]
- : sig),
- &frame->sig);
- }
+ err |= __put_user((ed && ed->signal_invmap && sig < 32
+ ? ed->signal_invmap[sig] : sig), &frame->sig);
err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
err |= copy_siginfo_to_user32(&frame->info, info);
@@ -540,73 +555,58 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->rsp),
+ err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
- regs, set->sig[0]);
+ regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
goto give_sigsegv;
-
- {
- void __user *restorer = VSYSCALL32_RTSIGRETURN;
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
- err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
- }
-
- /* This is movl $,%eax ; int $0x80 */
- /* Not actually used anymore, but left because some gdb versions
- need it. */
- {
- /* __copy_to_user optimizes that into a single 8 byte store */
- static const struct {
- u8 movl;
- u32 val;
- u16 int80;
- u16 pad;
- u8 pad2;
- } __attribute__((packed)) code = {
- 0xb8,
- __NR_ia32_rt_sigreturn,
- 0x80cd,
- 0,
- };
- err |= __copy_to_user(frame->retcode, &code, 8);
- }
+ if (ka->sa.sa_flags & SA_RESTORER)
+ restorer = ka->sa.sa_restorer;
+ else
+ restorer = VDSO32_SYMBOL(current->mm->context.vdso,
+ rt_sigreturn);
+ err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+
+ /*
+ * Not actually used anymore, but left because some gdb
+ * versions need it.
+ */
+ err |= __copy_to_user(frame->retcode, &code, 8);
if (err)
goto give_sigsegv;
/* Set up registers for signal handler */
- regs->rsp = (unsigned long) frame;
- regs->rip = (unsigned long) ka->sa.sa_handler;
+ regs->sp = (unsigned long) frame;
+ regs->ip = (unsigned long) ka->sa.sa_handler;
/* Make -mregparm=3 work */
- regs->rax = sig;
- regs->rdx = (unsigned long) &frame->info;
- regs->rcx = (unsigned long) &frame->uc;
+ regs->ax = sig;
+ regs->dx = (unsigned long) &frame->info;
+ regs->cx = (unsigned long) &frame->uc;
/* Make -mregparm=3 work */
- regs->rax = sig;
- regs->rdx = (unsigned long) &frame->info;
- regs->rcx = (unsigned long) &frame->uc;
+ regs->ax = sig;
+ regs->dx = (unsigned long) &frame->info;
+ regs->cx = (unsigned long) &frame->uc;
+
+ asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+ asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
- asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
- asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
-
- regs->cs = __USER32_CS;
- regs->ss = __USER32_DS;
+ regs->cs = __USER32_CS;
+ regs->ss = __USER32_DS;
set_fs(USER_DS);
- regs->eflags &= ~TF_MASK;
+ regs->flags &= ~X86_EFLAGS_TF;
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
#if DEBUG_SIG
- printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
+ current->comm, current->pid, frame, regs->ip, frame->pretcode);
#endif
return 0;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index df588f0f76e..0db0a6291bb 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -12,7 +12,6 @@
#include <asm/ia32_unistd.h>
#include <asm/thread_info.h>
#include <asm/segment.h>
-#include <asm/vsyscall32.h>
#include <asm/irqflags.h>
#include <linux/linkage.h>
@@ -104,7 +103,7 @@ ENTRY(ia32_sysenter_target)
pushfq
CFI_ADJUST_CFA_OFFSET 8
/*CFI_REL_OFFSET rflags,0*/
- movl $VSYSCALL32_SYSEXIT, %r10d
+ movl 8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d
CFI_REGISTER rip,r10
pushq $__USER32_CS
CFI_ADJUST_CFA_OFFSET 8
@@ -142,6 +141,8 @@ sysenter_do_call:
andl $~TS_COMPAT,threadinfo_status(%r10)
/* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-R11(%rsp)
+ movl RIP-R11(%rsp),%edx /* User %eip */
+ CFI_REGISTER rip,rdx
RESTORE_ARGS 1,24,1,1,1,1
popfq
CFI_ADJUST_CFA_OFFSET -8
@@ -149,8 +150,6 @@ sysenter_do_call:
popq %rcx /* User %esp */
CFI_ADJUST_CFA_OFFSET -8
CFI_REGISTER rsp,rcx
- movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
- CFI_REGISTER rip,rdx
TRACE_IRQS_ON
swapgs
sti /* sti only takes effect after the next instruction */
@@ -644,8 +643,8 @@ ia32_sys_call_table:
.quad compat_sys_futex /* 240 */
.quad compat_sys_sched_setaffinity
.quad compat_sys_sched_getaffinity
- .quad sys32_set_thread_area
- .quad sys32_get_thread_area
+ .quad sys_set_thread_area
+ .quad sys_get_thread_area
.quad compat_sys_io_setup /* 245 */
.quad sys_io_destroy
.quad compat_sys_io_getevents
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index 7b3342e5aab..d21991ce606 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -9,9 +9,8 @@
#include <linux/ipc.h>
#include <linux/compat.h>
-asmlinkage long
-sys32_ipc(u32 call, int first, int second, int third,
- compat_uptr_t ptr, u32 fifth)
+asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
+ compat_uptr_t ptr, u32 fifth)
{
int version;
@@ -19,36 +18,35 @@ sys32_ipc(u32 call, int first, int second, int third,
call &= 0xffff;
switch (call) {
- case SEMOP:
+ case SEMOP:
/* struct sembuf is the same on 32 and 64bit :)) */
return sys_semtimedop(first, compat_ptr(ptr), second, NULL);
- case SEMTIMEDOP:
+ case SEMTIMEDOP:
return compat_sys_semtimedop(first, compat_ptr(ptr), second,
compat_ptr(fifth));
- case SEMGET:
+ case SEMGET:
return sys_semget(first, second, third);
- case SEMCTL:
+ case SEMCTL:
return compat_sys_semctl(first, second, third, compat_ptr(ptr));
- case MSGSND:
+ case MSGSND:
return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
- case MSGRCV:
+ case MSGRCV:
return compat_sys_msgrcv(first, second, fifth, third,
version, compat_ptr(ptr));
- case MSGGET:
+ case MSGGET:
return sys_msgget((key_t) first, second);
- case MSGCTL:
+ case MSGCTL:
return compat_sys_msgctl(first, second, compat_ptr(ptr));
- case SHMAT:
+ case SHMAT:
return compat_sys_shmat(first, second, third, version,
compat_ptr(ptr));
- break;
- case SHMDT:
+ case SHMDT:
return sys_shmdt(compat_ptr(ptr));
- case SHMGET:
+ case SHMGET:
return sys_shmget(first, (unsigned)second, third);
- case SHMCTL:
+ case SHMCTL:
return compat_sys_shmctl(first, second, compat_ptr(ptr));
}
return -ENOSYS;
diff --git a/arch/x86/ia32/mmap32.c b/arch/x86/ia32/mmap32.c
deleted file mode 100644
index e4b84b4a417..00000000000
--- a/arch/x86/ia32/mmap32.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * linux/arch/x86_64/ia32/mm/mmap.c
- *
- * flexible mmap layout support
- *
- * Based on the i386 version which was
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *
- * Started by Ingo Molnar <mingo@elte.hu>
- */
-
-#include <linux/personality.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/sched.h>
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline unsigned long mmap_base(struct mm_struct *mm)
-{
- unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
- unsigned long random_factor = 0;
-
- if (current->flags & PF_RANDOMIZE)
- random_factor = get_random_int() % (1024*1024);
-
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
-
- return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
-}
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-void ia32_pick_mmap_layout(struct mm_struct *mm)
-{
- /*
- * Fall back to the standard layout if the personality
- * bit is set, or if the expected stack growth is unlimited:
- */
- if (sysctl_legacy_va_layout ||
- (current->personality & ADDR_COMPAT_LAYOUT) ||
- current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
- mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
- } else {
- mm->mmap_base = mmap_base(mm);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
- }
-}
diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
deleted file mode 100644
index 4a233ad6269..00000000000
--- a/arch/x86/ia32/ptrace32.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * 32bit ptrace for x86-64.
- *
- * Copyright 2001,2002 Andi Kleen, SuSE Labs.
- * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier
- * copyright.
- *
- * This allows to access 64bit processes too; but there is no way to see the extended
- * register contents.
- */
-
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/syscalls.h>
-#include <linux/unistd.h>
-#include <linux/mm.h>
-#include <linux/err.h>
-#include <linux/ptrace.h>
-#include <asm/ptrace.h>
-#include <asm/compat.h>
-#include <asm/uaccess.h>
-#include <asm/user32.h>
-#include <asm/user.h>
-#include <asm/errno.h>
-#include <asm/debugreg.h>
-#include <asm/i387.h>
-#include <asm/fpu32.h>
-#include <asm/ia32.h>
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
- * Also masks reserved bits (31-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x54dd5UL
-
-#define R32(l,q) \
- case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
-
-static int putreg32(struct task_struct *child, unsigned regno, u32 val)
-{
- int i;
- __u64 *stack = (__u64 *)task_pt_regs(child);
-
- switch (regno) {
- case offsetof(struct user32, regs.fs):
- if (val && (val & 3) != 3) return -EIO;
- child->thread.fsindex = val & 0xffff;
- break;
- case offsetof(struct user32, regs.gs):
- if (val && (val & 3) != 3) return -EIO;
- child->thread.gsindex = val & 0xffff;
- break;
- case offsetof(struct user32, regs.ds):
- if (val && (val & 3) != 3) return -EIO;
- child->thread.ds = val & 0xffff;
- break;
- case offsetof(struct user32, regs.es):
- child->thread.es = val & 0xffff;
- break;
- case offsetof(struct user32, regs.ss):
- if ((val & 3) != 3) return -EIO;
- stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
- break;
- case offsetof(struct user32, regs.cs):
- if ((val & 3) != 3) return -EIO;
- stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
- break;
-
- R32(ebx, rbx);
- R32(ecx, rcx);
- R32(edx, rdx);
- R32(edi, rdi);
- R32(esi, rsi);
- R32(ebp, rbp);
- R32(eax, rax);
- R32(orig_eax, orig_rax);
- R32(eip, rip);
- R32(esp, rsp);
-
- case offsetof(struct user32, regs.eflags): {
- __u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8];
- val &= FLAG_MASK;
- *flags = val | (*flags & ~FLAG_MASK);
- break;
- }
-
- case offsetof(struct user32, u_debugreg[4]):
- case offsetof(struct user32, u_debugreg[5]):
- return -EIO;
-
- case offsetof(struct user32, u_debugreg[0]):
- child->thread.debugreg0 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[1]):
- child->thread.debugreg1 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[2]):
- child->thread.debugreg2 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[3]):
- child->thread.debugreg3 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[6]):
- child->thread.debugreg6 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[7]):
- val &= ~DR_CONTROL_RESERVED;
- /* See arch/i386/kernel/ptrace.c for an explanation of
- * this awkward check.*/
- for(i=0; i<4; i++)
- if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
- return -EIO;
- child->thread.debugreg7 = val;
- if (val)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- break;
-
- default:
- if (regno > sizeof(struct user32) || (regno & 3))
- return -EIO;
-
- /* Other dummy fields in the virtual user structure are ignored */
- break;
- }
- return 0;
-}
-
-#undef R32
-
-#define R32(l,q) \
- case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break
-
-static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
-{
- __u64 *stack = (__u64 *)task_pt_regs(child);
-
- switch (regno) {
- case offsetof(struct user32, regs.fs):
- *val = child->thread.fsindex;
- break;
- case offsetof(struct user32, regs.gs):
- *val = child->thread.gsindex;
- break;
- case offsetof(struct user32, regs.ds):
- *val = child->thread.ds;
- break;
- case offsetof(struct user32, regs.es):
- *val = child->thread.es;
- break;
-
- R32(cs, cs);
- R32(ss, ss);
- R32(ebx, rbx);
- R32(ecx, rcx);
- R32(edx, rdx);
- R32(edi, rdi);
- R32(esi, rsi);
- R32(ebp, rbp);
- R32(eax, rax);
- R32(orig_eax, orig_rax);
- R32(eip, rip);
- R32(eflags, eflags);
- R32(esp, rsp);
-
- case offsetof(struct user32, u_debugreg[0]):
- *val = child->thread.debugreg0;
- break;
- case offsetof(struct user32, u_debugreg[1]):
- *val = child->thread.debugreg1;
- break;
- case offsetof(struct user32, u_debugreg[2]):
- *val = child->thread.debugreg2;
- break;
- case offsetof(struct user32, u_debugreg[3]):
- *val = child->thread.debugreg3;
- break;
- case offsetof(struct user32, u_debugreg[6]):
- *val = child->thread.debugreg6;
- break;
- case offsetof(struct user32, u_debugreg[7]):
- *val = child->thread.debugreg7;
- break;
-
- default:
- if (regno > sizeof(struct user32) || (regno & 3))
- return -EIO;
-
- /* Other dummy fields in the virtual user structure are ignored */
- *val = 0;
- break;
- }
- return 0;
-}
-
-#undef R32
-
-static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
-{
- int ret;
- compat_siginfo_t __user *si32 = compat_ptr(data);
- siginfo_t ssi;
- siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
- if (request == PTRACE_SETSIGINFO) {
- memset(&ssi, 0, sizeof(siginfo_t));
- ret = copy_siginfo_from_user32(&ssi, si32);
- if (ret)
- return ret;
- if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
- return -EFAULT;
- }
- ret = sys_ptrace(request, pid, addr, (unsigned long)si);
- if (ret)
- return ret;
- if (request == PTRACE_GETSIGINFO) {
- if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
- return -EFAULT;
- ret = copy_siginfo_to_user32(si32, &ssi);
- }
- return ret;
-}
-
-asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
-{
- struct task_struct *child;
- struct pt_regs *childregs;
- void __user *datap = compat_ptr(data);
- int ret;
- __u32 val;
-
- switch (request) {
- case PTRACE_TRACEME:
- case PTRACE_ATTACH:
- case PTRACE_KILL:
- case PTRACE_CONT:
- case PTRACE_SINGLESTEP:
- case PTRACE_DETACH:
- case PTRACE_SYSCALL:
- case PTRACE_OLDSETOPTIONS:
- case PTRACE_SETOPTIONS:
- case PTRACE_SET_THREAD_AREA:
- case PTRACE_GET_THREAD_AREA:
- return sys_ptrace(request, pid, addr, data);
-
- default:
- return -EINVAL;
-
- case PTRACE_PEEKTEXT:
- case PTRACE_PEEKDATA:
- case PTRACE_POKEDATA:
- case PTRACE_POKETEXT:
- case PTRACE_POKEUSR:
- case PTRACE_PEEKUSR:
- case PTRACE_GETREGS:
- case PTRACE_SETREGS:
- case PTRACE_SETFPREGS:
- case PTRACE_GETFPREGS:
- case PTRACE_SETFPXREGS:
- case PTRACE_GETFPXREGS:
- case PTRACE_GETEVENTMSG:
- break;
-
- case PTRACE_SETSIGINFO:
- case PTRACE_GETSIGINFO:
- return ptrace32_siginfo(request, pid, addr, data);
- }
-
- child = ptrace_get_task_struct(pid);
- if (IS_ERR(child))
- return PTR_ERR(child);
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out;
-
- childregs = task_pt_regs(child);
-
- switch (request) {
- case PTRACE_PEEKDATA:
- case PTRACE_PEEKTEXT:
- ret = 0;
- if (access_process_vm(child, addr, &val, sizeof(u32), 0)!=sizeof(u32))
- ret = -EIO;
- else
- ret = put_user(val, (unsigned int __user *)datap);
- break;
-
- case PTRACE_POKEDATA:
- case PTRACE_POKETEXT:
- ret = 0;
- if (access_process_vm(child, addr, &data, sizeof(u32), 1)!=sizeof(u32))
- ret = -EIO;
- break;
-
- case PTRACE_PEEKUSR:
- ret = getreg32(child, addr, &val);
- if (ret == 0)
- ret = put_user(val, (__u32 __user *)datap);
- break;
-
- case PTRACE_POKEUSR:
- ret = putreg32(child, addr, data);
- break;
-
- case PTRACE_GETREGS: { /* Get all gp regs from the child. */
- int i;
- if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
- ret = -EIO;
- break;
- }
- ret = 0;
- for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) {
- getreg32(child, i, &val);
- ret |= __put_user(val,(u32 __user *)datap);
- datap += sizeof(u32);
- }
- break;
- }
-
- case PTRACE_SETREGS: { /* Set all gp regs in the child. */
- unsigned long tmp;
- int i;
- if (!access_ok(VERIFY_READ, datap, 16*4)) {
- ret = -EIO;
- break;
- }
- ret = 0;
- for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
- ret |= __get_user(tmp, (u32 __user *)datap);
- putreg32(child, i, tmp);
- datap += sizeof(u32);
- }
- break;
- }
-
- case PTRACE_GETFPREGS:
- ret = -EIO;
- if (!access_ok(VERIFY_READ, compat_ptr(data),
- sizeof(struct user_i387_struct)))
- break;
- save_i387_ia32(child, datap, childregs, 1);
- ret = 0;
- break;
-
- case PTRACE_SETFPREGS:
- ret = -EIO;
- if (!access_ok(VERIFY_WRITE, datap,
- sizeof(struct user_i387_struct)))
- break;
- ret = 0;
- /* don't check EFAULT to be bug-to-bug compatible to i386 */
- restore_i387_ia32(child, datap, 1);
- break;
-
- case PTRACE_GETFPXREGS: {
- struct user32_fxsr_struct __user *u = datap;
- init_fpu(child);
- ret = -EIO;
- if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
- break;
- ret = -EFAULT;
- if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
- break;
- ret = __put_user(childregs->cs, &u->fcs);
- ret |= __put_user(child->thread.ds, &u->fos);
- break;
- }
- case PTRACE_SETFPXREGS: {
- struct user32_fxsr_struct __user *u = datap;
- unlazy_fpu(child);
- ret = -EIO;
- if (!access_ok(VERIFY_READ, u, sizeof(*u)))
- break;
- /* no checking to be bug-to-bug compatible with i386. */
- /* but silence warning */
- if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
- ;
- set_stopped_child_used_math(child);
- child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
- ret = 0;
- break;
- }
-
- case PTRACE_GETEVENTMSG:
- ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data));
- break;
-
- default:
- BUG();
- }
-
- out:
- put_task_struct(child);
- return ret;
-}
-
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index bee96d61443..abf71d26fc2 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -1,29 +1,29 @@
/*
* sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
- * sys_sparc32
+ * sys_sparc32
*
* Copyright (C) 2000 VA Linux Co
* Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
- * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 2000 Hewlett-Packard Co.
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port)
+ * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port)
*
* These routines maintain argument size conversion between 32bit and 64bit
- * environment. In 2.5 most of this should be moved to a generic directory.
+ * environment. In 2.5 most of this should be moved to a generic directory.
*
* This file assumes that there is a hole at the end of user address space.
- *
- * Some of the functions are LE specific currently. These are hopefully all marked.
- * This should be fixed.
+ *
+ * Some of the functions are LE specific currently. These are
+ * hopefully all marked. This should be fixed.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/file.h>
#include <linux/signal.h>
#include <linux/syscalls.h>
#include <linux/resource.h>
@@ -90,43 +90,44 @@ int cp_compat_stat(struct kstat *kbuf, struct compat_stat __user *ubuf)
if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino)
return -EOVERFLOW;
if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) ||
- __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
- __put_user (ino, &ubuf->st_ino) ||
- __put_user (kbuf->mode, &ubuf->st_mode) ||
- __put_user (kbuf->nlink, &ubuf->st_nlink) ||
- __put_user (uid, &ubuf->st_uid) ||
- __put_user (gid, &ubuf->st_gid) ||
- __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
- __put_user (kbuf->size, &ubuf->st_size) ||
- __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) ||
- __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
- __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
- __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
- __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
- __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
- __put_user (kbuf->blksize, &ubuf->st_blksize) ||
- __put_user (kbuf->blocks, &ubuf->st_blocks))
+ __put_user(old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
+ __put_user(ino, &ubuf->st_ino) ||
+ __put_user(kbuf->mode, &ubuf->st_mode) ||
+ __put_user(kbuf->nlink, &ubuf->st_nlink) ||
+ __put_user(uid, &ubuf->st_uid) ||
+ __put_user(gid, &ubuf->st_gid) ||
+ __put_user(old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
+ __put_user(kbuf->size, &ubuf->st_size) ||
+ __put_user(kbuf->atime.tv_sec, &ubuf->st_atime) ||
+ __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
+ __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
+ __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
+ __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
+ __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
+ __put_user(kbuf->blksize, &ubuf->st_blksize) ||
+ __put_user(kbuf->blocks, &ubuf->st_blocks))
return -EFAULT;
return 0;
}
-asmlinkage long
-sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high)
+asmlinkage long sys32_truncate64(char __user *filename,
+ unsigned long offset_low,
+ unsigned long offset_high)
{
return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
}
-asmlinkage long
-sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high)
+asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
+ unsigned long offset_high)
{
return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
}
-/* Another set for IA32/LFS -- x86_64 struct stat is different due to
- support for 64bit inode numbers. */
-
-static int
-cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
+/*
+ * Another set for IA32/LFS -- x86_64 struct stat is different due to
+ * support for 64bit inode numbers.
+ */
+static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
{
typeof(ubuf->st_uid) uid = 0;
typeof(ubuf->st_gid) gid = 0;
@@ -134,38 +135,39 @@ cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
SET_GID(gid, stat->gid);
if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
__put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
- __put_user (stat->ino, &ubuf->__st_ino) ||
- __put_user (stat->ino, &ubuf->st_ino) ||
- __put_user (stat->mode, &ubuf->st_mode) ||
- __put_user (stat->nlink, &ubuf->st_nlink) ||
- __put_user (uid, &ubuf->st_uid) ||
- __put_user (gid, &ubuf->st_gid) ||
- __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
- __put_user (stat->size, &ubuf->st_size) ||
- __put_user (stat->atime.tv_sec, &ubuf->st_atime) ||
- __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
- __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) ||
- __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
- __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) ||
- __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
- __put_user (stat->blksize, &ubuf->st_blksize) ||
- __put_user (stat->blocks, &ubuf->st_blocks))
+ __put_user(stat->ino, &ubuf->__st_ino) ||
+ __put_user(stat->ino, &ubuf->st_ino) ||
+ __put_user(stat->mode, &ubuf->st_mode) ||
+ __put_user(stat->nlink, &ubuf->st_nlink) ||
+ __put_user(uid, &ubuf->st_uid) ||
+ __put_user(gid, &ubuf->st_gid) ||
+ __put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
+ __put_user(stat->size, &ubuf->st_size) ||
+ __put_user(stat->atime.tv_sec, &ubuf->st_atime) ||
+ __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
+ __put_user(stat->mtime.tv_sec, &ubuf->st_mtime) ||
+ __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
+ __put_user(stat->ctime.tv_sec, &ubuf->st_ctime) ||
+ __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
+ __put_user(stat->blksize, &ubuf->st_blksize) ||
+ __put_user(stat->blocks, &ubuf->st_blocks))
return -EFAULT;
return 0;
}
-asmlinkage long
-sys32_stat64(char __user * filename, struct stat64 __user *statbuf)
+asmlinkage long sys32_stat64(char __user *filename,
+ struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
+
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
-asmlinkage long
-sys32_lstat64(char __user * filename, struct stat64 __user *statbuf)
+asmlinkage long sys32_lstat64(char __user *filename,
+ struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
@@ -174,8 +176,7 @@ sys32_lstat64(char __user * filename, struct stat64 __user *statbuf)
return ret;
}
-asmlinkage long
-sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
@@ -184,9 +185,8 @@ sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
return ret;
}
-asmlinkage long
-sys32_fstatat(unsigned int dfd, char __user *filename,
- struct stat64 __user* statbuf, int flag)
+asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename,
+ struct stat64 __user *statbuf, int flag)
{
struct kstat stat;
int error = -EINVAL;
@@ -221,8 +221,7 @@ struct mmap_arg_struct {
unsigned int offset;
};
-asmlinkage long
-sys32_mmap(struct mmap_arg_struct __user *arg)
+asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
{
struct mmap_arg_struct a;
struct file *file = NULL;
@@ -233,33 +232,33 @@ sys32_mmap(struct mmap_arg_struct __user *arg)
return -EFAULT;
if (a.offset & ~PAGE_MASK)
- return -EINVAL;
+ return -EINVAL;
if (!(a.flags & MAP_ANONYMOUS)) {
file = fget(a.fd);
if (!file)
return -EBADF;
}
-
- mm = current->mm;
- down_write(&mm->mmap_sem);
- retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT);
+
+ mm = current->mm;
+ down_write(&mm->mmap_sem);
+ retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags,
+ a.offset>>PAGE_SHIFT);
if (file)
fput(file);
- up_write(&mm->mmap_sem);
+ up_write(&mm->mmap_sem);
return retval;
}
-asmlinkage long
-sys32_mprotect(unsigned long start, size_t len, unsigned long prot)
+asmlinkage long sys32_mprotect(unsigned long start, size_t len,
+ unsigned long prot)
{
- return sys_mprotect(start,len,prot);
+ return sys_mprotect(start, len, prot);
}
-asmlinkage long
-sys32_pipe(int __user *fd)
+asmlinkage long sys32_pipe(int __user *fd)
{
int retval;
int fds[2];
@@ -269,13 +268,13 @@ sys32_pipe(int __user *fd)
goto out;
if (copy_to_user(fd, fds, sizeof(fds)))
retval = -EFAULT;
- out:
+out:
return retval;
}
-asmlinkage long
-sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
- struct sigaction32 __user *oact, unsigned int sigsetsize)
+asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
+ struct sigaction32 __user *oact,
+ unsigned int sigsetsize)
{
struct k_sigaction new_ka, old_ka;
int ret;
@@ -291,12 +290,17 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(restorer, &act->sa_restorer)||
- __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)))
+ __get_user(restorer, &act->sa_restorer) ||
+ __copy_from_user(&set32, &act->sa_mask,
+ sizeof(compat_sigset_t)))
return -EFAULT;
new_ka.sa.sa_handler = compat_ptr(handler);
new_ka.sa.sa_restorer = compat_ptr(restorer);
- /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+
+ /*
+ * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
+ * than _NSIG_WORDS << 1
+ */
switch (_NSIG_WORDS) {
case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
| (((long)set32.sig[7]) << 32);
@@ -312,7 +316,10 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
- /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+ /*
+ * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
+ * than _NSIG_WORDS << 1
+ */
switch (_NSIG_WORDS) {
case 4:
set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
@@ -328,23 +335,26 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
set32.sig[0] = old_ka.sa.sa_mask.sig[0];
}
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_handler),
+ &oact->sa_handler) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+ &oact->sa_restorer) ||
__put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)))
+ __copy_to_user(&oact->sa_mask, &set32,
+ sizeof(compat_sigset_t)))
return -EFAULT;
}
return ret;
}
-asmlinkage long
-sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact)
+asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
+ struct old_sigaction32 __user *oact)
{
- struct k_sigaction new_ka, old_ka;
- int ret;
+ struct k_sigaction new_ka, old_ka;
+ int ret;
- if (act) {
+ if (act) {
compat_old_sigset_t mask;
compat_uptr_t handler, restorer;
@@ -359,33 +369,35 @@ sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigacti
new_ka.sa.sa_restorer = compat_ptr(restorer);
siginitset(&new_ka.sa.sa_mask, mask);
- }
+ }
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_handler),
+ &oact->sa_handler) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+ &oact->sa_restorer) ||
__put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
return -EFAULT;
- }
+ }
return ret;
}
-asmlinkage long
-sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
- compat_sigset_t __user *oset, unsigned int sigsetsize)
+asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
+ compat_sigset_t __user *oset,
+ unsigned int sigsetsize)
{
sigset_t s;
compat_sigset_t s32;
int ret;
mm_segment_t old_fs = get_fs();
-
+
if (set) {
- if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
+ if (copy_from_user(&s32, set, sizeof(compat_sigset_t)))
return -EFAULT;
switch (_NSIG_WORDS) {
case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
@@ -394,13 +406,14 @@ sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
}
}
- set_fs (KERNEL_DS);
+ set_fs(KERNEL_DS);
ret = sys_rt_sigprocmask(how,
set ? (sigset_t __user *)&s : NULL,
oset ? (sigset_t __user *)&s : NULL,
- sigsetsize);
- set_fs (old_fs);
- if (ret) return ret;
+ sigsetsize);
+ set_fs(old_fs);
+ if (ret)
+ return ret;
if (oset) {
switch (_NSIG_WORDS) {
case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
@@ -408,52 +421,49 @@ sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
}
- if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
+ if (copy_to_user(oset, &s32, sizeof(compat_sigset_t)))
return -EFAULT;
}
return 0;
}
-static inline long
-get_tv32(struct timeval *o, struct compat_timeval __user *i)
+static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i)
{
- int err = -EFAULT;
- if (access_ok(VERIFY_READ, i, sizeof(*i))) {
+ int err = -EFAULT;
+
+ if (access_ok(VERIFY_READ, i, sizeof(*i))) {
err = __get_user(o->tv_sec, &i->tv_sec);
err |= __get_user(o->tv_usec, &i->tv_usec);
}
- return err;
+ return err;
}
-static inline long
-put_tv32(struct compat_timeval __user *o, struct timeval *i)
+static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
{
int err = -EFAULT;
- if (access_ok(VERIFY_WRITE, o, sizeof(*o))) {
+
+ if (access_ok(VERIFY_WRITE, o, sizeof(*o))) {
err = __put_user(i->tv_sec, &o->tv_sec);
err |= __put_user(i->tv_usec, &o->tv_usec);
- }
- return err;
+ }
+ return err;
}
-extern unsigned int alarm_setitimer(unsigned int seconds);
-
-asmlinkage long
-sys32_alarm(unsigned int seconds)
+asmlinkage long sys32_alarm(unsigned int seconds)
{
return alarm_setitimer(seconds);
}
-/* Translations due to time_t size differences. Which affects all
- sorts of things, like timeval and itimerval. */
-
-extern struct timezone sys_tz;
-
-asmlinkage long
-sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+/*
+ * Translations due to time_t size differences. Which affects all
+ * sorts of things, like timeval and itimerval.
+ */
+asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv,
+ struct timezone __user *tz)
{
if (tv) {
struct timeval ktv;
+
do_gettimeofday(&ktv);
if (put_tv32(tv, &ktv))
return -EFAULT;
@@ -465,14 +475,14 @@ sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
return 0;
}
-asmlinkage long
-sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv,
+ struct timezone __user *tz)
{
struct timeval ktv;
struct timespec kts;
struct timezone ktz;
- if (tv) {
+ if (tv) {
if (get_tv32(&ktv, tv))
return -EFAULT;
kts.tv_sec = ktv.tv_sec;
@@ -494,8 +504,7 @@ struct sel_arg_struct {
unsigned int tvp;
};
-asmlinkage long
-sys32_old_select(struct sel_arg_struct __user *arg)
+asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg)
{
struct sel_arg_struct a;
@@ -505,50 +514,45 @@ sys32_old_select(struct sel_arg_struct __user *arg)
compat_ptr(a.exp), compat_ptr(a.tvp));
}
-extern asmlinkage long
-compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options,
- struct compat_rusage *ru);
-
-asmlinkage long
-sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
+asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
+ int options)
{
return compat_sys_wait4(pid, stat_addr, options, NULL);
}
/* 32-bit timeval and related flotsam. */
-asmlinkage long
-sys32_sysfs(int option, u32 arg1, u32 arg2)
+asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2)
{
return sys_sysfs(option, arg1, arg2);
}
-asmlinkage long
-sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
+asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
+ struct compat_timespec __user *interval)
{
struct timespec t;
int ret;
- mm_segment_t old_fs = get_fs ();
-
- set_fs (KERNEL_DS);
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
- set_fs (old_fs);
+ set_fs(old_fs);
if (put_compat_timespec(&t, interval))
return -EFAULT;
return ret;
}
-asmlinkage long
-sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
+asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
+ compat_size_t sigsetsize)
{
sigset_t s;
compat_sigset_t s32;
int ret;
mm_segment_t old_fs = get_fs();
-
- set_fs (KERNEL_DS);
+
+ set_fs(KERNEL_DS);
ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
- set_fs (old_fs);
+ set_fs(old_fs);
if (!ret) {
switch (_NSIG_WORDS) {
case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
@@ -556,30 +560,29 @@ sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
}
- if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
+ if (copy_to_user(set, &s32, sizeof(compat_sigset_t)))
return -EFAULT;
}
return ret;
}
-asmlinkage long
-sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
+asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
+ compat_siginfo_t __user *uinfo)
{
siginfo_t info;
int ret;
mm_segment_t old_fs = get_fs();
-
+
if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT;
- set_fs (KERNEL_DS);
+ set_fs(KERNEL_DS);
ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
- set_fs (old_fs);
+ set_fs(old_fs);
return ret;
}
/* These are here just in case some old ia32 binary calls it. */
-asmlinkage long
-sys32_pause(void)
+asmlinkage long sys32_pause(void)
{
current->state = TASK_INTERRUPTIBLE;
schedule();
@@ -599,25 +602,25 @@ struct sysctl_ia32 {
};
-asmlinkage long
-sys32_sysctl(struct sysctl_ia32 __user *args32)
+asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32)
{
struct sysctl_ia32 a32;
- mm_segment_t old_fs = get_fs ();
+ mm_segment_t old_fs = get_fs();
void __user *oldvalp, *newvalp;
size_t oldlen;
int __user *namep;
long ret;
- if (copy_from_user(&a32, args32, sizeof (a32)))
+ if (copy_from_user(&a32, args32, sizeof(a32)))
return -EFAULT;
/*
- * We need to pre-validate these because we have to disable address checking
- * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
- * user specifying bad addresses here. Well, since we're dealing with 32 bit
- * addresses, we KNOW that access_ok() will always succeed, so this is an
- * expensive NOP, but so what...
+ * We need to pre-validate these because we have to disable
+ * address checking before calling do_sysctl() because of
+ * OLDLEN but we can't run the risk of the user specifying bad
+ * addresses here. Well, since we're dealing with 32 bit
+ * addresses, we KNOW that access_ok() will always succeed, so
+ * this is an expensive NOP, but so what...
*/
namep = compat_ptr(a32.name);
oldvalp = compat_ptr(a32.oldval);
@@ -636,34 +639,34 @@ sys32_sysctl(struct sysctl_ia32 __user *args32)
unlock_kernel();
set_fs(old_fs);
- if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp)))
+ if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
return -EFAULT;
return ret;
}
#endif
-/* warning: next two assume little endian */
-asmlinkage long
-sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
+/* warning: next two assume little endian */
+asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
+ u32 poslo, u32 poshi)
{
return sys_pread64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
-asmlinkage long
-sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
+asmlinkage long sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count,
+ u32 poslo, u32 poshi)
{
return sys_pwrite64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
-asmlinkage long
-sys32_personality(unsigned long personality)
+asmlinkage long sys32_personality(unsigned long personality)
{
int ret;
- if (personality(current->personality) == PER_LINUX32 &&
+
+ if (personality(current->personality) == PER_LINUX32 &&
personality == PER_LINUX)
personality = PER_LINUX32;
ret = sys_personality(personality);
@@ -672,34 +675,33 @@ sys32_personality(unsigned long personality)
return ret;
}
-asmlinkage long
-sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count)
+asmlinkage long sys32_sendfile(int out_fd, int in_fd,
+ compat_off_t __user *offset, s32 count)
{
mm_segment_t old_fs = get_fs();
int ret;
off_t of;
-
+
if (offset && get_user(of, offset))
return -EFAULT;
-
+
set_fs(KERNEL_DS);
ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
count);
set_fs(old_fs);
-
+
if (offset && put_user(of, offset))
return -EFAULT;
-
return ret;
}
asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
{
struct mm_struct *mm = current->mm;
unsigned long error;
- struct file * file = NULL;
+ struct file *file = NULL;
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
if (!(flags & MAP_ANONYMOUS)) {
@@ -717,36 +719,35 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
return error;
}
-asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
+asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
{
+ char *arch = "x86_64";
int err;
if (!name)
return -EFAULT;
if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
return -EFAULT;
-
- down_read(&uts_sem);
-
- err = __copy_to_user(&name->sysname,&utsname()->sysname,
- __OLD_UTS_LEN);
- err |= __put_user(0,name->sysname+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->nodename,&utsname()->nodename,
- __OLD_UTS_LEN);
- err |= __put_user(0,name->nodename+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->release,&utsname()->release,
- __OLD_UTS_LEN);
- err |= __put_user(0,name->release+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->version,&utsname()->version,
- __OLD_UTS_LEN);
- err |= __put_user(0,name->version+__OLD_UTS_LEN);
- {
- char *arch = "x86_64";
- if (personality(current->personality) == PER_LINUX32)
- arch = "i686";
-
- err |= __copy_to_user(&name->machine, arch, strlen(arch)+1);
- }
+
+ down_read(&uts_sem);
+
+ err = __copy_to_user(&name->sysname, &utsname()->sysname,
+ __OLD_UTS_LEN);
+ err |= __put_user(0, name->sysname+__OLD_UTS_LEN);
+ err |= __copy_to_user(&name->nodename, &utsname()->nodename,
+ __OLD_UTS_LEN);
+ err |= __put_user(0, name->nodename+__OLD_UTS_LEN);
+ err |= __copy_to_user(&name->release, &utsname()->release,
+ __OLD_UTS_LEN);
+ err |= __put_user(0, name->release+__OLD_UTS_LEN);
+ err |= __copy_to_user(&name->version, &utsname()->version,
+ __OLD_UTS_LEN);
+ err |= __put_user(0, name->version+__OLD_UTS_LEN);
+
+ if (personality(current->personality) == PER_LINUX32)
+ arch = "i686";
+
+ err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1);
up_read(&uts_sem);
@@ -755,17 +756,19 @@ asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
return err;
}
-long sys32_uname(struct old_utsname __user * name)
+long sys32_uname(struct old_utsname __user *name)
{
int err;
+
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err = copy_to_user(name, utsname(), sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof(*name));
up_read(&uts_sem);
- if (personality(current->personality) == PER_LINUX32)
+ if (personality(current->personality) == PER_LINUX32)
err |= copy_to_user(&name->machine, "i686", 5);
- return err?-EFAULT:0;
+
+ return err ? -EFAULT : 0;
}
long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
@@ -773,27 +776,28 @@ long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
struct ustat u;
mm_segment_t seg;
int ret;
-
- seg = get_fs();
- set_fs(KERNEL_DS);
+
+ seg = get_fs();
+ set_fs(KERNEL_DS);
ret = sys_ustat(dev, (struct ustat __user *)&u);
set_fs(seg);
- if (ret >= 0) {
- if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) ||
- __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
- __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
- __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
- __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
- ret = -EFAULT;
- }
+ if (ret < 0)
+ return ret;
+
+ if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) ||
+ __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
+ __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
+ __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
+ __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
+ ret = -EFAULT;
return ret;
-}
+}
asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
compat_uptr_t __user *envp, struct pt_regs *regs)
{
long error;
- char * filename;
+ char *filename;
filename = getname(name);
error = PTR_ERR(filename);
@@ -812,18 +816,19 @@ asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
struct pt_regs *regs)
{
- void __user *parent_tid = (void __user *)regs->rdx;
- void __user *child_tid = (void __user *)regs->rdi;
+ void __user *parent_tid = (void __user *)regs->dx;
+ void __user *child_tid = (void __user *)regs->di;
+
if (!newsp)
- newsp = regs->rsp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+ newsp = regs->sp;
+ return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
/*
- * Some system calls that need sign extended arguments. This could be done by a generic wrapper.
- */
-
-long sys32_lseek (unsigned int fd, int offset, unsigned int whence)
+ * Some system calls that need sign extended arguments. This could be
+ * done by a generic wrapper.
+ */
+long sys32_lseek(unsigned int fd, int offset, unsigned int whence)
{
return sys_lseek(fd, offset, whence);
}
@@ -832,49 +837,52 @@ long sys32_kill(int pid, int sig)
{
return sys_kill(pid, sig);
}
-
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
+
+long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
__u32 len_low, __u32 len_high, int advice)
-{
+{
return sys_fadvise64_64(fd,
(((u64)offset_high)<<32) | offset_low,
(((u64)len_high)<<32) | len_low,
- advice);
-}
+ advice);
+}
long sys32_vm86_warning(void)
-{
+{
struct task_struct *me = current;
static char lastcomm[sizeof(me->comm)];
+
if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
- compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
- me->comm);
+ compat_printk(KERN_INFO
+ "%s: vm86 mode not supported on 64 bit kernel\n",
+ me->comm);
strncpy(lastcomm, me->comm, sizeof(lastcomm));
- }
+ }
return -ENOSYS;
-}
+}
long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
- char __user * buf, size_t len)
+ char __user *buf, size_t len)
{
return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
}
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count)
+asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
+ size_t count)
{
return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
}
asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
- unsigned n_low, unsigned n_hi, int flags)
+ unsigned n_low, unsigned n_hi, int flags)
{
return sys_sync_file_range(fd,
((u64)off_hi << 32) | off_low,
((u64)n_hi << 32) | n_low, flags);
}
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, size_t len,
- int advice)
+asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
+ size_t len, int advice)
{
return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
len, advice);
diff --git a/arch/x86/ia32/syscall32.c b/arch/x86/ia32/syscall32.c
deleted file mode 100644
index 15013bac181..00000000000
--- a/arch/x86/ia32/syscall32.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Copyright 2002,2003 Andi Kleen, SuSE Labs */
-
-/* vsyscall handling for 32bit processes. Map a stub page into it
- on demand because 32bit cannot reach the kernel's fixmaps */
-
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/stringify.h>
-#include <linux/security.h>
-#include <asm/proto.h>
-#include <asm/tlbflush.h>
-#include <asm/ia32_unistd.h>
-#include <asm/vsyscall32.h>
-
-extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
-extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
-extern int sysctl_vsyscall32;
-
-static struct page *syscall32_pages[1];
-static int use_sysenter = -1;
-
-struct linux_binprm;
-
-/* Setup a VMA at program startup for the vsyscall page */
-int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
-{
- struct mm_struct *mm = current->mm;
- int ret;
-
- down_write(&mm->mmap_sem);
- /*
- * MAYWRITE to allow gdb to COW and set breakpoints
- *
- * Make sure the vDSO gets into every core dump.
- * Dumping its contents makes post-mortem fully interpretable later
- * without matching up the same kernel and hardware config to see
- * what PC values meant.
- */
- /* Could randomize here */
- ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
- syscall32_pages);
- up_write(&mm->mmap_sem);
- return ret;
-}
-
-static int __init init_syscall32(void)
-{
- char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
- if (!syscall32_page)
- panic("Cannot allocate syscall32 page");
- syscall32_pages[0] = virt_to_page(syscall32_page);
- if (use_sysenter > 0) {
- memcpy(syscall32_page, syscall32_sysenter,
- syscall32_sysenter_end - syscall32_sysenter);
- } else {
- memcpy(syscall32_page, syscall32_syscall,
- syscall32_syscall_end - syscall32_syscall);
- }
- return 0;
-}
-
-__initcall(init_syscall32);
-
-/* May not be __init: called during resume */
-void syscall32_cpu_init(void)
-{
- if (use_sysenter < 0)
- use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
-
- /* Load these always in case some future AMD CPU supports
- SYSENTER from compat mode too. */
- checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
- checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
- wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
diff --git a/arch/x86/ia32/syscall32_syscall.S b/arch/x86/ia32/syscall32_syscall.S
deleted file mode 100644
index 933f0f08b1c..00000000000
--- a/arch/x86/ia32/syscall32_syscall.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* 32bit VDSOs mapped into user space. */
-
- .section ".init.data","aw"
-
- .globl syscall32_syscall
- .globl syscall32_syscall_end
-
-syscall32_syscall:
- .incbin "arch/x86/ia32/vsyscall-syscall.so"
-syscall32_syscall_end:
-
- .globl syscall32_sysenter
- .globl syscall32_sysenter_end
-
-syscall32_sysenter:
- .incbin "arch/x86/ia32/vsyscall-sysenter.so"
-syscall32_sysenter_end:
diff --git a/arch/x86/ia32/tls32.c b/arch/x86/ia32/tls32.c
deleted file mode 100644
index 1cc4340de3c..00000000000
--- a/arch/x86/ia32/tls32.c
+++ /dev/null
@@ -1,163 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/user.h>
-
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/processor.h>
-#include <asm/proto.h>
-
-/*
- * sys_alloc_thread_area: get a yet unused TLS descriptor index.
- */
-static int get_free_idx(void)
-{
- struct thread_struct *t = &current->thread;
- int idx;
-
- for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
- if (desc_empty((struct n_desc_struct *)(t->tls_array) + idx))
- return idx + GDT_ENTRY_TLS_MIN;
- return -ESRCH;
-}
-
-/*
- * Set a given TLS descriptor:
- * When you want addresses > 32bit use arch_prctl()
- */
-int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
-{
- struct user_desc info;
- struct n_desc_struct *desc;
- int cpu, idx;
-
- if (copy_from_user(&info, u_info, sizeof(info)))
- return -EFAULT;
-
- idx = info.entry_number;
-
- /*
- * index -1 means the kernel should try to find and
- * allocate an empty descriptor:
- */
- if (idx == -1) {
- idx = get_free_idx();
- if (idx < 0)
- return idx;
- if (put_user(idx, &u_info->entry_number))
- return -EFAULT;
- }
-
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
-
- /*
- * We must not get preempted while modifying the TLS.
- */
- cpu = get_cpu();
-
- if (LDT_empty(&info)) {
- desc->a = 0;
- desc->b = 0;
- } else {
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
- }
- if (t == &current->thread)
- load_TLS(t, cpu);
-
- put_cpu();
- return 0;
-}
-
-asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
-{
- return do_set_thread_area(&current->thread, u_info);
-}
-
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
- (((desc)->a >> 16) & 0x0000ffff) | \
- (((desc)->b << 16) & 0x00ff0000) | \
- ( (desc)->b & 0xff000000) )
-
-#define GET_LIMIT(desc) ( \
- ((desc)->a & 0x0ffff) | \
- ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
-#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
-#define GET_LONGMODE(desc) (((desc)->b >> 21) & 1)
-
-int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
-{
- struct user_desc info;
- struct n_desc_struct *desc;
- int idx;
-
- if (get_user(idx, &u_info->entry_number))
- return -EFAULT;
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
-
- memset(&info, 0, sizeof(struct user_desc));
- info.entry_number = idx;
- info.base_addr = GET_BASE(desc);
- info.limit = GET_LIMIT(desc);
- info.seg_32bit = GET_32BIT(desc);
- info.contents = GET_CONTENTS(desc);
- info.read_exec_only = !GET_WRITABLE(desc);
- info.limit_in_pages = GET_LIMIT_PAGES(desc);
- info.seg_not_present = !GET_PRESENT(desc);
- info.useable = GET_USEABLE(desc);
- info.lm = GET_LONGMODE(desc);
-
- if (copy_to_user(u_info, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
-asmlinkage long sys32_get_thread_area(struct user_desc __user *u_info)
-{
- return do_get_thread_area(&current->thread, u_info);
-}
-
-
-int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs)
-{
- struct n_desc_struct *desc;
- struct user_desc info;
- struct user_desc __user *cp;
- int idx;
-
- cp = (void __user *)childregs->rsi;
- if (copy_from_user(&info, cp, sizeof(info)))
- return -EFAULT;
- if (LDT_empty(&info))
- return -EINVAL;
-
- idx = info.entry_number;
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN;
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
-
- return 0;
-}
diff --git a/arch/x86/ia32/vsyscall-sigreturn.S b/arch/x86/ia32/vsyscall-sigreturn.S
deleted file mode 100644
index b383be00bae..00000000000
--- a/arch/x86/ia32/vsyscall-sigreturn.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Common code for the sigreturn entry points on the vsyscall page.
- * This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80)
- * to enter the kernel.
- * This file is #include'd by vsyscall-*.S to define them after the
- * vsyscall entry point. The addresses we get for these entry points
- * by doing ".balign 32" must match in both versions of the page.
- */
-
- .code32
- .section .text.sigreturn,"ax"
- .balign 32
- .globl __kernel_sigreturn
- .type __kernel_sigreturn,@function
-__kernel_sigreturn:
-.LSTART_sigreturn:
- popl %eax
- movl $__NR_ia32_sigreturn, %eax
- SYSCALL_ENTER_KERNEL
-.LEND_sigreturn:
- .size __kernel_sigreturn,.-.LSTART_sigreturn
-
- .section .text.rtsigreturn,"ax"
- .balign 32
- .globl __kernel_rt_sigreturn
- .type __kernel_rt_sigreturn,@function
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
- movl $__NR_ia32_rt_sigreturn, %eax
- SYSCALL_ENTER_KERNEL
-.LEND_rt_sigreturn:
- .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAMES:
- .long .LENDCIES-.LSTARTCIES
-.LSTARTCIES:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zRS" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIES:
-
- .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */
-.LSTARTFDE2:
- .long .LSTARTFDE2-.LSTARTFRAMES /* CIE pointer */
- /* HACK: The dwarf2 unwind routines will subtract 1 from the
- return address to get an address in the middle of the
- presumed call instruction. Since we didn't get here via
- a call, we need to include the nop before the real start
- to make up for it. */
- .long .LSTART_sigreturn-1-. /* PC-relative start address */
- .long .LEND_sigreturn-.LSTART_sigreturn+1
- .uleb128 0 /* Augmentation length */
- /* What follows are the instructions for the table generation.
- We record the locations of each register saved. This is
- complicated by the fact that the "CFA" is always assumed to
- be the value of the stack pointer in the caller. This means
- that we must define the CFA of this body of code to be the
- saved value of the stack pointer in the sigcontext. Which
- also means that there is no fixed relation to the other
- saved registers, which means that we must use DW_CFA_expression
- to compute their addresses. It also means that when we
- adjust the stack with the popl, we have to do it all over again. */
-
-#define do_cfa_expr(offset) \
- .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
- .uleb128 1f-0f; /* length */ \
-0: .byte 0x74; /* DW_OP_breg4 */ \
- .sleb128 offset; /* offset */ \
- .byte 0x06; /* DW_OP_deref */ \
-1:
-
-#define do_expr(regno, offset) \
- .byte 0x10; /* DW_CFA_expression */ \
- .uleb128 regno; /* regno */ \
- .uleb128 1f-0f; /* length */ \
-0: .byte 0x74; /* DW_OP_breg4 */ \
- .sleb128 offset; /* offset */ \
-1:
-
- do_cfa_expr(IA32_SIGCONTEXT_esp+4)
- do_expr(0, IA32_SIGCONTEXT_eax+4)
- do_expr(1, IA32_SIGCONTEXT_ecx+4)
- do_expr(2, IA32_SIGCONTEXT_edx+4)
- do_expr(3, IA32_SIGCONTEXT_ebx+4)
- do_expr(5, IA32_SIGCONTEXT_ebp+4)
- do_expr(6, IA32_SIGCONTEXT_esi+4)
- do_expr(7, IA32_SIGCONTEXT_edi+4)
- do_expr(8, IA32_SIGCONTEXT_eip+4)
-
- .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
-
- do_cfa_expr(IA32_SIGCONTEXT_esp)
- do_expr(0, IA32_SIGCONTEXT_eax)
- do_expr(1, IA32_SIGCONTEXT_ecx)
- do_expr(2, IA32_SIGCONTEXT_edx)
- do_expr(3, IA32_SIGCONTEXT_ebx)
- do_expr(5, IA32_SIGCONTEXT_ebp)
- do_expr(6, IA32_SIGCONTEXT_esi)
- do_expr(7, IA32_SIGCONTEXT_edi)
- do_expr(8, IA32_SIGCONTEXT_eip)
-
- .align 4
-.LENDFDE2:
-
- .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */
-.LSTARTFDE3:
- .long .LSTARTFDE3-.LSTARTFRAMES /* CIE pointer */
- /* HACK: See above wrt unwind library assumptions. */
- .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
- .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
- .uleb128 0 /* Augmentation */
- /* What follows are the instructions for the table generation.
- We record the locations of each register saved. This is
- slightly less complicated than the above, since we don't
- modify the stack pointer in the process. */
-
- do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp)
- do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax)
- do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx)
- do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx)
- do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx)
- do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp)
- do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi)
- do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi)
- do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip)
-
- .align 4
-.LENDFDE3:
-
-#include "../../x86/kernel/vsyscall-note_32.S"
-
diff --git a/arch/x86/ia32/vsyscall-sysenter.S b/arch/x86/ia32/vsyscall-sysenter.S
deleted file mode 100644
index ae056e553d1..00000000000
--- a/arch/x86/ia32/vsyscall-sysenter.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Code for the vsyscall page. This version uses the sysenter instruction.
- */
-
-#include <asm/ia32_unistd.h>
-#include <asm/asm-offsets.h>
-
- .code32
- .text
- .section .text.vsyscall,"ax"
- .globl __kernel_vsyscall
- .type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
- push %ecx
-.Lpush_ecx:
- push %edx
-.Lpush_edx:
- push %ebp
-.Lenter_kernel:
- movl %esp,%ebp
- sysenter
- .space 7,0x90
- jmp .Lenter_kernel
- /* 16: System call normal return point is here! */
- pop %ebp
-.Lpop_ebp:
- pop %edx
-.Lpop_edx:
- pop %ecx
-.Lpop_ecx:
- ret
-.LEND_vsyscall:
- .size __kernel_vsyscall,.-.LSTART_vsyscall
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAME:
- .long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zR" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIE:
-
- .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
-.LSTARTFDE1:
- .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
- .long .LSTART_vsyscall-. /* PC-relative start address */
- .long .LEND_vsyscall-.LSTART_vsyscall
- .uleb128 0 /* Augmentation length */
- /* What follows are the instructions for the table generation.
- We have to record all changes of the stack pointer. */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpush_ecx-.LSTART_vsyscall
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x08 /* RA at offset 8 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpush_edx-.Lpush_ecx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x0c /* RA at offset 12 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lenter_kernel-.Lpush_edx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x10 /* RA at offset 16 now */
- .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
- /* Finally the epilogue. */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_ebp-.Lenter_kernel
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x12 /* RA at offset 12 now */
- .byte 0xc5 /* DW_CFA_restore %ebp */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_edx-.Lpop_ebp
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x08 /* RA at offset 8 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_ecx-.Lpop_edx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x04 /* RA at offset 4 now */
- .align 4
-.LENDFDE1:
-
-#define SYSCALL_ENTER_KERNEL int $0x80
-#include "vsyscall-sigreturn.S"
diff --git a/arch/x86/ia32/vsyscall.lds b/arch/x86/ia32/vsyscall.lds
deleted file mode 100644
index 1dc86ff5bcb..00000000000
--- a/arch/x86/ia32/vsyscall.lds
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
- * object prelinked to its virtual address. This script controls its layout.
- */
-
-/* This must match <asm/fixmap.h>. */
-VSYSCALL_BASE = 0xffffe000;
-
-SECTIONS
-{
- . = VSYSCALL_BASE + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- /* This linker script is used both with -r and with -shared.
- For the layouts to match, we need to skip more than enough
- space for the dynamic symbol table et al. If this amount
- is insufficient, ld -shared will barf. Just increase it here. */
- . = VSYSCALL_BASE + 0x400;
-
- .text.vsyscall : { *(.text.vsyscall) } :text =0x90909090
-
- /* This is an 32bit object and we cannot easily get the offsets
- into the 64bit kernel. Just hardcode them here. This assumes
- that all the stubs don't need more than 0x100 bytes. */
- . = VSYSCALL_BASE + 0x500;
-
- .text.sigreturn : { *(.text.sigreturn) } :text =0x90909090
-
- . = VSYSCALL_BASE + 0x600;
-
- .text.rtsigreturn : { *(.text.rtsigreturn) } :text =0x90909090
-
- .note : { *(.note.*) } :text :note
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .dynamic : { *(.dynamic) } :text :dynamic
- .useless : {
- *(.got.plt) *(.got)
- *(.data .data.* .gnu.linkonce.d.*)
- *(.dynbss)
- *(.bss .bss.* .gnu.linkonce.b.*)
- } :text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- LINUX_2.5 {
- global:
- __kernel_vsyscall;
- __kernel_sigreturn;
- __kernel_rt_sigreturn;
-
- local: *;
- };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value. */
-ENTRY(__kernel_vsyscall);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 38573340b14..6f813009d44 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -1,9 +1,91 @@
-ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/kernel/Makefile_32
-else
-include ${srctree}/arch/x86/kernel/Makefile_64
+#
+# Makefile for the linux kernel.
+#
+
+extra-y := head_$(BITS).o init_task.o vmlinux.lds
+extra-$(CONFIG_X86_64) += head64.o
+
+CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
+CFLAGS_vsyscall_64.o := $(PROFILING) -g0
+
+obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
+obj-y += traps_$(BITS).o irq_$(BITS).o
+obj-y += time_$(BITS).o ioport.o ldt.o
+obj-y += setup_$(BITS).o i8259_$(BITS).o
+obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
+obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
+obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
+obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o
+obj-y += quirks.o i8237.o topology.o kdebugfs.o
+obj-y += alternative.o i8253.o
+obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o
+obj-y += tsc_$(BITS).o io_delay.o rtc.o
+
+obj-y += i387.o
+obj-y += ptrace.o
+obj-y += ds.o
+obj-$(CONFIG_X86_32) += tls.o
+obj-$(CONFIG_IA32_EMULATION) += tls.o
+obj-y += step.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-y += cpu/
+obj-y += acpi/
+obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
+obj-$(CONFIG_X86_64) += reboot.o
+obj-$(CONFIG_MCA) += mca_32.o
+obj-$(CONFIG_X86_MSR) += msr.o
+obj-$(CONFIG_X86_CPUID) += cpuid.o
+obj-$(CONFIG_MICROCODE) += microcode.o
+obj-$(CONFIG_PCI) += early-quirks.o
+obj-$(CONFIG_APM) += apm_32.o
+obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
+obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o
+obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o
+obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
+obj-$(CONFIG_X86_MPPARSE) += mpparse_$(BITS).o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
+obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
+obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
+obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
+obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
+obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
+obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
+obj-$(CONFIG_X86_VSMP) += vsmp_64.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_MODULES) += module_$(BITS).o
+obj-$(CONFIG_ACPI_SRAT) += srat_32.o
+obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
+obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
+obj-$(CONFIG_VM86) += vm86_32.o
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+
+obj-$(CONFIG_HPET_TIMER) += hpet.o
+
+obj-$(CONFIG_K8_NB) += k8.o
+obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o
+obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
+obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
+
+obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+
+ifdef CONFIG_INPUT_PCSPKR
+obj-y += pcspeaker.o
endif
-# Workaround to delete .lds files with make clean
-# The problem is that we do not enter Makefile_32 with make clean.
-clean-files := vsyscall*.lds vsyscall*.so
+obj-$(CONFIG_SCx200) += scx200_32.o
+
+###
+# 64 bit specific files
+ifeq ($(CONFIG_X86_64),y)
+ obj-y += genapic_64.o genapic_flat_64.o
+ obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
+ obj-$(CONFIG_AUDIT) += audit_64.o
+ obj-$(CONFIG_PM) += suspend_64.o
+ obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o
+
+ obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
+ obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
+ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
+endif
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
deleted file mode 100644
index a7bc93c2766..00000000000
--- a/arch/x86/kernel/Makefile_32
+++ /dev/null
@@ -1,88 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := head_32.o init_task.o vmlinux.lds
-CPPFLAGS_vmlinux.lds += -Ui386
-
-obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
- ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
- pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
-
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-y += cpu/
-obj-y += acpi/
-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot_32.o
-obj-$(CONFIG_MCA) += mca_32.o
-obj-$(CONFIG_X86_MSR) += msr.o
-obj-$(CONFIG_X86_CPUID) += cpuid.o
-obj-$(CONFIG_MICROCODE) += microcode.o
-obj-$(CONFIG_PCI) += early-quirks.o
-obj-$(CONFIG_APM) += apm_32.o
-obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o
-obj-$(CONFIG_SMP) += smpcommon_32.o
-obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o
-obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o
-obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o
-obj-$(CONFIG_X86_IO_APIC) += io_apic_32.o
-obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
-obj-$(CONFIG_KEXEC) += machine_kexec_32.o relocate_kernel_32.o crash.o
-obj-$(CONFIG_CRASH_DUMP) += crash_dump_32.o
-obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
-obj-$(CONFIG_KPROBES) += kprobes_32.o
-obj-$(CONFIG_MODULES) += module_32.o
-obj-y += sysenter_32.o vsyscall_32.o
-obj-$(CONFIG_ACPI_SRAT) += srat_32.o
-obj-$(CONFIG_EFI) += efi_32.o efi_stub_32.o
-obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
-obj-$(CONFIG_VM86) += vm86_32.o
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_HPET_TIMER) += hpet.o
-obj-$(CONFIG_K8_NB) += k8.o
-obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o
-
-obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
-obj-$(CONFIG_PARAVIRT) += paravirt_32.o
-obj-y += pcspeaker.o
-
-obj-$(CONFIG_SCx200) += scx200_32.o
-
-# vsyscall_32.o contains the vsyscall DSO images as __initdata.
-# We must build both images before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so
-targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so)
-targets += vsyscall-note_32.o vsyscall_32.lds
-
-# The DSO images are built using a special linker script.
-quiet_cmd_syscall = SYSCALL $@
- cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
- -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-export CPPFLAGS_vsyscall_32.lds += -P -C -Ui386
-
-vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
-SYSCFLAGS_vsyscall-sysenter_32.so = $(vsyscall-flags)
-SYSCFLAGS_vsyscall-int80_32.so = $(vsyscall-flags)
-
-$(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall_32.lds \
- $(obj)/vsyscall-%.o $(obj)/vsyscall-note_32.o FORCE
- $(call if_changed,syscall)
-
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vsyscall-syms.o
-$(obj)/built-in.o: $(obj)/vsyscall-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
-
-SYSCFLAGS_vsyscall-syms.o = -r
-$(obj)/vsyscall-syms.o: $(src)/vsyscall_32.lds \
- $(obj)/vsyscall-sysenter_32.o $(obj)/vsyscall-note_32.o FORCE
- $(call if_changed,syscall)
-
-
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
deleted file mode 100644
index 5a88890d8ee..00000000000
--- a/arch/x86/kernel/Makefile_64
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := head_64.o head64.o init_task.o vmlinux.lds
-CPPFLAGS_vmlinux.lds += -Ux86_64
-EXTRA_AFLAGS := -traditional
-
-obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
- ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \
- x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
- setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
- pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
-
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-y += cpu/
-obj-y += acpi/
-obj-$(CONFIG_X86_MSR) += msr.o
-obj-$(CONFIG_MICROCODE) += microcode.o
-obj-$(CONFIG_X86_CPUID) += cpuid.o
-obj-$(CONFIG_SMP) += smp_64.o smpboot_64.o trampoline_64.o tsc_sync.o
-obj-y += apic_64.o nmi_64.o
-obj-y += io_apic_64.o mpparse_64.o genapic_64.o genapic_flat_64.o
-obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash.o
-obj-$(CONFIG_CRASH_DUMP) += crash_dump_64.o
-obj-$(CONFIG_PM) += suspend_64.o
-obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
-obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
-obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
-obj-$(CONFIG_KPROBES) += kprobes_64.o
-obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
-obj-$(CONFIG_X86_VSMP) += vsmp_64.o
-obj-$(CONFIG_K8_NB) += k8.o
-obj-$(CONFIG_AUDIT) += audit_64.o
-
-obj-$(CONFIG_MODULES) += module_64.o
-obj-$(CONFIG_PCI) += early-quirks.o
-
-obj-y += topology.o
-obj-y += pcspeaker.o
-
-CFLAGS_vsyscall_64.o := $(PROFILING) -g0
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 1351c3982ee..19d3d6e9d09 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,5 +1,5 @@
obj-$(CONFIG_ACPI) += boot.o
-obj-$(CONFIG_ACPI_SLEEP) += sleep_$(BITS).o wakeup_$(BITS).o
+obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
ifneq ($(CONFIG_ACPI_PROCESSOR),)
obj-y += cstate.o processor.o
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
new file mode 100644
index 00000000000..6bc815cd8cb
--- /dev/null
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -0,0 +1,87 @@
+/*
+ * sleep.c - x86-specific ACPI sleep support.
+ *
+ * Copyright (C) 2001-2003 Patrick Mochel
+ * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
+ */
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/dmi.h>
+#include <linux/cpumask.h>
+
+#include <asm/smp.h>
+
+/* address in low memory of the wakeup routine. */
+unsigned long acpi_wakeup_address = 0;
+unsigned long acpi_realmode_flags;
+extern char wakeup_start, wakeup_end;
+
+extern unsigned long acpi_copy_wakeup_routine(unsigned long);
+
+/**
+ * acpi_save_state_mem - save kernel state
+ *
+ * Create an identity mapped page table and copy the wakeup routine to
+ * low memory.
+ */
+int acpi_save_state_mem(void)
+{
+ if (!acpi_wakeup_address) {
+ printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n");
+ return -ENOMEM;
+ }
+ memcpy((void *)acpi_wakeup_address, &wakeup_start,
+ &wakeup_end - &wakeup_start);
+ acpi_copy_wakeup_routine(acpi_wakeup_address);
+
+ return 0;
+}
+
+/*
+ * acpi_restore_state - undo effects of acpi_save_state_mem
+ */
+void acpi_restore_state_mem(void)
+{
+}
+
+
+/**
+ * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ *
+ * We allocate a page from the first 1MB of memory for the wakeup
+ * routine for when we come back from a sleep state. The
+ * runtime allocator allows specification of <16MB pages, but not
+ * <1MB pages.
+ */
+void __init acpi_reserve_bootmem(void)
+{
+ if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) {
+ printk(KERN_ERR
+ "ACPI: Wakeup code way too big, S3 disabled.\n");
+ return;
+ }
+
+ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
+ if (!acpi_wakeup_address)
+ printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
+}
+
+
+static int __init acpi_sleep_setup(char *str)
+{
+ while ((str != NULL) && (*str != '\0')) {
+ if (strncmp(str, "s3_bios", 7) == 0)
+ acpi_realmode_flags |= 1;
+ if (strncmp(str, "s3_mode", 7) == 0)
+ acpi_realmode_flags |= 2;
+ if (strncmp(str, "s3_beep", 7) == 0)
+ acpi_realmode_flags |= 4;
+ str = strchr(str, ',');
+ if (str != NULL)
+ str += strspn(str, ", \t");
+ }
+ return 1;
+}
+
+__setup("acpi_sleep=", acpi_sleep_setup);
diff --git a/arch/x86/kernel/acpi/sleep_32.c b/arch/x86/kernel/acpi/sleep_32.c
index 10699489cfe..63fe5525e02 100644
--- a/arch/x86/kernel/acpi/sleep_32.c
+++ b/arch/x86/kernel/acpi/sleep_32.c
@@ -12,76 +12,6 @@
#include <asm/smp.h>
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
-unsigned long acpi_realmode_flags;
-extern char wakeup_start, wakeup_end;
-
-extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
-
-/**
- * acpi_save_state_mem - save kernel state
- *
- * Create an identity mapped page table and copy the wakeup routine to
- * low memory.
- */
-int acpi_save_state_mem(void)
-{
- if (!acpi_wakeup_address)
- return 1;
- memcpy((void *)acpi_wakeup_address, &wakeup_start,
- &wakeup_end - &wakeup_start);
- acpi_copy_wakeup_routine(acpi_wakeup_address);
-
- return 0;
-}
-
-/*
- * acpi_restore_state - undo effects of acpi_save_state_mem
- */
-void acpi_restore_state_mem(void)
-{
-}
-
-/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
- *
- * We allocate a page from the first 1MB of memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16MB pages, but not
- * <1MB pages.
- */
-void __init acpi_reserve_bootmem(void)
-{
- if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) {
- printk(KERN_ERR
- "ACPI: Wakeup code way too big, S3 disabled.\n");
- return;
- }
-
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
- if (!acpi_wakeup_address)
- printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
-}
-
-static int __init acpi_sleep_setup(char *str)
-{
- while ((str != NULL) && (*str != '\0')) {
- if (strncmp(str, "s3_bios", 7) == 0)
- acpi_realmode_flags |= 1;
- if (strncmp(str, "s3_mode", 7) == 0)
- acpi_realmode_flags |= 2;
- if (strncmp(str, "s3_beep", 7) == 0)
- acpi_realmode_flags |= 4;
- str = strchr(str, ',');
- if (str != NULL)
- str += strspn(str, ", \t");
- }
- return 1;
-}
-
-__setup("acpi_sleep=", acpi_sleep_setup);
-
/* Ouch, we want to delete this. We already have better version in userspace, in
s2ram from suspend.sf.net project */
static __init int reset_videomode_after_s3(const struct dmi_system_id *d)
diff --git a/arch/x86/kernel/acpi/sleep_64.c b/arch/x86/kernel/acpi/sleep_64.c
deleted file mode 100644
index da42de261ba..00000000000
--- a/arch/x86/kernel/acpi/sleep_64.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * acpi.c - Architecture-Specific Low-Level ACPI Support
- *
- * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
- * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- * Copyright (C) 2002 Andi Kleen, SuSE Labs (x86-64 port)
- * Copyright (C) 2003 Pavel Machek, SuSE Labs
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/bootmem.h>
-#include <linux/acpi.h>
-#include <linux/cpumask.h>
-
-#include <asm/mpspec.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/tlbflush.h>
-
-/* --------------------------------------------------------------------------
- Low-Level Sleep Support
- -------------------------------------------------------------------------- */
-
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
-unsigned long acpi_realmode_flags;
-extern char wakeup_start, wakeup_end;
-
-extern unsigned long acpi_copy_wakeup_routine(unsigned long);
-
-/**
- * acpi_save_state_mem - save kernel state
- *
- * Create an identity mapped page table and copy the wakeup routine to
- * low memory.
- */
-int acpi_save_state_mem(void)
-{
- memcpy((void *)acpi_wakeup_address, &wakeup_start,
- &wakeup_end - &wakeup_start);
- acpi_copy_wakeup_routine(acpi_wakeup_address);
-
- return 0;
-}
-
-/*
- * acpi_restore_state
- */
-void acpi_restore_state_mem(void)
-{
-}
-
-/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
- *
- * We allocate a page in low memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16M pages, but not
- * <1M pages.
- */
-void __init acpi_reserve_bootmem(void)
-{
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
- if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
- printk(KERN_CRIT
- "ACPI: Wakeup code way too big, will crash on attempt"
- " to suspend\n");
-}
-
-static int __init acpi_sleep_setup(char *str)
-{
- while ((str != NULL) && (*str != '\0')) {
- if (strncmp(str, "s3_bios", 7) == 0)
- acpi_realmode_flags |= 1;
- if (strncmp(str, "s3_mode", 7) == 0)
- acpi_realmode_flags |= 2;
- if (strncmp(str, "s3_beep", 7) == 0)
- acpi_realmode_flags |= 4;
- str = strchr(str, ',');
- if (str != NULL)
- str += strspn(str, ", \t");
- }
- return 1;
-}
-
-__setup("acpi_sleep=", acpi_sleep_setup);
-
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 1e931aaf2ef..f53e3277f8e 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,4 +1,4 @@
-.text
+ .section .text.page_aligned
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 5ed3bc5c61d..2e1b9e0d076 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -344,13 +344,13 @@ do_suspend_lowlevel:
call save_processor_state
movq $saved_context, %rax
- movq %rsp, pt_regs_rsp(%rax)
- movq %rbp, pt_regs_rbp(%rax)
- movq %rsi, pt_regs_rsi(%rax)
- movq %rdi, pt_regs_rdi(%rax)
- movq %rbx, pt_regs_rbx(%rax)
- movq %rcx, pt_regs_rcx(%rax)
- movq %rdx, pt_regs_rdx(%rax)
+ movq %rsp, pt_regs_sp(%rax)
+ movq %rbp, pt_regs_bp(%rax)
+ movq %rsi, pt_regs_si(%rax)
+ movq %rdi, pt_regs_di(%rax)
+ movq %rbx, pt_regs_bx(%rax)
+ movq %rcx, pt_regs_cx(%rax)
+ movq %rdx, pt_regs_dx(%rax)
movq %r8, pt_regs_r8(%rax)
movq %r9, pt_regs_r9(%rax)
movq %r10, pt_regs_r10(%rax)
@@ -360,7 +360,7 @@ do_suspend_lowlevel:
movq %r14, pt_regs_r14(%rax)
movq %r15, pt_regs_r15(%rax)
pushfq
- popq pt_regs_eflags(%rax)
+ popq pt_regs_flags(%rax)
movq $.L97, saved_rip(%rip)
@@ -391,15 +391,15 @@ do_suspend_lowlevel:
movq %rbx, %cr2
movq saved_context_cr0(%rax), %rbx
movq %rbx, %cr0
- pushq pt_regs_eflags(%rax)
+ pushq pt_regs_flags(%rax)
popfq
- movq pt_regs_rsp(%rax), %rsp
- movq pt_regs_rbp(%rax), %rbp
- movq pt_regs_rsi(%rax), %rsi
- movq pt_regs_rdi(%rax), %rdi
- movq pt_regs_rbx(%rax), %rbx
- movq pt_regs_rcx(%rax), %rcx
- movq pt_regs_rdx(%rax), %rdx
+ movq pt_regs_sp(%rax), %rsp
+ movq pt_regs_bp(%rax), %rbp
+ movq pt_regs_si(%rax), %rsi
+ movq pt_regs_di(%rax), %rdi
+ movq pt_regs_bx(%rax), %rbx
+ movq pt_regs_cx(%rax), %rcx
+ movq pt_regs_dx(%rax), %rdx
movq pt_regs_r8(%rax), %r8
movq pt_regs_r9(%rax), %r9
movq pt_regs_r10(%rax), %r10
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d6405e0842b..45d79ea890a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -273,6 +273,7 @@ struct smp_alt_module {
};
static LIST_HEAD(smp_alt_modules);
static DEFINE_SPINLOCK(smp_alt);
+static int smp_mode = 1; /* protected by smp_alt */
void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
@@ -341,12 +342,13 @@ void alternatives_smp_switch(int smp)
#ifdef CONFIG_LOCKDEP
/*
- * A not yet fixed binutils section handling bug prevents
- * alternatives-replacement from working reliably, so turn
- * it off:
+ * Older binutils section handling bug prevented
+ * alternatives-replacement from working reliably.
+ *
+ * If this still occurs then you should see a hang
+ * or crash shortly after this line:
*/
- printk("lockdep: not fixing up alternatives.\n");
- return;
+ printk("lockdep: fixing up alternatives.\n");
#endif
if (noreplace_smp || smp_alt_once)
@@ -354,21 +356,29 @@ void alternatives_smp_switch(int smp)
BUG_ON(!smp && (num_online_cpus() > 1));
spin_lock_irqsave(&smp_alt, flags);
- if (smp) {
+
+ /*
+ * Avoid unnecessary switches because it forces JIT based VMs to
+ * throw away all cached translations, which can be quite costly.
+ */
+ if (smp == smp_mode) {
+ /* nothing */
+ } else if (smp) {
printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
- clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+ clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_lock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
} else {
printk(KERN_INFO "SMP alternatives: switching to UP code\n");
- set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+ set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_unlock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
}
+ smp_mode = smp;
spin_unlock_irqrestore(&smp_alt, flags);
}
@@ -431,8 +441,9 @@ void __init alternative_instructions(void)
if (smp_alt_once) {
if (1 == num_possible_cpus()) {
printk(KERN_INFO "SMP alternatives: switching to UP code\n");
- set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+ set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
+
alternatives_smp_unlock(__smp_locks, __smp_locks_end,
_text, _etext);
}
@@ -440,7 +451,10 @@ void __init alternative_instructions(void)
alternatives_smp_module_add(NULL, "core kernel",
__smp_locks, __smp_locks_end,
_text, _etext);
- alternatives_smp_switch(0);
+
+ /* Only switch to UP mode if we don't immediately boot others */
+ if (num_possible_cpus() == 1 || setup_max_cpus <= 1)
+ alternatives_smp_switch(0);
}
#endif
apply_paravirt(__parainstructions, __parainstructions_end);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 5b6992799c9..608152a2a05 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -1,12 +1,12 @@
-/*
+/*
* Firmware replacement code.
- *
+ *
* Work around broken BIOSes that don't set an aperture or only set the
- * aperture in the AGP bridge.
- * If all fails map the aperture over some low memory. This is cheaper than
- * doing bounce buffering. The memory is lost. This is done at early boot
- * because only the bootmem allocator can allocate 32+MB.
- *
+ * aperture in the AGP bridge.
+ * If all fails map the aperture over some low memory. This is cheaper than
+ * doing bounce buffering. The memory is lost. This is done at early boot
+ * because only the bootmem allocator can allocate 32+MB.
+ *
* Copyright 2002 Andi Kleen, SuSE Labs.
*/
#include <linux/kernel.h>
@@ -30,7 +30,7 @@ int gart_iommu_aperture_disabled __initdata = 0;
int gart_iommu_aperture_allowed __initdata = 0;
int fallback_aper_order __initdata = 1; /* 64MB */
-int fallback_aper_force __initdata = 0;
+int fallback_aper_force __initdata = 0;
int fix_aperture __initdata = 1;
@@ -49,167 +49,270 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
/* This code runs before the PCI subsystem is initialized, so just
access the northbridge directly. */
-static u32 __init allocate_aperture(void)
+static u32 __init allocate_aperture(void)
{
u32 aper_size;
- void *p;
+ void *p;
- if (fallback_aper_order > 7)
- fallback_aper_order = 7;
- aper_size = (32 * 1024 * 1024) << fallback_aper_order;
+ if (fallback_aper_order > 7)
+ fallback_aper_order = 7;
+ aper_size = (32 * 1024 * 1024) << fallback_aper_order;
- /*
- * Aperture has to be naturally aligned. This means an 2GB aperture won't
- * have much chance of finding a place in the lower 4GB of memory.
- * Unfortunately we cannot move it up because that would make the
- * IOMMU useless.
+ /*
+ * Aperture has to be naturally aligned. This means a 2GB aperture
+ * won't have much chance of finding a place in the lower 4GB of
+ * memory. Unfortunately we cannot move it up because that would
+ * make the IOMMU useless.
*/
p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
if (!p || __pa(p)+aper_size > 0xffffffff) {
- printk("Cannot allocate aperture memory hole (%p,%uK)\n",
- p, aper_size>>10);
+ printk(KERN_ERR
+ "Cannot allocate aperture memory hole (%p,%uK)\n",
+ p, aper_size>>10);
if (p)
free_bootmem(__pa(p), aper_size);
return 0;
}
- printk("Mapping aperture over %d KB of RAM @ %lx\n",
- aper_size >> 10, __pa(p));
+ printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
+ aper_size >> 10, __pa(p));
insert_aperture_resource((u32)__pa(p), aper_size);
- return (u32)__pa(p);
+
+ return (u32)__pa(p);
}
static int __init aperture_valid(u64 aper_base, u32 aper_size)
-{
- if (!aper_base)
- return 0;
- if (aper_size < 64*1024*1024) {
- printk("Aperture too small (%d MB)\n", aper_size>>20);
+{
+ if (!aper_base)
return 0;
- }
+
if (aper_base + aper_size > 0x100000000UL) {
- printk("Aperture beyond 4GB. Ignoring.\n");
- return 0;
+ printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
+ return 0;
}
if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
- printk("Aperture pointing to e820 RAM. Ignoring.\n");
- return 0;
- }
+ printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
+ return 0;
+ }
+ if (aper_size < 64*1024*1024) {
+ printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20);
+ return 0;
+ }
+
return 1;
-}
+}
/* Find a PCI capability */
-static __u32 __init find_cap(int num, int slot, int func, int cap)
-{
- u8 pos;
+static __u32 __init find_cap(int num, int slot, int func, int cap)
+{
int bytes;
- if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST))
+ u8 pos;
+
+ if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
+ PCI_STATUS_CAP_LIST))
return 0;
- pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST);
- for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
+
+ pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
+ for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
u8 id;
- pos &= ~3;
- id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID);
+
+ pos &= ~3;
+ id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
if (id == 0xff)
break;
- if (id == cap)
- return pos;
- pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT);
- }
+ if (id == cap)
+ return pos;
+ pos = read_pci_config_byte(num, slot, func,
+ pos+PCI_CAP_LIST_NEXT);
+ }
return 0;
-}
+}
/* Read a standard AGPv3 bridge header */
static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
-{
+{
u32 apsize;
u32 apsizereg;
int nbits;
u32 aper_low, aper_hi;
u64 aper;
- printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func);
- apsizereg = read_pci_config_16(num,slot,func, cap + 0x14);
+ printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func);
+ apsizereg = read_pci_config_16(num, slot, func, cap + 0x14);
if (apsizereg == 0xffffffff) {
- printk("APSIZE in AGP bridge unreadable\n");
+ printk(KERN_ERR "APSIZE in AGP bridge unreadable\n");
return 0;
}
apsize = apsizereg & 0xfff;
/* Some BIOS use weird encodings not in the AGPv3 table. */
- if (apsize & 0xff)
- apsize |= 0xf00;
+ if (apsize & 0xff)
+ apsize |= 0xf00;
nbits = hweight16(apsize);
*order = 7 - nbits;
if ((int)*order < 0) /* < 32MB */
*order = 0;
-
- aper_low = read_pci_config(num,slot,func, 0x10);
- aper_hi = read_pci_config(num,slot,func,0x14);
+
+ aper_low = read_pci_config(num, slot, func, 0x10);
+ aper_hi = read_pci_config(num, slot, func, 0x14);
aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
- printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
- aper, 32 << *order, apsizereg);
+ printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
+ aper, 32 << *order, apsizereg);
if (!aperture_valid(aper, (32*1024*1024) << *order))
- return 0;
- return (u32)aper;
-}
-
-/* Look for an AGP bridge. Windows only expects the aperture in the
- AGP bridge and some BIOS forget to initialize the Northbridge too.
- Work around this here.
-
- Do an PCI bus scan by hand because we're running before the PCI
- subsystem.
+ return 0;
+ return (u32)aper;
+}
- All K8 AGP bridges are AGPv3 compliant, so we can do this scan
- generically. It's probably overkill to always scan all slots because
- the AGP bridges should be always an own bus on the HT hierarchy,
- but do it here for future safety. */
+/*
+ * Look for an AGP bridge. Windows only expects the aperture in the
+ * AGP bridge and some BIOS forget to initialize the Northbridge too.
+ * Work around this here.
+ *
+ * Do an PCI bus scan by hand because we're running before the PCI
+ * subsystem.
+ *
+ * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
+ * generically. It's probably overkill to always scan all slots because
+ * the AGP bridges should be always an own bus on the HT hierarchy,
+ * but do it here for future safety.
+ */
static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
{
int num, slot, func;
/* Poor man's PCI discovery */
- for (num = 0; num < 256; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
+ for (num = 0; num < 256; num++) {
+ for (slot = 0; slot < 32; slot++) {
+ for (func = 0; func < 8; func++) {
u32 class, cap;
u8 type;
- class = read_pci_config(num,slot,func,
+ class = read_pci_config(num, slot, func,
PCI_CLASS_REVISION);
if (class == 0xffffffff)
- break;
-
- switch (class >> 16) {
+ break;
+
+ switch (class >> 16) {
case PCI_CLASS_BRIDGE_HOST:
case PCI_CLASS_BRIDGE_OTHER: /* needed? */
/* AGP bridge? */
- cap = find_cap(num,slot,func,PCI_CAP_ID_AGP);
+ cap = find_cap(num, slot, func,
+ PCI_CAP_ID_AGP);
if (!cap)
break;
- *valid_agp = 1;
- return read_agp(num,slot,func,cap,order);
- }
-
+ *valid_agp = 1;
+ return read_agp(num, slot, func, cap,
+ order);
+ }
+
/* No multi-function device? */
- type = read_pci_config_byte(num,slot,func,
+ type = read_pci_config_byte(num, slot, func,
PCI_HEADER_TYPE);
if (!(type & 0x80))
break;
- }
- }
+ }
+ }
}
- printk("No AGP bridge found\n");
+ printk(KERN_INFO "No AGP bridge found\n");
+
return 0;
}
+static int gart_fix_e820 __initdata = 1;
+
+static int __init parse_gart_mem(char *p)
+{
+ if (!p)
+ return -EINVAL;
+
+ if (!strncmp(p, "off", 3))
+ gart_fix_e820 = 0;
+ else if (!strncmp(p, "on", 2))
+ gart_fix_e820 = 1;
+
+ return 0;
+}
+early_param("gart_fix_e820", parse_gart_mem);
+
+void __init early_gart_iommu_check(void)
+{
+ /*
+ * in case it is enabled before, esp for kexec/kdump,
+ * previous kernel already enable that. memset called
+ * by allocate_aperture/__alloc_bootmem_nopanic cause restart.
+ * or second kernel have different position for GART hole. and new
+ * kernel could use hole as RAM that is still used by GART set by
+ * first kernel
+ * or BIOS forget to put that in reserved.
+ * try to update e820 to make that region as reserved.
+ */
+ int fix, num;
+ u32 ctl;
+ u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
+ u64 aper_base = 0, last_aper_base = 0;
+ int aper_enabled = 0, last_aper_enabled = 0;
+
+ if (!early_pci_allowed())
+ return;
+
+ fix = 0;
+ for (num = 24; num < 32; num++) {
+ if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
+ continue;
+
+ ctl = read_pci_config(0, num, 3, 0x90);
+ aper_enabled = ctl & 1;
+ aper_order = (ctl >> 1) & 7;
+ aper_size = (32 * 1024 * 1024) << aper_order;
+ aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
+ aper_base <<= 25;
+
+ if ((last_aper_order && aper_order != last_aper_order) ||
+ (last_aper_base && aper_base != last_aper_base) ||
+ (last_aper_enabled && aper_enabled != last_aper_enabled)) {
+ fix = 1;
+ break;
+ }
+ last_aper_order = aper_order;
+ last_aper_base = aper_base;
+ last_aper_enabled = aper_enabled;
+ }
+
+ if (!fix && !aper_enabled)
+ return;
+
+ if (!aper_base || !aper_size || aper_base + aper_size > 0x100000000UL)
+ fix = 1;
+
+ if (gart_fix_e820 && !fix && aper_enabled) {
+ if (e820_any_mapped(aper_base, aper_base + aper_size,
+ E820_RAM)) {
+ /* reserved it, so we can resuse it in second kernel */
+ printk(KERN_INFO "update e820 for GART\n");
+ add_memory_region(aper_base, aper_size, E820_RESERVED);
+ update_e820();
+ }
+ return;
+ }
+
+ /* different nodes have different setting, disable them all at first*/
+ for (num = 24; num < 32; num++) {
+ if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
+ continue;
+
+ ctl = read_pci_config(0, num, 3, 0x90);
+ ctl &= ~1;
+ write_pci_config(0, num, 3, 0x90, ctl);
+ }
+
+}
+
void __init gart_iommu_hole_init(void)
-{
- int fix, num;
+{
u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
u64 aper_base, last_aper_base = 0;
- int valid_agp = 0;
+ int fix, num, valid_agp = 0;
+ int node;
if (gart_iommu_aperture_disabled || !fix_aperture ||
!early_pci_allowed())
@@ -218,24 +321,26 @@ void __init gart_iommu_hole_init(void)
printk(KERN_INFO "Checking aperture...\n");
fix = 0;
- for (num = 24; num < 32; num++) {
+ node = 0;
+ for (num = 24; num < 32; num++) {
if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
continue;
iommu_detected = 1;
gart_iommu_aperture = 1;
- aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
- aper_size = (32 * 1024 * 1024) << aper_order;
+ aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
+ aper_size = (32 * 1024 * 1024) << aper_order;
aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
- aper_base <<= 25;
+ aper_base <<= 25;
+
+ printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
+ node, aper_base, aper_size >> 20);
+ node++;
- printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
- aper_base, aper_size>>20);
-
if (!aperture_valid(aper_base, aper_size)) {
- fix = 1;
- break;
+ fix = 1;
+ break;
}
if ((last_aper_order && aper_order != last_aper_order) ||
@@ -245,55 +350,64 @@ void __init gart_iommu_hole_init(void)
}
last_aper_order = aper_order;
last_aper_base = aper_base;
- }
+ }
if (!fix && !fallback_aper_force) {
if (last_aper_base) {
unsigned long n = (32 * 1024 * 1024) << last_aper_order;
+
insert_aperture_resource((u32)last_aper_base, n);
}
- return;
+ return;
}
if (!fallback_aper_force)
- aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
-
- if (aper_alloc) {
+ aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
+
+ if (aper_alloc) {
/* Got the aperture from the AGP bridge */
} else if (swiotlb && !valid_agp) {
/* Do nothing */
} else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) ||
force_iommu ||
valid_agp ||
- fallback_aper_force) {
- printk("Your BIOS doesn't leave a aperture memory hole\n");
- printk("Please enable the IOMMU option in the BIOS setup\n");
- printk("This costs you %d MB of RAM\n",
- 32 << fallback_aper_order);
+ fallback_aper_force) {
+ printk(KERN_ERR
+ "Your BIOS doesn't leave a aperture memory hole\n");
+ printk(KERN_ERR
+ "Please enable the IOMMU option in the BIOS setup\n");
+ printk(KERN_ERR
+ "This costs you %d MB of RAM\n",
+ 32 << fallback_aper_order);
aper_order = fallback_aper_order;
aper_alloc = allocate_aperture();
- if (!aper_alloc) {
- /* Could disable AGP and IOMMU here, but it's probably
- not worth it. But the later users cannot deal with
- bad apertures and turning on the aperture over memory
- causes very strange problems, so it's better to
- panic early. */
+ if (!aper_alloc) {
+ /*
+ * Could disable AGP and IOMMU here, but it's
+ * probably not worth it. But the later users
+ * cannot deal with bad apertures and turning
+ * on the aperture over memory causes very
+ * strange problems, so it's better to panic
+ * early.
+ */
panic("Not enough memory for aperture");
}
- } else {
- return;
- }
+ } else {
+ return;
+ }
/* Fix up the north bridges */
- for (num = 24; num < 32; num++) {
+ for (num = 24; num < 32; num++) {
if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
- continue;
-
- /* Don't enable translation yet. That is done later.
- Assume this BIOS didn't initialise the GART so
- just overwrite all previous bits */
- write_pci_config(0, num, 3, 0x90, aper_order<<1);
- write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
- }
-}
+ continue;
+
+ /*
+ * Don't enable translation yet. That is done later.
+ * Assume this BIOS didn't initialise the GART so
+ * just overwrite all previous bits
+ */
+ write_pci_config(0, num, 3, 0x90, aper_order<<1);
+ write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
+ }
+}
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a56c782653b..35a568ea840 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -43,12 +43,10 @@
#include <mach_apicdef.h>
#include <mach_ipi.h>
-#include "io_ports.h"
-
/*
* Sanity check
*/
-#if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F
+#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
# error SPURIOUS_APIC_VECTOR definition error
#endif
@@ -57,7 +55,7 @@
*
* -1=force-disable, +1=force-enable
*/
-static int enable_local_apic __initdata = 0;
+static int enable_local_apic __initdata;
/* Local APIC timer verification ok */
static int local_apic_timer_verify_ok;
@@ -101,6 +99,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
/* Local APIC was disabled by the BIOS and enabled by the kernel */
static int enabled_via_apicbase;
+static unsigned long apic_phys;
+
/*
* Get the LAPIC version
*/
@@ -110,7 +110,7 @@ static inline int lapic_get_version(void)
}
/*
- * Check, if the APIC is integrated or a seperate chip
+ * Check, if the APIC is integrated or a separate chip
*/
static inline int lapic_is_integrated(void)
{
@@ -135,9 +135,9 @@ void apic_wait_icr_idle(void)
cpu_relax();
}
-unsigned long safe_apic_wait_icr_idle(void)
+u32 safe_apic_wait_icr_idle(void)
{
- unsigned long send_status;
+ u32 send_status;
int timeout;
timeout = 0;
@@ -154,7 +154,7 @@ unsigned long safe_apic_wait_icr_idle(void)
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
-void enable_NMI_through_LVT0 (void * dummy)
+void __cpuinit enable_NMI_through_LVT0(void)
{
unsigned int v = APIC_DM_NMI;
@@ -379,8 +379,10 @@ void __init setup_boot_APIC_clock(void)
*/
if (local_apic_timer_disabled) {
/* No broadcast on UP ! */
- if (num_possible_cpus() > 1)
+ if (num_possible_cpus() > 1) {
+ lapic_clockevent.mult = 1;
setup_APIC_timer();
+ }
return;
}
@@ -434,7 +436,7 @@ void __init setup_boot_APIC_clock(void)
"with PM Timer: %ldms instead of 100ms\n",
(long)res);
/* Correct the lapic counter value */
- res = (((u64) delta ) * pm_100ms);
+ res = (((u64) delta) * pm_100ms);
do_div(res, deltapm);
printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
"%lu (%ld)\n", (unsigned long) res, delta);
@@ -472,6 +474,19 @@ void __init setup_boot_APIC_clock(void)
local_apic_timer_verify_ok = 1;
+ /*
+ * Do a sanity check on the APIC calibration result
+ */
+ if (calibration_result < (1000000 / HZ)) {
+ local_irq_enable();
+ printk(KERN_WARNING
+ "APIC frequency too slow, disabling apic timer\n");
+ /* No broadcast on UP ! */
+ if (num_possible_cpus() > 1)
+ setup_APIC_timer();
+ return;
+ }
+
/* We trust the pm timer based calibration */
if (!pm_referenced) {
apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
@@ -563,6 +578,9 @@ static void local_apic_timer_interrupt(void)
return;
}
+ /*
+ * the NMI deadlock-detector uses this.
+ */
per_cpu(irq_stat, cpu).apic_timer_irqs++;
evt->event_handler(evt);
@@ -576,8 +594,7 @@ static void local_apic_timer_interrupt(void)
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-
-void fastcall smp_apic_timer_interrupt(struct pt_regs *regs)
+void smp_apic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -616,9 +633,14 @@ int setup_profiling_timer(unsigned int multiplier)
*/
void clear_local_APIC(void)
{
- int maxlvt = lapic_get_maxlvt();
- unsigned long v;
+ int maxlvt;
+ u32 v;
+
+ /* APIC hasn't been mapped yet */
+ if (!apic_phys)
+ return;
+ maxlvt = lapic_get_maxlvt();
/*
* Masking an LVT entry can trigger a local APIC error
* if the vector is zero. Mask LVTERR first to prevent this.
@@ -976,7 +998,8 @@ void __cpuinit setup_local_APIC(void)
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value);
- if (integrated && !esr_disable) { /* !82489DX */
+ if (integrated && !esr_disable) {
+ /* !82489DX */
maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
@@ -1020,7 +1043,7 @@ void __cpuinit setup_local_APIC(void)
/*
* Detect and initialize APIC
*/
-static int __init detect_init_APIC (void)
+static int __init detect_init_APIC(void)
{
u32 h, l, features;
@@ -1077,7 +1100,7 @@ static int __init detect_init_APIC (void)
printk(KERN_WARNING "Could not enable APIC!\n");
return -1;
}
- set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
/* The BIOS may have set up the APIC at some other address */
@@ -1104,8 +1127,6 @@ no_apic:
*/
void __init init_apic_mappings(void)
{
- unsigned long apic_phys;
-
/*
* If no local APIC can be found then set up a fake all
* zeroes page to simulate the local APIC and another
@@ -1164,10 +1185,10 @@ fake_ioapic_page:
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
-int __init APIC_init_uniprocessor (void)
+int __init APIC_init_uniprocessor(void)
{
if (enable_local_apic < 0)
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
if (!smp_found_config && !cpu_has_apic)
return -1;
@@ -1179,7 +1200,7 @@ int __init APIC_init_uniprocessor (void)
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_physical_apicid);
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
return -1;
}
@@ -1210,50 +1231,6 @@ int __init APIC_init_uniprocessor (void)
}
/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
- enable_local_apic = 1;
- return 0;
-}
-early_param("lapic", parse_lapic);
-
-static int __init parse_nolapic(char *arg)
-{
- enable_local_apic = -1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- return 0;
-}
-early_param("nolapic", parse_nolapic);
-
-static int __init parse_disable_lapic_timer(char *arg)
-{
- local_apic_timer_disabled = 1;
- return 0;
-}
-early_param("nolapic_timer", parse_disable_lapic_timer);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
- local_apic_timer_c2_ok = 1;
- return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init apic_set_verbosity(char *str)
-{
- if (strcmp("debug", str) == 0)
- apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
- apic_verbosity = APIC_VERBOSE;
- return 1;
-}
-
-__setup("apic=", apic_set_verbosity);
-
-
-/*
* Local APIC interrupts
*/
@@ -1306,7 +1283,7 @@ void smp_error_interrupt(struct pt_regs *regs)
6: Received illegal vector
7: Illegal register address
*/
- printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+ printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
smp_processor_id(), v , v1);
irq_exit();
}
@@ -1393,7 +1370,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
value = apic_read(APIC_LVT0);
value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
apic_write_around(APIC_LVT0, value);
@@ -1565,3 +1542,46 @@ device_initcall(init_lapic_sysfs);
static void apic_pm_activate(void) { }
#endif /* CONFIG_PM */
+
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+ enable_local_apic = 1;
+ return 0;
+}
+early_param("lapic", parse_lapic);
+
+static int __init parse_nolapic(char *arg)
+{
+ enable_local_apic = -1;
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+ return 0;
+}
+early_param("nolapic", parse_nolapic);
+
+static int __init parse_disable_lapic_timer(char *arg)
+{
+ local_apic_timer_disabled = 1;
+ return 0;
+}
+early_param("nolapic_timer", parse_disable_lapic_timer);
+
+static int __init parse_lapic_timer_c2_ok(char *arg)
+{
+ local_apic_timer_c2_ok = 1;
+ return 0;
+}
+early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+
+static int __init apic_set_verbosity(char *str)
+{
+ if (strcmp("debug", str) == 0)
+ apic_verbosity = APIC_DEBUG;
+ else if (strcmp("verbose", str) == 0)
+ apic_verbosity = APIC_VERBOSE;
+ return 1;
+}
+__setup("apic=", apic_set_verbosity);
+
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index fa6cdee6d30..d8d03e09dea 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -23,32 +23,37 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
-#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/clockchips.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/module.h>
#include <asm/atomic.h>
#include <asm/smp.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
+#include <asm/hpet.h>
#include <asm/pgalloc.h>
#include <asm/mach_apic.h>
#include <asm/nmi.h>
#include <asm/idle.h>
#include <asm/proto.h>
#include <asm/timex.h>
-#include <asm/hpet.h>
#include <asm/apic.h>
-int apic_verbosity;
int disable_apic_timer __cpuinitdata;
static int apic_calibrate_pmtmr __initdata;
+int disable_apic;
-/* Local APIC timer works in C2? */
+/* Local APIC timer works in C2 */
int local_apic_timer_c2_ok;
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-static struct resource *ioapic_resources;
+/*
+ * Debug level, exported for io_apic.c
+ */
+int apic_verbosity;
+
static struct resource lapic_resource = {
.name = "Local APIC",
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@@ -60,10 +65,8 @@ static int lapic_next_event(unsigned long delta,
struct clock_event_device *evt);
static void lapic_timer_setup(enum clock_event_mode mode,
struct clock_event_device *evt);
-
static void lapic_timer_broadcast(cpumask_t mask);
-
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen);
+static void apic_pm_activate(void);
static struct clock_event_device lapic_clockevent = {
.name = "lapic",
@@ -78,6 +81,150 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+static unsigned long apic_phys;
+
+/*
+ * Get the LAPIC version
+ */
+static inline int lapic_get_version(void)
+{
+ return GET_APIC_VERSION(apic_read(APIC_LVR));
+}
+
+/*
+ * Check, if the APIC is integrated or a seperate chip
+ */
+static inline int lapic_is_integrated(void)
+{
+ return 1;
+}
+
+/*
+ * Check, whether this is a modern or a first generation APIC
+ */
+static int modern_apic(void)
+{
+ /* AMD systems use old APIC versions, so check the CPU */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 >= 0xf)
+ return 1;
+ return lapic_get_version() >= 0x14;
+}
+
+void apic_wait_icr_idle(void)
+{
+ while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+ cpu_relax();
+}
+
+u32 safe_apic_wait_icr_idle(void)
+{
+ u32 send_status;
+ int timeout;
+
+ timeout = 0;
+ do {
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ if (!send_status)
+ break;
+ udelay(100);
+ } while (timeout++ < 1000);
+
+ return send_status;
+}
+
+/**
+ * enable_NMI_through_LVT0 - enable NMI through local vector table 0
+ */
+void __cpuinit enable_NMI_through_LVT0(void)
+{
+ unsigned int v;
+
+ /* unmask and set to NMI */
+ v = APIC_DM_NMI;
+ apic_write(APIC_LVT0, v);
+}
+
+/**
+ * lapic_get_maxlvt - get the maximum number of local vector table entries
+ */
+int lapic_get_maxlvt(void)
+{
+ unsigned int v, maxlvt;
+
+ v = apic_read(APIC_LVR);
+ maxlvt = GET_APIC_MAXLVT(v);
+ return maxlvt;
+}
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
+{
+ unsigned int lvtt_value, tmp_value;
+
+ lvtt_value = LOCAL_TIMER_VECTOR;
+ if (!oneshot)
+ lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+ if (!irqen)
+ lvtt_value |= APIC_LVT_MASKED;
+
+ apic_write(APIC_LVTT, lvtt_value);
+
+ /*
+ * Divide PICLK by 16
+ */
+ tmp_value = apic_read(APIC_TDCR);
+ apic_write(APIC_TDCR, (tmp_value
+ & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
+ | APIC_TDR_DIV_16);
+
+ if (!oneshot)
+ apic_write(APIC_TMICT, clocks);
+}
+
+/*
+ * Setup extended LVT, AMD specific (K8, family 10h)
+ *
+ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+ * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ */
+
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
+
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+{
+ unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+ unsigned int v = (mask << 16) | (msg_type << 8) | vector;
+
+ apic_write(reg, v);
+}
+
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+{
+ setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+ return APIC_EILVT_LVTOFF_MCE;
+}
+
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+ setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+ return APIC_EILVT_LVTOFF_IBS;
+}
+
+/*
+ * Program the next event, relative to now
+ */
static int lapic_next_event(unsigned long delta,
struct clock_event_device *evt)
{
@@ -85,6 +232,9 @@ static int lapic_next_event(unsigned long delta,
return 0;
}
+/*
+ * Setup the lapic timer in periodic or oneshot mode
+ */
static void lapic_timer_setup(enum clock_event_mode mode,
struct clock_event_device *evt)
{
@@ -127,75 +277,261 @@ static void lapic_timer_broadcast(cpumask_t mask)
#endif
}
-static void apic_pm_activate(void);
+/*
+ * Setup the local APIC timer for this CPU. Copy the initilized values
+ * of the boot CPU and register the clock event in the framework.
+ */
+static void setup_APIC_timer(void)
+{
+ struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-void apic_wait_icr_idle(void)
+ memcpy(levt, &lapic_clockevent, sizeof(*levt));
+ levt->cpumask = cpumask_of_cpu(smp_processor_id());
+
+ clockevents_register_device(levt);
+}
+
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+#define TICK_COUNT 100000000
+
+static void __init calibrate_APIC_clock(void)
{
- while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
- cpu_relax();
+ unsigned apic, apic_start;
+ unsigned long tsc, tsc_start;
+ int result;
+
+ local_irq_disable();
+
+ /*
+ * Put whatever arbitrary (but long enough) timeout
+ * value into the APIC clock, we just want to get the
+ * counter running for calibration.
+ *
+ * No interrupt enable !
+ */
+ __setup_APIC_LVTT(250000000, 0, 0);
+
+ apic_start = apic_read(APIC_TMCCT);
+#ifdef CONFIG_X86_PM_TIMER
+ if (apic_calibrate_pmtmr && pmtmr_ioport) {
+ pmtimer_wait(5000); /* 5ms wait */
+ apic = apic_read(APIC_TMCCT);
+ result = (apic_start - apic) * 1000L / 5;
+ } else
+#endif
+ {
+ rdtscll(tsc_start);
+
+ do {
+ apic = apic_read(APIC_TMCCT);
+ rdtscll(tsc);
+ } while ((tsc - tsc_start) < TICK_COUNT &&
+ (apic_start - apic) < TICK_COUNT);
+
+ result = (apic_start - apic) * 1000L * tsc_khz /
+ (tsc - tsc_start);
+ }
+
+ local_irq_enable();
+
+ printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
+
+ printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
+ result / 1000 / 1000, result / 1000 % 1000);
+
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+
+ calibration_result = result / HZ;
}
-unsigned int safe_apic_wait_icr_idle(void)
+/*
+ * Setup the boot APIC
+ *
+ * Calibrate and verify the result.
+ */
+void __init setup_boot_APIC_clock(void)
{
- unsigned int send_status;
- int timeout;
+ /*
+ * The local apic timer can be disabled via the kernel commandline.
+ * Register the lapic timer as a dummy clock event source on SMP
+ * systems, so the broadcast mechanism is used. On UP systems simply
+ * ignore it.
+ */
+ if (disable_apic_timer) {
+ printk(KERN_INFO "Disabling APIC timer\n");
+ /* No broadcast on UP ! */
+ if (num_possible_cpus() > 1) {
+ lapic_clockevent.mult = 1;
+ setup_APIC_timer();
+ }
+ return;
+ }
- timeout = 0;
- do {
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- if (!send_status)
- break;
- udelay(100);
- } while (timeout++ < 1000);
+ printk(KERN_INFO "Using local APIC timer interrupts.\n");
+ calibrate_APIC_clock();
- return send_status;
+ /*
+ * Do a sanity check on the APIC calibration result
+ */
+ if (calibration_result < (1000000 / HZ)) {
+ printk(KERN_WARNING
+ "APIC frequency too slow, disabling apic timer\n");
+ /* No broadcast on UP ! */
+ if (num_possible_cpus() > 1)
+ setup_APIC_timer();
+ return;
+ }
+
+ /*
+ * If nmi_watchdog is set to IO_APIC, we need the
+ * PIT/HPET going. Otherwise register lapic as a dummy
+ * device.
+ */
+ if (nmi_watchdog != NMI_IO_APIC)
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ else
+ printk(KERN_WARNING "APIC timer registered as dummy,"
+ " due to nmi_watchdog=1!\n");
+
+ setup_APIC_timer();
}
-void enable_NMI_through_LVT0 (void * dummy)
+/*
+ * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
+ * C1E flag only in the secondary CPU, so when we detect the wreckage
+ * we already have enabled the boot CPU local apic timer. Check, if
+ * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
+ * set the DUMMY flag again and force the broadcast mode in the
+ * clockevents layer.
+ */
+void __cpuinit check_boot_apic_timer_broadcast(void)
{
- unsigned int v;
+ if (!disable_apic_timer ||
+ (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
+ return;
- /* unmask and set to NMI */
- v = APIC_DM_NMI;
- apic_write(APIC_LVT0, v);
+ printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
+ lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
+
+ local_irq_enable();
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
+ local_irq_disable();
}
-int get_maxlvt(void)
+void __cpuinit setup_secondary_APIC_clock(void)
{
- unsigned int v, maxlvt;
+ check_boot_apic_timer_broadcast();
+ setup_APIC_timer();
+}
- v = apic_read(APIC_LVR);
- maxlvt = GET_APIC_MAXLVT(v);
- return maxlvt;
+/*
+ * The guts of the apic timer interrupt
+ */
+static void local_apic_timer_interrupt(void)
+{
+ int cpu = smp_processor_id();
+ struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+
+ /*
+ * Normally we should not be here till LAPIC has been initialized but
+ * in some cases like kdump, its possible that there is a pending LAPIC
+ * timer interrupt from previous kernel's context and is delivered in
+ * new kernel the moment interrupts are enabled.
+ *
+ * Interrupts are enabled early and LAPIC is setup much later, hence
+ * its possible that when we get here evt->event_handler is NULL.
+ * Check for event_handler being NULL and discard the interrupt as
+ * spurious.
+ */
+ if (!evt->event_handler) {
+ printk(KERN_WARNING
+ "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+ /* Switch it off */
+ lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+ return;
+ }
+
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+ add_pda(apic_timer_irqs, 1);
+
+ evt->event_handler(evt);
}
/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ * interrupt as well. Thus we cannot inline the local irq ... ]
*/
-void ack_bad_irq(unsigned int irq)
+void smp_apic_timer_interrupt(struct pt_regs *regs)
{
- printk("unexpected IRQ trap at vector %02x\n", irq);
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
/*
- * Currently unexpected vectors happen only on SMP and APIC.
- * We _must_ ack these because every local APIC has only N
- * irq slots per priority level, and a 'hanging, unacked' IRQ
- * holds up an irq slot - in excessive cases (when multiple
- * unexpected vectors occur) that might lock up the APIC
- * completely.
- * But don't ack when the APIC is disabled. -AK
+ * NOTE! We'd better ACK the irq immediately,
+ * because timer handling can be slow.
*/
- if (!disable_apic)
- ack_APIC_irq();
+ ack_APIC_irq();
+ /*
+ * update_process_times() expects us to have done irq_enter().
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
+ */
+ exit_idle();
+ irq_enter();
+ local_apic_timer_interrupt();
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
}
+
+/*
+ * Local APIC start and shutdown
+ */
+
+/**
+ * clear_local_APIC - shutdown the local APIC
+ *
+ * This is called, when a CPU is disabled and before rebooting, so the state of
+ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
+ * leftovers during boot.
+ */
void clear_local_APIC(void)
{
- int maxlvt;
- unsigned int v;
+ int maxlvt = lapic_get_maxlvt();
+ u32 v;
- maxlvt = get_maxlvt();
+ /* APIC hasn't been mapped yet */
+ if (!apic_phys)
+ return;
+ maxlvt = lapic_get_maxlvt();
/*
* Masking an LVT entry can trigger a local APIC error
* if the vector is zero. Mask LVTERR first to prevent this.
@@ -233,45 +569,9 @@ void clear_local_APIC(void)
apic_read(APIC_ESR);
}
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
- /* Go back to Virtual Wire compatibility mode */
- unsigned long value;
-
- /* For the spurious interrupt use vector F, and enable it */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
- value |= 0xf;
- apic_write(APIC_SPIV, value);
-
- if (!virt_wire_setup) {
- /*
- * For LVT0 make it edge triggered, active high,
- * external and enabled
- */
- value = apic_read(APIC_LVT0);
- value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
- apic_write(APIC_LVT0, value);
- } else {
- /* Disable LVT0 */
- apic_write(APIC_LVT0, APIC_LVT_MASKED);
- }
-
- /* For LVT1 make it edge triggered, active high, nmi and enabled */
- value = apic_read(APIC_LVT1);
- value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
- apic_write(APIC_LVT1, value);
-}
-
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
void disable_local_APIC(void)
{
unsigned int value;
@@ -333,7 +633,7 @@ int __init verify_local_APIC(void)
reg1 = GET_APIC_VERSION(reg0);
if (reg1 == 0x00 || reg1 == 0xff)
return 0;
- reg1 = get_maxlvt();
+ reg1 = lapic_get_maxlvt();
if (reg1 < 0x02 || reg1 == 0xff)
return 0;
@@ -355,18 +655,20 @@ int __init verify_local_APIC(void)
* compatibility mode, but most boxes are anymore.
*/
reg0 = apic_read(APIC_LVT0);
- apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0);
+ apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
reg1 = apic_read(APIC_LVT1);
apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
return 1;
}
+/**
+ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
+ */
void __init sync_Arb_IDs(void)
{
/* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
- unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (ver >= 0x14) /* P4 or higher */
+ if (modern_apic())
return;
/*
@@ -418,9 +720,12 @@ void __init init_bsp_APIC(void)
apic_write(APIC_LVT1, value);
}
-void __cpuinit setup_local_APIC (void)
+/**
+ * setup_local_APIC - setup the local APIC
+ */
+void __cpuinit setup_local_APIC(void)
{
- unsigned int value, maxlvt;
+ unsigned int value;
int i, j;
value = apic_read(APIC_LVR);
@@ -516,30 +821,217 @@ void __cpuinit setup_local_APIC (void)
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
apic_write(APIC_LVT1, value);
+}
- {
- unsigned oldvalue;
- maxlvt = get_maxlvt();
- oldvalue = apic_read(APIC_ESR);
- value = ERROR_APIC_VECTOR; // enables sending errors
- apic_write(APIC_LVTERR, value);
- /*
- * spec says clear errors after enabling vector.
- */
- if (maxlvt > 3)
- apic_write(APIC_ESR, 0);
- value = apic_read(APIC_ESR);
- if (value != oldvalue)
- apic_printk(APIC_VERBOSE,
- "ESR value after enabling vector: %08x, after %08x\n",
- oldvalue, value);
- }
+void __cpuinit lapic_setup_esr(void)
+{
+ unsigned maxlvt = lapic_get_maxlvt();
+
+ apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
+ /*
+ * spec says clear errors after enabling vector.
+ */
+ if (maxlvt > 3)
+ apic_write(APIC_ESR, 0);
+}
+void __cpuinit end_local_APIC_setup(void)
+{
+ lapic_setup_esr();
nmi_watchdog_default();
setup_apic_nmi_watchdog(NULL);
apic_pm_activate();
}
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+ if (!cpu_has_apic) {
+ printk(KERN_INFO "No local APIC present\n");
+ return -1;
+ }
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+ boot_cpu_id = 0;
+ return 0;
+}
+
+/**
+ * init_apic_mappings - initialize APIC mappings
+ */
+void __init init_apic_mappings(void)
+{
+ /*
+ * If no local APIC can be found then set up a fake all
+ * zeroes page to simulate the local APIC and another
+ * one for the IO-APIC.
+ */
+ if (!smp_found_config && detect_init_APIC()) {
+ apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+ apic_phys = __pa(apic_phys);
+ } else
+ apic_phys = mp_lapic_addr;
+
+ set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+ apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+ APIC_BASE, apic_phys);
+
+ /* Put local APIC into the resource map. */
+ lapic_resource.start = apic_phys;
+ lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+ insert_resource(&iomem_resource, &lapic_resource);
+
+ /*
+ * Fetch the APIC ID of the BSP in case we have a
+ * default configuration (or the MP table is broken).
+ */
+ boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor(void)
+{
+ if (disable_apic) {
+ printk(KERN_INFO "Apic disabled\n");
+ return -1;
+ }
+ if (!cpu_has_apic) {
+ disable_apic = 1;
+ printk(KERN_INFO "Apic disabled by BIOS\n");
+ return -1;
+ }
+
+ verify_local_APIC();
+
+ phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
+ apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
+
+ setup_local_APIC();
+
+ /*
+ * Now enable IO-APICs, actually call clear_IO_APIC
+ * We need clear_IO_APIC before enabling vector on BP
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ enable_IO_APIC();
+
+ end_local_APIC_setup();
+
+ if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+ else
+ nr_ioapics = 0;
+ setup_boot_APIC_clock();
+ check_nmi_watchdog();
+ return 0;
+}
+
+/*
+ * Local APIC interrupts
+ */
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+ unsigned int v;
+ exit_idle();
+ irq_enter();
+ /*
+ * Check if this really is a spurious interrupt and ACK it
+ * if it is a vectored one. Just in case...
+ * Spurious interrupts should not be ACKed.
+ */
+ v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+ if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+ ack_APIC_irq();
+
+ add_pda(irq_spurious_count, 1);
+ irq_exit();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_error_interrupt(void)
+{
+ unsigned int v, v1;
+
+ exit_idle();
+ irq_enter();
+ /* First tickle the hardware, only then report what went on. -- REW */
+ v = apic_read(APIC_ESR);
+ apic_write(APIC_ESR, 0);
+ v1 = apic_read(APIC_ESR);
+ ack_APIC_irq();
+ atomic_inc(&irq_err_count);
+
+ /* Here is what the APIC error bits mean:
+ 0: Send CS error
+ 1: Receive CS error
+ 2: Send accept error
+ 3: Receive accept error
+ 4: Reserved
+ 5: Send illegal vector
+ 6: Received illegal vector
+ 7: Illegal register address
+ */
+ printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+ smp_processor_id(), v , v1);
+ irq_exit();
+}
+
+void disconnect_bsp_APIC(int virt_wire_setup)
+{
+ /* Go back to Virtual Wire compatibility mode */
+ unsigned long value;
+
+ /* For the spurious interrupt use vector F, and enable it */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+ value |= 0xf;
+ apic_write(APIC_SPIV, value);
+
+ if (!virt_wire_setup) {
+ /*
+ * For LVT0 make it edge triggered, active high,
+ * external and enabled
+ */
+ value = apic_read(APIC_LVT0);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+ apic_write(APIC_LVT0, value);
+ } else {
+ /* Disable LVT0 */
+ apic_write(APIC_LVT0, APIC_LVT_MASKED);
+ }
+
+ /* For LVT1 make it edge triggered, active high, nmi and enabled */
+ value = apic_read(APIC_LVT1);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+ apic_write(APIC_LVT1, value);
+}
+
+/*
+ * Power management
+ */
#ifdef CONFIG_PM
static struct {
@@ -571,7 +1063,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
if (!apic_pm_state.active)
return 0;
- maxlvt = get_maxlvt();
+ maxlvt = lapic_get_maxlvt();
apic_pm_state.apic_id = apic_read(APIC_ID);
apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
@@ -605,7 +1097,7 @@ static int lapic_resume(struct sys_device *dev)
if (!apic_pm_state.active)
return 0;
- maxlvt = get_maxlvt();
+ maxlvt = lapic_get_maxlvt();
local_irq_save(flags);
rdmsr(MSR_IA32_APICBASE, l, h);
@@ -645,8 +1137,8 @@ static struct sysdev_class lapic_sysclass = {
};
static struct sys_device device_lapic = {
- .id = 0,
- .cls = &lapic_sysclass,
+ .id = 0,
+ .cls = &lapic_sysclass,
};
static void __cpuinit apic_pm_activate(void)
@@ -657,9 +1149,11 @@ static void __cpuinit apic_pm_activate(void)
static int __init init_lapic_sysfs(void)
{
int error;
+
if (!cpu_has_apic)
return 0;
/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+
error = sysdev_class_register(&lapic_sysclass);
if (!error)
error = sysdev_register(&device_lapic);
@@ -673,423 +1167,6 @@ static void apic_pm_activate(void) { }
#endif /* CONFIG_PM */
-static int __init apic_set_verbosity(char *str)
-{
- if (str == NULL) {
- skip_ioapic_setup = 0;
- ioapic_force = 1;
- return 0;
- }
- if (strcmp("debug", str) == 0)
- apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
- apic_verbosity = APIC_VERBOSE;
- else {
- printk(KERN_WARNING "APIC Verbosity level %s not recognised"
- " use apic=verbose or apic=debug\n", str);
- return -EINVAL;
- }
-
- return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-
-static int __init detect_init_APIC (void)
-{
- if (!cpu_has_apic) {
- printk(KERN_INFO "No local APIC present\n");
- return -1;
- }
-
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- boot_cpu_id = 0;
- return 0;
-}
-
-#ifdef CONFIG_X86_IO_APIC
-static struct resource * __init ioapic_setup_resources(void)
-{
-#define IOAPIC_RESOURCE_NAME_SIZE 11
- unsigned long n;
- struct resource *res;
- char *mem;
- int i;
-
- if (nr_ioapics <= 0)
- return NULL;
-
- n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
- n *= nr_ioapics;
-
- mem = alloc_bootmem(n);
- res = (void *)mem;
-
- if (mem != NULL) {
- memset(mem, 0, n);
- mem += sizeof(struct resource) * nr_ioapics;
-
- for (i = 0; i < nr_ioapics; i++) {
- res[i].name = mem;
- res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- sprintf(mem, "IOAPIC %u", i);
- mem += IOAPIC_RESOURCE_NAME_SIZE;
- }
- }
-
- ioapic_resources = res;
-
- return res;
-}
-
-static int __init ioapic_insert_resources(void)
-{
- int i;
- struct resource *r = ioapic_resources;
-
- if (!r) {
- printk("IO APIC resources could be not be allocated.\n");
- return -1;
- }
-
- for (i = 0; i < nr_ioapics; i++) {
- insert_resource(&iomem_resource, r);
- r++;
- }
-
- return 0;
-}
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
-#endif
-
-void __init init_apic_mappings(void)
-{
- unsigned long apic_phys;
-
- /*
- * If no local APIC can be found then set up a fake all
- * zeroes page to simulate the local APIC and another
- * one for the IO-APIC.
- */
- if (!smp_found_config && detect_init_APIC()) {
- apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
- apic_phys = __pa(apic_phys);
- } else
- apic_phys = mp_lapic_addr;
-
- set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
- apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
- APIC_BASE, apic_phys);
-
- /* Put local APIC into the resource map. */
- lapic_resource.start = apic_phys;
- lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
- insert_resource(&iomem_resource, &lapic_resource);
-
- /*
- * Fetch the APIC ID of the BSP in case we have a
- * default configuration (or the MP table is broken).
- */
- boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-
- {
- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
- int i;
- struct resource *ioapic_res;
-
- ioapic_res = ioapic_setup_resources();
- for (i = 0; i < nr_ioapics; i++) {
- if (smp_found_config) {
- ioapic_phys = mp_ioapics[i].mpc_apicaddr;
- } else {
- ioapic_phys = (unsigned long)
- alloc_bootmem_pages(PAGE_SIZE);
- ioapic_phys = __pa(ioapic_phys);
- }
- set_fixmap_nocache(idx, ioapic_phys);
- apic_printk(APIC_VERBOSE,
- "mapped IOAPIC to %016lx (%016lx)\n",
- __fix_to_virt(idx), ioapic_phys);
- idx++;
-
- if (ioapic_res != NULL) {
- ioapic_res->start = ioapic_phys;
- ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
- ioapic_res++;
- }
- }
- }
-}
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
- unsigned int lvtt_value, tmp_value;
-
- lvtt_value = LOCAL_TIMER_VECTOR;
- if (!oneshot)
- lvtt_value |= APIC_LVT_TIMER_PERIODIC;
- if (!irqen)
- lvtt_value |= APIC_LVT_MASKED;
-
- apic_write(APIC_LVTT, lvtt_value);
-
- /*
- * Divide PICLK by 16
- */
- tmp_value = apic_read(APIC_TDCR);
- apic_write(APIC_TDCR, (tmp_value
- & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
- | APIC_TDR_DIV_16);
-
- if (!oneshot)
- apic_write(APIC_TMICT, clocks);
-}
-
-static void setup_APIC_timer(void)
-{
- struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
- memcpy(levt, &lapic_clockevent, sizeof(*levt));
- levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
- clockevents_register_device(levt);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static void __init calibrate_APIC_clock(void)
-{
- unsigned apic, apic_start;
- unsigned long tsc, tsc_start;
- int result;
-
- local_irq_disable();
-
- /*
- * Put whatever arbitrary (but long enough) timeout
- * value into the APIC clock, we just want to get the
- * counter running for calibration.
- *
- * No interrupt enable !
- */
- __setup_APIC_LVTT(250000000, 0, 0);
-
- apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
- if (apic_calibrate_pmtmr && pmtmr_ioport) {
- pmtimer_wait(5000); /* 5ms wait */
- apic = apic_read(APIC_TMCCT);
- result = (apic_start - apic) * 1000L / 5;
- } else
-#endif
- {
- rdtscll(tsc_start);
-
- do {
- apic = apic_read(APIC_TMCCT);
- rdtscll(tsc);
- } while ((tsc - tsc_start) < TICK_COUNT &&
- (apic_start - apic) < TICK_COUNT);
-
- result = (apic_start - apic) * 1000L * tsc_khz /
- (tsc - tsc_start);
- }
-
- local_irq_enable();
-
- printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
- printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
- result / 1000 / 1000, result / 1000 % 1000);
-
- /* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
- lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
- lapic_clockevent.min_delta_ns =
- clockevent_delta2ns(0xF, &lapic_clockevent);
-
- calibration_result = result / HZ;
-}
-
-void __init setup_boot_APIC_clock (void)
-{
- /*
- * The local apic timer can be disabled via the kernel commandline.
- * Register the lapic timer as a dummy clock event source on SMP
- * systems, so the broadcast mechanism is used. On UP systems simply
- * ignore it.
- */
- if (disable_apic_timer) {
- printk(KERN_INFO "Disabling APIC timer\n");
- /* No broadcast on UP ! */
- if (num_possible_cpus() > 1)
- setup_APIC_timer();
- return;
- }
-
- printk(KERN_INFO "Using local APIC timer interrupts.\n");
- calibrate_APIC_clock();
-
- /*
- * If nmi_watchdog is set to IO_APIC, we need the
- * PIT/HPET going. Otherwise register lapic as a dummy
- * device.
- */
- if (nmi_watchdog != NMI_IO_APIC)
- lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
- else
- printk(KERN_WARNING "APIC timer registered as dummy,"
- " due to nmi_watchdog=1!\n");
-
- setup_APIC_timer();
-}
-
-/*
- * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
- * C1E flag only in the secondary CPU, so when we detect the wreckage
- * we already have enabled the boot CPU local apic timer. Check, if
- * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
- * set the DUMMY flag again and force the broadcast mode in the
- * clockevents layer.
- */
-void __cpuinit check_boot_apic_timer_broadcast(void)
-{
- if (!disable_apic_timer ||
- (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
- return;
-
- printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
- lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
-
- local_irq_enable();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
- local_irq_disable();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
- check_boot_apic_timer_broadcast();
- setup_APIC_timer();
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
- return -EINVAL;
-}
-
-void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
- unsigned char msg_type, unsigned char mask)
-{
- unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
- unsigned int v = (mask << 16) | (msg_type << 8) | vector;
- apic_write(reg, v);
-}
-
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-
-void smp_local_timer_interrupt(void)
-{
- int cpu = smp_processor_id();
- struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
- /*
- * Normally we should not be here till LAPIC has been initialized but
- * in some cases like kdump, its possible that there is a pending LAPIC
- * timer interrupt from previous kernel's context and is delivered in
- * new kernel the moment interrupts are enabled.
- *
- * Interrupts are enabled early and LAPIC is setup much later, hence
- * its possible that when we get here evt->event_handler is NULL.
- * Check for event_handler being NULL and discard the interrupt as
- * spurious.
- */
- if (!evt->event_handler) {
- printk(KERN_WARNING
- "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
- /* Switch it off */
- lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
- return;
- }
-
- /*
- * the NMI deadlock-detector uses this.
- */
- add_pda(apic_timer_irqs, 1);
-
- evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- * interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- */
- ack_APIC_irq();
- /*
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- exit_idle();
- irq_enter();
- smp_local_timer_interrupt();
- irq_exit();
- set_irq_regs(old_regs);
-}
-
/*
* apic_is_clustered_box() -- Check if we can expect good TSC
*
@@ -1103,21 +1180,34 @@ __cpuinit int apic_is_clustered_box(void)
{
int i, clusters, zeros;
unsigned id;
+ u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
for (i = 0; i < NR_CPUS; i++) {
- id = bios_cpu_apicid[i];
+ /* are we being called early in kernel startup? */
+ if (bios_cpu_apicid) {
+ id = bios_cpu_apicid[i];
+ }
+ else if (i < nr_cpu_ids) {
+ if (cpu_present(i))
+ id = per_cpu(x86_bios_cpu_apicid, i);
+ else
+ continue;
+ }
+ else
+ break;
+
if (id != BAD_APICID)
__set_bit(APIC_CLUSTERID(id), clustermap);
}
/* Problem: Partially populated chassis may not have CPUs in some of
* the APIC clusters they have been allocated. Only present CPUs have
- * bios_cpu_apicid entries, thus causing zeroes in the bitmap. Since
- * clusters are allocated sequentially, count zeros only if they are
- * bounded by ones.
+ * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+ * Since clusters are allocated sequentially, count zeros only if
+ * they are bounded by ones.
*/
clusters = 0;
zeros = 0;
@@ -1138,96 +1228,33 @@ __cpuinit int apic_is_clustered_box(void)
}
/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-asmlinkage void smp_spurious_interrupt(void)
-{
- unsigned int v;
- exit_idle();
- irq_enter();
- /*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
- */
- v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
- if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
- ack_APIC_irq();
-
- add_pda(irq_spurious_count, 1);
- irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
+ * APIC command line parameters
*/
-
-asmlinkage void smp_error_interrupt(void)
-{
- unsigned int v, v1;
-
- exit_idle();
- irq_enter();
- /* First tickle the hardware, only then report what went on. -- REW */
- v = apic_read(APIC_ESR);
- apic_write(APIC_ESR, 0);
- v1 = apic_read(APIC_ESR);
- ack_APIC_irq();
- atomic_inc(&irq_err_count);
-
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
- smp_processor_id(), v , v1);
- irq_exit();
-}
-
-int disable_apic;
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor (void)
+static int __init apic_set_verbosity(char *str)
{
- if (disable_apic) {
- printk(KERN_INFO "Apic disabled\n");
- return -1;
+ if (str == NULL) {
+ skip_ioapic_setup = 0;
+ ioapic_force = 1;
+ return 0;
}
- if (!cpu_has_apic) {
- disable_apic = 1;
- printk(KERN_INFO "Apic disabled by BIOS\n");
- return -1;
+ if (strcmp("debug", str) == 0)
+ apic_verbosity = APIC_DEBUG;
+ else if (strcmp("verbose", str) == 0)
+ apic_verbosity = APIC_VERBOSE;
+ else {
+ printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+ " use apic=verbose or apic=debug\n", str);
+ return -EINVAL;
}
- verify_local_APIC();
-
- phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
- apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
-
- setup_local_APIC();
-
- if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
- else
- nr_ioapics = 0;
- setup_boot_APIC_clock();
- check_nmi_watchdog();
return 0;
}
+early_param("apic", apic_set_verbosity);
static __init int setup_disableapic(char *str)
{
disable_apic = 1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
return 0;
}
early_param("disableapic", setup_disableapic);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index af045ca0f65..d4438ef296d 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
#include <linux/dmi.h>
#include <linux/suspend.h>
#include <linux/kthread.h>
+#include <linux/jiffies.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -235,8 +236,6 @@
#include <asm/paravirt.h>
#include <asm/reboot.h>
-#include "io_ports.h"
-
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
extern int (*console_blank_hook)(int);
#endif
@@ -324,7 +323,7 @@ extern int (*console_blank_hook)(int);
/*
* Ignore suspend events for this amount of time after a resume
*/
-#define DEFAULT_BOUNCE_INTERVAL (3 * HZ)
+#define DEFAULT_BOUNCE_INTERVAL (3 * HZ)
/*
* Maximum number of events stored
@@ -336,7 +335,7 @@ extern int (*console_blank_hook)(int);
*/
struct apm_user {
int magic;
- struct apm_user * next;
+ struct apm_user *next;
unsigned int suser: 1;
unsigned int writer: 1;
unsigned int reader: 1;
@@ -372,44 +371,44 @@ struct apm_user {
static struct {
unsigned long offset;
unsigned short segment;
-} apm_bios_entry;
-static int clock_slowed;
-static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
-static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
-static int set_pm_idle;
-static int suspends_pending;
-static int standbys_pending;
-static int ignore_sys_suspend;
-static int ignore_normal_resume;
-static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
-
-static int debug __read_mostly;
-static int smp __read_mostly;
-static int apm_disabled = -1;
+} apm_bios_entry;
+static int clock_slowed;
+static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
+static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
+static int set_pm_idle;
+static int suspends_pending;
+static int standbys_pending;
+static int ignore_sys_suspend;
+static int ignore_normal_resume;
+static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
+
+static int debug __read_mostly;
+static int smp __read_mostly;
+static int apm_disabled = -1;
#ifdef CONFIG_SMP
-static int power_off;
+static int power_off;
#else
-static int power_off = 1;
+static int power_off = 1;
#endif
#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int realmode_power_off = 1;
+static int realmode_power_off = 1;
#else
-static int realmode_power_off;
+static int realmode_power_off;
#endif
#ifdef CONFIG_APM_ALLOW_INTS
-static int allow_ints = 1;
+static int allow_ints = 1;
#else
-static int allow_ints;
+static int allow_ints;
#endif
-static int broken_psr;
+static int broken_psr;
static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
-static struct apm_user * user_list;
+static struct apm_user *user_list;
static DEFINE_SPINLOCK(user_list_lock);
-static const struct desc_struct bad_bios_desc = { 0, 0x00409200 };
+static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } };
-static const char driver_version[] = "1.16ac"; /* no spaces */
+static const char driver_version[] = "1.16ac"; /* no spaces */
static struct task_struct *kapmd_task;
@@ -417,7 +416,7 @@ static struct task_struct *kapmd_task;
* APM event names taken from the APM 1.2 specification. These are
* the message codes that the BIOS uses to tell us about events
*/
-static const char * const apm_event_name[] = {
+static const char * const apm_event_name[] = {
"system standby",
"system suspend",
"normal resume",
@@ -435,14 +434,14 @@ static const char * const apm_event_name[] = {
typedef struct lookup_t {
int key;
- char * msg;
+ char *msg;
} lookup_t;
/*
* The BIOS returns a set of standard error codes in AX when the
* carry flag is set.
*/
-
+
static const lookup_t error_table[] = {
/* N/A { APM_SUCCESS, "Operation succeeded" }, */
{ APM_DISABLED, "Power management disabled" },
@@ -472,24 +471,25 @@ static const lookup_t error_table[] = {
* Write a meaningful log entry to the kernel log in the event of
* an APM error.
*/
-
+
static void apm_error(char *str, int err)
{
- int i;
+ int i;
for (i = 0; i < ERROR_COUNT; i++)
- if (error_table[i].key == err) break;
+ if (error_table[i].key == err)
+ break;
if (i < ERROR_COUNT)
printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
else
printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
- str, err);
+ str, err);
}
/*
* Lock APM functionality to physical CPU 0
*/
-
+
#ifdef CONFIG_SMP
static cpumask_t apm_save_cpus(void)
@@ -511,7 +511,7 @@ static inline void apm_restore_cpus(cpumask_t mask)
/*
* No CPU lockdown needed on a uniprocessor
*/
-
+
#define apm_save_cpus() (current->cpus_allowed)
#define apm_restore_cpus(x) (void)(x)
@@ -590,7 +590,7 @@ static inline void apm_irq_restore(unsigned long flags)
* code is returned in AH (bits 8-15 of eax) and this function
* returns non-zero.
*/
-
+
static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
{
@@ -602,7 +602,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
struct desc_struct *gdt;
cpus = apm_save_cpus();
-
+
cpu = get_cpu();
gdt = get_cpu_gdt_table(cpu);
save_desc_40 = gdt[0x40 / 8];
@@ -616,7 +616,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
gdt[0x40 / 8] = save_desc_40;
put_cpu();
apm_restore_cpus(cpus);
-
+
return *eax & 0xff;
}
@@ -645,7 +645,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
struct desc_struct *gdt;
cpus = apm_save_cpus();
-
+
cpu = get_cpu();
gdt = get_cpu_gdt_table(cpu);
save_desc_40 = gdt[0x40 / 8];
@@ -680,7 +680,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
static int apm_driver_version(u_short *val)
{
- u32 eax;
+ u32 eax;
if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
return (eax >> 8) & 0xff;
@@ -704,16 +704,16 @@ static int apm_driver_version(u_short *val)
* that APM 1.2 is in use. If no messges are pending the value 0x80
* is returned (No power management events pending).
*/
-
+
static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 dummy;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 dummy;
if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
- &dummy, &dummy))
+ &dummy, &dummy))
return (eax >> 8) & 0xff;
*event = ebx;
if (apm_info.connection_version < 0x0102)
@@ -736,10 +736,10 @@ static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
* The state holds the state to transition to, which may in fact
* be an acceptance of a BIOS requested state change.
*/
-
+
static int set_power_state(u_short what, u_short state)
{
- u32 eax;
+ u32 eax;
if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
return (eax >> 8) & 0xff;
@@ -752,7 +752,7 @@ static int set_power_state(u_short what, u_short state)
*
* Transition the entire system into a new APM power state.
*/
-
+
static int set_system_power_state(u_short state)
{
return set_power_state(APM_DEVICE_ALL, state);
@@ -766,13 +766,13 @@ static int set_system_power_state(u_short state)
* to handle the idle request. On a success the function returns 1
* if the BIOS did clock slowing or 0 otherwise.
*/
-
+
static int apm_do_idle(void)
{
- u32 eax;
- u8 ret = 0;
- int idled = 0;
- int polling;
+ u32 eax;
+ u8 ret = 0;
+ int idled = 0;
+ int polling;
polling = !!(current_thread_info()->status & TS_POLLING);
if (polling) {
@@ -799,10 +799,9 @@ static int apm_do_idle(void)
/* This always fails on some SMP boards running UP kernels.
* Only report the failure the first 5 times.
*/
- if (++t < 5)
- {
+ if (++t < 5) {
printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
- (eax >> 8) & 0xff);
+ (eax >> 8) & 0xff);
t = jiffies;
}
return -1;
@@ -814,15 +813,15 @@ static int apm_do_idle(void)
/**
* apm_do_busy - inform the BIOS the CPU is busy
*
- * Request that the BIOS brings the CPU back to full performance.
+ * Request that the BIOS brings the CPU back to full performance.
*/
-
+
static void apm_do_busy(void)
{
- u32 dummy;
+ u32 dummy;
if (clock_slowed || ALWAYS_CALL_BUSY) {
- (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
+ (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
clock_slowed = 0;
}
}
@@ -833,15 +832,15 @@ static void apm_do_busy(void)
* power management - we probably want
* to conserve power.
*/
-#define IDLE_CALC_LIMIT (HZ * 100)
-#define IDLE_LEAKY_MAX 16
+#define IDLE_CALC_LIMIT (HZ * 100)
+#define IDLE_LEAKY_MAX 16
static void (*original_pm_idle)(void) __read_mostly;
/**
* apm_cpu_idle - cpu idling for APM capable Linux
*
- * This is the idling function the kernel executes when APM is available. It
+ * This is the idling function the kernel executes when APM is available. It
* tries to do BIOS powermanagement based on the average system idle time.
* Furthermore it calls the system default idle routine.
*/
@@ -882,7 +881,8 @@ recalc:
t = jiffies;
switch (apm_do_idle()) {
- case 0: apm_idle_done = 1;
+ case 0:
+ apm_idle_done = 1;
if (t != jiffies) {
if (bucket) {
bucket = IDLE_LEAKY_MAX;
@@ -893,7 +893,8 @@ recalc:
continue;
}
break;
- case 1: apm_idle_done = 1;
+ case 1:
+ apm_idle_done = 1;
break;
default: /* BIOS refused */
break;
@@ -921,10 +922,10 @@ recalc:
* the SMP call on CPU0 as some systems will only honour this call
* on their first cpu.
*/
-
+
static void apm_power_off(void)
{
- unsigned char po_bios_call[] = {
+ unsigned char po_bios_call[] = {
0xb8, 0x00, 0x10, /* movw $0x1000,ax */
0x8e, 0xd0, /* movw ax,ss */
0xbc, 0x00, 0xf0, /* movw $0xf000,sp */
@@ -935,13 +936,12 @@ static void apm_power_off(void)
};
/* Some bioses don't like being called from CPU != 0 */
- if (apm_info.realmode_power_off)
- {
+ if (apm_info.realmode_power_off) {
(void)apm_save_cpus();
machine_real_restart(po_bios_call, sizeof(po_bios_call));
+ } else {
+ (void)set_system_power_state(APM_STATE_OFF);
}
- else
- (void) set_system_power_state(APM_STATE_OFF);
}
#ifdef CONFIG_APM_DO_ENABLE
@@ -950,17 +950,17 @@ static void apm_power_off(void)
* apm_enable_power_management - enable BIOS APM power management
* @enable: enable yes/no
*
- * Enable or disable the APM BIOS power services.
+ * Enable or disable the APM BIOS power services.
*/
-
+
static int apm_enable_power_management(int enable)
{
- u32 eax;
+ u32 eax;
if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
return APM_NOT_ENGAGED;
if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
- enable, &eax))
+ enable, &eax))
return (eax >> 8) & 0xff;
if (enable)
apm_info.bios.flags &= ~APM_BIOS_DISABLED;
@@ -983,19 +983,19 @@ static int apm_enable_power_management(int enable)
* if reported is a lifetime in secodnds/minutes at current powwer
* consumption.
*/
-
+
static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 dummy;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ u32 dummy;
if (apm_info.get_power_status_broken)
return APM_32_UNSUPPORTED;
if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
- &eax, &ebx, &ecx, &edx, &dummy))
+ &eax, &ebx, &ecx, &edx, &dummy))
return (eax >> 8) & 0xff;
*status = ebx;
*bat = ecx;
@@ -1011,11 +1011,11 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
static int apm_get_battery_status(u_short which, u_short *status,
u_short *bat, u_short *life, u_short *nbat)
{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 esi;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ u32 esi;
if (apm_info.connection_version < 0x0102) {
/* pretend we only have one battery. */
@@ -1026,7 +1026,7 @@ static int apm_get_battery_status(u_short which, u_short *status,
}
if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
- &ebx, &ecx, &edx, &esi))
+ &ebx, &ecx, &edx, &esi))
return (eax >> 8) & 0xff;
*status = ebx;
*bat = ecx;
@@ -1044,10 +1044,10 @@ static int apm_get_battery_status(u_short which, u_short *status,
* Activate or deactive power management on either a specific device
* or the entire system (%APM_DEVICE_ALL).
*/
-
+
static int apm_engage_power_management(u_short device, int enable)
{
- u32 eax;
+ u32 eax;
if ((enable == 0) && (device == APM_DEVICE_ALL)
&& (apm_info.bios.flags & APM_BIOS_DISABLED))
@@ -1074,7 +1074,7 @@ static int apm_engage_power_management(u_short device, int enable)
* all video devices. Typically the BIOS will do laptop backlight and
* monitor powerdown for us.
*/
-
+
static int apm_console_blank(int blank)
{
int error = APM_NOT_ENGAGED; /* silence gcc */
@@ -1126,7 +1126,7 @@ static apm_event_t get_queued_event(struct apm_user *as)
static void queue_event(apm_event_t event, struct apm_user *sender)
{
- struct apm_user * as;
+ struct apm_user *as;
spin_lock(&user_list_lock);
if (user_list == NULL)
@@ -1174,11 +1174,11 @@ static void reinit_timer(void)
spin_lock_irqsave(&i8253_lock, flags);
/* set the clock to HZ */
- outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
udelay(10);
- outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
+ outb_pit(LATCH & 0xff, PIT_CH0); /* LSB */
udelay(10);
- outb(LATCH >> 8, PIT_CH0); /* MSB */
+ outb_pit(LATCH >> 8, PIT_CH0); /* MSB */
udelay(10);
spin_unlock_irqrestore(&i8253_lock, flags);
#endif
@@ -1186,7 +1186,7 @@ static void reinit_timer(void)
static int suspend(int vetoable)
{
- int err;
+ int err;
struct apm_user *as;
if (pm_send_all(PM_SUSPEND, (void *)3)) {
@@ -1239,7 +1239,7 @@ static int suspend(int vetoable)
static void standby(void)
{
- int err;
+ int err;
local_irq_disable();
device_power_down(PMSG_SUSPEND);
@@ -1256,8 +1256,8 @@ static void standby(void)
static apm_event_t get_event(void)
{
- int error;
- apm_event_t event = APM_NO_EVENTS; /* silence gcc */
+ int error;
+ apm_event_t event = APM_NO_EVENTS; /* silence gcc */
apm_eventinfo_t info;
static int notified;
@@ -1275,9 +1275,9 @@ static apm_event_t get_event(void)
static void check_events(void)
{
- apm_event_t event;
- static unsigned long last_resume;
- static int ignore_bounce;
+ apm_event_t event;
+ static unsigned long last_resume;
+ static int ignore_bounce;
while ((event = get_event()) != 0) {
if (debug) {
@@ -1289,7 +1289,7 @@ static void check_events(void)
"event 0x%02x\n", event);
}
if (ignore_bounce
- && ((jiffies - last_resume) > bounce_interval))
+ && (time_after(jiffies, last_resume + bounce_interval)))
ignore_bounce = 0;
switch (event) {
@@ -1357,7 +1357,7 @@ static void check_events(void)
/*
* We are not allowed to reject a critical suspend.
*/
- (void) suspend(0);
+ (void)suspend(0);
break;
}
}
@@ -1365,12 +1365,12 @@ static void check_events(void)
static void apm_event_handler(void)
{
- static int pending_count = 4;
- int err;
+ static int pending_count = 4;
+ int err;
if ((standbys_pending > 0) || (suspends_pending > 0)) {
if ((apm_info.connection_version > 0x100) &&
- (pending_count-- <= 0)) {
+ (pending_count-- <= 0)) {
pending_count = 4;
if (debug)
printk(KERN_DEBUG "apm: setting state busy\n");
@@ -1418,9 +1418,9 @@ static int check_apm_user(struct apm_user *as, const char *func)
static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
{
- struct apm_user * as;
- int i;
- apm_event_t event;
+ struct apm_user *as;
+ int i;
+ apm_event_t event;
as = fp->private_data;
if (check_apm_user(as, "read"))
@@ -1459,9 +1459,9 @@ static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *
return 0;
}
-static unsigned int do_poll(struct file *fp, poll_table * wait)
+static unsigned int do_poll(struct file *fp, poll_table *wait)
{
- struct apm_user * as;
+ struct apm_user *as;
as = fp->private_data;
if (check_apm_user(as, "poll"))
@@ -1472,10 +1472,10 @@ static unsigned int do_poll(struct file *fp, poll_table * wait)
return 0;
}
-static int do_ioctl(struct inode * inode, struct file *filp,
+static int do_ioctl(struct inode *inode, struct file *filp,
u_int cmd, u_long arg)
{
- struct apm_user * as;
+ struct apm_user *as;
as = filp->private_data;
if (check_apm_user(as, "ioctl"))
@@ -1515,9 +1515,9 @@ static int do_ioctl(struct inode * inode, struct file *filp,
return 0;
}
-static int do_release(struct inode * inode, struct file * filp)
+static int do_release(struct inode *inode, struct file *filp)
{
- struct apm_user * as;
+ struct apm_user *as;
as = filp->private_data;
if (check_apm_user(as, "release"))
@@ -1533,11 +1533,11 @@ static int do_release(struct inode * inode, struct file * filp)
if (suspends_pending <= 0)
(void) suspend(1);
}
- spin_lock(&user_list_lock);
+ spin_lock(&user_list_lock);
if (user_list == as)
user_list = as->next;
else {
- struct apm_user * as1;
+ struct apm_user *as1;
for (as1 = user_list;
(as1 != NULL) && (as1->next != as);
@@ -1553,9 +1553,9 @@ static int do_release(struct inode * inode, struct file * filp)
return 0;
}
-static int do_open(struct inode * inode, struct file * filp)
+static int do_open(struct inode *inode, struct file *filp)
{
- struct apm_user * as;
+ struct apm_user *as;
as = kmalloc(sizeof(*as), GFP_KERNEL);
if (as == NULL) {
@@ -1569,7 +1569,7 @@ static int do_open(struct inode * inode, struct file * filp)
as->suspends_read = as->standbys_read = 0;
/*
* XXX - this is a tiny bit broken, when we consider BSD
- * process accounting. If the device is opened by root, we
+ * process accounting. If the device is opened by root, we
* instantly flag that we used superuser privs. Who knows,
* we might close the device immediately without doing a
* privileged operation -- cevans
@@ -1652,16 +1652,16 @@ static int proc_apm_show(struct seq_file *m, void *v)
8) min = minutes; sec = seconds */
seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
- driver_version,
- (apm_info.bios.version >> 8) & 0xff,
- apm_info.bios.version & 0xff,
- apm_info.bios.flags,
- ac_line_status,
- battery_status,
- battery_flag,
- percentage,
- time_units,
- units);
+ driver_version,
+ (apm_info.bios.version >> 8) & 0xff,
+ apm_info.bios.version & 0xff,
+ apm_info.bios.flags,
+ ac_line_status,
+ battery_status,
+ battery_flag,
+ percentage,
+ time_units,
+ units);
return 0;
}
@@ -1684,8 +1684,8 @@ static int apm(void *unused)
unsigned short cx;
unsigned short dx;
int error;
- char * power_stat;
- char * bat_stat;
+ char *power_stat;
+ char *bat_stat;
#ifdef CONFIG_SMP
/* 2002/08/01 - WT
@@ -1744,23 +1744,41 @@ static int apm(void *unused)
}
}
- if (debug && (num_online_cpus() == 1 || smp )) {
+ if (debug && (num_online_cpus() == 1 || smp)) {
error = apm_get_power_status(&bx, &cx, &dx);
if (error)
printk(KERN_INFO "apm: power status not available\n");
else {
switch ((bx >> 8) & 0xff) {
- case 0: power_stat = "off line"; break;
- case 1: power_stat = "on line"; break;
- case 2: power_stat = "on backup power"; break;
- default: power_stat = "unknown"; break;
+ case 0:
+ power_stat = "off line";
+ break;
+ case 1:
+ power_stat = "on line";
+ break;
+ case 2:
+ power_stat = "on backup power";
+ break;
+ default:
+ power_stat = "unknown";
+ break;
}
switch (bx & 0xff) {
- case 0: bat_stat = "high"; break;
- case 1: bat_stat = "low"; break;
- case 2: bat_stat = "critical"; break;
- case 3: bat_stat = "charging"; break;
- default: bat_stat = "unknown"; break;
+ case 0:
+ bat_stat = "high";
+ break;
+ case 1:
+ bat_stat = "low";
+ break;
+ case 2:
+ bat_stat = "critical";
+ break;
+ case 3:
+ bat_stat = "charging";
+ break;
+ default:
+ bat_stat = "unknown";
+ break;
}
printk(KERN_INFO
"apm: AC %s, battery status %s, battery life ",
@@ -1777,8 +1795,8 @@ static int apm(void *unused)
printk("unknown\n");
else
printk("%d %s\n", dx & 0x7fff,
- (dx & 0x8000) ?
- "minutes" : "seconds");
+ (dx & 0x8000) ?
+ "minutes" : "seconds");
}
}
}
@@ -1803,7 +1821,7 @@ static int apm(void *unused)
#ifndef MODULE
static int __init apm_setup(char *str)
{
- int invert;
+ int invert;
while ((str != NULL) && (*str != '\0')) {
if (strncmp(str, "off", 3) == 0)
@@ -1828,14 +1846,13 @@ static int __init apm_setup(char *str)
if ((strncmp(str, "power-off", 9) == 0) ||
(strncmp(str, "power_off", 9) == 0))
power_off = !invert;
- if (strncmp(str, "smp", 3) == 0)
- {
+ if (strncmp(str, "smp", 3) == 0) {
smp = !invert;
idle_threshold = 100;
}
if ((strncmp(str, "allow-ints", 10) == 0) ||
(strncmp(str, "allow_ints", 10) == 0))
- apm_info.allow_ints = !invert;
+ apm_info.allow_ints = !invert;
if ((strncmp(str, "broken-psr", 10) == 0) ||
(strncmp(str, "broken_psr", 10) == 0))
apm_info.get_power_status_broken = !invert;
@@ -1881,7 +1898,8 @@ static int __init print_if_true(const struct dmi_system_id *d)
*/
static int __init broken_ps2_resume(const struct dmi_system_id *d)
{
- printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident);
+ printk(KERN_INFO "%s machine detected. Mousepad Resume Bug "
+ "workaround hopefully not needed.\n", d->ident);
return 0;
}
@@ -1890,7 +1908,8 @@ static int __init set_realmode_power_off(const struct dmi_system_id *d)
{
if (apm_info.realmode_power_off == 0) {
apm_info.realmode_power_off = 1;
- printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident);
+ printk(KERN_INFO "%s bios detected. "
+ "Using realmode poweroff only.\n", d->ident);
}
return 0;
}
@@ -1900,7 +1919,8 @@ static int __init set_apm_ints(const struct dmi_system_id *d)
{
if (apm_info.allow_ints == 0) {
apm_info.allow_ints = 1;
- printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident);
+ printk(KERN_INFO "%s machine detected. "
+ "Enabling interrupts during APM calls.\n", d->ident);
}
return 0;
}
@@ -1910,7 +1930,8 @@ static int __init apm_is_horked(const struct dmi_system_id *d)
{
if (apm_info.disabled == 0) {
apm_info.disabled = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
+ printk(KERN_INFO "%s machine detected. "
+ "Disabling APM.\n", d->ident);
}
return 0;
}
@@ -1919,7 +1940,8 @@ static int __init apm_is_horked_d850md(const struct dmi_system_id *d)
{
if (apm_info.disabled == 0) {
apm_info.disabled = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
+ printk(KERN_INFO "%s machine detected. "
+ "Disabling APM.\n", d->ident);
printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n");
printk(KERN_INFO "download from support.intel.com \n");
}
@@ -1931,7 +1953,8 @@ static int __init apm_likes_to_melt(const struct dmi_system_id *d)
{
if (apm_info.forbid_idle == 0) {
apm_info.forbid_idle = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident);
+ printk(KERN_INFO "%s machine detected. "
+ "Disabling APM idle calls.\n", d->ident);
}
return 0;
}
@@ -1954,7 +1977,8 @@ static int __init apm_likes_to_melt(const struct dmi_system_id *d)
static int __init broken_apm_power(const struct dmi_system_id *d)
{
apm_info.get_power_status_broken = 1;
- printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n");
+ printk(KERN_WARNING "BIOS strings suggest APM bugs, "
+ "disabling power status reporting.\n");
return 0;
}
@@ -1965,7 +1989,8 @@ static int __init broken_apm_power(const struct dmi_system_id *d)
static int __init swab_apm_power_in_minutes(const struct dmi_system_id *d)
{
apm_info.get_power_status_swabinminutes = 1;
- printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n");
+ printk(KERN_WARNING "BIOS strings suggest APM reports battery life "
+ "in minutes and wrong byte order.\n");
return 0;
}
@@ -1990,8 +2015,8 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
apm_is_horked, "Dell Inspiron 2500",
{ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+ DMI_MATCH(DMI_BIOS_VERSION, "A11"), },
},
{ /* Allow interrupts during suspend on Dell Inspiron laptops*/
set_apm_ints, "Dell Inspiron", {
@@ -2014,15 +2039,15 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
apm_is_horked, "Dell Dimension 4100",
{ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+ DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
+ DMI_MATCH(DMI_BIOS_VERSION, "A11"), },
},
{ /* Allow interrupts during suspend on Compaq Laptops*/
set_apm_ints, "Compaq 12XL125",
{ DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"),
DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"4.06"), },
+ DMI_MATCH(DMI_BIOS_VERSION, "4.06"), },
},
{ /* Allow interrupts during APM or the clock goes slow */
set_apm_ints, "ASUSTeK",
@@ -2064,15 +2089,15 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
apm_is_horked, "Sharp PC-PJ/AX",
{ DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"),
- DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"),
- DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), },
+ DMI_MATCH(DMI_BIOS_VENDOR, "SystemSoft"),
+ DMI_MATCH(DMI_BIOS_VERSION, "Version R2.08"), },
},
{ /* APM crashes */
apm_is_horked, "Dell Inspiron 2500",
{ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+ DMI_MATCH(DMI_BIOS_VERSION, "A11"), },
},
{ /* APM idle hangs */
apm_likes_to_melt, "Jabil AMD",
@@ -2203,11 +2228,11 @@ static int __init apm_init(void)
return -ENODEV;
}
printk(KERN_INFO
- "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
- ((apm_info.bios.version >> 8) & 0xff),
- (apm_info.bios.version & 0xff),
- apm_info.bios.flags,
- driver_version);
+ "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
+ ((apm_info.bios.version >> 8) & 0xff),
+ (apm_info.bios.version & 0xff),
+ apm_info.bios.flags,
+ driver_version);
if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) {
printk(KERN_INFO "apm: no 32 bit BIOS support\n");
return -ENODEV;
@@ -2312,9 +2337,9 @@ static int __init apm_init(void)
}
wake_up_process(kapmd_task);
- if (num_online_cpus() > 1 && !smp ) {
+ if (num_online_cpus() > 1 && !smp) {
printk(KERN_NOTICE
- "apm: disabled - APM is not SMP safe (power off active).\n");
+ "apm: disabled - APM is not SMP safe (power off active).\n");
return 0;
}
@@ -2339,7 +2364,7 @@ static int __init apm_init(void)
static void __exit apm_exit(void)
{
- int error;
+ int error;
if (set_pm_idle) {
pm_idle = original_pm_idle;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 0e45981b2dd..afd84463b71 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -38,15 +38,15 @@ void foo(void);
void foo(void)
{
- OFFSET(SIGCONTEXT_eax, sigcontext, eax);
- OFFSET(SIGCONTEXT_ebx, sigcontext, ebx);
- OFFSET(SIGCONTEXT_ecx, sigcontext, ecx);
- OFFSET(SIGCONTEXT_edx, sigcontext, edx);
- OFFSET(SIGCONTEXT_esi, sigcontext, esi);
- OFFSET(SIGCONTEXT_edi, sigcontext, edi);
- OFFSET(SIGCONTEXT_ebp, sigcontext, ebp);
- OFFSET(SIGCONTEXT_esp, sigcontext, esp);
- OFFSET(SIGCONTEXT_eip, sigcontext, eip);
+ OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
+ OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
+ OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
+ OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
+ OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
+ OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
+ OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
+ OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
+ OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
BLANK();
OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
@@ -70,39 +70,38 @@ void foo(void)
OFFSET(TI_cpu, thread_info, cpu);
BLANK();
- OFFSET(GDS_size, Xgt_desc_struct, size);
- OFFSET(GDS_address, Xgt_desc_struct, address);
- OFFSET(GDS_pad, Xgt_desc_struct, pad);
+ OFFSET(GDS_size, desc_ptr, size);
+ OFFSET(GDS_address, desc_ptr, address);
BLANK();
- OFFSET(PT_EBX, pt_regs, ebx);
- OFFSET(PT_ECX, pt_regs, ecx);
- OFFSET(PT_EDX, pt_regs, edx);
- OFFSET(PT_ESI, pt_regs, esi);
- OFFSET(PT_EDI, pt_regs, edi);
- OFFSET(PT_EBP, pt_regs, ebp);
- OFFSET(PT_EAX, pt_regs, eax);
- OFFSET(PT_DS, pt_regs, xds);
- OFFSET(PT_ES, pt_regs, xes);
- OFFSET(PT_FS, pt_regs, xfs);
- OFFSET(PT_ORIG_EAX, pt_regs, orig_eax);
- OFFSET(PT_EIP, pt_regs, eip);
- OFFSET(PT_CS, pt_regs, xcs);
- OFFSET(PT_EFLAGS, pt_regs, eflags);
- OFFSET(PT_OLDESP, pt_regs, esp);
- OFFSET(PT_OLDSS, pt_regs, xss);
+ OFFSET(PT_EBX, pt_regs, bx);
+ OFFSET(PT_ECX, pt_regs, cx);
+ OFFSET(PT_EDX, pt_regs, dx);
+ OFFSET(PT_ESI, pt_regs, si);
+ OFFSET(PT_EDI, pt_regs, di);
+ OFFSET(PT_EBP, pt_regs, bp);
+ OFFSET(PT_EAX, pt_regs, ax);
+ OFFSET(PT_DS, pt_regs, ds);
+ OFFSET(PT_ES, pt_regs, es);
+ OFFSET(PT_FS, pt_regs, fs);
+ OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
+ OFFSET(PT_EIP, pt_regs, ip);
+ OFFSET(PT_CS, pt_regs, cs);
+ OFFSET(PT_EFLAGS, pt_regs, flags);
+ OFFSET(PT_OLDESP, pt_regs, sp);
+ OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
- OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
+ OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
BLANK();
OFFSET(pbe_address, pbe, address);
OFFSET(pbe_orig_address, pbe, orig_address);
OFFSET(pbe_next, pbe, next);
- /* Offset from the sysenter stack to tss.esp0 */
- DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) -
+ /* Offset from the sysenter stack to tss.sp0 */
+ DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
sizeof(struct tss_struct));
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
@@ -111,8 +110,6 @@ void foo(void)
DEFINE(PTRS_PER_PMD, PTRS_PER_PMD);
DEFINE(PTRS_PER_PGD, PTRS_PER_PGD);
- DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK);
-
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
#ifdef CONFIG_PARAVIRT
@@ -123,7 +120,7 @@ void foo(void)
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+ OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d1b6ed98774..494e1e096ee 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -38,7 +38,6 @@ int main(void)
#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
ENTRY(state);
ENTRY(flags);
- ENTRY(thread);
ENTRY(pid);
BLANK();
#undef ENTRY
@@ -47,6 +46,9 @@ int main(void)
ENTRY(addr_limit);
ENTRY(preempt_count);
ENTRY(status);
+#ifdef CONFIG_IA32_EMULATION
+ ENTRY(sysenter_return);
+#endif
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
@@ -59,17 +61,31 @@ int main(void)
ENTRY(data_offset);
BLANK();
#undef ENTRY
+#ifdef CONFIG_PARAVIRT
+ BLANK();
+ OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+ OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+ OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+ OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+ OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+ OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
+ OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+ OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+#endif
+
+
#ifdef CONFIG_IA32_EMULATION
#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
- ENTRY(eax);
- ENTRY(ebx);
- ENTRY(ecx);
- ENTRY(edx);
- ENTRY(esi);
- ENTRY(edi);
- ENTRY(ebp);
- ENTRY(esp);
- ENTRY(eip);
+ ENTRY(ax);
+ ENTRY(bx);
+ ENTRY(cx);
+ ENTRY(dx);
+ ENTRY(si);
+ ENTRY(di);
+ ENTRY(bp);
+ ENTRY(sp);
+ ENTRY(ip);
BLANK();
#undef ENTRY
DEFINE(IA32_RT_SIGFRAME_sigcontext,
@@ -81,14 +97,14 @@ int main(void)
DEFINE(pbe_next, offsetof(struct pbe, next));
BLANK();
#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
- ENTRY(rbx);
- ENTRY(rbx);
- ENTRY(rcx);
- ENTRY(rdx);
- ENTRY(rsp);
- ENTRY(rbp);
- ENTRY(rsi);
- ENTRY(rdi);
+ ENTRY(bx);
+ ENTRY(bx);
+ ENTRY(cx);
+ ENTRY(dx);
+ ENTRY(sp);
+ ENTRY(bp);
+ ENTRY(si);
+ ENTRY(di);
ENTRY(r8);
ENTRY(r9);
ENTRY(r10);
@@ -97,7 +113,7 @@ int main(void)
ENTRY(r13);
ENTRY(r14);
ENTRY(r15);
- ENTRY(eflags);
+ ENTRY(flags);
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
@@ -108,7 +124,7 @@ int main(void)
ENTRY(cr8);
BLANK();
#undef ENTRY
- DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
+ DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
BLANK();
DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
BLANK();
diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 0b9860530a6..30f25a75fe2 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -1,8 +1,6 @@
/*
* Implement 'Simple Boot Flag Specification 2.0'
*/
-
-
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -14,40 +12,38 @@
#include <linux/mc146818rtc.h>
-
#define SBF_RESERVED (0x78)
#define SBF_PNPOS (1<<0)
#define SBF_BOOTING (1<<1)
#define SBF_DIAG (1<<2)
#define SBF_PARITY (1<<7)
-
int sbf_port __initdata = -1; /* set via acpi_boot_init() */
-
static int __init parity(u8 v)
{
int x = 0;
int i;
-
- for(i=0;i<8;i++)
- {
- x^=(v&1);
- v>>=1;
+
+ for (i = 0; i < 8; i++) {
+ x ^= (v & 1);
+ v >>= 1;
}
+
return x;
}
static void __init sbf_write(u8 v)
{
unsigned long flags;
- if(sbf_port != -1)
- {
+
+ if (sbf_port != -1) {
v &= ~SBF_PARITY;
- if(!parity(v))
- v|=SBF_PARITY;
+ if (!parity(v))
+ v |= SBF_PARITY;
- printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v);
+ printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n",
+ sbf_port, v);
spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(v, sbf_port);
@@ -57,33 +53,41 @@ static void __init sbf_write(u8 v)
static u8 __init sbf_read(void)
{
- u8 v;
unsigned long flags;
- if(sbf_port == -1)
+ u8 v;
+
+ if (sbf_port == -1)
return 0;
+
spin_lock_irqsave(&rtc_lock, flags);
v = CMOS_READ(sbf_port);
spin_unlock_irqrestore(&rtc_lock, flags);
+
return v;
}
static int __init sbf_value_valid(u8 v)
{
- if(v&SBF_RESERVED) /* Reserved bits */
+ if (v & SBF_RESERVED) /* Reserved bits */
return 0;
- if(!parity(v))
+ if (!parity(v))
return 0;
+
return 1;
}
static int __init sbf_init(void)
{
u8 v;
- if(sbf_port == -1)
+
+ if (sbf_port == -1)
return 0;
+
v = sbf_read();
- if(!sbf_value_valid(v))
- printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v);
+ if (!sbf_value_valid(v)) {
+ printk(KERN_WARNING "Simple Boot Flag value 0x%x read from "
+ "CMOS RAM was invalid\n", v);
+ }
v &= ~SBF_RESERVED;
v &= ~SBF_BOOTING;
@@ -92,7 +96,7 @@ static int __init sbf_init(void)
v |= SBF_PNPOS;
#endif
sbf_write(v);
+
return 0;
}
-
module_init(sbf_init);
diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c
index 9a189cef640..8f520f93ffd 100644
--- a/arch/x86/kernel/bugs_64.c
+++ b/arch/x86/kernel/bugs_64.c
@@ -13,7 +13,6 @@
void __init check_bugs(void)
{
identify_cpu(&boot_cpu_data);
- mtrr_bp_init();
#if !defined(CONFIG_SMP)
printk("CPU: ");
print_cpu_info(&boot_cpu_data);
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 3e91d3ee26e..238468ae199 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -45,6 +45,6 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
&regs[CR_ECX], &regs[CR_EDX]);
if (regs[cb->reg] & (1 << cb->bit))
- set_bit(cb->feature, c->x86_capability);
+ set_cpu_cap(c, cb->feature);
}
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1ff88c7f45c..06fa159232f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -63,6 +63,15 @@ static __cpuinit int amd_apic_timer_broken(void)
int force_mwait __cpuinitdata;
+void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+ if (cpuid_eax(0x80000000) >= 0x80000007) {
+ c->x86_power = cpuid_edx(0x80000007);
+ if (c->x86_power & (1<<8))
+ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+ }
+}
+
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -85,6 +94,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
#endif
+ early_init_amd(c);
+
/*
* FIXME: We should handle the K5 here. Set up the write
* range and also turn on MSR 83 bits 4 and 31 (write alloc,
@@ -257,12 +268,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
}
- if (cpuid_eax(0x80000000) >= 0x80000007) {
- c->x86_power = cpuid_edx(0x80000007);
- if (c->x86_power & (1<<8))
- set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
- }
-
#ifdef CONFIG_X86_HT
/*
* On a AMD multi core setup the lower bits of the APIC id
@@ -295,12 +300,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
local_apic_timer_disabled = 1;
#endif
- if (c->x86 == 0x10 && !force_mwait)
- clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
-
/* K6s reports MCEs but don't actually have all the MSRs */
if (c->x86 < 6)
clear_bit(X86_FEATURE_MCE, c->x86_capability);
+
+ if (cpu_has_xmm)
+ set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
}
static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 205fd5ba57f..9b95edcfc6a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,6 +11,7 @@
#include <linux/utsname.h>
#include <asm/bugs.h>
#include <asm/processor.h>
+#include <asm/processor-flags.h>
#include <asm/i387.h>
#include <asm/msr.h>
#include <asm/paravirt.h>
@@ -35,7 +36,7 @@ __setup("mca-pentium", mca_pentium);
static int __init no_387(char *s)
{
boot_cpu_data.hard_math = 0;
- write_cr0(0xE | read_cr0());
+ write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
return 1;
}
@@ -153,7 +154,7 @@ static void __init check_config(void)
* If we configured ourselves for a TSC, we'd better have one!
*/
#ifdef CONFIG_X86_TSC
- if (!cpu_has_tsc && !tsc_disable)
+ if (!cpu_has_tsc)
panic("Kernel compiled for Pentium+, requires TSC feature!");
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2fcf2051bd..db28aa9e2f6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -22,43 +22,48 @@
#include "cpu.h"
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
- [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
- [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
- [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
- [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
+ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
+ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
+ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
+ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
/*
* Segments used for calling PnP BIOS have byte granularity.
* They code segments and data segments have fixed 64k limits,
* the transfer segment sizes are set at run time.
*/
- [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
- [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
- [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
- [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
- [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
+ /* 32-bit code */
+ [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
+ /* 16-bit code */
+ [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
+ /* 16-bit data */
+ [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
+ /* 16-bit data */
+ [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
+ /* 16-bit data */
+ [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
/*
* The APM segments have byte granularity and their bases
* are set at run time. All have 64k limits.
*/
- [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+ /* 32-bit code */
+ [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
/* 16-bit code */
- [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
- [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
+ [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
+ /* data */
+ [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
- [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
- [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
+ [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
+ [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
static int cachesize_override __cpuinitdata = -1;
-static int disable_x86_fxsr __cpuinitdata;
static int disable_x86_serial_nr __cpuinitdata = 1;
-static int disable_x86_sep __cpuinitdata;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
-extern int disable_pse;
-
static void __cpuinit default_init(struct cpuinfo_x86 * c)
{
/* Not much we can do here... */
@@ -207,16 +212,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
static int __init x86_fxsr_setup(char * s)
{
- /* Tell all the other CPUs to not use it... */
- disable_x86_fxsr = 1;
-
- /*
- * ... and clear the bits early in the boot_cpu_data
- * so that the bootup process doesn't try to do this
- * either.
- */
- clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
- clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
+ setup_clear_cpu_cap(X86_FEATURE_FXSR);
+ setup_clear_cpu_cap(X86_FEATURE_XMM);
return 1;
}
__setup("nofxsr", x86_fxsr_setup);
@@ -224,7 +221,7 @@ __setup("nofxsr", x86_fxsr_setup);
static int __init x86_sep_setup(char * s)
{
- disable_x86_sep = 1;
+ setup_clear_cpu_cap(X86_FEATURE_SEP);
return 1;
}
__setup("nosep", x86_sep_setup);
@@ -281,6 +278,33 @@ void __init cpu_detect(struct cpuinfo_x86 *c)
c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
}
}
+static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
+{
+ u32 tfms, xlvl;
+ int ebx;
+
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
+ if (have_cpuid_p()) {
+ /* Intel-defined flags: level 0x00000001 */
+ if (c->cpuid_level >= 0x00000001) {
+ u32 capability, excap;
+ cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+ c->x86_capability[0] = capability;
+ c->x86_capability[4] = excap;
+ }
+
+ /* AMD-defined flags: level 0x80000001 */
+ xlvl = cpuid_eax(0x80000000);
+ if ((xlvl & 0xffff0000) == 0x80000000) {
+ if (xlvl >= 0x80000001) {
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
+ }
+ }
+
+ }
+
+}
/* Do minimum CPU detection early.
Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
@@ -300,6 +324,17 @@ static void __init early_cpu_detect(void)
cpu_detect(c);
get_cpu_vendor(c, 1);
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ early_init_amd(c);
+ break;
+ case X86_VENDOR_INTEL:
+ early_init_intel(c);
+ break;
+ }
+
+ early_get_cap(c);
}
static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
@@ -357,8 +392,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
init_scattered_cpuid_features(c);
}
- early_intel_workaround(c);
-
#ifdef CONFIG_X86_HT
c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
#endif
@@ -392,7 +425,7 @@ __setup("serialnumber", x86_serial_nr_setup);
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
{
int i;
@@ -418,20 +451,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
generic_identify(c);
- printk(KERN_DEBUG "CPU: After generic identify, caps:");
- for (i = 0; i < NCAPINTS; i++)
- printk(" %08lx", c->x86_capability[i]);
- printk("\n");
-
- if (this_cpu->c_identify) {
+ if (this_cpu->c_identify)
this_cpu->c_identify(c);
- printk(KERN_DEBUG "CPU: After vendor identify, caps:");
- for (i = 0; i < NCAPINTS; i++)
- printk(" %08lx", c->x86_capability[i]);
- printk("\n");
- }
-
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
@@ -453,23 +475,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
* we do "generic changes."
*/
- /* TSC disabled? */
- if ( tsc_disable )
- clear_bit(X86_FEATURE_TSC, c->x86_capability);
-
- /* FXSR disabled? */
- if (disable_x86_fxsr) {
- clear_bit(X86_FEATURE_FXSR, c->x86_capability);
- clear_bit(X86_FEATURE_XMM, c->x86_capability);
- }
-
- /* SEP disabled? */
- if (disable_x86_sep)
- clear_bit(X86_FEATURE_SEP, c->x86_capability);
-
- if (disable_pse)
- clear_bit(X86_FEATURE_PSE, c->x86_capability);
-
/* If the model name is still unset, do table lookup. */
if ( !c->x86_model_id[0] ) {
char *p;
@@ -482,13 +487,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
c->x86, c->x86_model);
}
- /* Now the feature flags better reflect actual CPU features! */
-
- printk(KERN_DEBUG "CPU: After all inits, caps:");
- for (i = 0; i < NCAPINTS; i++)
- printk(" %08lx", c->x86_capability[i]);
- printk("\n");
-
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
@@ -501,8 +499,14 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
+ /* Clear all flags overriden by options */
+ for (i = 0; i < NCAPINTS; i++)
+ c->x86_capability[i] ^= cleared_cpu_caps[i];
+
/* Init Machine Check Exception if available. */
mcheck_init(c);
+
+ select_idle_routine(c);
}
void __init identify_boot_cpu(void)
@@ -510,7 +514,6 @@ void __init identify_boot_cpu(void)
identify_cpu(&boot_cpu_data);
sysenter_setup();
enable_sep_cpu();
- mtrr_bp_init();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -567,6 +570,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
}
#endif
+static __init int setup_noclflush(char *arg)
+{
+ setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+ return 1;
+}
+__setup("noclflush", setup_noclflush);
+
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
{
char *vendor = NULL;
@@ -590,6 +600,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
printk("\n");
}
+static __init int setup_disablecpuid(char *arg)
+{
+ int bit;
+ if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+ setup_clear_cpu_cap(bit);
+ else
+ return 0;
+ return 1;
+}
+__setup("clearcpuid=", setup_disablecpuid);
+
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
/* This is hacky. :)
@@ -620,21 +641,13 @@ void __init early_cpu_init(void)
nexgen_init_cpu();
umc_init_cpu();
early_cpu_detect();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- /* pse is not compatible with on-the-fly unmapping,
- * disable it even if the cpus claim to support it.
- */
- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
- disable_pse = 1;
-#endif
}
/* Make sure %fs is initialized properly in idle threads */
struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
- regs->xfs = __KERNEL_PERCPU;
+ regs->fs = __KERNEL_PERCPU;
return regs;
}
@@ -642,7 +655,7 @@ struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
* it's on the real one. */
void switch_to_new_gdt(void)
{
- struct Xgt_desc_struct gdt_descr;
+ struct desc_ptr gdt_descr;
gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
gdt_descr.size = GDT_SIZE - 1;
@@ -672,12 +685,6 @@ void __cpuinit cpu_init(void)
if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- if (tsc_disable && cpu_has_tsc) {
- printk(KERN_NOTICE "Disabling TSC...\n");
- /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
- clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
- set_in_cr4(X86_CR4_TSD);
- }
load_idt(&idt_descr);
switch_to_new_gdt();
@@ -691,7 +698,7 @@ void __cpuinit cpu_init(void)
BUG();
enter_lazy_tlb(&init_mm, curr);
- load_esp0(t, thread);
+ load_sp0(t, thread);
set_tss_desc(cpu,t);
load_TR_desc();
load_LDT(&init_mm.context);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2f6432cef6f..ad6527a5beb 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -24,5 +24,6 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
extern int get_model_name(struct cpuinfo_x86 *c);
extern void display_cacheinfo(struct cpuinfo_x86 *c);
-extern void early_intel_workaround(struct cpuinfo_x86 *c);
+extern void early_init_intel(struct cpuinfo_x86 *c);
+extern void early_init_amd(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index fea0af0476b..a962dcb9c40 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -67,7 +67,8 @@ struct acpi_cpufreq_data {
unsigned int cpu_feature;
};
-static struct acpi_cpufreq_data *drv_data[NR_CPUS];
+static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
@@ -218,14 +219,14 @@ static u32 get_cur_val(cpumask_t mask)
if (unlikely(cpus_empty(mask)))
return 0;
- switch (drv_data[first_cpu(mask)]->cpu_feature) {
+ switch (per_cpu(drv_data, first_cpu(mask))->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
- perf = drv_data[first_cpu(mask)]->acpi_data;
+ perf = per_cpu(drv_data, first_cpu(mask))->acpi_data;
cmd.addr.io.port = perf->control_register.address;
cmd.addr.io.bit_width = perf->control_register.bit_width;
break;
@@ -325,7 +326,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
#endif
- retval = drv_data[cpu]->max_freq * perf_percent / 100;
+ retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
put_cpu();
set_cpus_allowed(current, saved_mask);
@@ -336,7 +337,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
- struct acpi_cpufreq_data *data = drv_data[cpu];
+ struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
unsigned int freq;
dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
@@ -370,7 +371,7 @@ static unsigned int check_freqs(cpumask_t mask, unsigned int freq,
static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int target_freq, unsigned int relation)
{
- struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+ struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
struct acpi_processor_performance *perf;
struct cpufreq_freqs freqs;
cpumask_t online_policy_cpus;
@@ -466,7 +467,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+ struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
dprintk("acpi_cpufreq_verify\n");
@@ -570,7 +571,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
return -ENOMEM;
data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
- drv_data[cpu] = data;
+ per_cpu(drv_data, cpu) = data;
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@ -714,20 +715,20 @@ err_unreg:
acpi_processor_unregister_performance(perf, cpu);
err_free:
kfree(data);
- drv_data[cpu] = NULL;
+ per_cpu(drv_data, cpu) = NULL;
return result;
}
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+ struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
dprintk("acpi_cpufreq_cpu_exit\n");
if (data) {
cpufreq_frequency_table_put_attr(policy->cpu);
- drv_data[policy->cpu] = NULL;
+ per_cpu(drv_data, policy->cpu) = NULL;
acpi_processor_unregister_performance(data->acpi_data,
policy->cpu);
kfree(data);
@@ -738,7 +739,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+ struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
dprintk("acpi_cpufreq_resume\n");
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 749d00cb2eb..06fcce516d5 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -694,7 +694,7 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
if ( acpi_bus_get_device(obj_handle, &d) ) {
return 0;
}
- *return_value = (void *)acpi_driver_data(d);
+ *return_value = acpi_driver_data(d);
return 1;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 99e1ef9939b..a0522735dd9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -52,7 +52,7 @@
/* serialize freq changes */
static DEFINE_MUTEX(fidvid_mutex);
-static struct powernow_k8_data *powernow_data[NR_CPUS];
+static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
static int cpu_family = CPU_OPTERON;
@@ -1018,7 +1018,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
{
cpumask_t oldmask = CPU_MASK_ALL;
- struct powernow_k8_data *data = powernow_data[pol->cpu];
+ struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
u32 checkfid;
u32 checkvid;
unsigned int newstate;
@@ -1094,7 +1094,7 @@ err_out:
/* Driver entry point to verify the policy and range of frequencies */
static int powernowk8_verify(struct cpufreq_policy *pol)
{
- struct powernow_k8_data *data = powernow_data[pol->cpu];
+ struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
if (!data)
return -EINVAL;
@@ -1202,7 +1202,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
data->currfid, data->currvid);
- powernow_data[pol->cpu] = data;
+ per_cpu(powernow_data, pol->cpu) = data;
return 0;
@@ -1216,7 +1216,7 @@ err_out:
static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
{
- struct powernow_k8_data *data = powernow_data[pol->cpu];
+ struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
if (!data)
return -EINVAL;
@@ -1237,7 +1237,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
cpumask_t oldmask = current->cpus_allowed;
unsigned int khz = 0;
- data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))];
+ data = per_cpu(powernow_data, first_cpu(per_cpu(cpu_core_map, cpu)));
if (!data)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 88d66fb8411..404a6a2d401 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -5,6 +5,7 @@
#include <asm/dma.h>
#include <asm/io.h>
#include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
#include <asm/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
@@ -126,15 +127,12 @@ static void __cpuinit set_cx86_reorder(void)
static void __cpuinit set_cx86_memwb(void)
{
- u32 cr0;
-
printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
/* CCR2 bit 2: unlock NW bit */
setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
/* set 'Not Write-through' */
- cr0 = 0x20000000;
- write_cr0(read_cr0() | cr0);
+ write_cr0(read_cr0() | X86_CR0_NW);
/* CCR2 bit 2: lock NW bit and set WT1 */
setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cc8c501b9f3..d1c372b018d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,6 +11,8 @@
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/ds.h>
#include "cpu.h"
@@ -27,13 +29,14 @@
struct movsl_mask movsl_mask __read_mostly;
#endif
-void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c)
+void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
{
- if (c->x86_vendor != X86_VENDOR_INTEL)
- return;
/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
if (c->x86 == 15 && c->x86_cache_alignment == 64)
c->x86_cache_alignment = 128;
+ if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+ (c->x86 == 0x6 && c->x86_model >= 0x0e))
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
}
/*
@@ -113,6 +116,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
unsigned int l2 = 0;
char *p = NULL;
+ early_init_intel(c);
+
#ifdef CONFIG_X86_F00F_BUG
/*
* All current models of Pentium and Pentium with MMX technology CPUs
@@ -132,7 +137,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
}
#endif
- select_idle_routine(c);
l2 = init_intel_cacheinfo(c);
if (c->cpuid_level > 9 ) {
unsigned eax = cpuid_eax(10);
@@ -201,16 +205,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
}
#endif
+ if (cpu_has_xmm2)
+ set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability);
if (c->x86 == 15) {
set_bit(X86_FEATURE_P4, c->x86_capability);
- set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability);
}
if (c->x86 == 6)
set_bit(X86_FEATURE_P3, c->x86_capability);
- if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
- (c->x86 == 0x6 && c->x86_model >= 0x0e))
- set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-
if (cpu_has_ds) {
unsigned int l1;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
@@ -219,6 +220,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (!(l1 & (1<<12)))
set_bit(X86_FEATURE_PEBS, c->x86_capability);
}
+
+ if (cpu_has_bts)
+ ds_init_intel(c);
}
static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
@@ -342,5 +346,22 @@ unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
EXPORT_SYMBOL(cmpxchg_386_u32);
#endif
+#ifndef CONFIG_X86_CMPXCHG64
+unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
+{
+ u64 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u64 *)ptr;
+ if (prev == old)
+ *(u64 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_486_u64);
+#endif
+
// arch_initcall(intel_cpu_init);
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index eef63e3630c..e633c9c2b76 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -16,7 +16,7 @@
#include "mce.h"
/* Machine Check Handler For AMD Athlon/Duron */
-static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
+static void k7_machine_check(struct pt_regs * regs, long error_code)
{
int recover=1;
u32 alow, ahigh, high, low;
@@ -27,29 +27,32 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
if (mcgstl & (1<<0)) /* Recoverable ? */
recover=0;
- printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+ printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
- for (i=1; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+ for (i = 1; i < nr_mce_banks; i++) {
+ rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
if (high&(1<<31)) {
+ char misc[20];
+ char addr[24];
+ misc[0] = addr[0] = '\0';
if (high & (1<<29))
recover |= 1;
if (high & (1<<25))
recover |= 2;
- printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
high &= ~(1<<31);
if (high & (1<<27)) {
- rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk ("[%08x%08x]", ahigh, alow);
+ rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+ snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
}
if (high & (1<<26)) {
- rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk (" at %08x%08x", ahigh, alow);
+ rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+ snprintf(addr, 24, " at %08x%08x", ahigh, alow);
}
- printk ("\n");
+ printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+ smp_processor_id(), i, high, low, misc, addr);
/* Clear it */
- wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+ wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
/* Serialize */
wmb();
add_taint(TAINT_MACHINE_CHECK);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
index 81fb6e2d35f..ae9f628838f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -8,7 +8,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
/* Call the installed machine check handler for this CPU setup. */
-extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
+extern void (*machine_check_vector)(struct pt_regs *, long error_code);
extern int nr_mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 34c781eddee..a5182dcd94a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -22,13 +22,13 @@ int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
/* Handle unconfigured int18 (should never happen) */
-static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code)
+static void unexpected_machine_check(struct pt_regs * regs, long error_code)
{
printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
}
/* Call the installed machine check handler for this CPU setup. */
-void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
/* This has to be run for each processor */
void mcheck_init(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 242e8668dbe..9a699ed0359 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -63,7 +63,7 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
* separate MCEs from kernel messages to avoid bogus bug reports.
*/
-struct mce_log mcelog = {
+static struct mce_log mcelog = {
MCE_LOG_SIGNATURE,
MCE_LOG_LEN,
};
@@ -80,7 +80,7 @@ void mce_log(struct mce *mce)
/* When the buffer fills up discard new entries. Assume
that the earlier errors are the more interesting. */
if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW, &mcelog.flags);
+ set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
return;
}
/* Old left over entry. Skip. */
@@ -110,12 +110,12 @@ static void print_mce(struct mce *m)
KERN_EMERG
"CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
m->cpu, m->mcgstatus, m->bank, m->status);
- if (m->rip) {
+ if (m->ip) {
printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->rip);
+ m->cs, m->ip);
if (m->cs == __KERNEL_CS)
- print_symbol("{%s}", m->rip);
+ print_symbol("{%s}", m->ip);
printk("\n");
}
printk(KERN_EMERG "TSC %Lx ", m->tsc);
@@ -156,16 +156,16 @@ static int mce_available(struct cpuinfo_x86 *c)
static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
{
if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
- m->rip = regs->rip;
+ m->ip = regs->ip;
m->cs = regs->cs;
} else {
- m->rip = 0;
+ m->ip = 0;
m->cs = 0;
}
if (rip_msr) {
/* Assume the RIP in the MSR is exact. Is this true? */
m->mcgstatus |= MCG_STATUS_EIPV;
- rdmsrl(rip_msr, m->rip);
+ rdmsrl(rip_msr, m->ip);
m->cs = 0;
}
}
@@ -192,10 +192,10 @@ void do_machine_check(struct pt_regs * regs, long error_code)
atomic_inc(&mce_entry);
- if (regs)
- notify_die(DIE_NMI, "machine check", regs, error_code, 18,
- SIGKILL);
- if (!banks)
+ if ((regs
+ && notify_die(DIE_NMI, "machine check", regs, error_code,
+ 18, SIGKILL) == NOTIFY_STOP)
+ || !banks)
goto out2;
memset(&m, 0, sizeof(struct mce));
@@ -288,7 +288,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
* instruction which caused the MCE.
*/
if (m.mcgstatus & MCG_STATUS_EIPV)
- user_space = panicm.rip && (panicm.cs & 3);
+ user_space = panicm.ip && (panicm.cs & 3);
/*
* If we know that the error was in user space, send a
@@ -564,7 +564,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
loff_t *off)
{
unsigned long *cpu_tsc;
- static DECLARE_MUTEX(mce_read_sem);
+ static DEFINE_MUTEX(mce_read_mutex);
unsigned next;
char __user *buf = ubuf;
int i, err;
@@ -573,12 +573,12 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
if (!cpu_tsc)
return -ENOMEM;
- down(&mce_read_sem);
+ mutex_lock(&mce_read_mutex);
next = rcu_dereference(mcelog.next);
/* Only supports full reads right now */
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
- up(&mce_read_sem);
+ mutex_unlock(&mce_read_mutex);
kfree(cpu_tsc);
return -EINVAL;
}
@@ -621,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
memset(&mcelog.entry[i], 0, sizeof(struct mce));
}
}
- up(&mce_read_sem);
+ mutex_unlock(&mce_read_mutex);
kfree(cpu_tsc);
return err ? -EFAULT : buf - ubuf;
}
@@ -634,8 +634,7 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
return 0;
}
-static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd,
- unsigned long arg)
+static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
int __user *p = (int __user *)arg;
@@ -664,7 +663,7 @@ static const struct file_operations mce_chrdev_ops = {
.release = mce_release,
.read = mce_read,
.poll = mce_poll,
- .ioctl = mce_ioctl,
+ .unlocked_ioctl = mce_ioctl,
};
static struct miscdevice mce_log_device = {
@@ -855,8 +854,8 @@ static void mce_remove_device(unsigned int cpu)
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int
-mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
@@ -873,7 +872,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
-static struct notifier_block mce_cpu_notifier = {
+static struct notifier_block mce_cpu_notifier __cpuinitdata = {
.notifier_call = mce_cpu_callback,
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 753588755fe..32671da8184 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -118,6 +118,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
{
unsigned int bank, block;
unsigned int cpu = smp_processor_id();
+ u8 lvt_off;
u32 low = 0, high = 0, address = 0;
for (bank = 0; bank < NR_BANKS; ++bank) {
@@ -153,14 +154,13 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
if (shared_bank[bank] && c->cpu_core_id)
break;
#endif
+ lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR,
+ APIC_EILVT_MSG_FIX, 0);
+
high &= ~MASK_LVTOFF_HI;
- high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
+ high |= lvt_off << 20;
wrmsr(address, low, high);
- setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
- THRESHOLD_APIC_VECTOR,
- K8_APIC_EXT_INT_MSG_FIX, 0);
-
threshold_defaults.address = address;
threshold_restart_bank(&threshold_defaults, 0, 0);
}
@@ -450,7 +450,8 @@ recurse:
if (err)
goto out_free;
- kobject_uevent(&b->kobj, KOBJ_ADD);
+ if (b)
+ kobject_uevent(&b->kobj, KOBJ_ADD);
return err;
@@ -554,7 +555,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
int err = 0;
for (bank = 0; bank < NR_BANKS; ++bank) {
- if (!(per_cpu(bank_map, cpu) & 1 << bank))
+ if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
err = threshold_create_bank(cpu, bank);
if (err)
@@ -637,14 +638,14 @@ static void threshold_remove_device(unsigned int cpu)
unsigned int bank;
for (bank = 0; bank < NR_BANKS; ++bank) {
- if (!(per_cpu(bank_map, cpu) & 1 << bank))
+ if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
threshold_remove_bank(cpu, bank);
}
}
/* get notified when a cpu comes on/off */
-static int threshold_cpu_callback(struct notifier_block *nfb,
+static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
/* cpu was unsigned int to begin with */
@@ -669,7 +670,7 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block threshold_cpu_notifier = {
+static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
.notifier_call = threshold_cpu_callback,
};
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index be4dabfee1f..cb03345554a 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -57,7 +57,7 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
/* Thermal interrupt handler for this CPU setup */
static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
-fastcall void smp_thermal_interrupt(struct pt_regs *regs)
+void smp_thermal_interrupt(struct pt_regs *regs)
{
irq_enter();
vendor_thermal_interrupt(regs);
@@ -141,7 +141,7 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
}
-static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+static void intel_machine_check(struct pt_regs * regs, long error_code)
{
int recover=1;
u32 alow, ahigh, high, low;
@@ -152,38 +152,41 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
if (mcgstl & (1<<0)) /* Recoverable ? */
recover=0;
- printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+ printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
if (mce_num_extended_msrs > 0) {
struct intel_mce_extended_msrs dbg;
intel_get_extended_msrs(&dbg);
- printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
- smp_processor_id(), dbg.eip, dbg.eflags);
- printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
- dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
- printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
+ printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
+ "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
+ "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
+ smp_processor_id(), dbg.eip, dbg.eflags,
+ dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
}
- for (i=0; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+ for (i = 0; i < nr_mce_banks; i++) {
+ rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
if (high & (1<<31)) {
+ char misc[20];
+ char addr[24];
+ misc[0] = addr[0] = '\0';
if (high & (1<<29))
recover |= 1;
if (high & (1<<25))
recover |= 2;
- printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
high &= ~(1<<31);
if (high & (1<<27)) {
- rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk ("[%08x%08x]", ahigh, alow);
+ rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+ snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
}
if (high & (1<<26)) {
- rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk (" at %08x%08x", ahigh, alow);
+ rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+ snprintf(addr, 24, " at %08x%08x", ahigh, alow);
}
- printk ("\n");
+ printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+ smp_processor_id(), i, high, low, misc, addr);
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 94bc43d950c..a18310aaae0 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -16,7 +16,7 @@
#include "mce.h"
/* Machine check handler for Pentium class Intel */
-static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code)
+static void pentium_machine_check(struct pt_regs * regs, long error_code)
{
u32 loaddr, hi, lotype;
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index deeae42ce19..74342604d30 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -16,7 +16,7 @@
#include "mce.h"
/* Machine Check Handler For PII/PIII */
-static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+static void intel_machine_check(struct pt_regs * regs, long error_code)
{
int recover=1;
u32 alow, ahigh, high, low;
@@ -27,27 +27,30 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
if (mcgstl & (1<<0)) /* Recoverable ? */
recover=0;
- printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+ printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
- for (i=0; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+ for (i = 0; i < nr_mce_banks; i++) {
+ rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
if (high & (1<<31)) {
+ char misc[20];
+ char addr[24];
+ misc[0] = addr[0] = '\0';
if (high & (1<<29))
recover |= 1;
if (high & (1<<25))
recover |= 2;
- printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
high &= ~(1<<31);
if (high & (1<<27)) {
- rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk ("[%08x%08x]", ahigh, alow);
+ rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+ snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
}
if (high & (1<<26)) {
- rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk (" at %08x%08x", ahigh, alow);
+ rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+ snprintf(addr, 24, " at %08x%08x", ahigh, alow);
}
- printk ("\n");
+ printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
+ smp_processor_id(), i, high, low, misc, addr);
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 9e424b6c293..3d428d5afc5 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -15,7 +15,7 @@
#include "mce.h"
/* Machine check handler for WinChip C6 */
-static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code)
+static void winchip_machine_check(struct pt_regs * regs, long error_code)
{
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK);
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 0949cdbf848..ee2331b0e58 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -53,8 +53,6 @@ static void amd_set_mtrr(unsigned int reg, unsigned long base,
<base> The base address of the region.
<size> The size of the region. If this is 0 the region is disabled.
<type> The type of the region.
- <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
- be done externally.
[RETURNS] Nothing.
*/
{
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 9964be3de2b..8e139c70f88 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -4,6 +4,7 @@
#include <asm/msr.h>
#include <asm/io.h>
#include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
#include "mtrr.h"
int arr3_protected;
@@ -142,7 +143,7 @@ static void prepare_set(void)
/* Disable and flush caches. Note that wbinvd flushes the TLBs as
a side-effect */
- cr0 = read_cr0() | 0x40000000;
+ cr0 = read_cr0() | X86_CR0_CD;
wbinvd();
write_cr0(cr0);
wbinvd();
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 992f08dfbb6..103d61a59b1 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -9,11 +9,12 @@
#include <asm/msr.h>
#include <asm/system.h>
#include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
#include <asm/tlbflush.h>
#include "mtrr.h"
struct mtrr_state {
- struct mtrr_var_range *var_ranges;
+ struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
mtrr_type fixed_ranges[NUM_FIXED_RANGES];
unsigned char enabled;
unsigned char have_fixed;
@@ -85,12 +86,6 @@ void __init get_mtrr_state(void)
struct mtrr_var_range *vrs;
unsigned lo, dummy;
- if (!mtrr_state.var_ranges) {
- mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
- GFP_KERNEL);
- if (!mtrr_state.var_ranges)
- return;
- }
vrs = mtrr_state.var_ranges;
rdmsr(MTRRcap_MSR, lo, dummy);
@@ -188,7 +183,7 @@ static inline void k8_enable_fixed_iorrs(void)
* \param changed pointer which indicates whether the MTRR needed to be changed
* \param msrwords pointer to the MSR values which the MSR should have
*/
-static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
+static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
{
unsigned lo, hi;
@@ -200,7 +195,7 @@ static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
k8_enable_fixed_iorrs();
mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
- *changed = TRUE;
+ *changed = true;
}
}
@@ -260,7 +255,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
static int set_fixed_ranges(mtrr_type * frs)
{
unsigned long long *saved = (unsigned long long *) frs;
- int changed = FALSE;
+ bool changed = false;
int block=-1, range;
while (fixed_range_blocks[++block].ranges)
@@ -273,17 +268,17 @@ static int set_fixed_ranges(mtrr_type * frs)
/* Set the MSR pair relating to a var range. Returns TRUE if
changes are made */
-static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
+static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
{
unsigned int lo, hi;
- int changed = FALSE;
+ bool changed = false;
rdmsr(MTRRphysBase_MSR(index), lo, hi);
if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
|| (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
- changed = TRUE;
+ changed = true;
}
rdmsr(MTRRphysMask_MSR(index), lo, hi);
@@ -292,7 +287,7 @@ static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
|| (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
- changed = TRUE;
+ changed = true;
}
return changed;
}
@@ -350,7 +345,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
spin_lock(&set_atomicity_lock);
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
- cr0 = read_cr0() | 0x40000000; /* set CD flag */
+ cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
wbinvd();
@@ -417,8 +412,6 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
<base> The base address of the region.
<size> The size of the region. If this is 0 the region is disabled.
<type> The type of the region.
- <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
- be done externally.
[RETURNS] Nothing.
*/
{
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index c7d8f175674..91e150acb46 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -11,10 +11,6 @@
#include <asm/mtrr.h>
#include "mtrr.h"
-/* RED-PEN: this is accessed without any locking */
-extern unsigned int *usage_table;
-
-
#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
static const char *const mtrr_strings[MTRR_NUM_TYPES] =
@@ -37,7 +33,7 @@ const char *mtrr_attrib_to_str(int x)
static int
mtrr_file_add(unsigned long base, unsigned long size,
- unsigned int type, char increment, struct file *file, int page)
+ unsigned int type, bool increment, struct file *file, int page)
{
int reg, max;
unsigned int *fcount = FILE_FCOUNT(file);
@@ -55,7 +51,7 @@ mtrr_file_add(unsigned long base, unsigned long size,
base >>= PAGE_SHIFT;
size >>= PAGE_SHIFT;
}
- reg = mtrr_add_page(base, size, type, 1);
+ reg = mtrr_add_page(base, size, type, true);
if (reg >= 0)
++fcount[reg];
return reg;
@@ -141,7 +137,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
size >>= PAGE_SHIFT;
err =
mtrr_add_page((unsigned long) base, (unsigned long) size, i,
- 1);
+ true);
if (err < 0)
return err;
return len;
@@ -217,7 +213,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
err =
- mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+ mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
file, 0);
break;
case MTRRIOC_SET_ENTRY:
@@ -226,7 +222,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
#endif
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
+ err = mtrr_add(sentry.base, sentry.size, sentry.type, false);
break;
case MTRRIOC_DEL_ENTRY:
#ifdef CONFIG_COMPAT
@@ -270,7 +266,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
err =
- mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+ mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
file, 1);
break;
case MTRRIOC_SET_PAGE_ENTRY:
@@ -279,7 +275,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
#endif
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
+ err =
+ mtrr_add_page(sentry.base, sentry.size, sentry.type, false);
break;
case MTRRIOC_DEL_PAGE_ENTRY:
#ifdef CONFIG_COMPAT
@@ -396,7 +393,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
for (i = 0; i < max; i++) {
mtrr_if->get(i, &base, &size, &type);
if (size == 0)
- usage_table[i] = 0;
+ mtrr_usage_table[i] = 0;
else {
if (size < (0x100000 >> PAGE_SHIFT)) {
/* less than 1MB */
@@ -410,7 +407,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
len += seq_printf(seq,
"reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
i, base, base >> (20 - PAGE_SHIFT), size, factor,
- mtrr_attrib_to_str(type), usage_table[i]);
+ mtrr_attrib_to_str(type), mtrr_usage_table[i]);
}
}
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index beb45c9c083..71591958265 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -38,8 +38,8 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <asm/e820.h>
#include <asm/mtrr.h>
-
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -47,7 +47,7 @@
u32 num_var_ranges = 0;
-unsigned int *usage_table;
+unsigned int mtrr_usage_table[MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
@@ -121,13 +121,8 @@ static void __init init_table(void)
int i, max;
max = num_var_ranges;
- if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
- == NULL) {
- printk(KERN_ERR "mtrr: could not allocate\n");
- return;
- }
for (i = 0; i < max; i++)
- usage_table[i] = 1;
+ mtrr_usage_table[i] = 1;
}
struct set_mtrr_data {
@@ -311,7 +306,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
*/
int mtrr_add_page(unsigned long base, unsigned long size,
- unsigned int type, char increment)
+ unsigned int type, bool increment)
{
int i, replace, error;
mtrr_type ltype;
@@ -383,7 +378,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
goto out;
}
if (increment)
- ++usage_table[i];
+ ++mtrr_usage_table[i];
error = i;
goto out;
}
@@ -391,13 +386,15 @@ int mtrr_add_page(unsigned long base, unsigned long size,
i = mtrr_if->get_free_region(base, size, replace);
if (i >= 0) {
set_mtrr(i, base, size, type);
- if (likely(replace < 0))
- usage_table[i] = 1;
- else {
- usage_table[i] = usage_table[replace] + !!increment;
+ if (likely(replace < 0)) {
+ mtrr_usage_table[i] = 1;
+ } else {
+ mtrr_usage_table[i] = mtrr_usage_table[replace];
+ if (increment)
+ mtrr_usage_table[i]++;
if (unlikely(replace != i)) {
set_mtrr(replace, 0, 0, 0);
- usage_table[replace] = 0;
+ mtrr_usage_table[replace] = 0;
}
}
} else
@@ -460,7 +457,7 @@ static int mtrr_check(unsigned long base, unsigned long size)
int
mtrr_add(unsigned long base, unsigned long size, unsigned int type,
- char increment)
+ bool increment)
{
if (mtrr_check(base, size))
return -EINVAL;
@@ -527,11 +524,11 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
goto out;
}
- if (usage_table[reg] < 1) {
+ if (mtrr_usage_table[reg] < 1) {
printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
goto out;
}
- if (--usage_table[reg] < 1)
+ if (--mtrr_usage_table[reg] < 1)
set_mtrr(reg, 0, 0, 0);
error = reg;
out:
@@ -591,16 +588,11 @@ struct mtrr_value {
unsigned long lsize;
};
-static struct mtrr_value * mtrr_state;
+static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{
int i;
- int size = num_var_ranges * sizeof(struct mtrr_value);
-
- mtrr_state = kzalloc(size,GFP_ATOMIC);
- if (!mtrr_state)
- return -ENOMEM;
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i,
@@ -622,7 +614,6 @@ static int mtrr_restore(struct sys_device * sysdev)
mtrr_state[i].lsize,
mtrr_state[i].ltype);
}
- kfree(mtrr_state);
return 0;
}
@@ -633,6 +624,112 @@ static struct sysdev_driver mtrr_sysdev_driver = {
.resume = mtrr_restore,
};
+static int disable_mtrr_trim;
+
+static int __init disable_mtrr_trim_setup(char *str)
+{
+ disable_mtrr_trim = 1;
+ return 0;
+}
+early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
+
+/*
+ * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
+ * for memory >4GB. Check for that here.
+ * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
+ * apply to are wrong, but so far we don't know of any such case in the wild.
+ */
+#define Tom2Enabled (1U << 21)
+#define Tom2ForceMemTypeWB (1U << 22)
+
+static __init int amd_special_default_mtrr(void)
+{
+ u32 l, h;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return 0;
+ if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
+ return 0;
+ /* In case some hypervisor doesn't pass SYSCFG through */
+ if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
+ return 0;
+ /*
+ * Memory between 4GB and top of mem is forced WB by this magic bit.
+ * Reserved before K8RevF, but should be zero there.
+ */
+ if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
+ (Tom2Enabled | Tom2ForceMemTypeWB))
+ return 1;
+ return 0;
+}
+
+/**
+ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ *
+ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+ * memory configurations. This routine checks that the highest MTRR matches
+ * the end of memory, to make sure the MTRRs having a write back type cover
+ * all of the memory the kernel is intending to use. If not, it'll trim any
+ * memory off the end by adjusting end_pfn, removing it from the kernel's
+ * allocation pools, warning the user with an obnoxious message.
+ */
+int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
+{
+ unsigned long i, base, size, highest_addr = 0, def, dummy;
+ mtrr_type type;
+ u64 trim_start, trim_size;
+
+ /*
+ * Make sure we only trim uncachable memory on machines that
+ * support the Intel MTRR architecture:
+ */
+ if (!is_cpu(INTEL) || disable_mtrr_trim)
+ return 0;
+ rdmsr(MTRRdefType_MSR, def, dummy);
+ def &= 0xff;
+ if (def != MTRR_TYPE_UNCACHABLE)
+ return 0;
+
+ if (amd_special_default_mtrr())
+ return 0;
+
+ /* Find highest cached pfn */
+ for (i = 0; i < num_var_ranges; i++) {
+ mtrr_if->get(i, &base, &size, &type);
+ if (type != MTRR_TYPE_WRBACK)
+ continue;
+ base <<= PAGE_SHIFT;
+ size <<= PAGE_SHIFT;
+ if (highest_addr < base + size)
+ highest_addr = base + size;
+ }
+
+ /* kvm/qemu doesn't have mtrr set right, don't trim them all */
+ if (!highest_addr) {
+ printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
+ WARN_ON(1);
+ return 0;
+ }
+
+ if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
+ printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
+ " all of memory, losing %LdMB of RAM.\n",
+ (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20);
+
+ WARN_ON(1);
+
+ printk(KERN_INFO "update e820 for mtrr\n");
+ trim_start = highest_addr;
+ trim_size = end_pfn;
+ trim_size <<= PAGE_SHIFT;
+ trim_size -= trim_start;
+ add_memory_region(trim_start, trim_size, E820_RESERVED);
+ update_e820();
+ return 1;
+ }
+
+ return 0;
+}
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 289dfe6030e..fb74a2c2081 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -2,10 +2,8 @@
* local mtrr defines.
*/
-#ifndef TRUE
-#define TRUE 1
-#define FALSE 0
-#endif
+#include <linux/types.h>
+#include <linux/stddef.h>
#define MTRRcap_MSR 0x0fe
#define MTRRdefType_MSR 0x2ff
@@ -14,6 +12,7 @@
#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
#define NUM_FIXED_RANGES 88
+#define MAX_VAR_RANGES 256
#define MTRRfix64K_00000_MSR 0x250
#define MTRRfix16K_80000_MSR 0x258
#define MTRRfix16K_A0000_MSR 0x259
@@ -34,6 +33,8 @@
an 8 bit field: */
typedef u8 mtrr_type;
+extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+
struct mtrr_ops {
u32 vendor;
u32 use_intel_if;
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 49e20c2afcd..9f8ba923d1c 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -4,6 +4,7 @@
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
#include "mtrr.h"
@@ -25,7 +26,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
/* Disable and flush caches. Note that wbinvd flushes the TLBs as
a side-effect */
- cr0 = read_cr0() | 0x40000000;
+ cr0 = read_cr0() | X86_CR0_CD;
wbinvd();
write_cr0(cr0);
wbinvd();
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index c02541e6e65..9b838324b81 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -167,7 +167,6 @@ void release_evntsel_nmi(unsigned int msr)
clear_bit(counter, evntsel_nmi_owner);
}
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
EXPORT_SYMBOL(reserve_perfctr_nmi);
EXPORT_SYMBOL(release_perfctr_nmi);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 3900e46d66d..02821326014 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -188,7 +188,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
static void c_stop(struct seq_file *m, void *v)
{
}
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
.start = c_start,
.next = c_next,
.stop = c_stop,
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index d387c770c51..dec66e45281 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -50,7 +50,7 @@ struct cpuid_command {
static void cpuid_smp_cpuid(void *cmd_block)
{
- struct cpuid_command *cmd = (struct cpuid_command *)cmd_block;
+ struct cpuid_command *cmd = cmd_block;
cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
&cmd->data[3]);
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index 40978af630e..a47798b59f0 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -17,7 +17,7 @@ static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
static void doublefault_fn(void)
{
- struct Xgt_desc_struct gdt_desc = {0, 0};
+ struct desc_ptr gdt_desc = {0, 0};
unsigned long gdt, tss;
store_gdt(&gdt_desc);
@@ -33,14 +33,15 @@ static void doublefault_fn(void)
printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
if (ptr_ok(tss)) {
- struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
+ struct x86_hw_tss *t = (struct x86_hw_tss *)tss;
- printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp);
+ printk(KERN_EMERG "eip = %08lx, esp = %08lx\n",
+ t->ip, t->sp);
printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
- t->eax, t->ebx, t->ecx, t->edx);
+ t->ax, t->bx, t->cx, t->dx);
printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
- t->esi, t->edi);
+ t->si, t->di);
}
}
@@ -50,15 +51,15 @@ static void doublefault_fn(void)
struct tss_struct doublefault_tss __cacheline_aligned = {
.x86_tss = {
- .esp0 = STACK_START,
+ .sp0 = STACK_START,
.ss0 = __KERNEL_DS,
.ldt = 0,
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
- .eip = (unsigned long) doublefault_fn,
+ .ip = (unsigned long) doublefault_fn,
/* 0x2 bit is always set */
- .eflags = X86_EFLAGS_SF | 0x2,
- .esp = STACK_START,
+ .flags = X86_EFLAGS_SF | 0x2,
+ .sp = STACK_START,
.es = __USER_DS,
.cs = __KERNEL_CS,
.ss = __KERNEL_DS,
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
new file mode 100644
index 00000000000..1c5ca4d1878
--- /dev/null
+++ b/arch/x86/kernel/ds.c
@@ -0,0 +1,464 @@
+/*
+ * Debug Store support
+ *
+ * This provides a low-level interface to the hardware's Debug Store
+ * feature that is used for last branch recording (LBR) and
+ * precise-event based sampling (PEBS).
+ *
+ * Different architectures use a different DS layout/pointer size.
+ * The below functions therefore work on a void*.
+ *
+ *
+ * Since there is no user for PEBS, yet, only LBR (or branch
+ * trace store, BTS) is supported.
+ *
+ *
+ * Copyright (C) 2007 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ */
+
+#include <asm/ds.h>
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+
+/*
+ * Debug Store (DS) save area configuration (see Intel64 and IA32
+ * Architectures Software Developer's Manual, section 18.5)
+ *
+ * The DS configuration consists of the following fields; different
+ * architetures vary in the size of those fields.
+ * - double-word aligned base linear address of the BTS buffer
+ * - write pointer into the BTS buffer
+ * - end linear address of the BTS buffer (one byte beyond the end of
+ * the buffer)
+ * - interrupt pointer into BTS buffer
+ * (interrupt occurs when write pointer passes interrupt pointer)
+ * - double-word aligned base linear address of the PEBS buffer
+ * - write pointer into the PEBS buffer
+ * - end linear address of the PEBS buffer (one byte beyond the end of
+ * the buffer)
+ * - interrupt pointer into PEBS buffer
+ * (interrupt occurs when write pointer passes interrupt pointer)
+ * - value to which counter is reset following counter overflow
+ *
+ * On later architectures, the last branch recording hardware uses
+ * 64bit pointers even in 32bit mode.
+ *
+ *
+ * Branch Trace Store (BTS) records store information about control
+ * flow changes. They at least provide the following information:
+ * - source linear address
+ * - destination linear address
+ *
+ * Netburst supported a predicated bit that had been dropped in later
+ * architectures. We do not suppor it.
+ *
+ *
+ * In order to abstract from the actual DS and BTS layout, we describe
+ * the access to the relevant fields.
+ * Thanks to Andi Kleen for proposing this design.
+ *
+ * The implementation, however, is not as general as it might seem. In
+ * order to stay somewhat simple and efficient, we assume an
+ * underlying unsigned type (mostly a pointer type) and we expect the
+ * field to be at least as big as that type.
+ */
+
+/*
+ * A special from_ip address to indicate that the BTS record is an
+ * info record that needs to be interpreted or skipped.
+ */
+#define BTS_ESCAPE_ADDRESS (-1)
+
+/*
+ * A field access descriptor
+ */
+struct access_desc {
+ unsigned char offset;
+ unsigned char size;
+};
+
+/*
+ * The configuration for a particular DS/BTS hardware implementation.
+ */
+struct ds_configuration {
+ /* the DS configuration */
+ unsigned char sizeof_ds;
+ struct access_desc bts_buffer_base;
+ struct access_desc bts_index;
+ struct access_desc bts_absolute_maximum;
+ struct access_desc bts_interrupt_threshold;
+ /* the BTS configuration */
+ unsigned char sizeof_bts;
+ struct access_desc from_ip;
+ struct access_desc to_ip;
+ /* BTS variants used to store additional information like
+ timestamps */
+ struct access_desc info_type;
+ struct access_desc info_data;
+ unsigned long debugctl_mask;
+};
+
+/*
+ * The global configuration used by the below accessor functions
+ */
+static struct ds_configuration ds_cfg;
+
+/*
+ * Accessor functions for some DS and BTS fields using the above
+ * global ptrace_bts_cfg.
+ */
+static inline unsigned long get_bts_buffer_base(char *base)
+{
+ return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset);
+}
+static inline void set_bts_buffer_base(char *base, unsigned long value)
+{
+ (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value;
+}
+static inline unsigned long get_bts_index(char *base)
+{
+ return *(unsigned long *)(base + ds_cfg.bts_index.offset);
+}
+static inline void set_bts_index(char *base, unsigned long value)
+{
+ (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value;
+}
+static inline unsigned long get_bts_absolute_maximum(char *base)
+{
+ return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset);
+}
+static inline void set_bts_absolute_maximum(char *base, unsigned long value)
+{
+ (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
+}
+static inline unsigned long get_bts_interrupt_threshold(char *base)
+{
+ return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset);
+}
+static inline void set_bts_interrupt_threshold(char *base, unsigned long value)
+{
+ (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
+}
+static inline unsigned long get_from_ip(char *base)
+{
+ return *(unsigned long *)(base + ds_cfg.from_ip.offset);
+}
+static inline void set_from_ip(char *base, unsigned long value)
+{
+ (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value;
+}
+static inline unsigned long get_to_ip(char *base)
+{
+ return *(unsigned long *)(base + ds_cfg.to_ip.offset);
+}
+static inline void set_to_ip(char *base, unsigned long value)
+{
+ (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value;
+}
+static inline unsigned char get_info_type(char *base)
+{
+ return *(unsigned char *)(base + ds_cfg.info_type.offset);
+}
+static inline void set_info_type(char *base, unsigned char value)
+{
+ (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
+}
+static inline unsigned long get_info_data(char *base)
+{
+ return *(unsigned long *)(base + ds_cfg.info_data.offset);
+}
+static inline void set_info_data(char *base, unsigned long value)
+{
+ (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value;
+}
+
+
+int ds_allocate(void **dsp, size_t bts_size_in_bytes)
+{
+ size_t bts_size_in_records;
+ unsigned long bts;
+ void *ds;
+
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
+
+ if (bts_size_in_bytes < 0)
+ return -EINVAL;
+
+ bts_size_in_records =
+ bts_size_in_bytes / ds_cfg.sizeof_bts;
+ bts_size_in_bytes =
+ bts_size_in_records * ds_cfg.sizeof_bts;
+
+ if (bts_size_in_bytes <= 0)
+ return -EINVAL;
+
+ bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL);
+
+ if (!bts)
+ return -ENOMEM;
+
+ ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+
+ if (!ds) {
+ kfree((void *)bts);
+ return -ENOMEM;
+ }
+
+ set_bts_buffer_base(ds, bts);
+ set_bts_index(ds, bts);
+ set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
+ set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
+
+ *dsp = ds;
+ return 0;
+}
+
+int ds_free(void **dsp)
+{
+ if (*dsp)
+ kfree((void *)get_bts_buffer_base(*dsp));
+ kfree(*dsp);
+ *dsp = 0;
+
+ return 0;
+}
+
+int ds_get_bts_size(void *ds)
+{
+ int size_in_bytes;
+
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
+
+ if (!ds)
+ return 0;
+
+ size_in_bytes =
+ get_bts_absolute_maximum(ds) -
+ get_bts_buffer_base(ds);
+ return size_in_bytes;
+}
+
+int ds_get_bts_end(void *ds)
+{
+ int size_in_bytes = ds_get_bts_size(ds);
+
+ if (size_in_bytes <= 0)
+ return size_in_bytes;
+
+ return size_in_bytes / ds_cfg.sizeof_bts;
+}
+
+int ds_get_bts_index(void *ds)
+{
+ int index_offset_in_bytes;
+
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
+
+ index_offset_in_bytes =
+ get_bts_index(ds) -
+ get_bts_buffer_base(ds);
+
+ return index_offset_in_bytes / ds_cfg.sizeof_bts;
+}
+
+int ds_set_overflow(void *ds, int method)
+{
+ switch (method) {
+ case DS_O_SIGNAL:
+ return -EOPNOTSUPP;
+ case DS_O_WRAP:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+int ds_get_overflow(void *ds)
+{
+ return DS_O_WRAP;
+}
+
+int ds_clear(void *ds)
+{
+ int bts_size = ds_get_bts_size(ds);
+ unsigned long bts_base;
+
+ if (bts_size <= 0)
+ return bts_size;
+
+ bts_base = get_bts_buffer_base(ds);
+ memset((void *)bts_base, 0, bts_size);
+
+ set_bts_index(ds, bts_base);
+ return 0;
+}
+
+int ds_read_bts(void *ds, int index, struct bts_struct *out)
+{
+ void *bts;
+
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
+
+ if (index < 0)
+ return -EINVAL;
+
+ if (index >= ds_get_bts_size(ds))
+ return -EINVAL;
+
+ bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts));
+
+ memset(out, 0, sizeof(*out));
+ if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
+ out->qualifier = get_info_type(bts);
+ out->variant.jiffies = get_info_data(bts);
+ } else {
+ out->qualifier = BTS_BRANCH;
+ out->variant.lbr.from_ip = get_from_ip(bts);
+ out->variant.lbr.to_ip = get_to_ip(bts);
+ }
+
+ return sizeof(*out);;
+}
+
+int ds_write_bts(void *ds, const struct bts_struct *in)
+{
+ unsigned long bts;
+
+ if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+ return -EOPNOTSUPP;
+
+ if (ds_get_bts_size(ds) <= 0)
+ return -ENXIO;
+
+ bts = get_bts_index(ds);
+
+ memset((void *)bts, 0, ds_cfg.sizeof_bts);
+ switch (in->qualifier) {
+ case BTS_INVALID:
+ break;
+
+ case BTS_BRANCH:
+ set_from_ip((void *)bts, in->variant.lbr.from_ip);
+ set_to_ip((void *)bts, in->variant.lbr.to_ip);
+ break;
+
+ case BTS_TASK_ARRIVES:
+ case BTS_TASK_DEPARTS:
+ set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS);
+ set_info_type((void *)bts, in->qualifier);
+ set_info_data((void *)bts, in->variant.jiffies);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ bts = bts + ds_cfg.sizeof_bts;
+ if (bts >= get_bts_absolute_maximum(ds))
+ bts = get_bts_buffer_base(ds);
+ set_bts_index(ds, bts);
+
+ return ds_cfg.sizeof_bts;
+}
+
+unsigned long ds_debugctl_mask(void)
+{
+ return ds_cfg.debugctl_mask;
+}
+
+#ifdef __i386__
+static const struct ds_configuration ds_cfg_netburst = {
+ .sizeof_ds = 9 * 4,
+ .bts_buffer_base = { 0, 4 },
+ .bts_index = { 4, 4 },
+ .bts_absolute_maximum = { 8, 4 },
+ .bts_interrupt_threshold = { 12, 4 },
+ .sizeof_bts = 3 * 4,
+ .from_ip = { 0, 4 },
+ .to_ip = { 4, 4 },
+ .info_type = { 4, 1 },
+ .info_data = { 8, 4 },
+ .debugctl_mask = (1<<2)|(1<<3)
+};
+
+static const struct ds_configuration ds_cfg_pentium_m = {
+ .sizeof_ds = 9 * 4,
+ .bts_buffer_base = { 0, 4 },
+ .bts_index = { 4, 4 },
+ .bts_absolute_maximum = { 8, 4 },
+ .bts_interrupt_threshold = { 12, 4 },
+ .sizeof_bts = 3 * 4,
+ .from_ip = { 0, 4 },
+ .to_ip = { 4, 4 },
+ .info_type = { 4, 1 },
+ .info_data = { 8, 4 },
+ .debugctl_mask = (1<<6)|(1<<7)
+};
+#endif /* _i386_ */
+
+static const struct ds_configuration ds_cfg_core2 = {
+ .sizeof_ds = 9 * 8,
+ .bts_buffer_base = { 0, 8 },
+ .bts_index = { 8, 8 },
+ .bts_absolute_maximum = { 16, 8 },
+ .bts_interrupt_threshold = { 24, 8 },
+ .sizeof_bts = 3 * 8,
+ .from_ip = { 0, 8 },
+ .to_ip = { 8, 8 },
+ .info_type = { 8, 1 },
+ .info_data = { 16, 8 },
+ .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+};
+
+static inline void
+ds_configure(const struct ds_configuration *cfg)
+{
+ ds_cfg = *cfg;
+}
+
+void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
+{
+ switch (c->x86) {
+ case 0x6:
+ switch (c->x86_model) {
+#ifdef __i386__
+ case 0xD:
+ case 0xE: /* Pentium M */
+ ds_configure(&ds_cfg_pentium_m);
+ break;
+#endif /* _i386_ */
+ case 0xF: /* Core2 */
+ ds_configure(&ds_cfg_core2);
+ break;
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+ break;
+ case 0xF:
+ switch (c->x86_model) {
+#ifdef __i386__
+ case 0x0:
+ case 0x1:
+ case 0x2: /* Netburst */
+ ds_configure(&ds_cfg_netburst);
+ break;
+#endif /* _i386_ */
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+ break;
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+}
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 18f500d185a..4e16ef4a265 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -7,7 +7,6 @@
#include <linux/kexec.h>
#include <linux/module.h>
#include <linux/mm.h>
-#include <linux/efi.h>
#include <linux/pfn.h>
#include <linux/uaccess.h>
#include <linux/suspend.h>
@@ -17,11 +16,6 @@
#include <asm/e820.h>
#include <asm/setup.h>
-#ifdef CONFIG_EFI
-int efi_enabled = 0;
-EXPORT_SYMBOL(efi_enabled);
-#endif
-
struct e820map e820;
struct change_member {
struct e820entry *pbios; /* pointer to original bios entry */
@@ -37,26 +31,6 @@ unsigned long pci_mem_start = 0x10000000;
EXPORT_SYMBOL(pci_mem_start);
#endif
extern int user_defined_memmap;
-struct resource data_resource = {
- .name = "Kernel data",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-struct resource code_resource = {
- .name = "Kernel code",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-struct resource bss_resource = {
- .name = "Kernel bss",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
static struct resource system_rom_resource = {
.name = "System ROM",
@@ -111,60 +85,6 @@ static struct resource video_rom_resource = {
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
};
-static struct resource video_ram_resource = {
- .name = "Video RAM area",
- .start = 0xa0000,
- .end = 0xbffff,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource standard_io_resources[] = { {
- .name = "dma1",
- .start = 0x0000,
- .end = 0x001f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "pic1",
- .start = 0x0020,
- .end = 0x0021,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "timer0",
- .start = 0x0040,
- .end = 0x0043,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "timer1",
- .start = 0x0050,
- .end = 0x0053,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "keyboard",
- .start = 0x0060,
- .end = 0x006f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "dma page reg",
- .start = 0x0080,
- .end = 0x008f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "pic2",
- .start = 0x00a0,
- .end = 0x00a1,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "dma2",
- .start = 0x00c0,
- .end = 0x00df,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "fpu",
- .start = 0x00f0,
- .end = 0x00ff,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-} };
-
#define ROMSIGNATURE 0xaa55
static int __init romsignature(const unsigned char *rom)
@@ -260,10 +180,9 @@ static void __init probe_roms(void)
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
-static void __init
-legacy_init_iomem_resources(struct resource *code_resource,
- struct resource *data_resource,
- struct resource *bss_resource)
+void __init init_iomem_resources(struct resource *code_resource,
+ struct resource *data_resource,
+ struct resource *bss_resource)
{
int i;
@@ -305,35 +224,6 @@ legacy_init_iomem_resources(struct resource *code_resource,
}
}
-/*
- * Request address space for all standard resources
- *
- * This is called just before pcibios_init(), which is also a
- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
- */
-static int __init request_standard_resources(void)
-{
- int i;
-
- printk("Setting up standard PCI resources\n");
- if (efi_enabled)
- efi_initialize_iomem_resources(&code_resource,
- &data_resource, &bss_resource);
- else
- legacy_init_iomem_resources(&code_resource,
- &data_resource, &bss_resource);
-
- /* EFI systems may still have VGA */
- request_resource(&iomem_resource, &video_ram_resource);
-
- /* request I/O space for devices used on all i[345]86 PCs */
- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
- request_resource(&ioport_resource, &standard_io_resources[i]);
- return 0;
-}
-
-subsys_initcall(request_standard_resources);
-
#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
/**
* e820_mark_nosave_regions - Find the ranges of physical addresses that do not
@@ -370,19 +260,17 @@ void __init add_memory_region(unsigned long long start,
{
int x;
- if (!efi_enabled) {
- x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
+ x = e820.nr_map;
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
+ if (x == E820MAX) {
+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ return;
}
+
+ e820.map[x].addr = start;
+ e820.map[x].size = size;
+ e820.map[x].type = type;
+ e820.nr_map++;
} /* add_memory_region */
/*
@@ -598,29 +486,6 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
}
/*
- * Callback for efi_memory_walk.
- */
-static int __init
-efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
-{
- unsigned long *max_pfn = arg, pfn;
-
- if (start < end) {
- pfn = PFN_UP(end -1);
- if (pfn > *max_pfn)
- *max_pfn = pfn;
- }
- return 0;
-}
-
-static int __init
-efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
-{
- memory_present(0, PFN_UP(start), PFN_DOWN(end));
- return 0;
-}
-
-/*
* Find the highest page frame number we have available
*/
void __init find_max_pfn(void)
@@ -628,11 +493,6 @@ void __init find_max_pfn(void)
int i;
max_pfn = 0;
- if (efi_enabled) {
- efi_memmap_walk(efi_find_max_pfn, &max_pfn);
- efi_memmap_walk(efi_memory_present_wrapper, NULL);
- return;
- }
for (i = 0; i < e820.nr_map; i++) {
unsigned long start, end;
@@ -650,34 +510,12 @@ void __init find_max_pfn(void)
}
/*
- * Free all available memory for boot time allocation. Used
- * as a callback function by efi_memory_walk()
- */
-
-static int __init
-free_available_memory(unsigned long start, unsigned long end, void *arg)
-{
- /* check max_low_pfn */
- if (start >= (max_low_pfn << PAGE_SHIFT))
- return 0;
- if (end >= (max_low_pfn << PAGE_SHIFT))
- end = max_low_pfn << PAGE_SHIFT;
- if (start < end)
- free_bootmem(start, end - start);
-
- return 0;
-}
-/*
* Register fully available low RAM pages with the bootmem allocator.
*/
void __init register_bootmem_low_pages(unsigned long max_low_pfn)
{
int i;
- if (efi_enabled) {
- efi_memmap_walk(free_available_memory, NULL);
- return;
- }
for (i = 0; i < e820.nr_map; i++) {
unsigned long curr_pfn, last_pfn, size;
/*
@@ -785,56 +623,12 @@ void __init print_memory_map(char *who)
}
}
-static __init __always_inline void efi_limit_regions(unsigned long long size)
-{
- unsigned long long current_addr = 0;
- efi_memory_desc_t *md, *next_md;
- void *p, *p1;
- int i, j;
-
- j = 0;
- p1 = memmap.map;
- for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
- md = p;
- next_md = p1;
- current_addr = md->phys_addr +
- PFN_PHYS(md->num_pages);
- if (is_available_memory(md)) {
- if (md->phys_addr >= size) continue;
- memcpy(next_md, md, memmap.desc_size);
- if (current_addr >= size) {
- next_md->num_pages -=
- PFN_UP(current_addr-size);
- }
- p1 += memmap.desc_size;
- next_md = p1;
- j++;
- } else if ((md->attribute & EFI_MEMORY_RUNTIME) ==
- EFI_MEMORY_RUNTIME) {
- /* In order to make runtime services
- * available we have to include runtime
- * memory regions in memory map */
- memcpy(next_md, md, memmap.desc_size);
- p1 += memmap.desc_size;
- next_md = p1;
- j++;
- }
- }
- memmap.nr_map = j;
- memmap.map_end = memmap.map +
- (memmap.nr_map * memmap.desc_size);
-}
-
void __init limit_regions(unsigned long long size)
{
unsigned long long current_addr;
int i;
print_memory_map("limit_regions start");
- if (efi_enabled) {
- efi_limit_regions(size);
- return;
- }
for (i = 0; i < e820.nr_map; i++) {
current_addr = e820.map[i].addr + e820.map[i].size;
if (current_addr < size)
@@ -955,3 +749,14 @@ static int __init parse_memmap(char *arg)
return 0;
}
early_param("memmap", parse_memmap);
+void __init update_e820(void)
+{
+ u8 nr_map;
+
+ nr_map = e820.nr_map;
+ if (sanitize_e820_map(e820.map, &nr_map))
+ return;
+ e820.nr_map = nr_map;
+ printk(KERN_INFO "modified physical RAM map:\n");
+ print_memory_map("modified");
+}
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 04698e0b056..c617174e896 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -1,4 +1,4 @@
-/*
+/*
* Handle the memory map.
* The functions here do the job until bootmem takes over.
*
@@ -26,80 +26,87 @@
#include <asm/proto.h>
#include <asm/setup.h>
#include <asm/sections.h>
+#include <asm/kdebug.h>
struct e820map e820;
-/*
+/*
* PFN of last memory page.
*/
-unsigned long end_pfn;
-EXPORT_SYMBOL(end_pfn);
+unsigned long end_pfn;
-/*
+/*
* end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
* The direct mapping extends to end_pfn_map, so that we can directly access
* apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long end_pfn_map;
+ */
+unsigned long end_pfn_map;
-/*
+/*
* Last pfn which the user wants to use.
*/
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
-extern struct resource code_resource, data_resource, bss_resource;
-
-/* Check for some hardcoded bad areas that early boot is not allowed to touch */
-static inline int bad_addr(unsigned long *addrp, unsigned long size)
-{
- unsigned long addr = *addrp, last = addr + size;
-
- /* various gunk below that needed for SMP startup */
- if (addr < 0x8000) {
- *addrp = PAGE_ALIGN(0x8000);
- return 1;
- }
-
- /* direct mapping tables of the kernel */
- if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
- *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
- return 1;
- }
-
- /* initrd */
-#ifdef CONFIG_BLK_DEV_INITRD
- if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long ramdisk_end = ramdisk_image+ramdisk_size;
-
- if (last >= ramdisk_image && addr < ramdisk_end) {
- *addrp = PAGE_ALIGN(ramdisk_end);
- return 1;
- }
- }
+/*
+ * Early reserved memory areas.
+ */
+#define MAX_EARLY_RES 20
+
+struct early_res {
+ unsigned long start, end;
+};
+static struct early_res early_res[MAX_EARLY_RES] __initdata = {
+ { 0, PAGE_SIZE }, /* BIOS data page */
+#ifdef CONFIG_SMP
+ { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE },
#endif
- /* kernel code */
- if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
- *addrp = PAGE_ALIGN(__pa_symbol(&_end));
- return 1;
+ {}
+};
+
+void __init reserve_early(unsigned long start, unsigned long end)
+{
+ int i;
+ struct early_res *r;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ r = &early_res[i];
+ if (end > r->start && start < r->end)
+ panic("Overlapping early reservations %lx-%lx to %lx-%lx\n",
+ start, end, r->start, r->end);
}
+ if (i >= MAX_EARLY_RES)
+ panic("Too many early reservations");
+ r = &early_res[i];
+ r->start = start;
+ r->end = end;
+}
- if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
- *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
- return 1;
+void __init early_res_to_bootmem(void)
+{
+ int i;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ struct early_res *r = &early_res[i];
+ reserve_bootmem_generic(r->start, r->end - r->start);
}
+}
-#ifdef CONFIG_NUMA
- /* NUMA memory to node map */
- if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
- *addrp = nodemap_addr + nodemap_size;
- return 1;
+/* Check for already reserved areas */
+static inline int bad_addr(unsigned long *addrp, unsigned long size)
+{
+ int i;
+ unsigned long addr = *addrp, last;
+ int changed = 0;
+again:
+ last = addr + size;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ struct early_res *r = &early_res[i];
+ if (last >= r->start && addr < r->end) {
+ *addrp = addr = r->end;
+ changed = 1;
+ goto again;
+ }
}
-#endif
- /* XXX ramdisk image here? */
- return 0;
-}
+ return changed;
+}
/*
* This function checks if any part of the range <start,end> is mapped
@@ -107,16 +114,18 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
*/
int
e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
-{
+{
int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- if (type && ei->type != type)
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (type && ei->type != type)
continue;
if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
- return 1;
- }
+ continue;
+ return 1;
+ }
return 0;
}
EXPORT_SYMBOL_GPL(e820_any_mapped);
@@ -127,11 +136,14 @@ EXPORT_SYMBOL_GPL(e820_any_mapped);
* Note: this function only works correct if the e820 table is sorted and
* not-overlapping, which is the case
*/
-int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+int __init e820_all_mapped(unsigned long start, unsigned long end,
+ unsigned type)
{
int i;
+
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
+
if (type && ei->type != type)
continue;
/* is the region (part) in overlap with the current region ?*/
@@ -143,65 +155,73 @@ int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type
*/
if (ei->addr <= start)
start = ei->addr + ei->size;
- /* if start is now at or beyond end, we're done, full coverage */
+ /*
+ * if start is now at or beyond end, we're done, full
+ * coverage
+ */
if (start >= end)
- return 1; /* we're done */
+ return 1;
}
return 0;
}
-/*
- * Find a free area in a specific range.
- */
-unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long addr = ei->addr, last;
- if (ei->type != E820_RAM)
- continue;
- if (addr < start)
+/*
+ * Find a free area in a specific range.
+ */
+unsigned long __init find_e820_area(unsigned long start, unsigned long end,
+ unsigned size)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ unsigned long addr = ei->addr, last;
+
+ if (ei->type != E820_RAM)
+ continue;
+ if (addr < start)
addr = start;
- if (addr > ei->addr + ei->size)
- continue;
+ if (addr > ei->addr + ei->size)
+ continue;
while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
;
last = PAGE_ALIGN(addr) + size;
if (last > ei->addr + ei->size)
continue;
- if (last > end)
+ if (last > end)
continue;
- return addr;
- }
- return -1UL;
-}
+ return addr;
+ }
+ return -1UL;
+}
/*
* Find the highest page frame number we have available
*/
unsigned long __init e820_end_of_ram(void)
{
- unsigned long end_pfn = 0;
+ unsigned long end_pfn;
+
end_pfn = find_max_pfn_with_active_regions();
-
- if (end_pfn > end_pfn_map)
+
+ if (end_pfn > end_pfn_map)
end_pfn_map = end_pfn;
if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
end_pfn_map = MAXMEM>>PAGE_SHIFT;
if (end_pfn > end_user_pfn)
end_pfn = end_user_pfn;
- if (end_pfn > end_pfn_map)
- end_pfn = end_pfn_map;
+ if (end_pfn > end_pfn_map)
+ end_pfn = end_pfn_map;
- printk("end_pfn_map = %lu\n", end_pfn_map);
- return end_pfn;
+ printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
+ return end_pfn;
}
/*
* Mark e820 reserved areas as busy for the resource manager.
*/
-void __init e820_reserve_resources(void)
+void __init e820_reserve_resources(struct resource *code_resource,
+ struct resource *data_resource, struct resource *bss_resource)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
@@ -219,13 +239,13 @@ void __init e820_reserve_resources(void)
request_resource(&iomem_resource, res);
if (e820.map[i].type == E820_RAM) {
/*
- * We don't know which RAM region contains kernel data,
- * so we try it repeatedly and let the resource manager
- * test it.
+ * We don't know which RAM region contains kernel data,
+ * so we try it repeatedly and let the resource manager
+ * test it.
*/
- request_resource(res, &code_resource);
- request_resource(res, &data_resource);
- request_resource(res, &bss_resource);
+ request_resource(res, code_resource);
+ request_resource(res, data_resource);
+ request_resource(res, bss_resource);
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end)
request_resource(res, &crashk_res);
@@ -322,9 +342,9 @@ e820_register_active_regions(int nid, unsigned long start_pfn,
add_active_range(nid, ei_startpfn, ei_endpfn);
}
-/*
+/*
* Add a memory region to the kernel e820 map.
- */
+ */
void __init add_memory_region(unsigned long start, unsigned long size, int type)
{
int x = e820.nr_map;
@@ -349,9 +369,7 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long end_pfn = end >> PAGE_SHIFT;
- unsigned long ei_startpfn;
- unsigned long ei_endpfn;
- unsigned long ram = 0;
+ unsigned long ei_startpfn, ei_endpfn, ram = 0;
int i;
for (i = 0; i < e820.nr_map; i++) {
@@ -363,28 +381,31 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
return end - start - (ram << PAGE_SHIFT);
}
-void __init e820_print_map(char *who)
+static void __init e820_print_map(char *who)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
- (unsigned long long) e820.map[i].addr,
- (unsigned long long) (e820.map[i].addr + e820.map[i].size));
+ (unsigned long long) e820.map[i].addr,
+ (unsigned long long)
+ (e820.map[i].addr + e820.map[i].size));
switch (e820.map[i].type) {
- case E820_RAM: printk("(usable)\n");
- break;
+ case E820_RAM:
+ printk(KERN_CONT "(usable)\n");
+ break;
case E820_RESERVED:
- printk("(reserved)\n");
- break;
+ printk(KERN_CONT "(reserved)\n");
+ break;
case E820_ACPI:
- printk("(ACPI data)\n");
- break;
+ printk(KERN_CONT "(ACPI data)\n");
+ break;
case E820_NVS:
- printk("(ACPI NVS)\n");
- break;
- default: printk("type %u\n", e820.map[i].type);
- break;
+ printk(KERN_CONT "(ACPI NVS)\n");
+ break;
+ default:
+ printk(KERN_CONT "type %u\n", e820.map[i].type);
+ break;
}
}
}
@@ -392,11 +413,11 @@ void __init e820_print_map(char *who)
/*
* Sanitize the BIOS e820 map.
*
- * Some e820 responses include overlapping entries. The following
+ * Some e820 responses include overlapping entries. The following
* replaces the original e820 map with a new one, removing overlaps.
*
*/
-static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
{
struct change_member {
struct e820entry *pbios; /* pointer to original bios entry */
@@ -416,7 +437,8 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
int i;
/*
- Visually we're performing the following (1,2,3,4 = memory types)...
+ Visually we're performing the following
+ (1,2,3,4 = memory types)...
Sample memory map (w/overlaps):
____22__________________
@@ -458,22 +480,23 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
old_nr = *pnr_map;
/* bail out if we find any unreasonable addresses in bios map */
- for (i=0; i<old_nr; i++)
+ for (i = 0; i < old_nr; i++)
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
return -1;
/* create pointers for initial change-point information (for sorting) */
- for (i=0; i < 2*old_nr; i++)
+ for (i = 0; i < 2 * old_nr; i++)
change_point[i] = &change_point_list[i];
/* record all known change-points (starting and ending addresses),
omitting those that are for empty memory regions */
chgidx = 0;
- for (i=0; i < old_nr; i++) {
+ for (i = 0; i < old_nr; i++) {
if (biosmap[i].size != 0) {
change_point[chgidx]->addr = biosmap[i].addr;
change_point[chgidx++]->pbios = &biosmap[i];
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+ change_point[chgidx]->addr = biosmap[i].addr +
+ biosmap[i].size;
change_point[chgidx++]->pbios = &biosmap[i];
}
}
@@ -483,75 +506,106 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
still_changing = 1;
while (still_changing) {
still_changing = 0;
- for (i=1; i < chg_nr; i++) {
- /* if <current_addr> > <last_addr>, swap */
- /* or, if current=<start_addr> & last=<end_addr>, swap */
- if ((change_point[i]->addr < change_point[i-1]->addr) ||
- ((change_point[i]->addr == change_point[i-1]->addr) &&
- (change_point[i]->addr == change_point[i]->pbios->addr) &&
- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
- )
- {
+ for (i = 1; i < chg_nr; i++) {
+ unsigned long long curaddr, lastaddr;
+ unsigned long long curpbaddr, lastpbaddr;
+
+ curaddr = change_point[i]->addr;
+ lastaddr = change_point[i - 1]->addr;
+ curpbaddr = change_point[i]->pbios->addr;
+ lastpbaddr = change_point[i - 1]->pbios->addr;
+
+ /*
+ * swap entries, when:
+ *
+ * curaddr > lastaddr or
+ * curaddr == lastaddr and curaddr == curpbaddr and
+ * lastaddr != lastpbaddr
+ */
+ if (curaddr < lastaddr ||
+ (curaddr == lastaddr && curaddr == curpbaddr &&
+ lastaddr != lastpbaddr)) {
change_tmp = change_point[i];
change_point[i] = change_point[i-1];
change_point[i-1] = change_tmp;
- still_changing=1;
+ still_changing = 1;
}
}
}
/* create a new bios memory map, removing overlaps */
- overlap_entries=0; /* number of entries in the overlap table */
- new_bios_entry=0; /* index for creating new bios map entries */
+ overlap_entries = 0; /* number of entries in the overlap table */
+ new_bios_entry = 0; /* index for creating new bios map entries */
last_type = 0; /* start with undefined memory type */
last_addr = 0; /* start with 0 as last starting address */
+
/* loop through change-points, determining affect on the new bios map */
- for (chgidx=0; chgidx < chg_nr; chgidx++)
- {
+ for (chgidx = 0; chgidx < chg_nr; chgidx++) {
/* keep track of all overlapping bios entries */
- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
- {
- /* add map entry to overlap list (> 1 entry implies an overlap) */
- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
- }
- else
- {
- /* remove entry from list (order independent, so swap with last) */
- for (i=0; i<overlap_entries; i++)
- {
- if (overlap_list[i] == change_point[chgidx]->pbios)
- overlap_list[i] = overlap_list[overlap_entries-1];
+ if (change_point[chgidx]->addr ==
+ change_point[chgidx]->pbios->addr) {
+ /*
+ * add map entry to overlap list (> 1 entry
+ * implies an overlap)
+ */
+ overlap_list[overlap_entries++] =
+ change_point[chgidx]->pbios;
+ } else {
+ /*
+ * remove entry from list (order independent,
+ * so swap with last)
+ */
+ for (i = 0; i < overlap_entries; i++) {
+ if (overlap_list[i] ==
+ change_point[chgidx]->pbios)
+ overlap_list[i] =
+ overlap_list[overlap_entries-1];
}
overlap_entries--;
}
- /* if there are overlapping entries, decide which "type" to use */
- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+ /*
+ * if there are overlapping entries, decide which
+ * "type" to use (larger value takes precedence --
+ * 1=usable, 2,3,4,4+=unusable)
+ */
current_type = 0;
- for (i=0; i<overlap_entries; i++)
+ for (i = 0; i < overlap_entries; i++)
if (overlap_list[i]->type > current_type)
current_type = overlap_list[i]->type;
- /* continue building up new bios map based on this information */
+ /*
+ * continue building up new bios map based on this
+ * information
+ */
if (current_type != last_type) {
if (last_type != 0) {
new_bios[new_bios_entry].size =
change_point[chgidx]->addr - last_addr;
- /* move forward only if the new size was non-zero */
+ /*
+ * move forward only if the new size
+ * was non-zero
+ */
if (new_bios[new_bios_entry].size != 0)
+ /*
+ * no more space left for new
+ * bios entries ?
+ */
if (++new_bios_entry >= E820MAX)
- break; /* no more space left for new bios entries */
+ break;
}
if (current_type != 0) {
- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+ new_bios[new_bios_entry].addr =
+ change_point[chgidx]->addr;
new_bios[new_bios_entry].type = current_type;
- last_addr=change_point[chgidx]->addr;
+ last_addr = change_point[chgidx]->addr;
}
last_type = current_type;
}
}
- new_nr = new_bios_entry; /* retain count for new bios entries */
+ /* retain count for new bios entries */
+ new_nr = new_bios_entry;
/* copy new bios mapping into original location */
- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+ memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
*pnr_map = new_nr;
return 0;
@@ -566,7 +620,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
* will have given us a memory map that we can use to properly
* set up memory. If we aren't, we'll fake a memory map.
*/
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
{
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
@@ -583,18 +637,20 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
return -1;
add_memory_region(start, size, type);
- } while (biosmap++,--nr_map);
+ } while (biosmap++, --nr_map);
return 0;
}
-void early_panic(char *msg)
+static void early_panic(char *msg)
{
early_printk(msg);
panic(msg);
}
-void __init setup_memory_region(void)
+/* We're not void only for x86 32-bit compat */
+char * __init machine_specific_memory_setup(void)
{
+ char *who = "BIOS-e820";
/*
* Try to copy the BIOS-supplied E820-map.
*
@@ -605,7 +661,10 @@ void __init setup_memory_region(void)
if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
early_panic("Cannot find a valid memory map");
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- e820_print_map("BIOS-e820");
+ e820_print_map(who);
+
+ /* In case someone cares... */
+ return who;
}
static int __init parse_memopt(char *p)
@@ -613,9 +672,9 @@ static int __init parse_memopt(char *p)
if (!p)
return -EINVAL;
end_user_pfn = memparse(p, &p);
- end_user_pfn >>= PAGE_SHIFT;
+ end_user_pfn >>= PAGE_SHIFT;
return 0;
-}
+}
early_param("mem", parse_memopt);
static int userdef __initdata;
@@ -627,9 +686,9 @@ static int __init parse_memmap_opt(char *p)
if (!strcmp(p, "exactmap")) {
#ifdef CONFIG_CRASH_DUMP
- /* If we are doing a crash dump, we
- * still need to know the real mem
- * size before original memory map is
+ /*
+ * If we are doing a crash dump, we still need to know
+ * the real mem size before original memory map is
* reset.
*/
e820_register_active_regions(0, 0, -1UL);
@@ -646,6 +705,8 @@ static int __init parse_memmap_opt(char *p)
mem_size = memparse(p, &p);
if (p == oldp)
return -EINVAL;
+
+ userdef = 1;
if (*p == '@') {
start_at = memparse(p+1, &p);
add_memory_region(start_at, mem_size, E820_RAM);
@@ -665,11 +726,29 @@ early_param("memmap", parse_memmap_opt);
void __init finish_e820_parsing(void)
{
if (userdef) {
+ char nr = e820.nr_map;
+
+ if (sanitize_e820_map(e820.map, &nr) < 0)
+ early_panic("Invalid user supplied memory map");
+ e820.nr_map = nr;
+
printk(KERN_INFO "user-defined physical RAM map:\n");
e820_print_map("user");
}
}
+void __init update_e820(void)
+{
+ u8 nr_map;
+
+ nr_map = e820.nr_map;
+ if (sanitize_e820_map(e820.map, &nr_map))
+ return;
+ e820.nr_map = nr_map;
+ printk(KERN_INFO "modified physical RAM map:\n");
+ e820_print_map("modified");
+}
+
unsigned long pci_mem_start = 0xaeedbabe;
EXPORT_SYMBOL(pci_mem_start);
@@ -713,8 +792,10 @@ __init void e820_setup_gap(void)
if (!found) {
gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n"
- KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n");
+ printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
+ "address range\n"
+ KERN_ERR "PCI: Unassigned devices with 32bit resource "
+ "registers may break!\n");
}
/*
@@ -727,8 +808,9 @@ __init void e820_setup_gap(void)
/* Fun with two's complement */
pci_mem_start = (gapstart + round) & -round;
- printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
- pci_mem_start, gapstart, gapsize);
+ printk(KERN_INFO
+ "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
+ pci_mem_start, gapstart, gapsize);
}
int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 88bb83ec895..9f51e1ea9e8 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -21,7 +21,33 @@
#include <asm/gart.h>
#endif
-static void __init via_bugs(void)
+static void __init fix_hypertransport_config(int num, int slot, int func)
+{
+ u32 htcfg;
+ /*
+ * we found a hypertransport bus
+ * make sure that we are broadcasting
+ * interrupts to all cpus on the ht bus
+ * if we're using extended apic ids
+ */
+ htcfg = read_pci_config(num, slot, func, 0x68);
+ if (htcfg & (1 << 18)) {
+ printk(KERN_INFO "Detected use of extended apic ids "
+ "on hypertransport bus\n");
+ if ((htcfg & (1 << 17)) == 0) {
+ printk(KERN_INFO "Enabling hypertransport extended "
+ "apic interrupt broadcast\n");
+ printk(KERN_INFO "Note this is a bios bug, "
+ "please contact your hw vendor\n");
+ htcfg |= (1 << 17);
+ write_pci_config(num, slot, func, 0x68, htcfg);
+ }
+ }
+
+
+}
+
+static void __init via_bugs(int num, int slot, int func)
{
#ifdef CONFIG_GART_IOMMU
if ((end_pfn > MAX_DMA32_PFN || force_iommu) &&
@@ -44,7 +70,7 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header)
#endif /* CONFIG_X86_IO_APIC */
#endif /* CONFIG_ACPI */
-static void __init nvidia_bugs(void)
+static void __init nvidia_bugs(int num, int slot, int func)
{
#ifdef CONFIG_ACPI
#ifdef CONFIG_X86_IO_APIC
@@ -72,7 +98,7 @@ static void __init nvidia_bugs(void)
}
-static void __init ati_bugs(void)
+static void __init ati_bugs(int num, int slot, int func)
{
#ifdef CONFIG_X86_IO_APIC
if (timer_over_8254 == 1) {
@@ -83,18 +109,67 @@ static void __init ati_bugs(void)
#endif
}
+#define QFLAG_APPLY_ONCE 0x1
+#define QFLAG_APPLIED 0x2
+#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
struct chipset {
- u16 vendor;
- void (*f)(void);
+ u32 vendor;
+ u32 device;
+ u32 class;
+ u32 class_mask;
+ u32 flags;
+ void (*f)(int num, int slot, int func);
};
static struct chipset early_qrk[] __initdata = {
- { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
- { PCI_VENDOR_ID_VIA, via_bugs },
- { PCI_VENDOR_ID_ATI, ati_bugs },
+ { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+ PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
+ { PCI_VENDOR_ID_VIA, PCI_ANY_ID,
+ PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
+ { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+ PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs },
+ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
{}
};
+static void __init check_dev_quirk(int num, int slot, int func)
+{
+ u16 class;
+ u16 vendor;
+ u16 device;
+ u8 type;
+ int i;
+
+ class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
+
+ if (class == 0xffff)
+ return;
+
+ vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
+
+ device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
+
+ for (i = 0; early_qrk[i].f != NULL; i++) {
+ if (((early_qrk[i].vendor == PCI_ANY_ID) ||
+ (early_qrk[i].vendor == vendor)) &&
+ ((early_qrk[i].device == PCI_ANY_ID) ||
+ (early_qrk[i].device == device)) &&
+ (!((early_qrk[i].class ^ class) &
+ early_qrk[i].class_mask))) {
+ if ((early_qrk[i].flags &
+ QFLAG_DONE) != QFLAG_DONE)
+ early_qrk[i].f(num, slot, func);
+ early_qrk[i].flags |= QFLAG_APPLIED;
+ }
+ }
+
+ type = read_pci_config_byte(num, slot, func,
+ PCI_HEADER_TYPE);
+ if (!(type & 0x80))
+ return;
+}
+
void __init early_quirks(void)
{
int num, slot, func;
@@ -103,36 +178,8 @@ void __init early_quirks(void)
return;
/* Poor man's PCI discovery */
- for (num = 0; num < 32; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- u32 class;
- u32 vendor;
- u8 type;
- int i;
- class = read_pci_config(num,slot,func,
- PCI_CLASS_REVISION);
- if (class == 0xffffffff)
- break;
-
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
- continue;
-
- vendor = read_pci_config(num, slot, func,
- PCI_VENDOR_ID);
- vendor &= 0xffff;
-
- for (i = 0; early_qrk[i].f; i++)
- if (early_qrk[i].vendor == vendor) {
- early_qrk[i].f();
- return;
- }
-
- type = read_pci_config_byte(num, slot, func,
- PCI_HEADER_TYPE);
- if (!(type & 0x80))
- break;
- }
- }
- }
+ for (num = 0; num < 32; num++)
+ for (slot = 0; slot < 32; slot++)
+ for (func = 0; func < 8; func++)
+ check_dev_quirk(num, slot, func);
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
new file mode 100644
index 00000000000..1411324a625
--- /dev/null
+++ b/arch/x86/kernel/efi.c
@@ -0,0 +1,512 @@
+/*
+ * Common EFI (Extensible Firmware Interface) support functions
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2005-2008 Intel Co.
+ * Fenghua Yu <fenghua.yu@intel.com>
+ * Bibo Mao <bibo.mao@intel.com>
+ * Chandramouli Narayanan <mouli@linux.intel.com>
+ * Huang Ying <ying.huang@intel.com>
+ *
+ * Copied from efi_32.c to eliminate the duplicated code between EFI
+ * 32/64 support code. --ying 2007-10-26
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version. --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls. --davidm
+ *
+ * Goutham Rao: <goutham.rao@intel.com>
+ * Skip non-WB memory and ignore empty memory ranges.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/efi.h>
+#include <linux/bootmem.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+#include <linux/bcd.h>
+
+#include <asm/setup.h>
+#include <asm/efi.h>
+#include <asm/time.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+#define EFI_DEBUG 1
+#define PFX "EFI: "
+
+int efi_enabled;
+EXPORT_SYMBOL(efi_enabled);
+
+struct efi efi;
+EXPORT_SYMBOL(efi);
+
+struct efi_memory_map memmap;
+
+struct efi efi_phys __initdata;
+static efi_system_table_t efi_systab __initdata;
+
+static int __init setup_noefi(char *arg)
+{
+ efi_enabled = 0;
+ return 0;
+}
+early_param("noefi", setup_noefi);
+
+static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+ return efi_call_virt2(get_time, tm, tc);
+}
+
+static efi_status_t virt_efi_set_time(efi_time_t *tm)
+{
+ return efi_call_virt1(set_time, tm);
+}
+
+static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
+ efi_bool_t *pending,
+ efi_time_t *tm)
+{
+ return efi_call_virt3(get_wakeup_time,
+ enabled, pending, tm);
+}
+
+static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+{
+ return efi_call_virt2(set_wakeup_time,
+ enabled, tm);
+}
+
+static efi_status_t virt_efi_get_variable(efi_char16_t *name,
+ efi_guid_t *vendor,
+ u32 *attr,
+ unsigned long *data_size,
+ void *data)
+{
+ return efi_call_virt5(get_variable,
+ name, vendor, attr,
+ data_size, data);
+}
+
+static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
+ efi_char16_t *name,
+ efi_guid_t *vendor)
+{
+ return efi_call_virt3(get_next_variable,
+ name_size, name, vendor);
+}
+
+static efi_status_t virt_efi_set_variable(efi_char16_t *name,
+ efi_guid_t *vendor,
+ unsigned long attr,
+ unsigned long data_size,
+ void *data)
+{
+ return efi_call_virt5(set_variable,
+ name, vendor, attr,
+ data_size, data);
+}
+
+static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
+{
+ return efi_call_virt1(get_next_high_mono_count, count);
+}
+
+static void virt_efi_reset_system(int reset_type,
+ efi_status_t status,
+ unsigned long data_size,
+ efi_char16_t *data)
+{
+ efi_call_virt4(reset_system, reset_type, status,
+ data_size, data);
+}
+
+static efi_status_t virt_efi_set_virtual_address_map(
+ unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map)
+{
+ return efi_call_virt4(set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
+}
+
+static efi_status_t __init phys_efi_set_virtual_address_map(
+ unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map)
+{
+ efi_status_t status;
+
+ efi_call_phys_prelog();
+ status = efi_call_phys4(efi_phys.set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
+ efi_call_phys_epilog();
+ return status;
+}
+
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+ efi_time_cap_t *tc)
+{
+ efi_status_t status;
+
+ efi_call_phys_prelog();
+ status = efi_call_phys2(efi_phys.get_time, tm, tc);
+ efi_call_phys_epilog();
+ return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
+{
+ int real_seconds, real_minutes;
+ efi_status_t status;
+ efi_time_t eft;
+ efi_time_cap_t cap;
+
+ status = efi.get_time(&eft, &cap);
+ if (status != EFI_SUCCESS) {
+ printk(KERN_ERR "Oops: efitime: can't read time!\n");
+ return -1;
+ }
+
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+ if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
+ real_minutes += 30;
+ real_minutes %= 60;
+ eft.minute = real_minutes;
+ eft.second = real_seconds;
+
+ status = efi.set_time(&eft);
+ if (status != EFI_SUCCESS) {
+ printk(KERN_ERR "Oops: efitime: can't write time!\n");
+ return -1;
+ }
+ return 0;
+}
+
+unsigned long efi_get_time(void)
+{
+ efi_status_t status;
+ efi_time_t eft;
+ efi_time_cap_t cap;
+
+ status = efi.get_time(&eft, &cap);
+ if (status != EFI_SUCCESS)
+ printk(KERN_ERR "Oops: efitime: can't read time!\n");
+
+ return mktime(eft.year, eft.month, eft.day, eft.hour,
+ eft.minute, eft.second);
+}
+
+#if EFI_DEBUG
+static void __init print_efi_memmap(void)
+{
+ efi_memory_desc_t *md;
+ void *p;
+ int i;
+
+ for (p = memmap.map, i = 0;
+ p < memmap.map_end;
+ p += memmap.desc_size, i++) {
+ md = p;
+ printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, "
+ "range=[0x%016llx-0x%016llx) (%lluMB)\n",
+ i, md->type, md->attribute, md->phys_addr,
+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+ (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
+ }
+}
+#endif /* EFI_DEBUG */
+
+void __init efi_init(void)
+{
+ efi_config_table_t *config_tables;
+ efi_runtime_services_t *runtime;
+ efi_char16_t *c16;
+ char vendor[100] = "unknown";
+ int i = 0;
+ void *tmp;
+
+#ifdef CONFIG_X86_32
+ efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
+ memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
+#else
+ efi_phys.systab = (efi_system_table_t *)
+ (boot_params.efi_info.efi_systab |
+ ((__u64)boot_params.efi_info.efi_systab_hi<<32));
+ memmap.phys_map = (void *)
+ (boot_params.efi_info.efi_memmap |
+ ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
+#endif
+ memmap.nr_map = boot_params.efi_info.efi_memmap_size /
+ boot_params.efi_info.efi_memdesc_size;
+ memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
+ memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
+
+ efi.systab = early_ioremap((unsigned long)efi_phys.systab,
+ sizeof(efi_system_table_t));
+ if (efi.systab == NULL)
+ printk(KERN_ERR "Couldn't map the EFI system table!\n");
+ memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
+ early_iounmap(efi.systab, sizeof(efi_system_table_t));
+ efi.systab = &efi_systab;
+
+ /*
+ * Verify the EFI Table
+ */
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ printk(KERN_ERR "EFI system table signature incorrect!\n");
+ if ((efi.systab->hdr.revision >> 16) == 0)
+ printk(KERN_ERR "Warning: EFI system table version "
+ "%d.%02d, expected 1.00 or greater!\n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff);
+
+ /*
+ * Show what we know for posterity
+ */
+ c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
+ if (c16) {
+ for (i = 0; i < sizeof(vendor) && *c16; ++i)
+ vendor[i] = *c16++;
+ vendor[i] = '\0';
+ } else
+ printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
+ early_iounmap(tmp, 2);
+
+ printk(KERN_INFO "EFI v%u.%.02u by %s \n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff, vendor);
+
+ /*
+ * Let's see what config tables the firmware passed to us.
+ */
+ config_tables = early_ioremap(
+ efi.systab->tables,
+ efi.systab->nr_tables * sizeof(efi_config_table_t));
+ if (config_tables == NULL)
+ printk(KERN_ERR "Could not map EFI Configuration Table!\n");
+
+ printk(KERN_INFO);
+ for (i = 0; i < efi.systab->nr_tables; i++) {
+ if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
+ efi.mps = config_tables[i].table;
+ printk(" MPS=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ ACPI_20_TABLE_GUID)) {
+ efi.acpi20 = config_tables[i].table;
+ printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ ACPI_TABLE_GUID)) {
+ efi.acpi = config_tables[i].table;
+ printk(" ACPI=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ SMBIOS_TABLE_GUID)) {
+ efi.smbios = config_tables[i].table;
+ printk(" SMBIOS=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ HCDP_TABLE_GUID)) {
+ efi.hcdp = config_tables[i].table;
+ printk(" HCDP=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ UGA_IO_PROTOCOL_GUID)) {
+ efi.uga = config_tables[i].table;
+ printk(" UGA=0x%lx ", config_tables[i].table);
+ }
+ }
+ printk("\n");
+ early_iounmap(config_tables,
+ efi.systab->nr_tables * sizeof(efi_config_table_t));
+
+ /*
+ * Check out the runtime services table. We need to map
+ * the runtime services table so that we can grab the physical
+ * address of several of the EFI runtime functions, needed to
+ * set the firmware into virtual mode.
+ */
+ runtime = early_ioremap((unsigned long)efi.systab->runtime,
+ sizeof(efi_runtime_services_t));
+ if (runtime != NULL) {
+ /*
+ * We will only need *early* access to the following
+ * two EFI runtime services before set_virtual_address_map
+ * is invoked.
+ */
+ efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
+ efi_phys.set_virtual_address_map =
+ (efi_set_virtual_address_map_t *)
+ runtime->set_virtual_address_map;
+ /*
+ * Make efi_get_time can be called before entering
+ * virtual mode.
+ */
+ efi.get_time = phys_efi_get_time;
+ } else
+ printk(KERN_ERR "Could not map the EFI runtime service "
+ "table!\n");
+ early_iounmap(runtime, sizeof(efi_runtime_services_t));
+
+ /* Map the EFI memory map */
+ memmap.map = early_ioremap((unsigned long)memmap.phys_map,
+ memmap.nr_map * memmap.desc_size);
+ if (memmap.map == NULL)
+ printk(KERN_ERR "Could not map the EFI memory map!\n");
+ memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+ if (memmap.desc_size != sizeof(efi_memory_desc_t))
+ printk(KERN_WARNING "Kernel-defined memdesc"
+ "doesn't match the one from EFI!\n");
+
+ /* Setup for EFI runtime service */
+ reboot_type = BOOT_EFI;
+
+#if EFI_DEBUG
+ print_efi_memmap();
+#endif
+}
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static void __init runtime_code_page_mkexec(void)
+{
+ efi_memory_desc_t *md;
+ unsigned long end;
+ void *p;
+
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return;
+
+ /* Make EFI runtime service code area executable */
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+ if (md->type == EFI_RUNTIME_SERVICES_CODE &&
+ (end >> PAGE_SHIFT) <= max_pfn_mapped) {
+ set_memory_x(md->virt_addr, md->num_pages);
+ set_memory_uc(md->virt_addr, md->num_pages);
+ }
+ }
+ __flush_tlb_all();
+}
+#else
+static inline void __init runtime_code_page_mkexec(void) { }
+#endif
+
+/*
+ * This function will switch the EFI runtime services to virtual mode.
+ * Essentially, look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor and update
+ * that memory descriptor with the virtual address obtained from ioremap().
+ * This enables the runtime services to be called without having to
+ * thunk back into physical mode for every invocation.
+ */
+void __init efi_enter_virtual_mode(void)
+{
+ efi_memory_desc_t *md;
+ efi_status_t status;
+ unsigned long end;
+ void *p;
+
+ efi.systab = NULL;
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
+ continue;
+ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+ if ((md->attribute & EFI_MEMORY_WB) &&
+ ((end >> PAGE_SHIFT) <= max_pfn_mapped))
+ md->virt_addr = (unsigned long)__va(md->phys_addr);
+ else
+ md->virt_addr = (unsigned long)
+ efi_ioremap(md->phys_addr,
+ md->num_pages << EFI_PAGE_SHIFT);
+ if (!md->virt_addr)
+ printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
+ (unsigned long long)md->phys_addr);
+ if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
+ ((unsigned long)efi_phys.systab < end))
+ efi.systab = (efi_system_table_t *)(unsigned long)
+ (md->virt_addr - md->phys_addr +
+ (unsigned long)efi_phys.systab);
+ }
+
+ BUG_ON(!efi.systab);
+
+ status = phys_efi_set_virtual_address_map(
+ memmap.desc_size * memmap.nr_map,
+ memmap.desc_size,
+ memmap.desc_version,
+ memmap.phys_map);
+
+ if (status != EFI_SUCCESS) {
+ printk(KERN_ALERT "Unable to switch EFI into virtual mode "
+ "(status=%lx)!\n", status);
+ panic("EFI call to SetVirtualAddressMap() failed!");
+ }
+
+ /*
+ * Now that EFI is in virtual mode, update the function
+ * pointers in the runtime service table to the new virtual addresses.
+ *
+ * Call EFI services through wrapper functions.
+ */
+ efi.get_time = virt_efi_get_time;
+ efi.set_time = virt_efi_set_time;
+ efi.get_wakeup_time = virt_efi_get_wakeup_time;
+ efi.set_wakeup_time = virt_efi_set_wakeup_time;
+ efi.get_variable = virt_efi_get_variable;
+ efi.get_next_variable = virt_efi_get_next_variable;
+ efi.set_variable = virt_efi_set_variable;
+ efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
+ efi.reset_system = virt_efi_reset_system;
+ efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
+ runtime_code_page_mkexec();
+ early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
+ memmap.map = NULL;
+}
+
+/*
+ * Convenience functions to obtain memory types and attributes
+ */
+u32 efi_mem_type(unsigned long phys_addr)
+{
+ efi_memory_desc_t *md;
+ void *p;
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ if ((md->phys_addr <= phys_addr) &&
+ (phys_addr < (md->phys_addr +
+ (md->num_pages << EFI_PAGE_SHIFT))))
+ return md->type;
+ }
+ return 0;
+}
+
+u64 efi_mem_attributes(unsigned long phys_addr)
+{
+ efi_memory_desc_t *md;
+ void *p;
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ if ((md->phys_addr <= phys_addr) &&
+ (phys_addr < (md->phys_addr +
+ (md->num_pages << EFI_PAGE_SHIFT))))
+ return md->attribute;
+ }
+ return 0;
+}
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index e2be78f4939..cb91f985b4a 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -20,40 +20,15 @@
*/
#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
#include <linux/types.h>
-#include <linux/time.h>
-#include <linux/spinlock.h>
-#include <linux/bootmem.h>
#include <linux/ioport.h>
-#include <linux/module.h>
#include <linux/efi.h>
-#include <linux/kexec.h>
-#include <asm/setup.h>
#include <asm/io.h>
#include <asm/page.h>
#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
#include <asm/tlbflush.h>
-#define EFI_DEBUG 0
-#define PFX "EFI: "
-
-extern efi_status_t asmlinkage efi_call_phys(void *, ...);
-
-struct efi efi;
-EXPORT_SYMBOL(efi);
-static struct efi efi_phys;
-struct efi_memory_map memmap;
-
-/*
- * We require an early boot_ioremap mapping mechanism initially
- */
-extern void * boot_ioremap(unsigned long, unsigned long);
-
/*
* To make EFI call EFI runtime service in physical addressing mode we need
* prelog/epilog before/after the invocation to disable interrupt, to
@@ -62,16 +37,14 @@ extern void * boot_ioremap(unsigned long, unsigned long);
*/
static unsigned long efi_rt_eflags;
-static DEFINE_SPINLOCK(efi_rt_lock);
static pgd_t efi_bak_pg_dir_pointer[2];
-static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
+void efi_call_phys_prelog(void)
{
unsigned long cr4;
unsigned long temp;
- struct Xgt_desc_struct gdt_descr;
+ struct desc_ptr gdt_descr;
- spin_lock(&efi_rt_lock);
local_irq_save(efi_rt_eflags);
/*
@@ -101,17 +74,17 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
/*
* After the lock is released, the original page table is restored.
*/
- local_flush_tlb();
+ __flush_tlb_all();
gdt_descr.address = __pa(get_cpu_gdt_table(0));
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
}
-static void efi_call_phys_epilog(void) __releases(efi_rt_lock)
+void efi_call_phys_epilog(void)
{
unsigned long cr4;
- struct Xgt_desc_struct gdt_descr;
+ struct desc_ptr gdt_descr;
gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
gdt_descr.size = GDT_SIZE - 1;
@@ -132,586 +105,7 @@ static void efi_call_phys_epilog(void) __releases(efi_rt_lock)
/*
* After the lock is released, the original page table is restored.
*/
- local_flush_tlb();
+ __flush_tlb_all();
local_irq_restore(efi_rt_eflags);
- spin_unlock(&efi_rt_lock);
-}
-
-static efi_status_t
-phys_efi_set_virtual_address_map(unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
-{
- efi_status_t status;
-
- efi_call_phys_prelog();
- status = efi_call_phys(efi_phys.set_virtual_address_map,
- memory_map_size, descriptor_size,
- descriptor_version, virtual_map);
- efi_call_phys_epilog();
- return status;
-}
-
-static efi_status_t
-phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
-{
- efi_status_t status;
-
- efi_call_phys_prelog();
- status = efi_call_phys(efi_phys.get_time, tm, tc);
- efi_call_phys_epilog();
- return status;
-}
-
-inline int efi_set_rtc_mmss(unsigned long nowtime)
-{
- int real_seconds, real_minutes;
- efi_status_t status;
- efi_time_t eft;
- efi_time_cap_t cap;
-
- spin_lock(&efi_rt_lock);
- status = efi.get_time(&eft, &cap);
- spin_unlock(&efi_rt_lock);
- if (status != EFI_SUCCESS)
- panic("Ooops, efitime: can't read time!\n");
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
-
- if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
- real_minutes += 30;
- real_minutes %= 60;
-
- eft.minute = real_minutes;
- eft.second = real_seconds;
-
- if (status != EFI_SUCCESS) {
- printk("Ooops: efitime: can't read time!\n");
- return -1;
- }
- return 0;
-}
-/*
- * This is used during kernel init before runtime
- * services have been remapped and also during suspend, therefore,
- * we'll need to call both in physical and virtual modes.
- */
-inline unsigned long efi_get_time(void)
-{
- efi_status_t status;
- efi_time_t eft;
- efi_time_cap_t cap;
-
- if (efi.get_time) {
- /* if we are in virtual mode use remapped function */
- status = efi.get_time(&eft, &cap);
- } else {
- /* we are in physical mode */
- status = phys_efi_get_time(&eft, &cap);
- }
-
- if (status != EFI_SUCCESS)
- printk("Oops: efitime: can't read time status: 0x%lx\n",status);
-
- return mktime(eft.year, eft.month, eft.day, eft.hour,
- eft.minute, eft.second);
-}
-
-int is_available_memory(efi_memory_desc_t * md)
-{
- if (!(md->attribute & EFI_MEMORY_WB))
- return 0;
-
- switch (md->type) {
- case EFI_LOADER_CODE:
- case EFI_LOADER_DATA:
- case EFI_BOOT_SERVICES_CODE:
- case EFI_BOOT_SERVICES_DATA:
- case EFI_CONVENTIONAL_MEMORY:
- return 1;
- }
- return 0;
-}
-
-/*
- * We need to map the EFI memory map again after paging_init().
- */
-void __init efi_map_memmap(void)
-{
- memmap.map = NULL;
-
- memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
- (memmap.nr_map * memmap.desc_size));
- if (memmap.map == NULL)
- printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
-
- memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-}
-
-#if EFI_DEBUG
-static void __init print_efi_memmap(void)
-{
- efi_memory_desc_t *md;
- void *p;
- int i;
-
- for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
- md = p;
- printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
- "range=[0x%016llx-0x%016llx) (%lluMB)\n",
- i, md->type, md->attribute, md->phys_addr,
- md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
- (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
- }
-}
-#endif /* EFI_DEBUG */
-
-/*
- * Walks the EFI memory map and calls CALLBACK once for each EFI
- * memory descriptor that has memory that is available for kernel use.
- */
-void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
-{
- int prev_valid = 0;
- struct range {
- unsigned long start;
- unsigned long end;
- } uninitialized_var(prev), curr;
- efi_memory_desc_t *md;
- unsigned long start, end;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
- if ((md->num_pages == 0) || (!is_available_memory(md)))
- continue;
-
- curr.start = md->phys_addr;
- curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
-
- if (!prev_valid) {
- prev = curr;
- prev_valid = 1;
- } else {
- if (curr.start < prev.start)
- printk(KERN_INFO PFX "Unordered memory map\n");
- if (prev.end == curr.start)
- prev.end = curr.end;
- else {
- start =
- (unsigned long) (PAGE_ALIGN(prev.start));
- end = (unsigned long) (prev.end & PAGE_MASK);
- if ((end > start)
- && (*callback) (start, end, arg) < 0)
- return;
- prev = curr;
- }
- }
- }
- if (prev_valid) {
- start = (unsigned long) PAGE_ALIGN(prev.start);
- end = (unsigned long) (prev.end & PAGE_MASK);
- if (end > start)
- (*callback) (start, end, arg);
- }
-}
-
-void __init efi_init(void)
-{
- efi_config_table_t *config_tables;
- efi_runtime_services_t *runtime;
- efi_char16_t *c16;
- char vendor[100] = "unknown";
- unsigned long num_config_tables;
- int i = 0;
-
- memset(&efi, 0, sizeof(efi) );
- memset(&efi_phys, 0, sizeof(efi_phys));
-
- efi_phys.systab =
- (efi_system_table_t *)boot_params.efi_info.efi_systab;
- memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
- memmap.nr_map = boot_params.efi_info.efi_memmap_size/
- boot_params.efi_info.efi_memdesc_size;
- memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
- memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
-
- efi.systab = (efi_system_table_t *)
- boot_ioremap((unsigned long) efi_phys.systab,
- sizeof(efi_system_table_t));
- /*
- * Verify the EFI Table
- */
- if (efi.systab == NULL)
- printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n");
- if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n");
- if ((efi.systab->hdr.revision >> 16) == 0)
- printk(KERN_ERR PFX "Warning: EFI system table version "
- "%d.%02d, expected 1.00 or greater\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff);
-
- /*
- * Grab some details from the system table
- */
- num_config_tables = efi.systab->nr_tables;
- config_tables = (efi_config_table_t *)efi.systab->tables;
- runtime = efi.systab->runtime;
-
- /*
- * Show what we know for posterity
- */
- c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2);
- if (c16) {
- for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i)
- vendor[i] = *c16++;
- vendor[i] = '\0';
- } else
- printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
-
- printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff, vendor);
-
- /*
- * Let's see what config tables the firmware passed to us.
- */
- config_tables = (efi_config_table_t *)
- boot_ioremap((unsigned long) config_tables,
- num_config_tables * sizeof(efi_config_table_t));
-
- if (config_tables == NULL)
- printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n");
-
- efi.mps = EFI_INVALID_TABLE_ADDR;
- efi.acpi = EFI_INVALID_TABLE_ADDR;
- efi.acpi20 = EFI_INVALID_TABLE_ADDR;
- efi.smbios = EFI_INVALID_TABLE_ADDR;
- efi.sal_systab = EFI_INVALID_TABLE_ADDR;
- efi.boot_info = EFI_INVALID_TABLE_ADDR;
- efi.hcdp = EFI_INVALID_TABLE_ADDR;
- efi.uga = EFI_INVALID_TABLE_ADDR;
-
- for (i = 0; i < num_config_tables; i++) {
- if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
- efi.mps = config_tables[i].table;
- printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
- efi.acpi20 = config_tables[i].table;
- printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
- efi.acpi = config_tables[i].table;
- printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
- efi.smbios = config_tables[i].table;
- printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
- efi.hcdp = config_tables[i].table;
- printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) {
- efi.uga = config_tables[i].table;
- printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table);
- }
- }
- printk("\n");
-
- /*
- * Check out the runtime services table. We need to map
- * the runtime services table so that we can grab the physical
- * address of several of the EFI runtime functions, needed to
- * set the firmware into virtual mode.
- */
-
- runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long)
- runtime,
- sizeof(efi_runtime_services_t));
- if (runtime != NULL) {
- /*
- * We will only need *early* access to the following
- * two EFI runtime services before set_virtual_address_map
- * is invoked.
- */
- efi_phys.get_time = (efi_get_time_t *) runtime->get_time;
- efi_phys.set_virtual_address_map =
- (efi_set_virtual_address_map_t *)
- runtime->set_virtual_address_map;
- } else
- printk(KERN_ERR PFX "Could not map the runtime service table!\n");
-
- /* Map the EFI memory map for use until paging_init() */
- memmap.map = boot_ioremap(boot_params.efi_info.efi_memmap,
- boot_params.efi_info.efi_memmap_size);
- if (memmap.map == NULL)
- printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
-
- memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-
-#if EFI_DEBUG
- print_efi_memmap();
-#endif
-}
-
-static inline void __init check_range_for_systab(efi_memory_desc_t *md)
-{
- if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) &&
- ((unsigned long)efi_phys.systab < md->phys_addr +
- ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) {
- unsigned long addr;
-
- addr = md->virt_addr - md->phys_addr +
- (unsigned long)efi_phys.systab;
- efi.systab = (efi_system_table_t *)addr;
- }
-}
-
-/*
- * Wrap all the virtual calls in a way that forces the parameters on the stack.
- */
-
-#define efi_call_virt(f, args...) \
- ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args)
-
-static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
-{
- return efi_call_virt(get_time, tm, tc);
-}
-
-static efi_status_t virt_efi_set_time (efi_time_t *tm)
-{
- return efi_call_virt(set_time, tm);
-}
-
-static efi_status_t virt_efi_get_wakeup_time (efi_bool_t *enabled,
- efi_bool_t *pending,
- efi_time_t *tm)
-{
- return efi_call_virt(get_wakeup_time, enabled, pending, tm);
-}
-
-static efi_status_t virt_efi_set_wakeup_time (efi_bool_t enabled,
- efi_time_t *tm)
-{
- return efi_call_virt(set_wakeup_time, enabled, tm);
-}
-
-static efi_status_t virt_efi_get_variable (efi_char16_t *name,
- efi_guid_t *vendor, u32 *attr,
- unsigned long *data_size, void *data)
-{
- return efi_call_virt(get_variable, name, vendor, attr, data_size, data);
-}
-
-static efi_status_t virt_efi_get_next_variable (unsigned long *name_size,
- efi_char16_t *name,
- efi_guid_t *vendor)
-{
- return efi_call_virt(get_next_variable, name_size, name, vendor);
-}
-
-static efi_status_t virt_efi_set_variable (efi_char16_t *name,
- efi_guid_t *vendor,
- unsigned long attr,
- unsigned long data_size, void *data)
-{
- return efi_call_virt(set_variable, name, vendor, attr, data_size, data);
-}
-
-static efi_status_t virt_efi_get_next_high_mono_count (u32 *count)
-{
- return efi_call_virt(get_next_high_mono_count, count);
-}
-
-static void virt_efi_reset_system (int reset_type, efi_status_t status,
- unsigned long data_size,
- efi_char16_t *data)
-{
- efi_call_virt(reset_system, reset_type, status, data_size, data);
-}
-
-/*
- * This function will switch the EFI runtime services to virtual mode.
- * Essentially, look through the EFI memmap and map every region that
- * has the runtime attribute bit set in its memory descriptor and update
- * that memory descriptor with the virtual address obtained from ioremap().
- * This enables the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
- */
-
-void __init efi_enter_virtual_mode(void)
-{
- efi_memory_desc_t *md;
- efi_status_t status;
- void *p;
-
- efi.systab = NULL;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
- if (!(md->attribute & EFI_MEMORY_RUNTIME))
- continue;
-
- md->virt_addr = (unsigned long)ioremap(md->phys_addr,
- md->num_pages << EFI_PAGE_SHIFT);
- if (!(unsigned long)md->virt_addr) {
- printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
- (unsigned long)md->phys_addr);
- }
- /* update the virtual address of the EFI system table */
- check_range_for_systab(md);
- }
-
- BUG_ON(!efi.systab);
-
- status = phys_efi_set_virtual_address_map(
- memmap.desc_size * memmap.nr_map,
- memmap.desc_size,
- memmap.desc_version,
- memmap.phys_map);
-
- if (status != EFI_SUCCESS) {
- printk (KERN_ALERT "You are screwed! "
- "Unable to switch EFI into virtual mode "
- "(status=%lx)\n", status);
- panic("EFI call to SetVirtualAddressMap() failed!");
- }
-
- /*
- * Now that EFI is in virtual mode, update the function
- * pointers in the runtime service table to the new virtual addresses.
- */
-
- efi.get_time = virt_efi_get_time;
- efi.set_time = virt_efi_set_time;
- efi.get_wakeup_time = virt_efi_get_wakeup_time;
- efi.set_wakeup_time = virt_efi_set_wakeup_time;
- efi.get_variable = virt_efi_get_variable;
- efi.get_next_variable = virt_efi_get_next_variable;
- efi.set_variable = virt_efi_set_variable;
- efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
- efi.reset_system = virt_efi_reset_system;
-}
-
-void __init
-efi_initialize_iomem_resources(struct resource *code_resource,
- struct resource *data_resource,
- struct resource *bss_resource)
-{
- struct resource *res;
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
- if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
- 0x100000000ULL)
- continue;
- res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
- switch (md->type) {
- case EFI_RESERVED_TYPE:
- res->name = "Reserved Memory";
- break;
- case EFI_LOADER_CODE:
- res->name = "Loader Code";
- break;
- case EFI_LOADER_DATA:
- res->name = "Loader Data";
- break;
- case EFI_BOOT_SERVICES_DATA:
- res->name = "BootServices Data";
- break;
- case EFI_BOOT_SERVICES_CODE:
- res->name = "BootServices Code";
- break;
- case EFI_RUNTIME_SERVICES_CODE:
- res->name = "Runtime Service Code";
- break;
- case EFI_RUNTIME_SERVICES_DATA:
- res->name = "Runtime Service Data";
- break;
- case EFI_CONVENTIONAL_MEMORY:
- res->name = "Conventional Memory";
- break;
- case EFI_UNUSABLE_MEMORY:
- res->name = "Unusable Memory";
- break;
- case EFI_ACPI_RECLAIM_MEMORY:
- res->name = "ACPI Reclaim";
- break;
- case EFI_ACPI_MEMORY_NVS:
- res->name = "ACPI NVS";
- break;
- case EFI_MEMORY_MAPPED_IO:
- res->name = "Memory Mapped IO";
- break;
- case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
- res->name = "Memory Mapped IO Port Space";
- break;
- default:
- res->name = "Reserved";
- break;
- }
- res->start = md->phys_addr;
- res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1);
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- if (request_resource(&iomem_resource, res) < 0)
- printk(KERN_ERR PFX "Failed to allocate res %s : "
- "0x%llx-0x%llx\n", res->name,
- (unsigned long long)res->start,
- (unsigned long long)res->end);
- /*
- * We don't know which region contains kernel data so we try
- * it repeatedly and let the resource manager test it.
- */
- if (md->type == EFI_CONVENTIONAL_MEMORY) {
- request_resource(res, code_resource);
- request_resource(res, data_resource);
- request_resource(res, bss_resource);
-#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
-#endif
- }
- }
-}
-
-/*
- * Convenience functions to obtain memory types and attributes
- */
-
-u32 efi_mem_type(unsigned long phys_addr)
-{
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if ((md->phys_addr <= phys_addr) && (phys_addr <
- (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
- return md->type;
- }
- return 0;
-}
-
-u64 efi_mem_attributes(unsigned long phys_addr)
-{
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if ((md->phys_addr <= phys_addr) && (phys_addr <
- (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
- return md->attribute;
- }
- return 0;
}
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
new file mode 100644
index 00000000000..4b73992c1e1
--- /dev/null
+++ b/arch/x86/kernel/efi_64.c
@@ -0,0 +1,134 @@
+/*
+ * x86_64 specific EFI support functions
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 2005-2008 Intel Co.
+ * Fenghua Yu <fenghua.yu@intel.com>
+ * Bibo Mao <bibo.mao@intel.com>
+ * Chandramouli Narayanan <mouli@linux.intel.com>
+ * Huang Ying <ying.huang@intel.com>
+ *
+ * Code to convert EFI to E820 map has been implemented in elilo bootloader
+ * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table
+ * is setup appropriately for EFI runtime code.
+ * - mouli 06/14/2007.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/proto.h>
+#include <asm/efi.h>
+
+static pgd_t save_pgd __initdata;
+static unsigned long efi_flags __initdata;
+
+static void __init early_mapping_set_exec(unsigned long start,
+ unsigned long end,
+ int executable)
+{
+ pte_t *kpte;
+ int level;
+
+ while (start < end) {
+ kpte = lookup_address((unsigned long)__va(start), &level);
+ BUG_ON(!kpte);
+ if (executable)
+ set_pte(kpte, pte_mkexec(*kpte));
+ else
+ set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
+ __supported_pte_mask));
+ if (level == 4)
+ start = (start + PMD_SIZE) & PMD_MASK;
+ else
+ start = (start + PAGE_SIZE) & PAGE_MASK;
+ }
+}
+
+static void __init early_runtime_code_mapping_set_exec(int executable)
+{
+ efi_memory_desc_t *md;
+ void *p;
+
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return;
+
+ /* Make EFI runtime service code area executable */
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ if (md->type == EFI_RUNTIME_SERVICES_CODE) {
+ unsigned long end;
+ end = md->phys_addr + (md->num_pages << PAGE_SHIFT);
+ early_mapping_set_exec(md->phys_addr, end, executable);
+ }
+ }
+}
+
+void __init efi_call_phys_prelog(void)
+{
+ unsigned long vaddress;
+
+ local_irq_save(efi_flags);
+ early_runtime_code_mapping_set_exec(1);
+ vaddress = (unsigned long)__va(0x0UL);
+ save_pgd = *pgd_offset_k(0x0UL);
+ set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
+ __flush_tlb_all();
+}
+
+void __init efi_call_phys_epilog(void)
+{
+ /*
+ * After the lock is released, the original page table is restored.
+ */
+ set_pgd(pgd_offset_k(0x0UL), save_pgd);
+ early_runtime_code_mapping_set_exec(0);
+ __flush_tlb_all();
+ local_irq_restore(efi_flags);
+}
+
+void __init efi_reserve_bootmem(void)
+{
+ reserve_bootmem_generic((unsigned long)memmap.phys_map,
+ memmap.nr_map * memmap.desc_size);
+}
+
+void __iomem * __init efi_ioremap(unsigned long offset,
+ unsigned long size)
+{
+ static unsigned pages_mapped;
+ unsigned long last_addr;
+ unsigned i, pages;
+
+ last_addr = offset + size - 1;
+ offset &= PAGE_MASK;
+ pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT;
+ if (pages_mapped + pages > MAX_EFI_IO_PAGES)
+ return NULL;
+
+ for (i = 0; i < pages; i++) {
+ __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
+ offset, PAGE_KERNEL_EXEC_NOCACHE);
+ offset += PAGE_SIZE;
+ pages_mapped++;
+ }
+
+ return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
+ (pages_mapped - pages));
+}
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
new file mode 100644
index 00000000000..99b47d48c9f
--- /dev/null
+++ b/arch/x86/kernel/efi_stub_64.S
@@ -0,0 +1,109 @@
+/*
+ * Function calling ABI conversion from Linux to EFI for x86_64
+ *
+ * Copyright (C) 2007 Intel Corp
+ * Bibo Mao <bibo.mao@intel.com>
+ * Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/linkage.h>
+
+#define SAVE_XMM \
+ mov %rsp, %rax; \
+ subq $0x70, %rsp; \
+ and $~0xf, %rsp; \
+ mov %rax, (%rsp); \
+ mov %cr0, %rax; \
+ clts; \
+ mov %rax, 0x8(%rsp); \
+ movaps %xmm0, 0x60(%rsp); \
+ movaps %xmm1, 0x50(%rsp); \
+ movaps %xmm2, 0x40(%rsp); \
+ movaps %xmm3, 0x30(%rsp); \
+ movaps %xmm4, 0x20(%rsp); \
+ movaps %xmm5, 0x10(%rsp)
+
+#define RESTORE_XMM \
+ movaps 0x60(%rsp), %xmm0; \
+ movaps 0x50(%rsp), %xmm1; \
+ movaps 0x40(%rsp), %xmm2; \
+ movaps 0x30(%rsp), %xmm3; \
+ movaps 0x20(%rsp), %xmm4; \
+ movaps 0x10(%rsp), %xmm5; \
+ mov 0x8(%rsp), %rsi; \
+ mov %rsi, %cr0; \
+ mov (%rsp), %rsp
+
+ENTRY(efi_call0)
+ SAVE_XMM
+ subq $32, %rsp
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+
+ENTRY(efi_call1)
+ SAVE_XMM
+ subq $32, %rsp
+ mov %rsi, %rcx
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+
+ENTRY(efi_call2)
+ SAVE_XMM
+ subq $32, %rsp
+ mov %rsi, %rcx
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+
+ENTRY(efi_call3)
+ SAVE_XMM
+ subq $32, %rsp
+ mov %rcx, %r8
+ mov %rsi, %rcx
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+
+ENTRY(efi_call4)
+ SAVE_XMM
+ subq $32, %rsp
+ mov %r8, %r9
+ mov %rcx, %r8
+ mov %rsi, %rcx
+ call *%rdi
+ addq $32, %rsp
+ RESTORE_XMM
+ ret
+
+ENTRY(efi_call5)
+ SAVE_XMM
+ subq $48, %rsp
+ mov %r9, 32(%rsp)
+ mov %r8, %r9
+ mov %rcx, %r8
+ mov %rsi, %rcx
+ call *%rdi
+ addq $48, %rsp
+ RESTORE_XMM
+ ret
+
+ENTRY(efi_call6)
+ SAVE_XMM
+ mov (%rsp), %rax
+ mov 8(%rax), %rax
+ subq $48, %rsp
+ mov %r9, 32(%rsp)
+ mov %rax, 40(%rsp)
+ mov %r8, %r9
+ mov %rcx, %r8
+ mov %rsi, %rcx
+ call *%rdi
+ addq $48, %rsp
+ RESTORE_XMM
+ ret
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index dc7f938e501..be5c31d0488 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -58,7 +58,7 @@
* for paravirtualization. The following will never clobber any registers:
* INTERRUPT_RETURN (aka. "iret")
* GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
*
* For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
* specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
@@ -283,12 +283,12 @@ END(resume_kernel)
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
# sysenter call handler stub
-ENTRY(sysenter_entry)
+ENTRY(ia32_sysenter_target)
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, 0
CFI_REGISTER esp, ebp
- movl TSS_sysenter_esp0(%esp),%esp
+ movl TSS_sysenter_sp0(%esp),%esp
sysenter_past_esp:
/*
* No need to follow this irqs on/off section: the syscall
@@ -351,7 +351,7 @@ sysenter_past_esp:
xorl %ebp,%ebp
TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs
- ENABLE_INTERRUPTS_SYSEXIT
+ ENABLE_INTERRUPTS_SYSCALL_RET
CFI_ENDPROC
.pushsection .fixup,"ax"
2: movl $0,PT_FS(%esp)
@@ -360,7 +360,7 @@ sysenter_past_esp:
.align 4
.long 1b,2b
.popsection
-ENDPROC(sysenter_entry)
+ENDPROC(ia32_sysenter_target)
# system call handler stub
ENTRY(system_call)
@@ -583,7 +583,7 @@ END(syscall_badsys)
* Build the entry stubs and pointer table with
* some assembler magic.
*/
-.data
+.section .rodata,"a"
ENTRY(interrupt)
.text
@@ -743,7 +743,7 @@ END(device_not_available)
* that sets up the real kernel stack. Check here, since we can't
* allow the wrong stack to be used.
*
- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
* already pushed 3 words if it hits on the sysenter instruction:
* eflags, cs and eip.
*
@@ -755,7 +755,7 @@ END(device_not_available)
cmpw $__KERNEL_CS,4(%esp); \
jne ok; \
label: \
- movl TSS_sysenter_esp0+offset(%esp),%esp; \
+ movl TSS_sysenter_sp0+offset(%esp),%esp; \
CFI_DEF_CFA esp, 0; \
CFI_UNDEFINED eip; \
pushfl; \
@@ -768,7 +768,7 @@ label: \
KPROBE_ENTRY(debug)
RING0_INT_FRAME
- cmpl $sysenter_entry,(%esp)
+ cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
@@ -799,7 +799,7 @@ KPROBE_ENTRY(nmi)
popl %eax
CFI_ADJUST_CFA_OFFSET -4
je nmi_espfix_stack
- cmpl $sysenter_entry,(%esp)
+ cmpl $ia32_sysenter_target,(%esp)
je nmi_stack_fixup
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
@@ -812,7 +812,7 @@ KPROBE_ENTRY(nmi)
popl %eax
CFI_ADJUST_CFA_OFFSET -4
jae nmi_stack_correct
- cmpl $sysenter_entry,12(%esp)
+ cmpl $ia32_sysenter_target,12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
/* We have a RING0_INT_FRAME here */
@@ -882,10 +882,10 @@ ENTRY(native_iret)
.previous
END(native_iret)
-ENTRY(native_irq_enable_sysexit)
+ENTRY(native_irq_enable_syscall_ret)
sti
sysexit
-END(native_irq_enable_sysexit)
+END(native_irq_enable_syscall_ret)
#endif
KPROBE_ENTRY(int3)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e70f3881d7e..bea8474744f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -50,6 +50,7 @@
#include <asm/hw_irq.h>
#include <asm/page.h>
#include <asm/irqflags.h>
+#include <asm/paravirt.h>
.code64
@@ -57,6 +58,13 @@
#define retint_kernel retint_restore_args
#endif
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_irq_enable_syscall_ret)
+ movq %gs:pda_oldrsp,%rsp
+ swapgs
+ sysretq
+#endif /* CONFIG_PARAVIRT */
+
.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -216,14 +224,21 @@ ENTRY(system_call)
CFI_DEF_CFA rsp,PDA_STACKOFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
- swapgs
+ SWAPGS_UNSAFE_STACK
+ /*
+ * A hypervisor implementation might want to use a label
+ * after the swapgs, so that it can do the swapgs
+ * for the guest and jump here on syscall.
+ */
+ENTRY(system_call_after_swapgs)
+
movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
/*
* No need to follow this irqs off/on section - it's straight
* and short:
*/
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,1
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
@@ -246,7 +261,7 @@ ret_from_sys_call:
sysret_check:
LOCKDEP_SYS_EXIT
GET_THREAD_INFO(%rcx)
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
@@ -260,9 +275,7 @@ sysret_check:
CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
/*CFI_REGISTER rflags,r11*/
- movq %gs:pda_oldrsp,%rsp
- swapgs
- sysretq
+ ENABLE_INTERRUPTS_SYSCALL_RET
CFI_RESTORE_STATE
/* Handle reschedules */
@@ -271,7 +284,7 @@ sysret_careful:
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
@@ -282,7 +295,7 @@ sysret_careful:
/* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
testl $_TIF_DO_NOTIFY_MASK,%edx
jz 1f
@@ -295,7 +308,7 @@ sysret_signal:
1: movl $_TIF_NEED_RESCHED,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
@@ -327,7 +340,7 @@ tracesys:
*/
.globl int_ret_from_sys_call
int_ret_from_sys_call:
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
@@ -349,20 +362,20 @@ int_careful:
bt $TIF_NEED_RESCHED,%edx
jnc int_very_careful
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
/* handle signals and tracing -- both require a full stack frame */
int_very_careful:
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
/* Check for syscall exit trace */
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
@@ -385,7 +398,7 @@ int_signal:
1: movl $_TIF_NEED_RESCHED,%edi
int_restore_rest:
RESTORE_REST
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
CFI_ENDPROC
@@ -506,7 +519,7 @@ END(stub_rt_sigreturn)
CFI_DEF_CFA_REGISTER rbp
testl $3,CS(%rdi)
je 1f
- swapgs
+ SWAPGS
/* irqcount is used to check if a CPU is already on an interrupt
stack or not. While this is essentially redundant with preempt_count
it is a little cheaper to use a separate counter in the PDA
@@ -527,7 +540,7 @@ ENTRY(common_interrupt)
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
ret_from_intr:
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
decl %gs:pda_irqcount
leaveq
@@ -556,13 +569,13 @@ retint_swapgs: /* return to user-space */
/*
* The iretq could re-enable interrupts:
*/
- cli
+ DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_IRETQ
- swapgs
+ SWAPGS
jmp restore_args
retint_restore_args: /* return to kernel space */
- cli
+ DISABLE_INTERRUPTS(CLBR_ANY)
/*
* The iretq could re-enable interrupts:
*/
@@ -570,10 +583,14 @@ retint_restore_args: /* return to kernel space */
restore_args:
RESTORE_ARGS 0,8,0
iret_label:
+#ifdef CONFIG_PARAVIRT
+ INTERRUPT_RETURN
+#endif
+ENTRY(native_iret)
iretq
.section __ex_table,"a"
- .quad iret_label,bad_iret
+ .quad native_iret, bad_iret
.previous
.section .fixup,"ax"
/* force a signal here? this matches i386 behaviour */
@@ -581,24 +598,24 @@ iret_label:
bad_iret:
movq $11,%rdi /* SIGSEGV */
TRACE_IRQS_ON
- sti
- jmp do_exit
- .previous
-
+ ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+ jmp do_exit
+ .previous
+
/* edi: workmask, edx: work */
retint_careful:
CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc retint_signal
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp retint_check
@@ -606,14 +623,14 @@ retint_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz retint_swapgs
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
RESTORE_REST
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl $_TIF_NEED_RESCHED,%edi
GET_THREAD_INFO(%rcx)
@@ -731,7 +748,7 @@ END(spurious_interrupt)
rdmsr
testl %edx,%edx
js 1f
- swapgs
+ SWAPGS
xorl %ebx,%ebx
1:
.if \ist
@@ -747,7 +764,7 @@ END(spurious_interrupt)
.if \ist
addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
.if \irqtrace
TRACE_IRQS_OFF
.endif
@@ -776,10 +793,10 @@ paranoid_swapgs\trace:
.if \trace
TRACE_IRQS_IRETQ 0
.endif
- swapgs
+ SWAPGS_UNSAFE_STACK
paranoid_restore\trace:
RESTORE_ALL 8
- iretq
+ INTERRUPT_RETURN
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%ebx
@@ -794,11 +811,11 @@ paranoid_userspace\trace:
.if \trace
TRACE_IRQS_ON
.endif
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
.if \trace
TRACE_IRQS_OFF
.endif
@@ -807,9 +824,9 @@ paranoid_schedule\trace:
.if \trace
TRACE_IRQS_ON
.endif
- sti
+ ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
- cli
+ DISABLE_INTERRUPTS(CLBR_ANY)
.if \trace
TRACE_IRQS_OFF
.endif
@@ -862,7 +879,7 @@ KPROBE_ENTRY(error_entry)
testl $3,CS(%rsp)
je error_kernelspace
error_swapgs:
- swapgs
+ SWAPGS
error_sti:
movq %rdi,RDI(%rsp)
CFI_REL_OFFSET rdi,RDI
@@ -874,7 +891,7 @@ error_sti:
error_exit:
movl %ebx,%eax
RESTORE_REST
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
@@ -911,12 +928,12 @@ ENTRY(load_gs_index)
CFI_STARTPROC
pushf
CFI_ADJUST_CFA_OFFSET 8
- cli
- swapgs
+ DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+ SWAPGS
gs_change:
movl %edi,%gs
2: mfence /* workaround */
- swapgs
+ SWAPGS
popf
CFI_ADJUST_CFA_OFFSET -8
ret
@@ -930,7 +947,7 @@ ENDPROC(load_gs_index)
.section .fixup,"ax"
/* running with kernelgs */
bad_gs:
- swapgs /* switch back to user gs */
+ SWAPGS /* switch back to user gs */
xorl %eax,%eax
movl %eax,%gs
jmp 2b
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index ce703e21c91..4ae7b644026 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -24,18 +24,11 @@
#include <acpi/acpi_bus.h>
#endif
-/*
- * which logical CPU number maps to which CPU (physical APIC ID)
- *
- * The following static array is used during kernel startup
- * and the x86_cpu_to_apicid_ptr contains the address of the
- * array during this time. Is it zeroed when the per_cpu
- * data area is removed.
- */
-u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
= { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_ptr;
-DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+void *x86_cpu_to_apicid_early_ptr;
+DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
struct genapic __read_mostly *genapic = &apic_flat;
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index f12d8c5d980..9c7f7d39596 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -1,6 +1,7 @@
/*
* AMD Geode southbridge support code
* Copyright (C) 2006, Advanced Micro Devices, Inc.
+ * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public License
@@ -51,45 +52,62 @@ EXPORT_SYMBOL_GPL(geode_get_dev_base);
/* === GPIO API === */
-void geode_gpio_set(unsigned int gpio, unsigned int reg)
+void geode_gpio_set(u32 gpio, unsigned int reg)
{
u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
if (!base)
return;
- if (gpio < 16)
- outl(1 << gpio, base + reg);
- else
- outl(1 << (gpio - 16), base + 0x80 + reg);
+ /* low bank register */
+ if (gpio & 0xFFFF)
+ outl(gpio & 0xFFFF, base + reg);
+ /* high bank register */
+ gpio >>= 16;
+ if (gpio)
+ outl(gpio, base + 0x80 + reg);
}
EXPORT_SYMBOL_GPL(geode_gpio_set);
-void geode_gpio_clear(unsigned int gpio, unsigned int reg)
+void geode_gpio_clear(u32 gpio, unsigned int reg)
{
u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
if (!base)
return;
- if (gpio < 16)
- outl(1 << (gpio + 16), base + reg);
- else
- outl(1 << gpio, base + 0x80 + reg);
+ /* low bank register */
+ if (gpio & 0xFFFF)
+ outl((gpio & 0xFFFF) << 16, base + reg);
+ /* high bank register */
+ gpio &= (0xFFFF << 16);
+ if (gpio)
+ outl(gpio, base + 0x80 + reg);
}
EXPORT_SYMBOL_GPL(geode_gpio_clear);
-int geode_gpio_isset(unsigned int gpio, unsigned int reg)
+int geode_gpio_isset(u32 gpio, unsigned int reg)
{
u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+ u32 val;
if (!base)
return 0;
- if (gpio < 16)
- return (inl(base + reg) & (1 << gpio)) ? 1 : 0;
- else
- return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0;
+ /* low bank register */
+ if (gpio & 0xFFFF) {
+ val = inl(base + reg) & (gpio & 0xFFFF);
+ if ((gpio & 0xFFFF) == val)
+ return 1;
+ }
+ /* high bank register */
+ gpio >>= 16;
+ if (gpio) {
+ val = inl(base + 0x80 + reg) & gpio;
+ if (gpio == val)
+ return 1;
+ }
+ return 0;
}
EXPORT_SYMBOL_GPL(geode_gpio_isset);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6b3469311e4..a317336cdea 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/percpu.h>
+#include <linux/start_kernel.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -19,12 +20,14 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/kdebug.h>
+#include <asm/e820.h>
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
pgd_clear(pgd);
- __flush_tlb();
+ __flush_tlb_all();
}
/* Don't add a printk in there. printk relies on the PDA which is not initialized
@@ -46,6 +49,35 @@ static void __init copy_bootdata(char *real_mode_data)
}
}
+#define EBDA_ADDR_POINTER 0x40E
+
+static __init void reserve_ebda(void)
+{
+ unsigned ebda_addr, ebda_size;
+
+ /*
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E
+ */
+ ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
+ ebda_addr <<= 4;
+
+ if (!ebda_addr)
+ return;
+
+ ebda_size = *(unsigned short *)__va(ebda_addr);
+
+ /* Round EBDA up to pages */
+ if (ebda_size == 0)
+ ebda_size = 1;
+ ebda_size <<= 10;
+ ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
+ if (ebda_size > 64*1024)
+ ebda_size = 64*1024;
+
+ reserve_early(ebda_addr, ebda_addr + ebda_size);
+}
+
void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
@@ -56,8 +88,13 @@ void __init x86_64_start_kernel(char * real_mode_data)
/* Make NULL pointers segfault */
zap_identity_mappings();
- for (i = 0; i < IDT_ENTRIES; i++)
+ for (i = 0; i < IDT_ENTRIES; i++) {
+#ifdef CONFIG_EARLY_PRINTK
+ set_intr_gate(i, &early_idt_handlers[i]);
+#else
set_intr_gate(i, early_idt_handler);
+#endif
+ }
load_idt((const struct desc_ptr *)&idt_descr);
early_printk("Kernel alive\n");
@@ -67,8 +104,24 @@ void __init x86_64_start_kernel(char * real_mode_data)
pda_init(0);
copy_bootdata(__va(real_mode_data));
-#ifdef CONFIG_SMP
- cpu_set(0, cpu_online_map);
-#endif
+
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&_end));
+
+ /* Reserve INITRD */
+ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+ unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
+ unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
+ reserve_early(ramdisk_image, ramdisk_end);
+ }
+
+ reserve_ebda();
+
+ /*
+ * At this point everything still needed from the boot loader
+ * or BIOS or kernel text should be early reserved or marked not
+ * RAM in e820. All other memory is free game.
+ */
+
start_kernel();
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index fbad51fce67..5d8c5730686 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -9,6 +9,7 @@
.text
#include <linux/threads.h>
+#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
@@ -151,7 +152,9 @@ WEAK(xen_entry)
/* Unknown implementation; there's really
nothing we can do at this point. */
ud2a
-.data
+
+ __INITDATA
+
subarch_entries:
.long default_entry /* normal x86/PC */
.long lguest_entry /* lguest hypervisor */
@@ -199,7 +202,6 @@ default_entry:
addl $0x67, %eax /* 0x67 == _PAGE_TABLE */
movl %eax, 4092(%edx)
- xorl %ebx,%ebx /* This is the boot CPU (BSP) */
jmp 3f
/*
* Non-boot CPU entry point; entered from trampoline.S
@@ -222,6 +224,8 @@ ENTRY(startup_32_smp)
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
+#endif /* CONFIG_SMP */
+3:
/*
* New page tables may be in 4Mbyte page mode and may
@@ -268,12 +272,6 @@ ENTRY(startup_32_smp)
wrmsr
6:
- /* This is a secondary processor (AP) */
- xorl %ebx,%ebx
- incl %ebx
-
-#endif /* CONFIG_SMP */
-3:
/*
* Enable paging
@@ -297,7 +295,7 @@ ENTRY(startup_32_smp)
popfl
#ifdef CONFIG_SMP
- andl %ebx,%ebx
+ cmpb $0, ready
jz 1f /* Initial CPU cleans BSS */
jmp checkCPUtype
1:
@@ -502,6 +500,7 @@ early_fault:
call printk
#endif
#endif
+ call dump_stack
hlt_loop:
hlt
jmp hlt_loop
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b6167fe3330..1d5a7a36120 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,13 @@
#include <asm/msr.h>
#include <asm/cache.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/asm-offsets.h>
+#include <asm/paravirt.h>
+#else
+#define GET_CR2_INTO_RCX movq %cr2, %rcx
+#endif
+
/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
* because we need identity-mapped pages.
*
@@ -260,14 +267,43 @@ init_rsp:
bad_address:
jmp bad_address
+#ifdef CONFIG_EARLY_PRINTK
+.macro early_idt_tramp first, last
+ .ifgt \last-\first
+ early_idt_tramp \first, \last-1
+ .endif
+ movl $\last,%esi
+ jmp early_idt_handler
+.endm
+
+ .globl early_idt_handlers
+early_idt_handlers:
+ early_idt_tramp 0, 63
+ early_idt_tramp 64, 127
+ early_idt_tramp 128, 191
+ early_idt_tramp 192, 255
+#endif
+
ENTRY(early_idt_handler)
+#ifdef CONFIG_EARLY_PRINTK
cmpl $2,early_recursion_flag(%rip)
jz 1f
incl early_recursion_flag(%rip)
+ GET_CR2_INTO_RCX
+ movq %rcx,%r9
+ xorl %r8d,%r8d # zero for error code
+ movl %esi,%ecx # get vector number
+ # Test %ecx against mask of vectors that push error code.
+ cmpl $31,%ecx
+ ja 0f
+ movl $1,%eax
+ salq %cl,%rax
+ testl $0x27d00,%eax
+ je 0f
+ popq %r8 # get error code
+0: movq 0(%rsp),%rcx # get ip
+ movq 8(%rsp),%rdx # get cs
xorl %eax,%eax
- movq 8(%rsp),%rsi # get rip
- movq (%rsp),%rdx
- movq %cr2,%rcx
leaq early_idt_msg(%rip),%rdi
call early_printk
cmpl $2,early_recursion_flag(%rip)
@@ -278,15 +314,19 @@ ENTRY(early_idt_handler)
movq 8(%rsp),%rsi # get rip again
call __print_symbol
#endif
+#endif /* EARLY_PRINTK */
1: hlt
jmp 1b
+
+#ifdef CONFIG_EARLY_PRINTK
early_recursion_flag:
.long 0
early_idt_msg:
- .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n"
+ .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
early_idt_ripmsg:
.asciz "RIP %s\n"
+#endif /* CONFIG_EARLY_PRINTK */
.balign PAGE_SIZE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 2f99ee206b9..429d084e014 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -6,7 +6,6 @@
#include <linux/init.h>
#include <linux/sysdev.h>
#include <linux/pm.h>
-#include <linux/delay.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
@@ -16,7 +15,8 @@
#define HPET_MASK CLOCKSOURCE_MASK(32)
#define HPET_SHIFT 22
-/* FSEC = 10^-15 NSEC = 10^-9 */
+/* FSEC = 10^-15
+ NSEC = 10^-9 */
#define FSEC_PER_NSEC 1000000
/*
@@ -107,6 +107,7 @@ int is_hpet_enabled(void)
{
return is_hpet_capable() && hpet_legacy_int_enabled;
}
+EXPORT_SYMBOL_GPL(is_hpet_enabled);
/*
* When the hpet driver (/dev/hpet) is enabled, we need to reserve
@@ -132,16 +133,13 @@ static void hpet_reserve_platform_timers(unsigned long id)
#ifdef CONFIG_HPET_EMULATE_RTC
hpet_reserve_timer(&hd, 1);
#endif
-
hd.hd_irq[0] = HPET_LEGACY_8254;
hd.hd_irq[1] = HPET_LEGACY_RTC;
- for (i = 2; i < nrtimers; timer++, i++)
- hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
- Tn_INT_ROUTE_CNF_SHIFT;
-
+ for (i = 2; i < nrtimers; timer++, i++)
+ hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
+ Tn_INT_ROUTE_CNF_SHIFT;
hpet_alloc(&hd);
-
}
#else
static void hpet_reserve_platform_timers(unsigned long id) { }
@@ -478,6 +476,7 @@ void hpet_disable(void)
*/
#include <linux/mc146818rtc.h>
#include <linux/rtc.h>
+#include <asm/rtc.h>
#define DEFAULT_RTC_INT_FREQ 64
#define DEFAULT_RTC_SHIFT 6
@@ -492,6 +491,38 @@ static unsigned long hpet_default_delta;
static unsigned long hpet_pie_delta;
static unsigned long hpet_pie_limit;
+static rtc_irq_handler irq_handler;
+
+/*
+ * Registers a IRQ handler.
+ */
+int hpet_register_irq_handler(rtc_irq_handler handler)
+{
+ if (!is_hpet_enabled())
+ return -ENODEV;
+ if (irq_handler)
+ return -EBUSY;
+
+ irq_handler = handler;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hpet_register_irq_handler);
+
+/*
+ * Deregisters the IRQ handler registered with hpet_register_irq_handler()
+ * and does cleanup.
+ */
+void hpet_unregister_irq_handler(rtc_irq_handler handler)
+{
+ if (!is_hpet_enabled())
+ return;
+
+ irq_handler = NULL;
+ hpet_rtc_flags = 0;
+}
+EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler);
+
/*
* Timer 1 for RTC emulation. We use one shot mode, as periodic mode
* is not supported by all HPET implementations for timer 1.
@@ -533,6 +564,7 @@ int hpet_rtc_timer_init(void)
return 1;
}
+EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
/*
* The functions below are called from rtc driver.
@@ -547,6 +579,7 @@ int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
hpet_rtc_flags &= ~bit_mask;
return 1;
}
+EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit);
int hpet_set_rtc_irq_bit(unsigned long bit_mask)
{
@@ -562,6 +595,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
return 1;
}
+EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit);
int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
unsigned char sec)
@@ -575,6 +609,7 @@ int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
return 1;
}
+EXPORT_SYMBOL_GPL(hpet_set_alarm_time);
int hpet_set_periodic_freq(unsigned long freq)
{
@@ -593,11 +628,13 @@ int hpet_set_periodic_freq(unsigned long freq)
}
return 1;
}
+EXPORT_SYMBOL_GPL(hpet_set_periodic_freq);
int hpet_rtc_dropped_irq(void)
{
return is_hpet_enabled();
}
+EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq);
static void hpet_rtc_timer_reinit(void)
{
@@ -641,9 +678,10 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
unsigned long rtc_int_flag = 0;
hpet_rtc_timer_reinit();
+ memset(&curr_time, 0, sizeof(struct rtc_time));
if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- rtc_get_rtc_time(&curr_time);
+ get_rtc_time(&curr_time);
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
@@ -665,8 +703,10 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
if (rtc_int_flag) {
rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
- rtc_interrupt(rtc_int_flag, dev_id);
+ if (irq_handler)
+ irq_handler(rtc_int_flag, dev_id);
}
return IRQ_HANDLED;
}
+EXPORT_SYMBOL_GPL(hpet_rtc_interrupt);
#endif
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 02112fcc0de..061627806a2 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -22,12 +22,5 @@ EXPORT_SYMBOL(__put_user_8);
EXPORT_SYMBOL(strstr);
-#ifdef CONFIG_SMP
-extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
-extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
new file mode 100644
index 00000000000..26719bd2c77
--- /dev/null
+++ b/arch/x86/kernel/i387.c
@@ -0,0 +1,479 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/regset.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/math_emu.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_X86_64
+
+#include <asm/sigcontext32.h>
+#include <asm/user32.h>
+
+#else
+
+#define save_i387_ia32 save_i387
+#define restore_i387_ia32 restore_i387
+
+#define _fpstate_ia32 _fpstate
+#define user_i387_ia32_struct user_i387_struct
+#define user32_fxsr_struct user_fxsr_struct
+
+#endif
+
+#ifdef CONFIG_MATH_EMULATION
+#define HAVE_HWFP (boot_cpu_data.hard_math)
+#else
+#define HAVE_HWFP 1
+#endif
+
+unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+
+void mxcsr_feature_mask_init(void)
+{
+ unsigned long mask = 0;
+ clts();
+ if (cpu_has_fxsr) {
+ memset(&current->thread.i387.fxsave, 0,
+ sizeof(struct i387_fxsave_struct));
+ asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
+ mask = current->thread.i387.fxsave.mxcsr_mask;
+ if (mask == 0)
+ mask = 0x0000ffbf;
+ }
+ mxcsr_feature_mask &= mask;
+ stts();
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Called at bootup to set up the initial FPU state that is later cloned
+ * into all processes.
+ */
+void __cpuinit fpu_init(void)
+{
+ unsigned long oldcr0 = read_cr0();
+ extern void __bad_fxsave_alignment(void);
+
+ if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
+ __bad_fxsave_alignment();
+ set_in_cr4(X86_CR4_OSFXSR);
+ set_in_cr4(X86_CR4_OSXMMEXCPT);
+
+ write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */
+
+ mxcsr_feature_mask_init();
+ /* clean state in init */
+ current_thread_info()->status = 0;
+ clear_used_math();
+}
+#endif /* CONFIG_X86_64 */
+
+/*
+ * The _current_ task is using the FPU for the first time
+ * so initialize it and set the mxcsr to its default
+ * value at reset if we support XMM instructions and then
+ * remeber the current task has used the FPU.
+ */
+void init_fpu(struct task_struct *tsk)
+{
+ if (tsk_used_math(tsk)) {
+ if (tsk == current)
+ unlazy_fpu(tsk);
+ return;
+ }
+
+ if (cpu_has_fxsr) {
+ memset(&tsk->thread.i387.fxsave, 0,
+ sizeof(struct i387_fxsave_struct));
+ tsk->thread.i387.fxsave.cwd = 0x37f;
+ if (cpu_has_xmm)
+ tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT;
+ } else {
+ memset(&tsk->thread.i387.fsave, 0,
+ sizeof(struct i387_fsave_struct));
+ tsk->thread.i387.fsave.cwd = 0xffff037fu;
+ tsk->thread.i387.fsave.swd = 0xffff0000u;
+ tsk->thread.i387.fsave.twd = 0xffffffffu;
+ tsk->thread.i387.fsave.fos = 0xffff0000u;
+ }
+ /*
+ * Only the device not available exception or ptrace can call init_fpu.
+ */
+ set_stopped_child_used_math(tsk);
+}
+
+int fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+ return tsk_used_math(target) ? regset->n : 0;
+}
+
+int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+ return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
+}
+
+int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (!cpu_has_fxsr)
+ return -ENODEV;
+
+ unlazy_fpu(target);
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.i387.fxsave, 0, -1);
+}
+
+int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_fxsr)
+ return -ENODEV;
+
+ unlazy_fpu(target);
+ set_stopped_child_used_math(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.i387.fxsave, 0, -1);
+
+ /*
+ * mxcsr reserved bits must be masked to zero for security reasons.
+ */
+ target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+
+ return ret;
+}
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
+{
+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+ tmp = ~twd;
+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+ /* and move the valid bits to the lower byte. */
+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+ return tmp;
+}
+
+#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
+#define FP_EXP_TAG_VALID 0
+#define FP_EXP_TAG_ZERO 1
+#define FP_EXP_TAG_SPECIAL 2
+#define FP_EXP_TAG_EMPTY 3
+
+static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
+{
+ struct _fpxreg *st;
+ u32 tos = (fxsave->swd >> 11) & 7;
+ u32 twd = (unsigned long) fxsave->twd;
+ u32 tag;
+ u32 ret = 0xffff0000u;
+ int i;
+
+ for (i = 0; i < 8; i++, twd >>= 1) {
+ if (twd & 0x1) {
+ st = FPREG_ADDR(fxsave, (i - tos) & 7);
+
+ switch (st->exponent & 0x7fff) {
+ case 0x7fff:
+ tag = FP_EXP_TAG_SPECIAL;
+ break;
+ case 0x0000:
+ if (!st->significand[0] &&
+ !st->significand[1] &&
+ !st->significand[2] &&
+ !st->significand[3])
+ tag = FP_EXP_TAG_ZERO;
+ else
+ tag = FP_EXP_TAG_SPECIAL;
+ break;
+ default:
+ if (st->significand[3] & 0x8000)
+ tag = FP_EXP_TAG_VALID;
+ else
+ tag = FP_EXP_TAG_SPECIAL;
+ break;
+ }
+ } else {
+ tag = FP_EXP_TAG_EMPTY;
+ }
+ ret |= tag << (2 * i);
+ }
+ return ret;
+}
+
+/*
+ * FXSR floating point environment conversions.
+ */
+
+static void convert_from_fxsr(struct user_i387_ia32_struct *env,
+ struct task_struct *tsk)
+{
+ struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
+ struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
+ struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
+ int i;
+
+ env->cwd = fxsave->cwd | 0xffff0000u;
+ env->swd = fxsave->swd | 0xffff0000u;
+ env->twd = twd_fxsr_to_i387(fxsave);
+
+#ifdef CONFIG_X86_64
+ env->fip = fxsave->rip;
+ env->foo = fxsave->rdp;
+ if (tsk == current) {
+ /*
+ * should be actually ds/cs at fpu exception time, but
+ * that information is not available in 64bit mode.
+ */
+ asm("mov %%ds,%0" : "=r" (env->fos));
+ asm("mov %%cs,%0" : "=r" (env->fcs));
+ } else {
+ struct pt_regs *regs = task_pt_regs(tsk);
+ env->fos = 0xffff0000 | tsk->thread.ds;
+ env->fcs = regs->cs;
+ }
+#else
+ env->fip = fxsave->fip;
+ env->fcs = fxsave->fcs;
+ env->foo = fxsave->foo;
+ env->fos = fxsave->fos;
+#endif
+
+ for (i = 0; i < 8; ++i)
+ memcpy(&to[i], &from[i], sizeof(to[0]));
+}
+
+static void convert_to_fxsr(struct task_struct *tsk,
+ const struct user_i387_ia32_struct *env)
+
+{
+ struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
+ struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
+ struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
+ int i;
+
+ fxsave->cwd = env->cwd;
+ fxsave->swd = env->swd;
+ fxsave->twd = twd_i387_to_fxsr(env->twd);
+ fxsave->fop = (u16) ((u32) env->fcs >> 16);
+#ifdef CONFIG_X86_64
+ fxsave->rip = env->fip;
+ fxsave->rdp = env->foo;
+ /* cs and ds ignored */
+#else
+ fxsave->fip = env->fip;
+ fxsave->fcs = (env->fcs & 0xffff);
+ fxsave->foo = env->foo;
+ fxsave->fos = env->fos;
+#endif
+
+ for (i = 0; i < 8; ++i)
+ memcpy(&to[i], &from[i], sizeof(from[0]));
+}
+
+int fpregs_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct user_i387_ia32_struct env;
+
+ if (!HAVE_HWFP)
+ return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
+
+ unlazy_fpu(target);
+
+ if (!cpu_has_fxsr)
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.i387.fsave, 0, -1);
+
+ if (kbuf && pos == 0 && count == sizeof(env)) {
+ convert_from_fxsr(kbuf, target);
+ return 0;
+ }
+
+ convert_from_fxsr(&env, target);
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+}
+
+int fpregs_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct user_i387_ia32_struct env;
+ int ret;
+
+ if (!HAVE_HWFP)
+ return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
+
+ unlazy_fpu(target);
+ set_stopped_child_used_math(target);
+
+ if (!cpu_has_fxsr)
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.i387.fsave, 0, -1);
+
+ if (pos > 0 || count < sizeof(env))
+ convert_from_fxsr(&env, target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+ if (!ret)
+ convert_to_fxsr(target, &env);
+
+ return ret;
+}
+
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
+{
+ struct task_struct *tsk = current;
+
+ unlazy_fpu(tsk);
+ tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
+ if (__copy_to_user(buf, &tsk->thread.i387.fsave,
+ sizeof(struct i387_fsave_struct)))
+ return -1;
+ return 1;
+}
+
+static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
+{
+ struct task_struct *tsk = current;
+ struct user_i387_ia32_struct env;
+ int err = 0;
+
+ unlazy_fpu(tsk);
+
+ convert_from_fxsr(&env, tsk);
+ if (__copy_to_user(buf, &env, sizeof(env)))
+ return -1;
+
+ err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
+ err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
+ if (err)
+ return -1;
+
+ if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+ sizeof(struct i387_fxsave_struct)))
+ return -1;
+ return 1;
+}
+
+int save_i387_ia32(struct _fpstate_ia32 __user *buf)
+{
+ if (!used_math())
+ return 0;
+
+ /* This will cause a "finit" to be triggered by the next
+ * attempted FPU operation by the 'current' process.
+ */
+ clear_used_math();
+
+ if (HAVE_HWFP) {
+ if (cpu_has_fxsr) {
+ return save_i387_fxsave(buf);
+ } else {
+ return save_i387_fsave(buf);
+ }
+ } else {
+ return fpregs_soft_get(current, NULL,
+ 0, sizeof(struct user_i387_ia32_struct),
+ NULL, buf) ? -1 : 1;
+ }
+}
+
+static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
+{
+ struct task_struct *tsk = current;
+ clear_fpu(tsk);
+ return __copy_from_user(&tsk->thread.i387.fsave, buf,
+ sizeof(struct i387_fsave_struct));
+}
+
+static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
+{
+ int err;
+ struct task_struct *tsk = current;
+ struct user_i387_ia32_struct env;
+ clear_fpu(tsk);
+ err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
+ sizeof(struct i387_fxsave_struct));
+ /* mxcsr reserved bits must be masked to zero for security reasons */
+ tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+ if (err || __copy_from_user(&env, buf, sizeof(env)))
+ return 1;
+ convert_to_fxsr(tsk, &env);
+ return 0;
+}
+
+int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
+{
+ int err;
+
+ if (HAVE_HWFP) {
+ if (cpu_has_fxsr) {
+ err = restore_i387_fxsave(buf);
+ } else {
+ err = restore_i387_fsave(buf);
+ }
+ } else {
+ err = fpregs_soft_set(current, NULL,
+ 0, sizeof(struct user_i387_ia32_struct),
+ NULL, buf) != 0;
+ }
+ set_used_math();
+ return err;
+}
+
+/*
+ * FPU state for core dumps.
+ * This is only used for a.out dumps now.
+ * It is declared generically using elf_fpregset_t (which is
+ * struct user_i387_struct) but is in fact only used for 32-bit
+ * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
+ */
+int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
+{
+ int fpvalid;
+ struct task_struct *tsk = current;
+
+ fpvalid = !!used_math();
+ if (fpvalid)
+ fpvalid = !fpregs_get(tsk, NULL,
+ 0, sizeof(struct user_i387_ia32_struct),
+ fpu, NULL);
+
+ return fpvalid;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
diff --git a/arch/x86/kernel/i387_32.c b/arch/x86/kernel/i387_32.c
deleted file mode 100644
index 7d2e12f6c78..00000000000
--- a/arch/x86/kernel/i387_32.c
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/math_emu.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-
-#ifdef CONFIG_MATH_EMULATION
-#define HAVE_HWFP (boot_cpu_data.hard_math)
-#else
-#define HAVE_HWFP 1
-#endif
-
-static unsigned long mxcsr_feature_mask __read_mostly = 0xffffffff;
-
-void mxcsr_feature_mask_init(void)
-{
- unsigned long mask = 0;
- clts();
- if (cpu_has_fxsr) {
- memset(&current->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
- mask = current->thread.i387.fxsave.mxcsr_mask;
- if (mask == 0) mask = 0x0000ffbf;
- }
- mxcsr_feature_mask &= mask;
- stts();
-}
-
-/*
- * The _current_ task is using the FPU for the first time
- * so initialize it and set the mxcsr to its default
- * value at reset if we support XMM instructions and then
- * remeber the current task has used the FPU.
- */
-void init_fpu(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- tsk->thread.i387.fxsave.cwd = 0x37f;
- if (cpu_has_xmm)
- tsk->thread.i387.fxsave.mxcsr = 0x1f80;
- } else {
- memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct));
- tsk->thread.i387.fsave.cwd = 0xffff037fu;
- tsk->thread.i387.fsave.swd = 0xffff0000u;
- tsk->thread.i387.fsave.twd = 0xffffffffu;
- tsk->thread.i387.fsave.fos = 0xffff0000u;
- }
- /* only the device not available exception or ptrace can call init_fpu */
- set_stopped_child_used_math(tsk);
-}
-
-/*
- * FPU lazy state save handling.
- */
-
-void kernel_fpu_begin(void)
-{
- struct thread_info *thread = current_thread_info();
-
- preempt_disable();
- if (thread->status & TS_USEDFPU) {
- __save_init_fpu(thread->task);
- return;
- }
- clts();
-}
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
-
-/*
- * FPU tag word conversions.
- */
-
-static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
-{
- unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
- /* Transform each pair of bits into 01 (valid) or 00 (empty) */
- tmp = ~twd;
- tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
- /* and move the valid bits to the lower byte. */
- tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
- tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
- tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
- return tmp;
-}
-
-static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
-{
- struct _fpxreg *st = NULL;
- unsigned long tos = (fxsave->swd >> 11) & 7;
- unsigned long twd = (unsigned long) fxsave->twd;
- unsigned long tag;
- unsigned long ret = 0xffff0000u;
- int i;
-
-#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
-
- for ( i = 0 ; i < 8 ; i++ ) {
- if ( twd & 0x1 ) {
- st = FPREG_ADDR( fxsave, (i - tos) & 7 );
-
- switch ( st->exponent & 0x7fff ) {
- case 0x7fff:
- tag = 2; /* Special */
- break;
- case 0x0000:
- if ( !st->significand[0] &&
- !st->significand[1] &&
- !st->significand[2] &&
- !st->significand[3] ) {
- tag = 1; /* Zero */
- } else {
- tag = 2; /* Special */
- }
- break;
- default:
- if ( st->significand[3] & 0x8000 ) {
- tag = 0; /* Valid */
- } else {
- tag = 2; /* Special */
- }
- break;
- }
- } else {
- tag = 3; /* Empty */
- }
- ret |= (tag << (2 * i));
- twd = twd >> 1;
- }
- return ret;
-}
-
-/*
- * FPU state interaction.
- */
-
-unsigned short get_fpu_cwd( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- return tsk->thread.i387.fxsave.cwd;
- } else {
- return (unsigned short)tsk->thread.i387.fsave.cwd;
- }
-}
-
-unsigned short get_fpu_swd( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- return tsk->thread.i387.fxsave.swd;
- } else {
- return (unsigned short)tsk->thread.i387.fsave.swd;
- }
-}
-
-#if 0
-unsigned short get_fpu_twd( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- return tsk->thread.i387.fxsave.twd;
- } else {
- return (unsigned short)tsk->thread.i387.fsave.twd;
- }
-}
-#endif /* 0 */
-
-unsigned short get_fpu_mxcsr( struct task_struct *tsk )
-{
- if ( cpu_has_xmm ) {
- return tsk->thread.i387.fxsave.mxcsr;
- } else {
- return 0x1f80;
- }
-}
-
-#if 0
-
-void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
-{
- if ( cpu_has_fxsr ) {
- tsk->thread.i387.fxsave.cwd = cwd;
- } else {
- tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u);
- }
-}
-
-void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
-{
- if ( cpu_has_fxsr ) {
- tsk->thread.i387.fxsave.swd = swd;
- } else {
- tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u);
- }
-}
-
-void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
-{
- if ( cpu_has_fxsr ) {
- tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
- } else {
- tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u);
- }
-}
-
-#endif /* 0 */
-
-/*
- * FXSR floating point environment conversions.
- */
-
-static int convert_fxsr_to_user( struct _fpstate __user *buf,
- struct i387_fxsave_struct *fxsave )
-{
- unsigned long env[7];
- struct _fpreg __user *to;
- struct _fpxreg *from;
- int i;
-
- env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
- env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
- env[2] = twd_fxsr_to_i387(fxsave);
- env[3] = fxsave->fip;
- env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
- env[5] = fxsave->foo;
- env[6] = fxsave->fos;
-
- if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
- return 1;
-
- to = &buf->_st[0];
- from = (struct _fpxreg *) &fxsave->st_space[0];
- for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
- unsigned long __user *t = (unsigned long __user *)to;
- unsigned long *f = (unsigned long *)from;
-
- if (__put_user(*f, t) ||
- __put_user(*(f + 1), t + 1) ||
- __put_user(from->exponent, &to->exponent))
- return 1;
- }
- return 0;
-}
-
-static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
- struct _fpstate __user *buf )
-{
- unsigned long env[7];
- struct _fpxreg *to;
- struct _fpreg __user *from;
- int i;
-
- if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
- return 1;
-
- fxsave->cwd = (unsigned short)(env[0] & 0xffff);
- fxsave->swd = (unsigned short)(env[1] & 0xffff);
- fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
- fxsave->fip = env[3];
- fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
- fxsave->fcs = (env[4] & 0xffff);
- fxsave->foo = env[5];
- fxsave->fos = env[6];
-
- to = (struct _fpxreg *) &fxsave->st_space[0];
- from = &buf->_st[0];
- for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
- unsigned long *t = (unsigned long *)to;
- unsigned long __user *f = (unsigned long __user *)from;
-
- if (__get_user(*t, f) ||
- __get_user(*(t + 1), f + 1) ||
- __get_user(to->exponent, &from->exponent))
- return 1;
- }
- return 0;
-}
-
-/*
- * Signal frame handlers.
- */
-
-static inline int save_i387_fsave( struct _fpstate __user *buf )
-{
- struct task_struct *tsk = current;
-
- unlazy_fpu( tsk );
- tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
- if ( __copy_to_user( buf, &tsk->thread.i387.fsave,
- sizeof(struct i387_fsave_struct) ) )
- return -1;
- return 1;
-}
-
-static int save_i387_fxsave( struct _fpstate __user *buf )
-{
- struct task_struct *tsk = current;
- int err = 0;
-
- unlazy_fpu( tsk );
-
- if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) )
- return -1;
-
- err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status );
- err |= __put_user( X86_FXSR_MAGIC, &buf->magic );
- if ( err )
- return -1;
-
- if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
- sizeof(struct i387_fxsave_struct) ) )
- return -1;
- return 1;
-}
-
-int save_i387( struct _fpstate __user *buf )
-{
- if ( !used_math() )
- return 0;
-
- /* This will cause a "finit" to be triggered by the next
- * attempted FPU operation by the 'current' process.
- */
- clear_used_math();
-
- if ( HAVE_HWFP ) {
- if ( cpu_has_fxsr ) {
- return save_i387_fxsave( buf );
- } else {
- return save_i387_fsave( buf );
- }
- } else {
- return save_i387_soft( &current->thread.i387.soft, buf );
- }
-}
-
-static inline int restore_i387_fsave( struct _fpstate __user *buf )
-{
- struct task_struct *tsk = current;
- clear_fpu( tsk );
- return __copy_from_user( &tsk->thread.i387.fsave, buf,
- sizeof(struct i387_fsave_struct) );
-}
-
-static int restore_i387_fxsave( struct _fpstate __user *buf )
-{
- int err;
- struct task_struct *tsk = current;
- clear_fpu( tsk );
- err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
- sizeof(struct i387_fxsave_struct) );
- /* mxcsr reserved bits must be masked to zero for security reasons */
- tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
- return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
-}
-
-int restore_i387( struct _fpstate __user *buf )
-{
- int err;
-
- if ( HAVE_HWFP ) {
- if ( cpu_has_fxsr ) {
- err = restore_i387_fxsave( buf );
- } else {
- err = restore_i387_fsave( buf );
- }
- } else {
- err = restore_i387_soft( &current->thread.i387.soft, buf );
- }
- set_used_math();
- return err;
-}
-
-/*
- * ptrace request handlers.
- */
-
-static inline int get_fpregs_fsave( struct user_i387_struct __user *buf,
- struct task_struct *tsk )
-{
- return __copy_to_user( buf, &tsk->thread.i387.fsave,
- sizeof(struct user_i387_struct) );
-}
-
-static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf,
- struct task_struct *tsk )
-{
- return convert_fxsr_to_user( (struct _fpstate __user *)buf,
- &tsk->thread.i387.fxsave );
-}
-
-int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk )
-{
- if ( HAVE_HWFP ) {
- if ( cpu_has_fxsr ) {
- return get_fpregs_fxsave( buf, tsk );
- } else {
- return get_fpregs_fsave( buf, tsk );
- }
- } else {
- return save_i387_soft( &tsk->thread.i387.soft,
- (struct _fpstate __user *)buf );
- }
-}
-
-static inline int set_fpregs_fsave( struct task_struct *tsk,
- struct user_i387_struct __user *buf )
-{
- return __copy_from_user( &tsk->thread.i387.fsave, buf,
- sizeof(struct user_i387_struct) );
-}
-
-static inline int set_fpregs_fxsave( struct task_struct *tsk,
- struct user_i387_struct __user *buf )
-{
- return convert_fxsr_from_user( &tsk->thread.i387.fxsave,
- (struct _fpstate __user *)buf );
-}
-
-int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf )
-{
- if ( HAVE_HWFP ) {
- if ( cpu_has_fxsr ) {
- return set_fpregs_fxsave( tsk, buf );
- } else {
- return set_fpregs_fsave( tsk, buf );
- }
- } else {
- return restore_i387_soft( &tsk->thread.i387.soft,
- (struct _fpstate __user *)buf );
- }
-}
-
-int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- if (__copy_to_user( buf, &tsk->thread.i387.fxsave,
- sizeof(struct user_fxsr_struct) ))
- return -EFAULT;
- return 0;
- } else {
- return -EIO;
- }
-}
-
-int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf )
-{
- int ret = 0;
-
- if ( cpu_has_fxsr ) {
- if (__copy_from_user( &tsk->thread.i387.fxsave, buf,
- sizeof(struct user_fxsr_struct) ))
- ret = -EFAULT;
- /* mxcsr reserved bits must be masked to zero for security reasons */
- tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
- } else {
- ret = -EIO;
- }
- return ret;
-}
-
-/*
- * FPU state for core dumps.
- */
-
-static inline void copy_fpu_fsave( struct task_struct *tsk,
- struct user_i387_struct *fpu )
-{
- memcpy( fpu, &tsk->thread.i387.fsave,
- sizeof(struct user_i387_struct) );
-}
-
-static inline void copy_fpu_fxsave( struct task_struct *tsk,
- struct user_i387_struct *fpu )
-{
- unsigned short *to;
- unsigned short *from;
- int i;
-
- memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) );
-
- to = (unsigned short *)&fpu->st_space[0];
- from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
- for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
- memcpy( to, from, 5 * sizeof(unsigned short) );
- }
-}
-
-int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
-{
- int fpvalid;
- struct task_struct *tsk = current;
-
- fpvalid = !!used_math();
- if ( fpvalid ) {
- unlazy_fpu( tsk );
- if ( cpu_has_fxsr ) {
- copy_fpu_fxsave( tsk, fpu );
- } else {
- copy_fpu_fsave( tsk, fpu );
- }
- }
-
- return fpvalid;
-}
-EXPORT_SYMBOL(dump_fpu);
-
-int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
-{
- int fpvalid = !!tsk_used_math(tsk);
-
- if (fpvalid) {
- if (tsk == current)
- unlazy_fpu(tsk);
- if (cpu_has_fxsr)
- copy_fpu_fxsave(tsk, fpu);
- else
- copy_fpu_fsave(tsk, fpu);
- }
- return fpvalid;
-}
-
-int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu)
-{
- int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr;
-
- if (fpvalid) {
- if (tsk == current)
- unlazy_fpu(tsk);
- memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu));
- }
- return fpvalid;
-}
diff --git a/arch/x86/kernel/i387_64.c b/arch/x86/kernel/i387_64.c
deleted file mode 100644
index bfaff28fb13..00000000000
--- a/arch/x86/kernel/i387_64.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2002 Andi Kleen, SuSE Labs
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * x86-64 rework 2002 Andi Kleen.
- * Does direct fxsave in and out of user space now for signal handlers.
- * All the FSAVE<->FXSAVE conversion code has been moved to the 32bit emulation,
- * the 64bit user space sees a FXSAVE frame directly.
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-
-unsigned int mxcsr_feature_mask __read_mostly = 0xffffffff;
-
-void mxcsr_feature_mask_init(void)
-{
- unsigned int mask;
- clts();
- memset(&current->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
- mask = current->thread.i387.fxsave.mxcsr_mask;
- if (mask == 0) mask = 0x0000ffbf;
- mxcsr_feature_mask &= mask;
- stts();
-}
-
-/*
- * Called at bootup to set up the initial FPU state that is later cloned
- * into all processes.
- */
-void __cpuinit fpu_init(void)
-{
- unsigned long oldcr0 = read_cr0();
- extern void __bad_fxsave_alignment(void);
-
- if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
- __bad_fxsave_alignment();
- set_in_cr4(X86_CR4_OSFXSR);
- set_in_cr4(X86_CR4_OSXMMEXCPT);
-
- write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */
-
- mxcsr_feature_mask_init();
- /* clean state in init */
- current_thread_info()->status = 0;
- clear_used_math();
-}
-
-void init_fpu(struct task_struct *child)
-{
- if (tsk_used_math(child)) {
- if (child == current)
- unlazy_fpu(child);
- return;
- }
- memset(&child->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- child->thread.i387.fxsave.cwd = 0x37f;
- child->thread.i387.fxsave.mxcsr = 0x1f80;
- /* only the device not available exception or ptrace can call init_fpu */
- set_stopped_child_used_math(child);
-}
-
-/*
- * Signal frame handlers.
- */
-
-int save_i387(struct _fpstate __user *buf)
-{
- struct task_struct *tsk = current;
- int err = 0;
-
- BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
- sizeof(tsk->thread.i387.fxsave));
-
- if ((unsigned long)buf % 16)
- printk("save_i387: bad fpstate %p\n",buf);
-
- if (!used_math())
- return 0;
- clear_used_math(); /* trigger finit */
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
- err = save_i387_checking((struct i387_fxsave_struct __user *)buf);
- if (err) return err;
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
- } else {
- if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
- sizeof(struct i387_fxsave_struct)))
- return -1;
- }
- return 1;
-}
-
-/*
- * ptrace request handlers.
- */
-
-int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk)
-{
- init_fpu(tsk);
- return __copy_to_user(buf, &tsk->thread.i387.fxsave,
- sizeof(struct user_i387_struct)) ? -EFAULT : 0;
-}
-
-int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf)
-{
- if (__copy_from_user(&tsk->thread.i387.fxsave, buf,
- sizeof(struct user_i387_struct)))
- return -EFAULT;
- return 0;
-}
-
-/*
- * FPU state for core dumps.
- */
-
-int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
-{
- struct task_struct *tsk = current;
-
- if (!used_math())
- return 0;
-
- unlazy_fpu(tsk);
- memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct));
- return 1;
-}
-
-int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
-{
- int fpvalid = !!tsk_used_math(tsk);
-
- if (fpvalid) {
- if (tsk == current)
- unlazy_fpu(tsk);
- memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct));
-}
- return fpvalid;
-}
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index a42c8074532..ef62b07b2b4 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -13,10 +13,17 @@
#include <asm/delay.h>
#include <asm/i8253.h>
#include <asm/io.h>
+#include <asm/hpet.h>
DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
+#ifdef CONFIG_X86_32
+static void pit_disable_clocksource(void);
+#else
+static inline void pit_disable_clocksource(void) { }
+#endif
+
/*
* HPET replaces the PIT, when enabled. So we need to know, which of
* the two timers is used
@@ -31,38 +38,38 @@ struct clock_event_device *global_clock_event;
static void init_pit_timer(enum clock_event_mode mode,
struct clock_event_device *evt)
{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
+ spin_lock(&i8253_lock);
switch(mode) {
case CLOCK_EVT_MODE_PERIODIC:
/* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ outb_pit(0x34, PIT_MODE);
+ outb_pit(LATCH & 0xff , PIT_CH0); /* LSB */
+ outb_pit(LATCH >> 8 , PIT_CH0); /* MSB */
break;
case CLOCK_EVT_MODE_SHUTDOWN:
case CLOCK_EVT_MODE_UNUSED:
if (evt->mode == CLOCK_EVT_MODE_PERIODIC ||
evt->mode == CLOCK_EVT_MODE_ONESHOT) {
- outb_p(0x30, PIT_MODE);
- outb_p(0, PIT_CH0);
- outb_p(0, PIT_CH0);
+ outb_pit(0x30, PIT_MODE);
+ outb_pit(0, PIT_CH0);
+ outb_pit(0, PIT_CH0);
}
+ pit_disable_clocksource();
break;
case CLOCK_EVT_MODE_ONESHOT:
/* One shot setup */
- outb_p(0x38, PIT_MODE);
+ pit_disable_clocksource();
+ outb_pit(0x38, PIT_MODE);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
break;
}
- spin_unlock_irqrestore(&i8253_lock, flags);
+ spin_unlock(&i8253_lock);
}
/*
@@ -72,12 +79,10 @@ static void init_pit_timer(enum clock_event_mode mode,
*/
static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- outb_p(delta & 0xff , PIT_CH0); /* LSB */
- outb(delta >> 8 , PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
+ spin_lock(&i8253_lock);
+ outb_pit(delta & 0xff , PIT_CH0); /* LSB */
+ outb_pit(delta >> 8 , PIT_CH0); /* MSB */
+ spin_unlock(&i8253_lock);
return 0;
}
@@ -148,15 +153,15 @@ static cycle_t pit_read(void)
* count), it cannot be newer.
*/
jifs = jiffies;
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
- count = inb_p(PIT_CH0); /* read the latched count */
- count |= inb_p(PIT_CH0) << 8;
+ outb_pit(0x00, PIT_MODE); /* latch the count ASAP */
+ count = inb_pit(PIT_CH0); /* read the latched count */
+ count |= inb_pit(PIT_CH0) << 8;
/* VIA686a test code... reset the latch if count > max + 1 */
if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
+ outb_pit(0x34, PIT_MODE);
+ outb_pit(LATCH & 0xff, PIT_CH0);
+ outb_pit(LATCH >> 8, PIT_CH0);
count = LATCH - 1;
}
@@ -195,9 +200,28 @@ static struct clocksource clocksource_pit = {
.shift = 20,
};
+static void pit_disable_clocksource(void)
+{
+ /*
+ * Use mult to check whether it is registered or not
+ */
+ if (clocksource_pit.mult) {
+ clocksource_unregister(&clocksource_pit);
+ clocksource_pit.mult = 0;
+ }
+}
+
static int __init init_pit_clocksource(void)
{
- if (num_possible_cpus() > 1) /* PIT does not scale! */
+ /*
+ * Several reasons not to register PIT as a clocksource:
+ *
+ * - On SMP PIT does not scale due to i8253_lock
+ * - when HPET is enabled
+ * - when local APIC timer is active (PIT is switched off)
+ */
+ if (num_possible_cpus() > 1 || is_hpet_enabled() ||
+ pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
return 0;
clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index 5f3496d0198..2d25b77102f 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -21,8 +21,6 @@
#include <asm/arch_hooks.h>
#include <asm/i8259.h>
-#include <io_ports.h>
-
/*
* This is the 'legacy' 8259A Programmable Interrupt Controller,
* present in the majority of PC/AT boxes.
@@ -291,20 +289,20 @@ void init_8259A(int auto_eoi)
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
/*
- * outb_p - this has to work on a wide range of PC hardware.
+ * outb_pic - this has to work on a wide range of PC hardware.
*/
- outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
- outb_p(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
- outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */
+ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
+ outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+ outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */
if (auto_eoi) /* master does Auto EOI */
- outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+ outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
else /* master expects normal EOI */
- outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+ outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
- outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
- outb_p(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
- outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */
- outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
+ outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
+ outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */
+ outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
if (auto_eoi)
/*
* In AEOI mode we just have to mask the interrupt
@@ -341,7 +339,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
outb(0,0xF0);
if (ignore_fpu_irq || !boot_cpu_data.hard_math)
return IRQ_NONE;
- math_error((void __user *)get_irq_regs()->eip);
+ math_error((void __user *)get_irq_regs()->ip);
return IRQ_HANDLED;
}
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index ba6d57286f5..fa57a156850 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -21,6 +21,7 @@
#include <asm/delay.h>
#include <asm/desc.h>
#include <asm/apic.h>
+#include <asm/i8259.h>
/*
* Common place to define all x86 IRQ vectors
@@ -48,7 +49,7 @@
*/
/*
- * The IO-APIC gives us many more interrupt sources. Most of these
+ * The IO-APIC gives us many more interrupt sources. Most of these
* are unused but an SMP system is supposed to have enough memory ...
* sometimes (mostly wrt. hw bugs) we get corrupted vectors all
* across the spectrum, so we really want to be prepared to get all
@@ -76,7 +77,7 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
/* for the irq vectors */
-static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
+static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
IRQLIST_16(0x2), IRQLIST_16(0x3),
IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
@@ -114,11 +115,7 @@ static struct irq_chip i8259A_chip = {
/*
* This contains the irq mask for both 8259A irq controllers,
*/
-static unsigned int cached_irq_mask = 0xffff;
-
-#define __byte(x,y) (((unsigned char *)&(y))[x])
-#define cached_21 (__byte(0,cached_irq_mask))
-#define cached_A1 (__byte(1,cached_irq_mask))
+unsigned int cached_irq_mask = 0xffff;
/*
* Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
@@ -139,9 +136,9 @@ void disable_8259A_irq(unsigned int irq)
spin_lock_irqsave(&i8259A_lock, flags);
cached_irq_mask |= mask;
if (irq & 8)
- outb(cached_A1,0xA1);
+ outb(cached_slave_mask, PIC_SLAVE_IMR);
else
- outb(cached_21,0x21);
+ outb(cached_master_mask, PIC_MASTER_IMR);
spin_unlock_irqrestore(&i8259A_lock, flags);
}
@@ -153,9 +150,9 @@ void enable_8259A_irq(unsigned int irq)
spin_lock_irqsave(&i8259A_lock, flags);
cached_irq_mask &= mask;
if (irq & 8)
- outb(cached_A1,0xA1);
+ outb(cached_slave_mask, PIC_SLAVE_IMR);
else
- outb(cached_21,0x21);
+ outb(cached_master_mask, PIC_MASTER_IMR);
spin_unlock_irqrestore(&i8259A_lock, flags);
}
@@ -167,9 +164,9 @@ int i8259A_irq_pending(unsigned int irq)
spin_lock_irqsave(&i8259A_lock, flags);
if (irq < 8)
- ret = inb(0x20) & mask;
+ ret = inb(PIC_MASTER_CMD) & mask;
else
- ret = inb(0xA0) & (mask >> 8);
+ ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
spin_unlock_irqrestore(&i8259A_lock, flags);
return ret;
@@ -196,14 +193,14 @@ static inline int i8259A_irq_real(unsigned int irq)
int irqmask = 1<<irq;
if (irq < 8) {
- outb(0x0B,0x20); /* ISR register */
- value = inb(0x20) & irqmask;
- outb(0x0A,0x20); /* back to the IRR register */
+ outb(0x0B,PIC_MASTER_CMD); /* ISR register */
+ value = inb(PIC_MASTER_CMD) & irqmask;
+ outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */
return value;
}
- outb(0x0B,0xA0); /* ISR register */
- value = inb(0xA0) & (irqmask >> 8);
- outb(0x0A,0xA0); /* back to the IRR register */
+ outb(0x0B,PIC_SLAVE_CMD); /* ISR register */
+ value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
+ outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */
return value;
}
@@ -240,14 +237,17 @@ static void mask_and_ack_8259A(unsigned int irq)
handle_real_irq:
if (irq & 8) {
- inb(0xA1); /* DUMMY - (do we need this?) */
- outb(cached_A1,0xA1);
- outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
- outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */
+ inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
+ outb(cached_slave_mask, PIC_SLAVE_IMR);
+ /* 'Specific EOI' to slave */
+ outb(0x60+(irq&7),PIC_SLAVE_CMD);
+ /* 'Specific EOI' to master-IRQ2 */
+ outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
} else {
- inb(0x21); /* DUMMY - (do we need this?) */
- outb(cached_21,0x21);
- outb(0x60+irq,0x20); /* 'Specific EOI' to master */
+ inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
+ outb(cached_master_mask, PIC_MASTER_IMR);
+ /* 'Specific EOI' to master */
+ outb(0x60+irq,PIC_MASTER_CMD);
}
spin_unlock_irqrestore(&i8259A_lock, flags);
return;
@@ -270,7 +270,8 @@ spurious_8259A_irq:
* lets ACK and report it. [once per IRQ]
*/
if (!(spurious_irq_mask & irqmask)) {
- printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
+ printk(KERN_DEBUG
+ "spurious 8259A interrupt: IRQ%d.\n", irq);
spurious_irq_mask |= irqmask;
}
atomic_inc(&irq_err_count);
@@ -283,51 +284,6 @@ spurious_8259A_irq:
}
}
-void init_8259A(int auto_eoi)
-{
- unsigned long flags;
-
- i8259A_auto_eoi = auto_eoi;
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- outb(0xff, 0x21); /* mask all of 8259A-1 */
- outb(0xff, 0xA1); /* mask all of 8259A-2 */
-
- /*
- * outb_p - this has to work on a wide range of PC hardware.
- */
- outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */
- outb_p(IRQ0_VECTOR, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
- outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */
- if (auto_eoi)
- outb_p(0x03, 0x21); /* master does Auto EOI */
- else
- outb_p(0x01, 0x21); /* master expects normal EOI */
-
- outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */
- outb_p(IRQ8_VECTOR, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
- outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */
- outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode
- is to be investigated) */
-
- if (auto_eoi)
- /*
- * in AEOI mode we just have to mask the interrupt
- * when acking.
- */
- i8259A_chip.mask_ack = disable_8259A_irq;
- else
- i8259A_chip.mask_ack = mask_and_ack_8259A;
-
- udelay(100); /* wait for 8259A to initialize */
-
- outb(cached_21, 0x21); /* restore master IRQ mask */
- outb(cached_A1, 0xA1); /* restore slave IRQ mask */
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
static char irq_trigger[2];
/**
* ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
@@ -364,8 +320,8 @@ static int i8259A_shutdown(struct sys_device *dev)
* the kernel initialization code can get it
* out of.
*/
- outb(0xff, 0x21); /* mask all of 8259A-1 */
- outb(0xff, 0xA1); /* mask all of 8259A-1 */
+ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
+ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
return 0;
}
@@ -391,6 +347,58 @@ static int __init i8259A_init_sysfs(void)
device_initcall(i8259A_init_sysfs);
+void init_8259A(int auto_eoi)
+{
+ unsigned long flags;
+
+ i8259A_auto_eoi = auto_eoi;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
+ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
+
+ /*
+ * outb_pic - this has to work on a wide range of PC hardware.
+ */
+ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
+ /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+ outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
+ /* 8259A-1 (the master) has a slave on IR2 */
+ outb_pic(0x04, PIC_MASTER_IMR);
+ if (auto_eoi) /* master does Auto EOI */
+ outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+ else /* master expects normal EOI */
+ outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+
+ outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
+ /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+ outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
+ /* 8259A-2 is a slave on master's IR2 */
+ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
+ /* (slave's support for AEOI in flat mode is to be investigated) */
+ outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
+
+ if (auto_eoi)
+ /*
+ * In AEOI mode we just have to mask the interrupt
+ * when acking.
+ */
+ i8259A_chip.mask_ack = disable_8259A_irq;
+ else
+ i8259A_chip.mask_ack = mask_and_ack_8259A;
+
+ udelay(100); /* wait for 8259A to initialize */
+
+ outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+
+
+
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
@@ -448,7 +456,9 @@ void __init init_ISA_irqs (void)
}
}
-void __init init_IRQ(void)
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+
+void __init native_init_IRQ(void)
{
int i;
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 468c9c43784..5b3ce793436 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@ static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
/*
* Initial thread structure.
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index ab77f190546..4ca548632c8 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -35,6 +35,7 @@
#include <linux/htirq.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
+#include <linux/jiffies.h> /* time_after() */
#include <asm/io.h>
#include <asm/smp.h>
@@ -48,8 +49,6 @@
#include <mach_apic.h>
#include <mach_apicdef.h>
-#include "io_ports.h"
-
int (*ioapic_renumber_irq)(int ioapic, int irq);
atomic_t irq_mis_count;
@@ -351,7 +350,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
# include <asm/processor.h> /* kernel_thread() */
# include <linux/kernel_stat.h> /* kstat */
# include <linux/slab.h> /* kmalloc() */
-# include <linux/timer.h> /* time_after() */
+# include <linux/timer.h>
#define IRQBALANCE_CHECK_ARCH -999
#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
@@ -727,7 +726,7 @@ late_initcall(balanced_irq_init);
#endif /* CONFIG_SMP */
#ifndef CONFIG_SMP
-void fastcall send_IPI_self(int vector)
+void send_IPI_self(int vector)
{
unsigned int cfg;
@@ -1900,7 +1899,7 @@ static int __init timer_irq_works(void)
* might have cached one ExtINT interrupt. Finally, at
* least one tick may be lost due to delays.
*/
- if (jiffies - t1 > 4)
+ if (time_after(jiffies, t1 + 4))
return 1;
return 0;
@@ -2080,7 +2079,7 @@ static struct irq_chip lapic_chip __read_mostly = {
.eoi = ack_apic,
};
-static void setup_nmi (void)
+static void __init setup_nmi(void)
{
/*
* Dirty trick to enable the NMI watchdog ...
@@ -2093,7 +2092,7 @@ static void setup_nmi (void)
*/
apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
- on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
+ enable_NMI_through_LVT0();
apic_printk(APIC_VERBOSE, " done.\n");
}
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 23a3ac06a23..1627c0d53e0 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -32,9 +32,11 @@
#include <linux/msi.h>
#include <linux/htirq.h>
#include <linux/dmar.h>
+#include <linux/jiffies.h>
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
+#include <linux/bootmem.h>
#include <asm/idle.h>
#include <asm/io.h>
@@ -1069,7 +1071,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
v = apic_read(APIC_LVR);
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v);
- maxlvt = get_maxlvt();
+ maxlvt = lapic_get_maxlvt();
v = apic_read(APIC_TASKPRI);
printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
@@ -1171,7 +1173,7 @@ void __apicdebuginit print_PIC(void)
#endif /* 0 */
-static void __init enable_IO_APIC(void)
+void __init enable_IO_APIC(void)
{
union IO_APIC_reg_01 reg_01;
int i8259_apic, i8259_pin;
@@ -1298,7 +1300,7 @@ static int __init timer_irq_works(void)
*/
/* jiffies wrap? */
- if (jiffies - t1 > 4)
+ if (time_after(jiffies, t1 + 4))
return 1;
return 0;
}
@@ -1411,7 +1413,7 @@ static void irq_complete_move(unsigned int irq)
if (likely(!cfg->move_in_progress))
return;
- vector = ~get_irq_regs()->orig_rax;
+ vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
cpumask_t cleanup_mask;
@@ -1438,7 +1440,7 @@ static void ack_apic_level(unsigned int irq)
int do_unmask_irq = 0;
irq_complete_move(irq);
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
do_unmask_irq = 1;
@@ -1565,7 +1567,7 @@ static struct hw_interrupt_type lapic_irq_type __read_mostly = {
.end = end_lapic_irq,
};
-static void setup_nmi (void)
+static void __init setup_nmi(void)
{
/*
* Dirty trick to enable the NMI watchdog ...
@@ -1578,7 +1580,7 @@ static void setup_nmi (void)
*/
printk(KERN_INFO "activating NMI Watchdog ...");
- enable_NMI_through_LVT0(NULL);
+ enable_NMI_through_LVT0();
printk(" done.\n");
}
@@ -1654,7 +1656,7 @@ static inline void unlock_ExtINT_logic(void)
*
* FIXME: really need to revamp this for modern platforms only.
*/
-static inline void check_timer(void)
+static inline void __init check_timer(void)
{
struct irq_cfg *cfg = irq_cfg + 0;
int apic1, pin1, apic2, pin2;
@@ -1788,7 +1790,10 @@ __setup("no_timer_check", notimercheck);
void __init setup_IO_APIC(void)
{
- enable_IO_APIC();
+
+ /*
+ * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+ */
if (acpi_ioapic)
io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
@@ -2288,3 +2293,92 @@ void __init setup_ioapic_dest(void)
}
#endif
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(void)
+{
+ unsigned long n;
+ struct resource *res;
+ char *mem;
+ int i;
+
+ if (nr_ioapics <= 0)
+ return NULL;
+
+ n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+ n *= nr_ioapics;
+
+ mem = alloc_bootmem(n);
+ res = (void *)mem;
+
+ if (mem != NULL) {
+ memset(mem, 0, n);
+ mem += sizeof(struct resource) * nr_ioapics;
+
+ for (i = 0; i < nr_ioapics; i++) {
+ res[i].name = mem;
+ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ sprintf(mem, "IOAPIC %u", i);
+ mem += IOAPIC_RESOURCE_NAME_SIZE;
+ }
+ }
+
+ ioapic_resources = res;
+
+ return res;
+}
+
+void __init ioapic_init_mappings(void)
+{
+ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+ struct resource *ioapic_res;
+ int i;
+
+ ioapic_res = ioapic_setup_resources();
+ for (i = 0; i < nr_ioapics; i++) {
+ if (smp_found_config) {
+ ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+ } else {
+ ioapic_phys = (unsigned long)
+ alloc_bootmem_pages(PAGE_SIZE);
+ ioapic_phys = __pa(ioapic_phys);
+ }
+ set_fixmap_nocache(idx, ioapic_phys);
+ apic_printk(APIC_VERBOSE,
+ "mapped IOAPIC to %016lx (%016lx)\n",
+ __fix_to_virt(idx), ioapic_phys);
+ idx++;
+
+ if (ioapic_res != NULL) {
+ ioapic_res->start = ioapic_phys;
+ ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+ ioapic_res++;
+ }
+ }
+}
+
+static int __init ioapic_insert_resources(void)
+{
+ int i;
+ struct resource *r = ioapic_resources;
+
+ if (!r) {
+ printk(KERN_ERR
+ "IO APIC resources could be not be allocated.\n");
+ return -1;
+ }
+
+ for (i = 0; i < nr_ioapics; i++) {
+ insert_resource(&iomem_resource, r);
+ r++;
+ }
+
+ return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 00000000000..bd49321034d
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,114 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ *
+ * Allow for a DMI based override of port 0x80, needed for certain HP laptops
+ * and possibly other systems. Also allow for the gradual elimination of
+ * outb_p/inb_p API uses.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+EXPORT_SYMBOL_GPL(io_delay_type);
+
+static int __initdata io_delay_override;
+
+/*
+ * Paravirt wants native_io_delay to be a constant.
+ */
+void native_io_delay(void)
+{
+ switch (io_delay_type) {
+ default:
+ case CONFIG_IO_DELAY_TYPE_0X80:
+ asm volatile ("outb %al, $0x80");
+ break;
+ case CONFIG_IO_DELAY_TYPE_0XED:
+ asm volatile ("outb %al, $0xed");
+ break;
+ case CONFIG_IO_DELAY_TYPE_UDELAY:
+ /*
+ * 2 usecs is an upper-bound for the outb delay but
+ * note that udelay doesn't have the bus-level
+ * side-effects that outb does, nor does udelay() have
+ * precise timings during very early bootup (the delays
+ * are shorter until calibrated):
+ */
+ udelay(2);
+ case CONFIG_IO_DELAY_TYPE_NONE:
+ break;
+ }
+}
+EXPORT_SYMBOL(native_io_delay);
+
+static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
+{
+ if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+ printk(KERN_NOTICE "%s: using 0xed I/O delay port\n",
+ id->ident);
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ }
+
+ return 0;
+}
+
+/*
+ * Quirk table for systems that misbehave (lock up, etc.) if port
+ * 0x80 is used:
+ */
+static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "Compaq Presario V6000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B7")
+ }
+ },
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion tx1000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30BF")
+ }
+ },
+ { }
+};
+
+void __init io_delay_init(void)
+{
+ if (!io_delay_override)
+ dmi_check_system(io_delay_0xed_port_dmi_table);
+}
+
+static int __init io_delay_param(char *s)
+{
+ if (!strcmp(s, "0x80"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+ else if (!strcmp(s, "0xed"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ else if (!strcmp(s, "udelay"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+ else if (!strcmp(s, "none"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
+ else
+ return -EINVAL;
+
+ io_delay_override = 1;
+ return 0;
+}
+
+early_param("io_delay", io_delay_param);
diff --git a/arch/x86/kernel/ioport_32.c b/arch/x86/kernel/ioport.c
index 4ed48dc8df1..50e5e4a31c8 100644
--- a/arch/x86/kernel/ioport_32.c
+++ b/arch/x86/kernel/ioport.c
@@ -1,6 +1,6 @@
/*
* This contains the io-permission bitmap code - written by obz, with changes
- * by Linus.
+ * by Linus. 32/64 bits code unification by Miguel Botón.
*/
#include <linux/sched.h>
@@ -16,49 +16,27 @@
#include <linux/syscalls.h>
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
+static void set_bitmap(unsigned long *bitmap, unsigned int base,
+ unsigned int extent, int new_value)
{
- unsigned long mask;
- unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
- unsigned int low_index = base & (BITS_PER_LONG-1);
- int length = low_index + extent;
-
- if (low_index != 0) {
- mask = (~0UL << low_index);
- if (length < BITS_PER_LONG)
- mask &= ~(~0UL << length);
- if (new_value)
- *bitmap_base++ |= mask;
- else
- *bitmap_base++ &= ~mask;
- length -= BITS_PER_LONG;
- }
-
- mask = (new_value ? ~0UL : 0UL);
- while (length >= BITS_PER_LONG) {
- *bitmap_base++ = mask;
- length -= BITS_PER_LONG;
- }
+ unsigned int i;
- if (length > 0) {
- mask = ~(~0UL << length);
+ for (i = base; i < base + extent; i++) {
if (new_value)
- *bitmap_base++ |= mask;
+ __set_bit(i, bitmap);
else
- *bitmap_base++ &= ~mask;
+ __clear_bit(i, bitmap);
}
}
-
/*
* this changes the io permissions bitmap in the current task.
*/
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
- unsigned long i, max_long, bytes, bytes_updated;
struct thread_struct * t = &current->thread;
struct tss_struct * tss;
- unsigned long *bitmap;
+ unsigned int i, max_long, bytes, bytes_updated;
if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
return -EINVAL;
@@ -71,7 +49,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* this is why we delay this operation until now:
*/
if (!t->io_bitmap_ptr) {
- bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+ unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+
if (!bitmap)
return -ENOMEM;
@@ -100,11 +79,12 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
if (t->io_bitmap_ptr[i] != ~0UL)
max_long = i;
- bytes = (max_long + 1) * sizeof(long);
+ bytes = (max_long + 1) * sizeof(unsigned long);
bytes_updated = max(bytes, t->io_bitmap_max);
t->io_bitmap_max = bytes;
+#ifdef CONFIG_X86_32
/*
* Sets the lazy trigger so that the next I/O operation will
* reload the correct bitmap.
@@ -113,6 +93,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
*/
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
tss->io_bitmap_owner = NULL;
+#else
+ /* Update the TSS: */
+ memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+#endif
put_cpu();
@@ -124,18 +108,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* beyond the 0x3ff range: to get the full 65536 ports bitmapped
* you'd need 8kB of bitmaps/process, which is a bit excessive.
*
- * Here we just change the eflags value on the stack: we allow
+ * Here we just change the flags value on the stack: we allow
* only the super-user to do it. This depends on the stack-layout
* on system-call entry - see also fork() and the signal handling
* code.
*/
-
-asmlinkage long sys_iopl(unsigned long unused)
+static int do_iopl(unsigned int level, struct pt_regs *regs)
{
- volatile struct pt_regs * regs = (struct pt_regs *) &unused;
- unsigned int level = regs->ebx;
- unsigned int old = (regs->eflags >> 12) & 3;
- struct thread_struct *t = &current->thread;
+ unsigned int old = (regs->flags >> 12) & 3;
if (level > 3)
return -EINVAL;
@@ -144,8 +124,31 @@ asmlinkage long sys_iopl(unsigned long unused)
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
}
+ regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
+
+ return 0;
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage long sys_iopl(unsigned long regsp)
+{
+ struct pt_regs *regs = (struct pt_regs *)&regsp;
+ unsigned int level = regs->bx;
+ struct thread_struct *t = &current->thread;
+ int rc;
+
+ rc = do_iopl(level, regs);
+ if (rc < 0)
+ goto out;
+
t->iopl = level << 12;
- regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
set_iopl_mask(t->iopl);
- return 0;
+out:
+ return rc;
+}
+#else
+asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
+{
+ return do_iopl(level, regs);
}
+#endif
diff --git a/arch/x86/kernel/ioport_64.c b/arch/x86/kernel/ioport_64.c
deleted file mode 100644
index 5f62fad64da..00000000000
--- a/arch/x86/kernel/ioport_64.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This contains the io-permission bitmap code - written by obz, with changes
- * by Linus.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/thread_info.h>
-#include <linux/syscalls.h>
-
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
-{
- int i;
- if (new_value)
- for (i = base; i < base + extent; i++)
- __set_bit(i, bitmap);
- else
- for (i = base; i < base + extent; i++)
- clear_bit(i, bitmap);
-}
-
-/*
- * this changes the io permissions bitmap in the current task.
- */
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-{
- unsigned int i, max_long, bytes, bytes_updated;
- struct thread_struct * t = &current->thread;
- struct tss_struct * tss;
- unsigned long *bitmap;
-
- if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
- return -EINVAL;
- if (turn_on && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- /*
- * If it's the first ioperm() call in this thread's lifetime, set the
- * IO bitmap up. ioperm() is much less timing critical than clone(),
- * this is why we delay this operation until now:
- */
- if (!t->io_bitmap_ptr) {
- bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
- if (!bitmap)
- return -ENOMEM;
-
- memset(bitmap, 0xff, IO_BITMAP_BYTES);
- t->io_bitmap_ptr = bitmap;
- set_thread_flag(TIF_IO_BITMAP);
- }
-
- /*
- * do it in the per-thread copy and in the TSS ...
- *
- * Disable preemption via get_cpu() - we must not switch away
- * because the ->io_bitmap_max value must match the bitmap
- * contents:
- */
- tss = &per_cpu(init_tss, get_cpu());
-
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
-
- /*
- * Search for a (possibly new) maximum. This is simple and stupid,
- * to keep it obviously correct:
- */
- max_long = 0;
- for (i = 0; i < IO_BITMAP_LONGS; i++)
- if (t->io_bitmap_ptr[i] != ~0UL)
- max_long = i;
-
- bytes = (max_long + 1) * sizeof(long);
- bytes_updated = max(bytes, t->io_bitmap_max);
-
- t->io_bitmap_max = bytes;
-
- /* Update the TSS: */
- memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
-
- put_cpu();
-
- return 0;
-}
-
-/*
- * sys_iopl has to be used when you want to access the IO ports
- * beyond the 0x3ff range: to get the full 65536 ports bitmapped
- * you'd need 8kB of bitmaps/process, which is a bit excessive.
- *
- * Here we just change the eflags value on the stack: we allow
- * only the super-user to do it. This depends on the stack-layout
- * on system-call entry - see also fork() and the signal handling
- * code.
- */
-
-asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
-{
- unsigned int old = (regs->eflags >> 12) & 3;
-
- if (level > 3)
- return -EINVAL;
- /* Trying to gain more privileges? */
- if (level > old) {
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
- }
- regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12);
- return 0;
-}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index d3fde94f734..cef054b09d2 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -66,11 +66,11 @@ static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
+unsigned int do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs;
/* high bit used in ret_from_ code */
- int irq = ~regs->orig_eax;
+ int irq = ~regs->orig_ax;
struct irq_desc *desc = irq_desc + irq;
#ifdef CONFIG_4KSTACKS
union irq_ctx *curctx, *irqctx;
@@ -88,13 +88,13 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
- long esp;
+ long sp;
__asm__ __volatile__("andl %%esp,%0" :
- "=r" (esp) : "0" (THREAD_SIZE - 1));
- if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+ "=r" (sp) : "0" (THREAD_SIZE - 1));
+ if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
printk("do_IRQ: stack overflow: %ld\n",
- esp - sizeof(struct thread_info));
+ sp - sizeof(struct thread_info));
dump_stack();
}
}
@@ -112,7 +112,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
* current stack (which is the irq stack already after all)
*/
if (curctx != irqctx) {
- int arg1, arg2, ebx;
+ int arg1, arg2, bx;
/* build the stack frame on the IRQ stack */
isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
@@ -128,10 +128,10 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
asm volatile(
- " xchgl %%ebx,%%esp \n"
- " call *%%edi \n"
- " movl %%ebx,%%esp \n"
- : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+ " xchgl %%ebx,%%esp \n"
+ " call *%%edi \n"
+ " movl %%ebx,%%esp \n"
+ : "=a" (arg1), "=d" (arg2), "=b" (bx)
: "0" (irq), "1" (desc), "2" (isp),
"D" (desc->handle_irq)
: "memory", "cc"
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 6b5c730d67b..3aac15466a9 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -20,6 +20,26 @@
atomic_t irq_err_count;
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ * But don't ack when the APIC is disabled. -AK
+ */
+ if (!disable_apic)
+ ack_APIC_irq();
+}
+
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
* Probabilistic stack overflow check:
@@ -33,11 +53,11 @@ static inline void stack_overflow_check(struct pt_regs *regs)
u64 curbase = (u64)task_stack_page(current);
static unsigned long warned = -60*HZ;
- if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
- regs->rsp < curbase + sizeof(struct thread_info) + 128 &&
+ if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
+ regs->sp < curbase + sizeof(struct thread_info) + 128 &&
time_after(jiffies, warned + 60*HZ)) {
- printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
- current->comm, curbase, regs->rsp);
+ printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+ current->comm, curbase, regs->sp);
show_stack(NULL,NULL);
warned = jiffies;
}
@@ -142,7 +162,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
/* high bit used in ret_from_ code */
- unsigned vector = ~regs->orig_rax;
+ unsigned vector = ~regs->orig_ax;
unsigned irq;
exit_idle();
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
new file mode 100644
index 00000000000..73354302fda
--- /dev/null
+++ b/arch/x86/kernel/kdebugfs.c
@@ -0,0 +1,65 @@
+/*
+ * Architecture specific debugfs files
+ *
+ * Copyright (C) 2007, Intel Corp.
+ * Huang Ying <ying.huang@intel.com>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+
+#include <asm/setup.h>
+
+#ifdef CONFIG_DEBUG_BOOT_PARAMS
+static struct debugfs_blob_wrapper boot_params_blob = {
+ .data = &boot_params,
+ .size = sizeof(boot_params),
+};
+
+static int __init boot_params_kdebugfs_init(void)
+{
+ int error;
+ struct dentry *dbp, *version, *data;
+
+ dbp = debugfs_create_dir("boot_params", NULL);
+ if (!dbp) {
+ error = -ENOMEM;
+ goto err_return;
+ }
+ version = debugfs_create_x16("version", S_IRUGO, dbp,
+ &boot_params.hdr.version);
+ if (!version) {
+ error = -ENOMEM;
+ goto err_dir;
+ }
+ data = debugfs_create_blob("data", S_IRUGO, dbp,
+ &boot_params_blob);
+ if (!data) {
+ error = -ENOMEM;
+ goto err_version;
+ }
+ return 0;
+err_version:
+ debugfs_remove(version);
+err_dir:
+ debugfs_remove(dbp);
+err_return:
+ return error;
+}
+#endif
+
+static int __init arch_kdebugfs_init(void)
+{
+ int error = 0;
+
+#ifdef CONFIG_DEBUG_BOOT_PARAMS
+ error = boot_params_kdebugfs_init();
+#endif
+
+ return error;
+}
+
+arch_initcall(arch_kdebugfs_init);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
new file mode 100644
index 00000000000..a99e764fd66
--- /dev/null
+++ b/arch/x86/kernel/kprobes.c
@@ -0,0 +1,1066 @@
+/*
+ * Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ * Probes initial implementation ( includes contributions from
+ * Rusty Russell).
+ * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ * interface to access function arguments.
+ * 2004-Oct Jim Keniston <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
+ * <prasanna@in.ibm.com> adapted for x86_64 from i386.
+ * 2005-Mar Roland McGrath <roland@redhat.com>
+ * Fixed to handle %rip-relative addressing mode correctly.
+ * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
+ * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
+ * <prasanna@in.ibm.com> added function-return probes.
+ * 2005-May Rusty Lynch <rusty.lynch@intel.com>
+ * Added function return probes functionality
+ * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
+ * kprobe-booster and kretprobe-booster for i386.
+ * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
+ * and kretprobe-booster for x86-64
+ * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
+ * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
+ * unified x86 kprobes code.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+
+#include <asm/cacheflush.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/alternative.h>
+
+void jprobe_return_end(void);
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+#ifdef CONFIG_X86_64
+#define stack_addr(regs) ((unsigned long *)regs->sp)
+#else
+/*
+ * "&regs->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs
+ * don't save the ss and esp registers if the CPU is already in kernel
+ * mode when it traps. So for kprobes, regs->sp and regs->ss are not
+ * the [nonexistent] saved stack pointer and ss register, but rather
+ * the top 8 bytes of the pre-int3 stack. So &regs->sp happens to
+ * point to the top of the pre-int3 stack.
+ */
+#define stack_addr(regs) ((unsigned long *)&regs->sp)
+#endif
+
+#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
+ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
+ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
+ (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
+ (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
+ << (row % 32))
+ /*
+ * Undefined/reserved opcodes, conditional jump, Opcode Extension
+ * Groups, and some special opcodes can not boost.
+ */
+static const u32 twobyte_is_boostable[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ---------------------------------------------- */
+ W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
+ W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
+ W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
+ W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+ W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
+ W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
+ W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */
+ W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+ W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */
+ W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+ W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */
+ W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */
+ W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
+ W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */
+ W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */
+ W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */
+ /* ----------------------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+static const u32 onebyte_has_modrm[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ----------------------------------------------- */
+ W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
+ W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
+ W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
+ W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
+ W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+ W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
+ W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
+ W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
+ W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+ W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
+ W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
+ W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
+ W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
+ W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+ W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
+ W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
+ /* ----------------------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+static const u32 twobyte_has_modrm[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ----------------------------------------------- */
+ W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
+ W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
+ W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
+ W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
+ W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
+ W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
+ W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
+ W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
+ W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
+ W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
+ W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
+ W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
+ W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
+ W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
+ W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
+ W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
+ /* ----------------------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+#undef W
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = {
+ {"__switch_to", }, /* This function switches only current task, but
+ doesn't switch kernel stack.*/
+ {NULL, NULL} /* Terminator */
+};
+const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
+
+/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
+static void __kprobes set_jmp_op(void *from, void *to)
+{
+ struct __arch_jmp_op {
+ char op;
+ s32 raddr;
+ } __attribute__((packed)) * jop;
+ jop = (struct __arch_jmp_op *)from;
+ jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
+ jop->op = RELATIVEJUMP_INSTRUCTION;
+}
+
+/*
+ * Check for the REX prefix which can only exist on X86_64
+ * X86_32 always returns 0
+ */
+static int __kprobes is_REX_prefix(kprobe_opcode_t *insn)
+{
+#ifdef CONFIG_X86_64
+ if ((*insn & 0xf0) == 0x40)
+ return 1;
+#endif
+ return 0;
+}
+
+/*
+ * Returns non-zero if opcode is boostable.
+ * RIP relative instructions are adjusted at copying time in 64 bits mode
+ */
+static int __kprobes can_boost(kprobe_opcode_t *opcodes)
+{
+ kprobe_opcode_t opcode;
+ kprobe_opcode_t *orig_opcodes = opcodes;
+
+retry:
+ if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+ return 0;
+ opcode = *(opcodes++);
+
+ /* 2nd-byte opcode */
+ if (opcode == 0x0f) {
+ if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+ return 0;
+ return test_bit(*opcodes,
+ (unsigned long *)twobyte_is_boostable);
+ }
+
+ switch (opcode & 0xf0) {
+#ifdef CONFIG_X86_64
+ case 0x40:
+ goto retry; /* REX prefix is boostable */
+#endif
+ case 0x60:
+ if (0x63 < opcode && opcode < 0x67)
+ goto retry; /* prefixes */
+ /* can't boost Address-size override and bound */
+ return (opcode != 0x62 && opcode != 0x67);
+ case 0x70:
+ return 0; /* can't boost conditional jump */
+ case 0xc0:
+ /* can't boost software-interruptions */
+ return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
+ case 0xd0:
+ /* can boost AA* and XLAT */
+ return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
+ case 0xe0:
+ /* can boost in/out and absolute jmps */
+ return ((opcode & 0x04) || opcode == 0xea);
+ case 0xf0:
+ if ((opcode & 0x0c) == 0 && opcode != 0xf1)
+ goto retry; /* lock/rep(ne) prefix */
+ /* clear and set flags are boostable */
+ return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
+ default:
+ /* segment override prefixes are boostable */
+ if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
+ goto retry; /* prefixes */
+ /* CS override prefix and call are not boostable */
+ return (opcode != 0x2e && opcode != 0x9a);
+ }
+}
+
+/*
+ * Returns non-zero if opcode modifies the interrupt flag.
+ */
+static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
+{
+ switch (*insn) {
+ case 0xfa: /* cli */
+ case 0xfb: /* sti */
+ case 0xcf: /* iret/iretd */
+ case 0x9d: /* popf/popfd */
+ return 1;
+ }
+
+ /*
+ * on X86_64, 0x40-0x4f are REX prefixes so we need to look
+ * at the next byte instead.. but of course not recurse infinitely
+ */
+ if (is_REX_prefix(insn))
+ return is_IF_modifier(++insn);
+
+ return 0;
+}
+
+/*
+ * Adjust the displacement if the instruction uses the %rip-relative
+ * addressing mode.
+ * If it does, Return the address of the 32-bit displacement word.
+ * If not, return null.
+ * Only applicable to 64-bit x86.
+ */
+static void __kprobes fix_riprel(struct kprobe *p)
+{
+#ifdef CONFIG_X86_64
+ u8 *insn = p->ainsn.insn;
+ s64 disp;
+ int need_modrm;
+
+ /* Skip legacy instruction prefixes. */
+ while (1) {
+ switch (*insn) {
+ case 0x66:
+ case 0x67:
+ case 0x2e:
+ case 0x3e:
+ case 0x26:
+ case 0x64:
+ case 0x65:
+ case 0x36:
+ case 0xf0:
+ case 0xf3:
+ case 0xf2:
+ ++insn;
+ continue;
+ }
+ break;
+ }
+
+ /* Skip REX instruction prefix. */
+ if (is_REX_prefix(insn))
+ ++insn;
+
+ if (*insn == 0x0f) {
+ /* Two-byte opcode. */
+ ++insn;
+ need_modrm = test_bit(*insn,
+ (unsigned long *)twobyte_has_modrm);
+ } else
+ /* One-byte opcode. */
+ need_modrm = test_bit(*insn,
+ (unsigned long *)onebyte_has_modrm);
+
+ if (need_modrm) {
+ u8 modrm = *++insn;
+ if ((modrm & 0xc7) == 0x05) {
+ /* %rip+disp32 addressing mode */
+ /* Displacement follows ModRM byte. */
+ ++insn;
+ /*
+ * The copied instruction uses the %rip-relative
+ * addressing mode. Adjust the displacement for the
+ * difference between the original location of this
+ * instruction and the location of the copy that will
+ * actually be run. The tricky bit here is making sure
+ * that the sign extension happens correctly in this
+ * calculation, since we need a signed 32-bit result to
+ * be sign-extended to 64 bits when it's added to the
+ * %rip value and yield the same 64-bit result that the
+ * sign-extension of the original signed 32-bit
+ * displacement would have given.
+ */
+ disp = (u8 *) p->addr + *((s32 *) insn) -
+ (u8 *) p->ainsn.insn;
+ BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
+ *(s32 *)insn = (s32) disp;
+ }
+ }
+#endif
+}
+
+static void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+ memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+
+ fix_riprel(p);
+
+ if (can_boost(p->addr))
+ p->ainsn.boostable = 0;
+ else
+ p->ainsn.boostable = -1;
+
+ p->opcode = *p->addr;
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ /* insn: must be on special executable page on x86. */
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ return -ENOMEM;
+ arch_copy_kprobe(p);
+ return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ text_poke(p->addr, &p->opcode, 1);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ mutex_lock(&kprobe_mutex);
+ free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
+ mutex_unlock(&kprobe_mutex);
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
+ kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
+ kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = p;
+ kcb->kprobe_saved_flags = kcb->kprobe_old_flags
+ = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
+ if (is_IF_modifier(p->ainsn.insn))
+ kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
+}
+
+static void __kprobes clear_btf(void)
+{
+ if (test_thread_flag(TIF_DEBUGCTLMSR))
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+}
+
+static void __kprobes restore_btf(void)
+{
+ if (test_thread_flag(TIF_DEBUGCTLMSR))
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr);
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+ clear_btf();
+ regs->flags |= X86_EFLAGS_TF;
+ regs->flags &= ~X86_EFLAGS_IF;
+ /* single step inline if the instruction is an int3 */
+ if (p->opcode == BREAKPOINT_INSTRUCTION)
+ regs->ip = (unsigned long)p->addr;
+ else
+ regs->ip = (unsigned long)p->ainsn.insn;
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ unsigned long *sara = stack_addr(regs);
+
+ ri->ret_addr = (kprobe_opcode_t *) *sara;
+
+ /* Replace the return addr with trampoline addr */
+ *sara = (unsigned long) &kretprobe_trampoline;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
+ if (p->ainsn.boostable == 1 && !p->post_handler) {
+ /* Boost up -- we can execute copied instructions directly */
+ reset_current_kprobe();
+ regs->ip = (unsigned long)p->ainsn.insn;
+ preempt_enable_no_resched();
+ return;
+ }
+#endif
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+}
+
+/*
+ * We have reentered the kprobe_handler(), since another probe was hit while
+ * within the handler. We save the original kprobes variables and just single
+ * step on the instruction of the new probe without calling any user handlers.
+ */
+static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SSDONE:
+#ifdef CONFIG_X86_64
+ /* TODO: Provide re-entrancy from post_kprobes_handler() and
+ * avoid exception stack corruption while single-stepping on
+ * the instruction of the new probe.
+ */
+ arch_disarm_kprobe(p);
+ regs->ip = (unsigned long)p->addr;
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ break;
+#endif
+ case KPROBE_HIT_ACTIVE:
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_REENTER;
+ break;
+ case KPROBE_HIT_SS:
+ if (p == kprobe_running()) {
+ regs->flags &= ~TF_MASK;
+ regs->flags |= kcb->kprobe_saved_flags;
+ return 0;
+ } else {
+ /* A probe has been hit in the codepath leading up
+ * to, or just after, single-stepping of a probed
+ * instruction. This entire codepath should strictly
+ * reside in .kprobes.text section. Raise a warning
+ * to highlight this peculiar case.
+ */
+ }
+ default:
+ /* impossible cases */
+ WARN_ON(1);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Interrupts are disabled on entry as trap3 is an interrupt gate and they
+ * remain disabled thorough out this function.
+ */
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ kprobe_opcode_t *addr;
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+
+ addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
+ if (*addr != BREAKPOINT_INSTRUCTION) {
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ * Back up over the (now missing) int3 and run
+ * the original instruction.
+ */
+ regs->ip = (unsigned long)addr;
+ return 1;
+ }
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing. We conditionally
+ * re-enable preemption at the end of this function,
+ * and also in reenter_kprobe() and setup_singlestep().
+ */
+ preempt_disable();
+
+ kcb = get_kprobe_ctlblk();
+ p = get_kprobe(addr);
+
+ if (p) {
+ if (kprobe_running()) {
+ if (reenter_kprobe(p, regs, kcb))
+ return 1;
+ } else {
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with normal processing. If we have a
+ * pre-handler and it returned non-zero, it prepped
+ * for calling the break_handler below on re-entry
+ * for jprobe processing, so get out doing nothing
+ * more here.
+ */
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ setup_singlestep(p, regs, kcb);
+ return 1;
+ }
+ } else if (kprobe_running()) {
+ p = __get_cpu_var(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs)) {
+ setup_singlestep(p, regs, kcb);
+ return 1;
+ }
+ } /* else: not a kprobe fault; let the kernel handle it */
+
+ preempt_enable_no_resched();
+ return 0;
+}
+
+/*
+ * When a retprobed function returns, this code saves registers and
+ * calls trampoline_handler() runs, which calls the kretprobe's handler.
+ */
+void __kprobes kretprobe_trampoline_holder(void)
+{
+ asm volatile (
+ ".global kretprobe_trampoline\n"
+ "kretprobe_trampoline: \n"
+#ifdef CONFIG_X86_64
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ /*
+ * Skip cs, ip, orig_ax.
+ * trampoline_handler() will plug in these values
+ */
+ " subq $24, %rsp\n"
+ " pushq %rdi\n"
+ " pushq %rsi\n"
+ " pushq %rdx\n"
+ " pushq %rcx\n"
+ " pushq %rax\n"
+ " pushq %r8\n"
+ " pushq %r9\n"
+ " pushq %r10\n"
+ " pushq %r11\n"
+ " pushq %rbx\n"
+ " pushq %rbp\n"
+ " pushq %r12\n"
+ " pushq %r13\n"
+ " pushq %r14\n"
+ " pushq %r15\n"
+ " movq %rsp, %rdi\n"
+ " call trampoline_handler\n"
+ /* Replace saved sp with true return address. */
+ " movq %rax, 152(%rsp)\n"
+ " popq %r15\n"
+ " popq %r14\n"
+ " popq %r13\n"
+ " popq %r12\n"
+ " popq %rbp\n"
+ " popq %rbx\n"
+ " popq %r11\n"
+ " popq %r10\n"
+ " popq %r9\n"
+ " popq %r8\n"
+ " popq %rax\n"
+ " popq %rcx\n"
+ " popq %rdx\n"
+ " popq %rsi\n"
+ " popq %rdi\n"
+ /* Skip orig_ax, ip, cs */
+ " addq $24, %rsp\n"
+ " popfq\n"
+#else
+ " pushf\n"
+ /*
+ * Skip cs, ip, orig_ax.
+ * trampoline_handler() will plug in these values
+ */
+ " subl $12, %esp\n"
+ " pushl %fs\n"
+ " pushl %ds\n"
+ " pushl %es\n"
+ " pushl %eax\n"
+ " pushl %ebp\n"
+ " pushl %edi\n"
+ " pushl %esi\n"
+ " pushl %edx\n"
+ " pushl %ecx\n"
+ " pushl %ebx\n"
+ " movl %esp, %eax\n"
+ " call trampoline_handler\n"
+ /* Move flags to cs */
+ " movl 52(%esp), %edx\n"
+ " movl %edx, 48(%esp)\n"
+ /* Replace saved flags with true return address. */
+ " movl %eax, 52(%esp)\n"
+ " popl %ebx\n"
+ " popl %ecx\n"
+ " popl %edx\n"
+ " popl %esi\n"
+ " popl %edi\n"
+ " popl %ebp\n"
+ " popl %eax\n"
+ /* Skip ip, orig_ax, es, ds, fs */
+ " addl $20, %esp\n"
+ " popf\n"
+#endif
+ " ret\n");
+}
+
+/*
+ * Called from kretprobe_trampoline
+ */
+void * __kprobes trampoline_handler(struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ spin_lock_irqsave(&kretprobe_lock, flags);
+ head = kretprobe_inst_table_head(current);
+ /* fixup registers */
+#ifdef CONFIG_X86_64
+ regs->cs = __KERNEL_CS;
+#else
+ regs->cs = __KERNEL_CS | get_kernel_rpl();
+#endif
+ regs->ip = trampoline_address;
+ regs->orig_ax = ~0UL;
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because multiple functions in the call path have
+ * return probes installed on them, and/or more then one
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always pushed into the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the (chronologically) first instance's ret_addr
+ * will be the real return address, and all the rest will
+ * point to kretprobe_trampoline.
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler) {
+ __get_cpu_var(current_kprobe) = &ri->rp->kp;
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri->rp->handler(ri, regs);
+ __get_cpu_var(current_kprobe) = NULL;
+ }
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
+
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ return (void *)orig_ret_address;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction whose first byte has been replaced by the "int 3"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is p->ainsn.insn.
+ *
+ * This function prepares to return from the post-single-step
+ * interrupt. We have to fix up the stack as follows:
+ *
+ * 0) Except in the case of absolute or indirect jump or call instructions,
+ * the new ip is relative to the copied instruction. We need to make
+ * it relative to the original instruction.
+ *
+ * 1) If the single-stepped instruction was pushfl, then the TF and IF
+ * flags are set in the just-pushed flags, and may need to be cleared.
+ *
+ * 2) If the single-stepped instruction was a call, the return address
+ * that is atop the stack is the address following the copied instruction.
+ * We need to make it the address following the original instruction.
+ *
+ * If this is the first time we've single-stepped the instruction at
+ * this probepoint, and the instruction is boostable, boost it: add a
+ * jump instruction after the copied instruction, that jumps to the next
+ * instruction after the probepoint.
+ */
+static void __kprobes resume_execution(struct kprobe *p,
+ struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+{
+ unsigned long *tos = stack_addr(regs);
+ unsigned long copy_ip = (unsigned long)p->ainsn.insn;
+ unsigned long orig_ip = (unsigned long)p->addr;
+ kprobe_opcode_t *insn = p->ainsn.insn;
+
+ /*skip the REX prefix*/
+ if (is_REX_prefix(insn))
+ insn++;
+
+ regs->flags &= ~X86_EFLAGS_TF;
+ switch (*insn) {
+ case 0x9c: /* pushfl */
+ *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
+ *tos |= kcb->kprobe_old_flags;
+ break;
+ case 0xc2: /* iret/ret/lret */
+ case 0xc3:
+ case 0xca:
+ case 0xcb:
+ case 0xcf:
+ case 0xea: /* jmp absolute -- ip is correct */
+ /* ip is already adjusted, no more changes required */
+ p->ainsn.boostable = 1;
+ goto no_change;
+ case 0xe8: /* call relative - Fix return addr */
+ *tos = orig_ip + (*tos - copy_ip);
+ break;
+#ifdef CONFIG_X86_32
+ case 0x9a: /* call absolute -- same as call absolute, indirect */
+ *tos = orig_ip + (*tos - copy_ip);
+ goto no_change;
+#endif
+ case 0xff:
+ if ((insn[1] & 0x30) == 0x10) {
+ /*
+ * call absolute, indirect
+ * Fix return addr; ip is correct.
+ * But this is not boostable
+ */
+ *tos = orig_ip + (*tos - copy_ip);
+ goto no_change;
+ } else if (((insn[1] & 0x31) == 0x20) ||
+ ((insn[1] & 0x31) == 0x21)) {
+ /*
+ * jmp near and far, absolute indirect
+ * ip is correct. And this is boostable
+ */
+ p->ainsn.boostable = 1;
+ goto no_change;
+ }
+ default:
+ break;
+ }
+
+ if (p->ainsn.boostable == 0) {
+ if ((regs->ip > copy_ip) &&
+ (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
+ /*
+ * These instructions can be executed directly if it
+ * jumps back to correct address.
+ */
+ set_jmp_op((void *)regs->ip,
+ (void *)orig_ip + (regs->ip - copy_ip));
+ p->ainsn.boostable = 1;
+ } else {
+ p->ainsn.boostable = -1;
+ }
+ }
+
+ regs->ip += orig_ip - copy_ip;
+
+no_change:
+ restore_btf();
+}
+
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate and they
+ * remain disabled thoroughout this function.
+ */
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs, kcb);
+ regs->flags |= kcb->kprobe_saved_flags;
+ trace_hardirqs_fixup_flags(regs->flags);
+
+ /* Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ /*
+ * if somebody else is singlestepping across a probe point, flags
+ * will have TF set, in which case, continue the remaining processing
+ * of do_debug, as if this is not a probe hit.
+ */
+ if (regs->flags & X86_EFLAGS_TF)
+ return 0;
+
+ return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the ip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ regs->ip = (unsigned long)cur->addr;
+ regs->flags |= kcb->kprobe_old_flags;
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accounting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ if (fixup_exception(regs))
+ return 1;
+
+ /*
+ * fixup routine could not handle it,
+ * Let do_page_fault() fix it.
+ */
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Wrapper routine for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = data;
+ int ret = NOTIFY_DONE;
+
+ if (args->regs && user_mode_vm(args->regs))
+ return ret;
+
+ switch (val) {
+ case DIE_INT3:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_DEBUG:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_GPF:
+ /*
+ * To be potentially processing a kprobe fault and to
+ * trust the result from kprobe_running(), we have
+ * be non-preemptible.
+ */
+ if (!preemptible() && kprobe_running() &&
+ kprobe_fault_handler(args->regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ unsigned long addr;
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ kcb->jprobe_saved_regs = *regs;
+ kcb->jprobe_saved_sp = stack_addr(regs);
+ addr = (unsigned long)(kcb->jprobe_saved_sp);
+
+ /*
+ * As Linus pointed out, gcc assumes that the callee
+ * owns the argument space and could overwrite it, e.g.
+ * tailcall optimization. So, to be absolutely safe
+ * we also save and restore enough stack bytes to cover
+ * the argument area.
+ */
+ memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
+ MIN_STACK_SIZE(addr));
+ regs->flags &= ~X86_EFLAGS_IF;
+ trace_hardirqs_off();
+ regs->ip = (unsigned long)(jp->entry);
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ asm volatile (
+#ifdef CONFIG_X86_64
+ " xchg %%rbx,%%rsp \n"
+#else
+ " xchgl %%ebx,%%esp \n"
+#endif
+ " int3 \n"
+ " .globl jprobe_return_end\n"
+ " jprobe_return_end: \n"
+ " nop \n"::"b"
+ (kcb->jprobe_saved_sp):"memory");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ u8 *addr = (u8 *) (regs->ip - 1);
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+
+ if ((addr > (u8 *) jprobe_return) &&
+ (addr < (u8 *) jprobe_return_end)) {
+ if (stack_addr(regs) != kcb->jprobe_saved_sp) {
+ struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
+ printk(KERN_ERR
+ "current sp %p does not match saved sp %p\n",
+ stack_addr(regs), kcb->jprobe_saved_sp);
+ printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
+ show_registers(saved_regs);
+ printk(KERN_ERR "Current registers\n");
+ show_registers(regs);
+ BUG();
+ }
+ *regs = kcb->jprobe_saved_regs;
+ memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
+ kcb->jprobes_stack,
+ MIN_STACK_SIZE(kcb->jprobe_saved_sp));
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
+}
+
+int __init arch_init_kprobes(void)
+{
+ return 0;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ return 0;
+}
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
deleted file mode 100644
index 3a020f79f82..00000000000
--- a/arch/x86/kernel/kprobes_32.c
+++ /dev/null
@@ -1,756 +0,0 @@
-/*
- * Kernel Probes (KProbes)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- * Probes initial implementation ( includes contributions from
- * Rusty Russell).
- * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
- * interface to access function arguments.
- * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
- * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
- * <prasanna@in.ibm.com> added function-return probes.
- */
-
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-#include <linux/preempt.h>
-#include <linux/kdebug.h>
-#include <asm/cacheflush.h>
-#include <asm/desc.h>
-#include <asm/uaccess.h>
-#include <asm/alternative.h>
-
-void jprobe_return_end(void);
-
-DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
-DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-
-struct kretprobe_blackpoint kretprobe_blacklist[] = {
- {"__switch_to", }, /* This function switches only current task, but
- doesn't switch kernel stack.*/
- {NULL, NULL} /* Terminator */
-};
-const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
-
-/* insert a jmp code */
-static __always_inline void set_jmp_op(void *from, void *to)
-{
- struct __arch_jmp_op {
- char op;
- long raddr;
- } __attribute__((packed)) *jop;
- jop = (struct __arch_jmp_op *)from;
- jop->raddr = (long)(to) - ((long)(from) + 5);
- jop->op = RELATIVEJUMP_INSTRUCTION;
-}
-
-/*
- * returns non-zero if opcodes can be boosted.
- */
-static __always_inline int can_boost(kprobe_opcode_t *opcodes)
-{
-#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
- (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
- (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
- (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
- (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
- << (row % 32))
- /*
- * Undefined/reserved opcodes, conditional jump, Opcode Extension
- * Groups, and some special opcodes can not be boost.
- */
- static const unsigned long twobyte_is_boostable[256 / 32] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ------------------------------- */
- W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
- W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
- W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
- W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
- W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
- W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
- W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
- W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
- W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
- W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
- W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
- W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
- W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
- W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
- W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */
- /* ------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- };
-#undef W
- kprobe_opcode_t opcode;
- kprobe_opcode_t *orig_opcodes = opcodes;
-retry:
- if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
- return 0;
- opcode = *(opcodes++);
-
- /* 2nd-byte opcode */
- if (opcode == 0x0f) {
- if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
- return 0;
- return test_bit(*opcodes, twobyte_is_boostable);
- }
-
- switch (opcode & 0xf0) {
- case 0x60:
- if (0x63 < opcode && opcode < 0x67)
- goto retry; /* prefixes */
- /* can't boost Address-size override and bound */
- return (opcode != 0x62 && opcode != 0x67);
- case 0x70:
- return 0; /* can't boost conditional jump */
- case 0xc0:
- /* can't boost software-interruptions */
- return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
- case 0xd0:
- /* can boost AA* and XLAT */
- return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
- case 0xe0:
- /* can boost in/out and absolute jmps */
- return ((opcode & 0x04) || opcode == 0xea);
- case 0xf0:
- if ((opcode & 0x0c) == 0 && opcode != 0xf1)
- goto retry; /* lock/rep(ne) prefix */
- /* clear and set flags can be boost */
- return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
- default:
- if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
- goto retry; /* prefixes */
- /* can't boost CS override and call */
- return (opcode != 0x2e && opcode != 0x9a);
- }
-}
-
-/*
- * returns non-zero if opcode modifies the interrupt flag.
- */
-static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
-{
- switch (opcode) {
- case 0xfa: /* cli */
- case 0xfb: /* sti */
- case 0xcf: /* iret/iretd */
- case 0x9d: /* popf/popfd */
- return 1;
- }
- return 0;
-}
-
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
- /* insn: must be on special executable page on i386. */
- p->ainsn.insn = get_insn_slot();
- if (!p->ainsn.insn)
- return -ENOMEM;
-
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
- p->opcode = *p->addr;
- if (can_boost(p->addr)) {
- p->ainsn.boostable = 0;
- } else {
- p->ainsn.boostable = -1;
- }
- return 0;
-}
-
-void __kprobes arch_arm_kprobe(struct kprobe *p)
-{
- text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
-}
-
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
-{
- text_poke(p->addr, &p->opcode, 1);
-}
-
-void __kprobes arch_remove_kprobe(struct kprobe *p)
-{
- mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
- mutex_unlock(&kprobe_mutex);
-}
-
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- kcb->prev_kprobe.kp = kprobe_running();
- kcb->prev_kprobe.status = kcb->kprobe_status;
- kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
- kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
-}
-
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
- kcb->kprobe_status = kcb->prev_kprobe.status;
- kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
- kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
-}
-
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- __get_cpu_var(current_kprobe) = p;
- kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
- = (regs->eflags & (TF_MASK | IF_MASK));
- if (is_IF_modifier(p->opcode))
- kcb->kprobe_saved_eflags &= ~IF_MASK;
-}
-
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
- regs->eflags |= TF_MASK;
- regs->eflags &= ~IF_MASK;
- /*single step inline if the instruction is an int3*/
- if (p->opcode == BREAKPOINT_INSTRUCTION)
- regs->eip = (unsigned long)p->addr;
- else
- regs->eip = (unsigned long)p->ainsn.insn;
-}
-
-/* Called with kretprobe_lock held */
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
-{
- unsigned long *sara = (unsigned long *)&regs->esp;
-
- ri->ret_addr = (kprobe_opcode_t *) *sara;
-
- /* Replace the return addr with trampoline addr */
- *sara = (unsigned long) &kretprobe_trampoline;
-}
-
-/*
- * Interrupts are disabled on entry as trap3 is an interrupt gate and they
- * remain disabled thorough out this function.
- */
-static int __kprobes kprobe_handler(struct pt_regs *regs)
-{
- struct kprobe *p;
- int ret = 0;
- kprobe_opcode_t *addr;
- struct kprobe_ctlblk *kcb;
-
- addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
-
- /*
- * We don't want to be preempted for the entire
- * duration of kprobe processing
- */
- preempt_disable();
- kcb = get_kprobe_ctlblk();
-
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- p = get_kprobe(addr);
- if (p) {
- if (kcb->kprobe_status == KPROBE_HIT_SS &&
- *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
- regs->eflags &= ~TF_MASK;
- regs->eflags |= kcb->kprobe_saved_eflags;
- goto no_kprobe;
- }
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the handler.
- * We here save the original kprobes variables and
- * just single step on the instruction of the new probe
- * without calling any user handlers.
- */
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
- kprobes_inc_nmissed_count(p);
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_REENTER;
- return 1;
- } else {
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /* The breakpoint instruction was removed by
- * another cpu right after we hit, no further
- * handling of this interrupt is appropriate
- */
- regs->eip -= sizeof(kprobe_opcode_t);
- ret = 1;
- goto no_kprobe;
- }
- p = __get_cpu_var(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- goto ss_probe;
- }
- }
- goto no_kprobe;
- }
-
- p = get_kprobe(addr);
- if (!p) {
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /*
- * The breakpoint instruction was removed right
- * after we hit it. Another cpu has removed
- * either a probepoint or a debugger breakpoint
- * at this address. In either case, no further
- * handling of this interrupt is appropriate.
- * Back up over the (now missing) int3 and run
- * the original instruction.
- */
- regs->eip -= sizeof(kprobe_opcode_t);
- ret = 1;
- }
- /* Not one of ours: let kernel handle it */
- goto no_kprobe;
- }
-
- set_current_kprobe(p, regs, kcb);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-
- if (p->pre_handler && p->pre_handler(p, regs))
- /* handler has already set things up, so skip ss setup */
- return 1;
-
-ss_probe:
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
- if (p->ainsn.boostable == 1 && !p->post_handler){
- /* Boost up -- we can execute copied instructions directly */
- reset_current_kprobe();
- regs->eip = (unsigned long)p->ainsn.insn;
- preempt_enable_no_resched();
- return 1;
- }
-#endif
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_HIT_SS;
- return 1;
-
-no_kprobe:
- preempt_enable_no_resched();
- return ret;
-}
-
-/*
- * For function-return probes, init_kprobes() establishes a probepoint
- * here. When a retprobed function returns, this probe is hit and
- * trampoline_probe_handler() runs, calling the kretprobe's handler.
- */
- void __kprobes kretprobe_trampoline_holder(void)
- {
- asm volatile ( ".global kretprobe_trampoline\n"
- "kretprobe_trampoline: \n"
- " pushf\n"
- /* skip cs, eip, orig_eax */
- " subl $12, %esp\n"
- " pushl %fs\n"
- " pushl %ds\n"
- " pushl %es\n"
- " pushl %eax\n"
- " pushl %ebp\n"
- " pushl %edi\n"
- " pushl %esi\n"
- " pushl %edx\n"
- " pushl %ecx\n"
- " pushl %ebx\n"
- " movl %esp, %eax\n"
- " call trampoline_handler\n"
- /* move eflags to cs */
- " movl 52(%esp), %edx\n"
- " movl %edx, 48(%esp)\n"
- /* save true return address on eflags */
- " movl %eax, 52(%esp)\n"
- " popl %ebx\n"
- " popl %ecx\n"
- " popl %edx\n"
- " popl %esi\n"
- " popl %edi\n"
- " popl %ebp\n"
- " popl %eax\n"
- /* skip eip, orig_eax, es, ds, fs */
- " addl $20, %esp\n"
- " popf\n"
- " ret\n");
-}
-
-/*
- * Called from kretprobe_trampoline
- */
-fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
-{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head, empty_rp;
- struct hlist_node *node, *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-
- INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
- /* fixup registers */
- regs->xcs = __KERNEL_CS | get_kernel_rpl();
- regs->eip = trampoline_address;
- regs->orig_eax = 0xffffffff;
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more then one return
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always inserted at the head of the list
- * - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
- * real return address, and all the rest will point to
- * kretprobe_trampoline
- */
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- if (ri->rp && ri->rp->handler){
- __get_cpu_var(current_kprobe) = &ri->rp->kp;
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
- ri->rp->handler(ri, regs);
- __get_cpu_var(current_kprobe) = NULL;
- }
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
- spin_unlock_irqrestore(&kretprobe_lock, flags);
-
- hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
- return (void*)orig_ret_address;
-}
-
-/*
- * Called after single-stepping. p->addr is the address of the
- * instruction whose first byte has been replaced by the "int 3"
- * instruction. To avoid the SMP problems that can occur when we
- * temporarily put back the original opcode to single-step, we
- * single-stepped a copy of the instruction. The address of this
- * copy is p->ainsn.insn.
- *
- * This function prepares to return from the post-single-step
- * interrupt. We have to fix up the stack as follows:
- *
- * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new eip is relative to the copied instruction. We need to make
- * it relative to the original instruction.
- *
- * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed eflags, and may need to be cleared.
- *
- * 2) If the single-stepped instruction was a call, the return address
- * that is atop the stack is the address following the copied instruction.
- * We need to make it the address following the original instruction.
- *
- * This function also checks instruction size for preparing direct execution.
- */
-static void __kprobes resume_execution(struct kprobe *p,
- struct pt_regs *regs, struct kprobe_ctlblk *kcb)
-{
- unsigned long *tos = (unsigned long *)&regs->esp;
- unsigned long copy_eip = (unsigned long)p->ainsn.insn;
- unsigned long orig_eip = (unsigned long)p->addr;
-
- regs->eflags &= ~TF_MASK;
- switch (p->ainsn.insn[0]) {
- case 0x9c: /* pushfl */
- *tos &= ~(TF_MASK | IF_MASK);
- *tos |= kcb->kprobe_old_eflags;
- break;
- case 0xc2: /* iret/ret/lret */
- case 0xc3:
- case 0xca:
- case 0xcb:
- case 0xcf:
- case 0xea: /* jmp absolute -- eip is correct */
- /* eip is already adjusted, no more changes required */
- p->ainsn.boostable = 1;
- goto no_change;
- case 0xe8: /* call relative - Fix return addr */
- *tos = orig_eip + (*tos - copy_eip);
- break;
- case 0x9a: /* call absolute -- same as call absolute, indirect */
- *tos = orig_eip + (*tos - copy_eip);
- goto no_change;
- case 0xff:
- if ((p->ainsn.insn[1] & 0x30) == 0x10) {
- /*
- * call absolute, indirect
- * Fix return addr; eip is correct.
- * But this is not boostable
- */
- *tos = orig_eip + (*tos - copy_eip);
- goto no_change;
- } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
- ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
- /* eip is correct. And this is boostable */
- p->ainsn.boostable = 1;
- goto no_change;
- }
- default:
- break;
- }
-
- if (p->ainsn.boostable == 0) {
- if ((regs->eip > copy_eip) &&
- (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
- /*
- * These instructions can be executed directly if it
- * jumps back to correct address.
- */
- set_jmp_op((void *)regs->eip,
- (void *)orig_eip + (regs->eip - copy_eip));
- p->ainsn.boostable = 1;
- } else {
- p->ainsn.boostable = -1;
- }
- }
-
- regs->eip = orig_eip + (regs->eip - copy_eip);
-
-no_change:
- return;
-}
-
-/*
- * Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function.
- */
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
-{
- struct kprobe *cur = kprobe_running();
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- if (!cur)
- return 0;
-
- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- cur->post_handler(cur, regs, 0);
- }
-
- resume_execution(cur, regs, kcb);
- regs->eflags |= kcb->kprobe_saved_eflags;
- trace_hardirqs_fixup_flags(regs->eflags);
-
- /*Restore back the original saved kprobes variables and continue. */
- if (kcb->kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe(kcb);
- goto out;
- }
- reset_current_kprobe();
-out:
- preempt_enable_no_resched();
-
- /*
- * if somebody else is singlestepping across a probe point, eflags
- * will have TF set, in which case, continue the remaining processing
- * of do_debug, as if this is not a probe hit.
- */
- if (regs->eflags & TF_MASK)
- return 0;
-
- return 1;
-}
-
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
-{
- struct kprobe *cur = kprobe_running();
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- switch(kcb->kprobe_status) {
- case KPROBE_HIT_SS:
- case KPROBE_REENTER:
- /*
- * We are here because the instruction being single
- * stepped caused a page fault. We reset the current
- * kprobe and the eip points back to the probe address
- * and allow the page fault handler to continue as a
- * normal page fault.
- */
- regs->eip = (unsigned long)cur->addr;
- regs->eflags |= kcb->kprobe_old_eflags;
- if (kcb->kprobe_status == KPROBE_REENTER)
- restore_previous_kprobe(kcb);
- else
- reset_current_kprobe();
- preempt_enable_no_resched();
- break;
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
- /*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accouting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
- * In case the user-specified fault handler returned
- * zero, try to fix up.
- */
- if (fixup_exception(regs))
- return 1;
-
- /*
- * fixup_exception() could not handle it,
- * Let do_page_fault() fix it.
- */
- break;
- default:
- break;
- }
- return 0;
-}
-
-/*
- * Wrapper routine to for handling exceptions.
- */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct die_args *args = (struct die_args *)data;
- int ret = NOTIFY_DONE;
-
- if (args->regs && user_mode_vm(args->regs))
- return ret;
-
- switch (val) {
- case DIE_INT3:
- if (kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_GPF:
- /* kprobe_running() needs smp_processor_id() */
- preempt_disable();
- if (kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
- ret = NOTIFY_STOP;
- preempt_enable();
- break;
- default:
- break;
- }
- return ret;
-}
-
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- unsigned long addr;
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- kcb->jprobe_saved_regs = *regs;
- kcb->jprobe_saved_esp = &regs->esp;
- addr = (unsigned long)(kcb->jprobe_saved_esp);
-
- /*
- * TBD: As Linus pointed out, gcc assumes that the callee
- * owns the argument space and could overwrite it, e.g.
- * tailcall optimization. So, to be absolutely safe
- * we also save and restore enough stack bytes to cover
- * the argument area.
- */
- memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
- MIN_STACK_SIZE(addr));
- regs->eflags &= ~IF_MASK;
- trace_hardirqs_off();
- regs->eip = (unsigned long)(jp->entry);
- return 1;
-}
-
-void __kprobes jprobe_return(void)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- asm volatile (" xchgl %%ebx,%%esp \n"
- " int3 \n"
- " .globl jprobe_return_end \n"
- " jprobe_return_end: \n"
- " nop \n"::"b"
- (kcb->jprobe_saved_esp):"memory");
-}
-
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- u8 *addr = (u8 *) (regs->eip - 1);
- unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
- struct jprobe *jp = container_of(p, struct jprobe, kp);
-
- if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
- if (&regs->esp != kcb->jprobe_saved_esp) {
- struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
- printk("current esp %p does not match saved esp %p\n",
- &regs->esp, kcb->jprobe_saved_esp);
- printk("Saved registers for jprobe %p\n", jp);
- show_registers(saved_regs);
- printk("Current registers\n");
- show_registers(regs);
- BUG();
- }
- *regs = kcb->jprobe_saved_regs;
- memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
- MIN_STACK_SIZE(stack_addr));
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
-{
- return 0;
-}
-
-int __init arch_init_kprobes(void)
-{
- return 0;
-}
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
deleted file mode 100644
index 5df19a9f923..00000000000
--- a/arch/x86/kernel/kprobes_64.c
+++ /dev/null
@@ -1,749 +0,0 @@
-/*
- * Kernel Probes (KProbes)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- * Probes initial implementation ( includes contributions from
- * Rusty Russell).
- * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
- * interface to access function arguments.
- * 2004-Oct Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi
- * <prasanna@in.ibm.com> adapted for x86_64
- * 2005-Mar Roland McGrath <roland@redhat.com>
- * Fixed to handle %rip-relative addressing mode correctly.
- * 2005-May Rusty Lynch <rusty.lynch@intel.com>
- * Added function return probes functionality
- */
-
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/preempt.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/alternative.h>
-
-void jprobe_return_end(void);
-static void __kprobes arch_copy_kprobe(struct kprobe *p);
-
-DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
-DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-
-struct kretprobe_blackpoint kretprobe_blacklist[] = {
- {"__switch_to", }, /* This function switches only current task, but
- doesn't switch kernel stack.*/
- {NULL, NULL} /* Terminator */
-};
-const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
-
-/*
- * returns non-zero if opcode modifies the interrupt flag.
- */
-static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
-{
- switch (*insn) {
- case 0xfa: /* cli */
- case 0xfb: /* sti */
- case 0xcf: /* iret/iretd */
- case 0x9d: /* popf/popfd */
- return 1;
- }
-
- if (*insn >= 0x40 && *insn <= 0x4f && *++insn == 0xcf)
- return 1;
- return 0;
-}
-
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
- /* insn: must be on special executable page on x86_64. */
- p->ainsn.insn = get_insn_slot();
- if (!p->ainsn.insn) {
- return -ENOMEM;
- }
- arch_copy_kprobe(p);
- return 0;
-}
-
-/*
- * Determine if the instruction uses the %rip-relative addressing mode.
- * If it does, return the address of the 32-bit displacement word.
- * If not, return null.
- */
-static s32 __kprobes *is_riprel(u8 *insn)
-{
-#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
- (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
- (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
- (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
- (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
- << (row % 64))
- static const u64 onebyte_has_modrm[256 / 64] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ------------------------------- */
- W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */
- W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */
- W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */
- W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */
- W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
- W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */
- W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */
- W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
- W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */
- W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */
- W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */
- W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */
- W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */
- W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */
- W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */
- /* ------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- };
- static const u64 twobyte_has_modrm[256 / 64] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ------------------------------- */
- W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */
- W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */
- W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */
- W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */
- W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */
- W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */
- W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */
- W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */
- W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */
- W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */
- W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */
- W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */
- W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */
- W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */
- W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */
- /* ------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- };
-#undef W
- int need_modrm;
-
- /* Skip legacy instruction prefixes. */
- while (1) {
- switch (*insn) {
- case 0x66:
- case 0x67:
- case 0x2e:
- case 0x3e:
- case 0x26:
- case 0x64:
- case 0x65:
- case 0x36:
- case 0xf0:
- case 0xf3:
- case 0xf2:
- ++insn;
- continue;
- }
- break;
- }
-
- /* Skip REX instruction prefix. */
- if ((*insn & 0xf0) == 0x40)
- ++insn;
-
- if (*insn == 0x0f) { /* Two-byte opcode. */
- ++insn;
- need_modrm = test_bit(*insn, twobyte_has_modrm);
- } else { /* One-byte opcode. */
- need_modrm = test_bit(*insn, onebyte_has_modrm);
- }
-
- if (need_modrm) {
- u8 modrm = *++insn;
- if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */
- /* Displacement follows ModRM byte. */
- return (s32 *) ++insn;
- }
- }
-
- /* No %rip-relative addressing mode here. */
- return NULL;
-}
-
-static void __kprobes arch_copy_kprobe(struct kprobe *p)
-{
- s32 *ripdisp;
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
- ripdisp = is_riprel(p->ainsn.insn);
- if (ripdisp) {
- /*
- * The copied instruction uses the %rip-relative
- * addressing mode. Adjust the displacement for the
- * difference between the original location of this
- * instruction and the location of the copy that will
- * actually be run. The tricky bit here is making sure
- * that the sign extension happens correctly in this
- * calculation, since we need a signed 32-bit result to
- * be sign-extended to 64 bits when it's added to the
- * %rip value and yield the same 64-bit result that the
- * sign-extension of the original signed 32-bit
- * displacement would have given.
- */
- s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn;
- BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
- *ripdisp = disp;
- }
- p->opcode = *p->addr;
-}
-
-void __kprobes arch_arm_kprobe(struct kprobe *p)
-{
- text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
-}
-
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
-{
- text_poke(p->addr, &p->opcode, 1);
-}
-
-void __kprobes arch_remove_kprobe(struct kprobe *p)
-{
- mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn, 0);
- mutex_unlock(&kprobe_mutex);
-}
-
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- kcb->prev_kprobe.kp = kprobe_running();
- kcb->prev_kprobe.status = kcb->kprobe_status;
- kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags;
- kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
-}
-
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
- kcb->kprobe_status = kcb->prev_kprobe.status;
- kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags;
- kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
-}
-
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- __get_cpu_var(current_kprobe) = p;
- kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags
- = (regs->eflags & (TF_MASK | IF_MASK));
- if (is_IF_modifier(p->ainsn.insn))
- kcb->kprobe_saved_rflags &= ~IF_MASK;
-}
-
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
- regs->eflags |= TF_MASK;
- regs->eflags &= ~IF_MASK;
- /*single step inline if the instruction is an int3*/
- if (p->opcode == BREAKPOINT_INSTRUCTION)
- regs->rip = (unsigned long)p->addr;
- else
- regs->rip = (unsigned long)p->ainsn.insn;
-}
-
-/* Called with kretprobe_lock held */
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
-{
- unsigned long *sara = (unsigned long *)regs->rsp;
-
- ri->ret_addr = (kprobe_opcode_t *) *sara;
- /* Replace the return addr with trampoline addr */
- *sara = (unsigned long) &kretprobe_trampoline;
-}
-
-int __kprobes kprobe_handler(struct pt_regs *regs)
-{
- struct kprobe *p;
- int ret = 0;
- kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t));
- struct kprobe_ctlblk *kcb;
-
- /*
- * We don't want to be preempted for the entire
- * duration of kprobe processing
- */
- preempt_disable();
- kcb = get_kprobe_ctlblk();
-
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- p = get_kprobe(addr);
- if (p) {
- if (kcb->kprobe_status == KPROBE_HIT_SS &&
- *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
- regs->eflags &= ~TF_MASK;
- regs->eflags |= kcb->kprobe_saved_rflags;
- goto no_kprobe;
- } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
- /* TODO: Provide re-entrancy from
- * post_kprobes_handler() and avoid exception
- * stack corruption while single-stepping on
- * the instruction of the new probe.
- */
- arch_disarm_kprobe(p);
- regs->rip = (unsigned long)p->addr;
- reset_current_kprobe();
- ret = 1;
- } else {
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the
- * handler. We here save the original kprobe
- * variables and just single step on instruction
- * of the new probe without calling any user
- * handlers.
- */
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
- kprobes_inc_nmissed_count(p);
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_REENTER;
- return 1;
- }
- } else {
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /* The breakpoint instruction was removed by
- * another cpu right after we hit, no further
- * handling of this interrupt is appropriate
- */
- regs->rip = (unsigned long)addr;
- ret = 1;
- goto no_kprobe;
- }
- p = __get_cpu_var(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- goto ss_probe;
- }
- }
- goto no_kprobe;
- }
-
- p = get_kprobe(addr);
- if (!p) {
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /*
- * The breakpoint instruction was removed right
- * after we hit it. Another cpu has removed
- * either a probepoint or a debugger breakpoint
- * at this address. In either case, no further
- * handling of this interrupt is appropriate.
- * Back up over the (now missing) int3 and run
- * the original instruction.
- */
- regs->rip = (unsigned long)addr;
- ret = 1;
- }
- /* Not one of ours: let kernel handle it */
- goto no_kprobe;
- }
-
- set_current_kprobe(p, regs, kcb);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-
- if (p->pre_handler && p->pre_handler(p, regs))
- /* handler has already set things up, so skip ss setup */
- return 1;
-
-ss_probe:
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_HIT_SS;
- return 1;
-
-no_kprobe:
- preempt_enable_no_resched();
- return ret;
-}
-
-/*
- * For function-return probes, init_kprobes() establishes a probepoint
- * here. When a retprobed function returns, this probe is hit and
- * trampoline_probe_handler() runs, calling the kretprobe's handler.
- */
- void kretprobe_trampoline_holder(void)
- {
- asm volatile ( ".global kretprobe_trampoline\n"
- "kretprobe_trampoline: \n"
- "nop\n");
- }
-
-/*
- * Called when we hit the probe point at kretprobe_trampoline
- */
-int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head, empty_rp;
- struct hlist_node *node, *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-
- INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more then one return
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always inserted at the head of the list
- * - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
- * real return address, and all the rest will point to
- * kretprobe_trampoline
- */
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- if (ri->rp && ri->rp->handler)
- ri->rp->handler(ri, regs);
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
- regs->rip = orig_ret_address;
-
- reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
- preempt_enable_no_resched();
-
- hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
-}
-
-/*
- * Called after single-stepping. p->addr is the address of the
- * instruction whose first byte has been replaced by the "int 3"
- * instruction. To avoid the SMP problems that can occur when we
- * temporarily put back the original opcode to single-step, we
- * single-stepped a copy of the instruction. The address of this
- * copy is p->ainsn.insn.
- *
- * This function prepares to return from the post-single-step
- * interrupt. We have to fix up the stack as follows:
- *
- * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new rip is relative to the copied instruction. We need to make
- * it relative to the original instruction.
- *
- * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed eflags, and may need to be cleared.
- *
- * 2) If the single-stepped instruction was a call, the return address
- * that is atop the stack is the address following the copied instruction.
- * We need to make it the address following the original instruction.
- */
-static void __kprobes resume_execution(struct kprobe *p,
- struct pt_regs *regs, struct kprobe_ctlblk *kcb)
-{
- unsigned long *tos = (unsigned long *)regs->rsp;
- unsigned long copy_rip = (unsigned long)p->ainsn.insn;
- unsigned long orig_rip = (unsigned long)p->addr;
- kprobe_opcode_t *insn = p->ainsn.insn;
-
- /*skip the REX prefix*/
- if (*insn >= 0x40 && *insn <= 0x4f)
- insn++;
-
- regs->eflags &= ~TF_MASK;
- switch (*insn) {
- case 0x9c: /* pushfl */
- *tos &= ~(TF_MASK | IF_MASK);
- *tos |= kcb->kprobe_old_rflags;
- break;
- case 0xc2: /* iret/ret/lret */
- case 0xc3:
- case 0xca:
- case 0xcb:
- case 0xcf:
- case 0xea: /* jmp absolute -- ip is correct */
- /* ip is already adjusted, no more changes required */
- goto no_change;
- case 0xe8: /* call relative - Fix return addr */
- *tos = orig_rip + (*tos - copy_rip);
- break;
- case 0xff:
- if ((insn[1] & 0x30) == 0x10) {
- /* call absolute, indirect */
- /* Fix return addr; ip is correct. */
- *tos = orig_rip + (*tos - copy_rip);
- goto no_change;
- } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
- ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
- /* ip is correct. */
- goto no_change;
- }
- default:
- break;
- }
-
- regs->rip = orig_rip + (regs->rip - copy_rip);
-no_change:
-
- return;
-}
-
-int __kprobes post_kprobe_handler(struct pt_regs *regs)
-{
- struct kprobe *cur = kprobe_running();
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- if (!cur)
- return 0;
-
- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- cur->post_handler(cur, regs, 0);
- }
-
- resume_execution(cur, regs, kcb);
- regs->eflags |= kcb->kprobe_saved_rflags;
- trace_hardirqs_fixup_flags(regs->eflags);
-
- /* Restore the original saved kprobes variables and continue. */
- if (kcb->kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe(kcb);
- goto out;
- }
- reset_current_kprobe();
-out:
- preempt_enable_no_resched();
-
- /*
- * if somebody else is singlestepping across a probe point, eflags
- * will have TF set, in which case, continue the remaining processing
- * of do_debug, as if this is not a probe hit.
- */
- if (regs->eflags & TF_MASK)
- return 0;
-
- return 1;
-}
-
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
-{
- struct kprobe *cur = kprobe_running();
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- const struct exception_table_entry *fixup;
-
- switch(kcb->kprobe_status) {
- case KPROBE_HIT_SS:
- case KPROBE_REENTER:
- /*
- * We are here because the instruction being single
- * stepped caused a page fault. We reset the current
- * kprobe and the rip points back to the probe address
- * and allow the page fault handler to continue as a
- * normal page fault.
- */
- regs->rip = (unsigned long)cur->addr;
- regs->eflags |= kcb->kprobe_old_rflags;
- if (kcb->kprobe_status == KPROBE_REENTER)
- restore_previous_kprobe(kcb);
- else
- reset_current_kprobe();
- preempt_enable_no_resched();
- break;
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
- /*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accouting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
- * In case the user-specified fault handler returned
- * zero, try to fix up.
- */
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return 1;
- }
-
- /*
- * fixup() could not handle it,
- * Let do_page_fault() fix it.
- */
- break;
- default:
- break;
- }
- return 0;
-}
-
-/*
- * Wrapper routine for handling exceptions.
- */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct die_args *args = (struct die_args *)data;
- int ret = NOTIFY_DONE;
-
- if (args->regs && user_mode(args->regs))
- return ret;
-
- switch (val) {
- case DIE_INT3:
- if (kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_GPF:
- /* kprobe_running() needs smp_processor_id() */
- preempt_disable();
- if (kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
- ret = NOTIFY_STOP;
- preempt_enable();
- break;
- default:
- break;
- }
- return ret;
-}
-
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- unsigned long addr;
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- kcb->jprobe_saved_regs = *regs;
- kcb->jprobe_saved_rsp = (long *) regs->rsp;
- addr = (unsigned long)(kcb->jprobe_saved_rsp);
- /*
- * As Linus pointed out, gcc assumes that the callee
- * owns the argument space and could overwrite it, e.g.
- * tailcall optimization. So, to be absolutely safe
- * we also save and restore enough stack bytes to cover
- * the argument area.
- */
- memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
- MIN_STACK_SIZE(addr));
- regs->eflags &= ~IF_MASK;
- trace_hardirqs_off();
- regs->rip = (unsigned long)(jp->entry);
- return 1;
-}
-
-void __kprobes jprobe_return(void)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- asm volatile (" xchg %%rbx,%%rsp \n"
- " int3 \n"
- " .globl jprobe_return_end \n"
- " jprobe_return_end: \n"
- " nop \n"::"b"
- (kcb->jprobe_saved_rsp):"memory");
-}
-
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- u8 *addr = (u8 *) (regs->rip - 1);
- unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp);
- struct jprobe *jp = container_of(p, struct jprobe, kp);
-
- if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
- if ((unsigned long *)regs->rsp != kcb->jprobe_saved_rsp) {
- struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
- printk("current rsp %p does not match saved rsp %p\n",
- (long *)regs->rsp, kcb->jprobe_saved_rsp);
- printk("Saved registers for jprobe %p\n", jp);
- show_registers(saved_regs);
- printk("Current registers\n");
- show_registers(regs);
- BUG();
- }
- *regs = kcb->jprobe_saved_regs;
- memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
- MIN_STACK_SIZE(stack_addr));
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-
-static struct kprobe trampoline_p = {
- .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
- .pre_handler = trampoline_probe_handler
-};
-
-int __init arch_init_kprobes(void)
-{
- return register_kprobe(&trampoline_p);
-}
-
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
-{
- if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
- return 1;
-
- return 0;
-}
diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt.c
index 9ff90a27c45..8a7660c8394 100644
--- a/arch/x86/kernel/ldt_32.c
+++ b/arch/x86/kernel/ldt.c
@@ -1,6 +1,9 @@
/*
* Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2002 Andi Kleen
+ *
+ * This handles calls from both 32bit and 64bit mode.
*/
#include <linux/errno.h>
@@ -9,7 +12,6 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/vmalloc.h>
-#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -17,7 +19,7 @@
#include <asm/desc.h>
#include <asm/mmu_context.h>
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+#ifdef CONFIG_SMP
static void flush_ldt(void *null)
{
if (current->active_mm)
@@ -27,26 +29,31 @@ static void flush_ldt(void *null)
static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
{
- void *oldldt;
- void *newldt;
+ void *oldldt, *newldt;
int oldsize;
if (mincount <= pc->size)
return 0;
oldsize = pc->size;
- mincount = (mincount+511)&(~511);
- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+ mincount = (mincount + 511) & (~511);
+ if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
+ newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
else
- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+ newldt = (void *)__get_free_page(GFP_KERNEL);
if (!newldt)
return -ENOMEM;
if (oldsize)
- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+ memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
oldldt = pc->ldt;
- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+ memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
+ (mincount - oldsize) * LDT_ENTRY_SIZE);
+
+#ifdef CONFIG_X86_64
+ /* CHECKME: Do we really need this ? */
+ wmb();
+#endif
pc->ldt = newldt;
wmb();
pc->size = mincount;
@@ -55,6 +62,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
if (reload) {
#ifdef CONFIG_SMP
cpumask_t mask;
+
preempt_disable();
load_LDT(pc);
mask = cpumask_of_cpu(smp_processor_id());
@@ -66,10 +74,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
#endif
}
if (oldsize) {
- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+ if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
- kfree(oldldt);
+ put_page(virt_to_page(oldldt));
}
return 0;
}
@@ -77,9 +85,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
int err = alloc_ldt(new, old->size, 0);
+
if (err < 0)
return err;
- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+ memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
return 0;
}
@@ -89,7 +98,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
- struct mm_struct * old_mm;
+ struct mm_struct *old_mm;
int retval = 0;
mutex_init(&mm->context.lock);
@@ -105,33 +114,38 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
/*
* No need to lock the MM as we are the last user
+ *
+ * 64bit: Don't touch the LDT register - we're already in the next thread.
*/
void destroy_context(struct mm_struct *mm)
{
if (mm->context.size) {
+#ifdef CONFIG_X86_32
+ /* CHECKME: Can this ever happen ? */
if (mm == current->active_mm)
clear_LDT();
- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+#endif
+ if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
- kfree(mm->context.ldt);
+ put_page(virt_to_page(mm->context.ldt));
mm->context.size = 0;
}
}
-static int read_ldt(void __user * ptr, unsigned long bytecount)
+static int read_ldt(void __user *ptr, unsigned long bytecount)
{
int err;
unsigned long size;
- struct mm_struct * mm = current->mm;
+ struct mm_struct *mm = current->mm;
if (!mm->context.size)
return 0;
- if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
- bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+ if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
+ bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
mutex_lock(&mm->context.lock);
- size = mm->context.size*LDT_ENTRY_SIZE;
+ size = mm->context.size * LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
@@ -143,7 +157,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
goto error_return;
if (size != bytecount) {
/* zero-fill the rest */
- if (clear_user(ptr+size, bytecount-size) != 0) {
+ if (clear_user(ptr + size, bytecount - size) != 0) {
err = -EFAULT;
goto error_return;
}
@@ -153,34 +167,32 @@ error_return:
return err;
}
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+static int read_default_ldt(void __user *ptr, unsigned long bytecount)
{
- int err;
- unsigned long size;
-
- err = 0;
- size = 5*sizeof(struct desc_struct);
- if (size > bytecount)
- size = bytecount;
-
- err = size;
- if (clear_user(ptr, size))
- err = -EFAULT;
-
- return err;
+ /* CHECKME: Can we use _one_ random number ? */
+#ifdef CONFIG_X86_32
+ unsigned long size = 5 * sizeof(struct desc_struct);
+#else
+ unsigned long size = 128;
+#endif
+ if (bytecount > size)
+ bytecount = size;
+ if (clear_user(ptr, bytecount))
+ return -EFAULT;
+ return bytecount;
}
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
- struct mm_struct * mm = current->mm;
- __u32 entry_1, entry_2;
+ struct mm_struct *mm = current->mm;
+ struct desc_struct ldt;
int error;
struct user_desc ldt_info;
error = -EINVAL;
if (bytecount != sizeof(ldt_info))
goto out;
- error = -EFAULT;
+ error = -EFAULT;
if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
goto out;
@@ -196,28 +208,27 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
mutex_lock(&mm->context.lock);
if (ldt_info.entry_number >= mm->context.size) {
- error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
+ error = alloc_ldt(&current->mm->context,
+ ldt_info.entry_number + 1, 1);
if (error < 0)
goto out_unlock;
}
- /* Allow LDTs to be cleared by the user. */
- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ /* Allow LDTs to be cleared by the user. */
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
if (oldmode || LDT_empty(&ldt_info)) {
- entry_1 = 0;
- entry_2 = 0;
+ memset(&ldt, 0, sizeof(ldt));
goto install;
}
}
- entry_1 = LDT_entry_a(&ldt_info);
- entry_2 = LDT_entry_b(&ldt_info);
+ fill_ldt(&ldt, &ldt_info);
if (oldmode)
- entry_2 &= ~(1 << 20);
+ ldt.avl = 0;
/* Install the new entry ... */
install:
- write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
+ write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
error = 0;
out_unlock:
@@ -226,7 +237,8 @@ out:
return error;
}
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+asmlinkage int sys_modify_ldt(int func, void __user *ptr,
+ unsigned long bytecount)
{
int ret = -ENOSYS;
diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c
deleted file mode 100644
index 60e57abb8e9..00000000000
--- a/arch/x86/kernel/ldt_64.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2002 Andi Kleen
- *
- * This handles calls from both 32bit and 64bit mode.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
-static void flush_ldt(void *null)
-{
- if (current->active_mm)
- load_LDT(&current->active_mm->context);
-}
-#endif
-
-static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
-{
- void *oldldt;
- void *newldt;
- unsigned oldsize;
-
- if (mincount <= (unsigned)pc->size)
- return 0;
- oldsize = pc->size;
- mincount = (mincount+511)&(~511);
- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
- else
- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
- if (!newldt)
- return -ENOMEM;
-
- if (oldsize)
- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
- oldldt = pc->ldt;
- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
- wmb();
- pc->ldt = newldt;
- wmb();
- pc->size = mincount;
- wmb();
- if (reload) {
-#ifdef CONFIG_SMP
- cpumask_t mask;
-
- preempt_disable();
- mask = cpumask_of_cpu(smp_processor_id());
- load_LDT(pc);
- if (!cpus_equal(current->mm->cpu_vm_mask, mask))
- smp_call_function(flush_ldt, NULL, 1, 1);
- preempt_enable();
-#else
- load_LDT(pc);
-#endif
- }
- if (oldsize) {
- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(oldldt);
- else
- kfree(oldldt);
- }
- return 0;
-}
-
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
-{
- int err = alloc_ldt(new, old->size, 0);
- if (err < 0)
- return err;
- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
- return 0;
-}
-
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- struct mm_struct * old_mm;
- int retval = 0;
-
- mutex_init(&mm->context.lock);
- mm->context.size = 0;
- old_mm = current->mm;
- if (old_mm && old_mm->context.size > 0) {
- mutex_lock(&old_mm->context.lock);
- retval = copy_ldt(&mm->context, &old_mm->context);
- mutex_unlock(&old_mm->context.lock);
- }
- return retval;
-}
-
-/*
- *
- * Don't touch the LDT register - we're already in the next thread.
- */
-void destroy_context(struct mm_struct *mm)
-{
- if (mm->context.size) {
- if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(mm->context.ldt);
- else
- kfree(mm->context.ldt);
- mm->context.size = 0;
- }
-}
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
- int err;
- unsigned long size;
- struct mm_struct * mm = current->mm;
-
- if (!mm->context.size)
- return 0;
- if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
- bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
-
- mutex_lock(&mm->context.lock);
- size = mm->context.size*LDT_ENTRY_SIZE;
- if (size > bytecount)
- size = bytecount;
-
- err = 0;
- if (copy_to_user(ptr, mm->context.ldt, size))
- err = -EFAULT;
- mutex_unlock(&mm->context.lock);
- if (err < 0)
- goto error_return;
- if (size != bytecount) {
- /* zero-fill the rest */
- if (clear_user(ptr+size, bytecount-size) != 0) {
- err = -EFAULT;
- goto error_return;
- }
- }
- return bytecount;
-error_return:
- return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
- /* Arbitrary number */
- /* x86-64 default LDT is all zeros */
- if (bytecount > 128)
- bytecount = 128;
- if (clear_user(ptr, bytecount))
- return -EFAULT;
- return bytecount;
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
-{
- struct task_struct *me = current;
- struct mm_struct * mm = me->mm;
- __u32 entry_1, entry_2, *lp;
- int error;
- struct user_desc ldt_info;
-
- error = -EINVAL;
-
- if (bytecount != sizeof(ldt_info))
- goto out;
- error = -EFAULT;
- if (copy_from_user(&ldt_info, ptr, bytecount))
- goto out;
-
- error = -EINVAL;
- if (ldt_info.entry_number >= LDT_ENTRIES)
- goto out;
- if (ldt_info.contents == 3) {
- if (oldmode)
- goto out;
- if (ldt_info.seg_not_present == 0)
- goto out;
- }
-
- mutex_lock(&mm->context.lock);
- if (ldt_info.entry_number >= (unsigned)mm->context.size) {
- error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
- if (error < 0)
- goto out_unlock;
- }
-
- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
-
- /* Allow LDTs to be cleared by the user. */
- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode || LDT_empty(&ldt_info)) {
- entry_1 = 0;
- entry_2 = 0;
- goto install;
- }
- }
-
- entry_1 = LDT_entry_a(&ldt_info);
- entry_2 = LDT_entry_b(&ldt_info);
- if (oldmode)
- entry_2 &= ~(1 << 20);
-
- /* Install the new entry ... */
-install:
- *lp = entry_1;
- *(lp+1) = entry_2;
- error = 0;
-
-out_unlock:
- mutex_unlock(&mm->context.lock);
-out:
- return error;
-}
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
- int ret = -ENOSYS;
-
- switch (func) {
- case 0:
- ret = read_ldt(ptr, bytecount);
- break;
- case 1:
- ret = write_ldt(ptr, bytecount, 1);
- break;
- case 2:
- ret = read_default_ldt(ptr, bytecount);
- break;
- case 0x11:
- ret = write_ldt(ptr, bytecount, 0);
- break;
- }
- return ret;
-}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 11b935f4f88..c1cfd60639d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -32,7 +32,7 @@ static u32 kexec_pte1[1024] PAGE_ALIGNED;
static void set_idt(void *newidt, __u16 limit)
{
- struct Xgt_desc_struct curidt;
+ struct desc_ptr curidt;
/* ia32 supports unaliged loads & stores */
curidt.size = limit;
@@ -44,7 +44,7 @@ static void set_idt(void *newidt, __u16 limit)
static void set_gdt(void *newgdt, __u16 limit)
{
- struct Xgt_desc_struct curgdt;
+ struct desc_ptr curgdt;
/* ia32 supports unaligned loads & stores */
curgdt.size = limit;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index aa3d2c8f773..a1fef42f8cd 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -234,10 +234,5 @@ NORET_TYPE void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_SYMBOL(init_level4_pgt);
-
-#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
- VMCOREINFO_SYMBOL(node_data);
- VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3960ab7e149..219f86eb612 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -63,6 +63,21 @@ static int __init mfgpt_disable(char *s)
}
__setup("nomfgpt", mfgpt_disable);
+/* Reset the MFGPT timers. This is required by some broken BIOSes which already
+ * do the same and leave the system in an unstable state. TinyBIOS 0.98 is
+ * affected at least (0.99 is OK with MFGPT workaround left to off).
+ */
+static int __init mfgpt_fix(char *s)
+{
+ u32 val, dummy;
+
+ /* The following udocumented bit resets the MFGPT timers */
+ val = 0xFF; dummy = 0;
+ wrmsr(0x5140002B, val, dummy);
+ return 1;
+}
+__setup("mfgptfix", mfgpt_fix);
+
/*
* Check whether any MFGPTs are available for the kernel to use. In most
* cases, firmware that uses AMD's VSA code will claim all timers during
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 40cfd548871..6ff447f9fda 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -244,8 +244,8 @@ static int microcode_sanity_check(void *mc)
return 0;
/* check extended signature checksum */
for (i = 0; i < ext_sigcount; i++) {
- ext_sig = (struct extended_signature *)((void *)ext_header
- + EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i);
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
+ EXT_SIGNATURE_SIZE * i;
sum = orig_sum
- (mc_header->sig + mc_header->pf + mc_header->cksum)
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
@@ -279,11 +279,9 @@ static int get_maching_microcode(void *mc, int cpu)
if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
return 0;
- ext_header = (struct extended_sigtable *)(mc +
- get_datasize(mc_header) + MC_HEADER_SIZE);
+ ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
ext_sigcount = ext_header->count;
- ext_sig = (struct extended_signature *)((void *)ext_header
- + EXT_HEADER_SIZE);
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
for (i = 0; i < ext_sigcount; i++) {
if (microcode_update_match(cpu, mc_header,
ext_sig->sig, ext_sig->pf))
@@ -539,7 +537,7 @@ static int cpu_request_microcode(int cpu)
pr_debug("ucode data file %s load failed\n", name);
return error;
}
- buf = (void *)firmware->data;
+ buf = firmware->data;
size = firmware->size;
while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset))
> 0) {
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index 7a05a7f6099..67009cdd5ec 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -68,7 +68,7 @@ unsigned int def_to_bigsmp = 0;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
/* Internal processor count */
-unsigned int __cpuinitdata num_processors;
+unsigned int num_processors;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
@@ -258,7 +258,7 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
if (!(m->mpc_flags & MPC_APIC_USABLE))
return;
- printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+ printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
@@ -405,9 +405,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
mps_oem_check(mpc, oem, str);
- printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+ printk("APIC at: 0x%X\n", mpc->mpc_lapic);
- /*
+ /*
* Save the local APIC address (it might be non-default) -- but only
* if we're not using ACPI.
*/
@@ -721,7 +721,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
unsigned long *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
- Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+ printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
if (sizeof(*mpf) != 16)
printk("Error: MPF size\n");
@@ -734,8 +734,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
|| (mpf->mpf_specification == 4)) ) {
smp_found_config = 1;
- printk(KERN_INFO "found SMP MP-table at %08lx\n",
- virt_to_phys(mpf));
+ printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
+ mpf, virt_to_phys(mpf));
reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
if (mpf->mpf_physptr) {
/*
@@ -918,14 +918,14 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
*/
mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
mp_ioapic_routing[idx].gsi_base = gsi_base;
- mp_ioapic_routing[idx].gsi_end = gsi_base +
+ mp_ioapic_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
- printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
- "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
- mp_ioapic_routing[idx].gsi_base,
- mp_ioapic_routing[idx].gsi_end);
+ printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+ mp_ioapic_routing[idx].gsi_base,
+ mp_ioapic_routing[idx].gsi_end);
}
void __init
@@ -1041,15 +1041,16 @@ void __init mp_config_acpi_legacy_irqs (void)
}
#define MAX_GSI_NUM 4096
+#define IRQ_COMPRESSION_START 64
int mp_register_gsi(u32 gsi, int triggering, int polarity)
{
int ioapic = -1;
int ioapic_pin = 0;
int idx, bit = 0;
- static int pci_irq = 16;
+ static int pci_irq = IRQ_COMPRESSION_START;
/*
- * Mapping between Global System Interrups, which
+ * Mapping between Global System Interrupts, which
* represent all possible interrupts, and IRQs
* assigned to actual devices.
*/
@@ -1086,12 +1087,16 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
- return gsi_to_irq[gsi];
+ return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
}
mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
- if (triggering == ACPI_LEVEL_SENSITIVE) {
+ /*
+ * For GSI >= 64, use IRQ compression
+ */
+ if ((gsi >= IRQ_COMPRESSION_START)
+ && (triggering == ACPI_LEVEL_SENSITIVE)) {
/*
* For PCI devices assign IRQs in order, avoiding gaps
* due to unused I/O APIC pins.
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index ef4aab12358..72ab1403fed 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -60,14 +60,18 @@ unsigned int boot_cpu_id = -1U;
EXPORT_SYMBOL(boot_cpu_id);
/* Internal processor count */
-unsigned int num_processors __cpuinitdata = 0;
+unsigned int num_processors;
unsigned disabled_cpus __cpuinitdata;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
-u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
+ = { [0 ... NR_CPUS-1] = BAD_APICID };
+void *x86_bios_cpu_apicid_early_ptr;
+DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
/*
@@ -118,24 +122,22 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
physid_set(m->mpc_apicid, phys_cpu_present_map);
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
/*
- * bios_cpu_apicid is required to have processors listed
+ * x86_bios_cpu_apicid is required to have processors listed
* in same order as logical cpu numbers. Hence the first
* entry is BSP, and so on.
*/
cpu = 0;
}
- bios_cpu_apicid[cpu] = m->mpc_apicid;
- /*
- * We get called early in the the start_kernel initialization
- * process when the per_cpu data area is not yet setup, so we
- * use a static array that is removed after the per_cpu data
- * area is created.
- */
- if (x86_cpu_to_apicid_ptr) {
- u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr;
- x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+ /* are we being called early in kernel startup? */
+ if (x86_cpu_to_apicid_early_ptr) {
+ u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+ u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+
+ cpu_to_apicid[cpu] = m->mpc_apicid;
+ bios_cpu_apicid[cpu] = m->mpc_apicid;
} else {
per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
+ per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
}
cpu_set(cpu, cpu_possible_map);
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 4f4bfd3a88b..edd413650b3 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -51,13 +51,13 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
static int endflag __initdata = 0;
+#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
* CPUs during the test make them busy.
*/
static __init void nmi_cpu_busy(void *data)
{
-#ifdef CONFIG_SMP
local_irq_enable_in_hardirq();
/* Intentionally don't use cpu_relax here. This is
to make sure that the performance counter really ticks,
@@ -67,8 +67,8 @@ static __init void nmi_cpu_busy(void *data)
care if they get somewhat less cycles. */
while (endflag == 0)
mb();
-#endif
}
+#endif
static int __init check_nmi_watchdog(void)
{
@@ -87,11 +87,13 @@ static int __init check_nmi_watchdog(void)
printk(KERN_INFO "Testing NMI watchdog ... ");
+#ifdef CONFIG_SMP
if (nmi_watchdog == NMI_LOCAL_APIC)
smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+#endif
for_each_possible_cpu(cpu)
- prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
+ prev_nmi_count[cpu] = nmi_count(cpu);
local_irq_enable();
mdelay((20*1000)/nmi_hz); // wait 20 ticks
@@ -237,10 +239,10 @@ void acpi_nmi_disable(void)
on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
}
-void setup_apic_nmi_watchdog (void *unused)
+void setup_apic_nmi_watchdog(void *unused)
{
if (__get_cpu_var(wd_enabled))
- return;
+ return;
/* cheap hack to support suspend/resume */
/* if cpu0 is not active neither should the other cpus */
@@ -329,7 +331,7 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
unsigned int sum;
int touched = 0;
int cpu = smp_processor_id();
- int rc=0;
+ int rc = 0;
/* check for other users first */
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index c3d1476b6a1..fb99484d21c 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -39,7 +39,7 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE;
* 0: the lapic NMI watchdog is disabled, but can be enabled
*/
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
-int panic_on_timeout;
+static int panic_on_timeout;
unsigned int nmi_watchdog = NMI_DEFAULT;
static unsigned int nmi_hz = HZ;
@@ -78,22 +78,22 @@ static __init void nmi_cpu_busy(void *data)
}
#endif
-int __init check_nmi_watchdog (void)
+int __init check_nmi_watchdog(void)
{
- int *counts;
+ int *prev_nmi_count;
int cpu;
- if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
+ if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
return 0;
if (!atomic_read(&nmi_active))
return 0;
- counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
- if (!counts)
+ prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+ if (!prev_nmi_count)
return -1;
- printk(KERN_INFO "testing NMI watchdog ... ");
+ printk(KERN_INFO "Testing NMI watchdog ... ");
#ifdef CONFIG_SMP
if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -101,30 +101,29 @@ int __init check_nmi_watchdog (void)
#endif
for (cpu = 0; cpu < NR_CPUS; cpu++)
- counts[cpu] = cpu_pda(cpu)->__nmi_count;
+ prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
local_irq_enable();
mdelay((20*1000)/nmi_hz); // wait 20 ticks
for_each_online_cpu(cpu) {
if (!per_cpu(wd_enabled, cpu))
continue;
- if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
+ if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) {
printk(KERN_WARNING "WARNING: CPU#%d: NMI "
"appears to be stuck (%d->%d)!\n",
- cpu,
- counts[cpu],
- cpu_pda(cpu)->__nmi_count);
+ cpu,
+ prev_nmi_count[cpu],
+ cpu_pda(cpu)->__nmi_count);
per_cpu(wd_enabled, cpu) = 0;
atomic_dec(&nmi_active);
}
}
+ endflag = 1;
if (!atomic_read(&nmi_active)) {
- kfree(counts);
+ kfree(prev_nmi_count);
atomic_set(&nmi_active, -1);
- endflag = 1;
return -1;
}
- endflag = 1;
printk("OK.\n");
/* now that we know it works we can reduce NMI frequency to
@@ -132,11 +131,11 @@ int __init check_nmi_watchdog (void)
if (nmi_watchdog == NMI_LOCAL_APIC)
nmi_hz = lapic_adjust_nmi_hz(1);
- kfree(counts);
+ kfree(prev_nmi_count);
return 0;
}
-int __init setup_nmi_watchdog(char *str)
+static int __init setup_nmi_watchdog(char *str)
{
int nmi;
@@ -159,34 +158,6 @@ int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-static void __acpi_nmi_disable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
-}
-
-static void __acpi_nmi_enable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
-}
#ifdef CONFIG_PM
static int nmi_pm_active; /* nmi_active before suspend */
@@ -217,7 +188,7 @@ static struct sysdev_class nmi_sysclass = {
};
static struct sys_device device_lapic_nmi = {
- .id = 0,
+ .id = 0,
.cls = &nmi_sysclass,
};
@@ -231,7 +202,7 @@ static int __init init_lapic_nmi_sysfs(void)
if (nmi_watchdog != NMI_LOCAL_APIC)
return 0;
- if ( atomic_read(&nmi_active) < 0 )
+ if (atomic_read(&nmi_active) < 0)
return 0;
error = sysdev_class_register(&nmi_sysclass);
@@ -244,9 +215,37 @@ late_initcall(init_lapic_nmi_sysfs);
#endif /* CONFIG_PM */
+static void __acpi_nmi_enable(void *__unused)
+{
+ apic_write(APIC_LVT0, APIC_DM_NMI);
+}
+
+/*
+ * Enable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_enable(void)
+{
+ if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+ on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+}
+
+static void __acpi_nmi_disable(void *__unused)
+{
+ apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
+/*
+ * Disable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_disable(void)
+{
+ if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+ on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+}
+
void setup_apic_nmi_watchdog(void *unused)
{
- if (__get_cpu_var(wd_enabled) == 1)
+ if (__get_cpu_var(wd_enabled))
return;
/* cheap hack to support suspend/resume */
@@ -311,8 +310,9 @@ void touch_nmi_watchdog(void)
}
}
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog();
}
+EXPORT_SYMBOL(touch_nmi_watchdog);
int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
{
@@ -479,4 +479,3 @@ void __trigger_all_cpu_backtrace(void)
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(touch_nmi_watchdog);
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 9000d82c6dc..e65281b1634 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -82,7 +82,7 @@ static int __init numaq_tsc_disable(void)
{
if (num_online_nodes() > 1) {
printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- tsc_disable = 1;
+ setup_clear_cpu_cap(X86_FEATURE_TSC);
}
return 0;
}
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt.c
index f5000799f8e..075962cc75a 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt.c
@@ -14,7 +14,10 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
*/
+
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/efi.h>
@@ -55,59 +58,9 @@ char *memory_setup(void)
extern const char start_##ops##_##name[], end_##ops##_##name[]; \
asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
-DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
-DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
-DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
-DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
-DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
-DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
-DEF_NATIVE(pv_cpu_ops, clts, "clts");
-DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
-
/* Undefined instruction for dealing with missing ops pointers. */
static const unsigned char ud2a[] = { 0x0f, 0x0b };
-static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
- unsigned long addr, unsigned len)
-{
- const unsigned char *start, *end;
- unsigned ret;
-
- switch(type) {
-#define SITE(ops, x) \
- case PARAVIRT_PATCH(ops.x): \
- start = start_##ops##_##x; \
- end = end_##ops##_##x; \
- goto patch_site
-
- SITE(pv_irq_ops, irq_disable);
- SITE(pv_irq_ops, irq_enable);
- SITE(pv_irq_ops, restore_fl);
- SITE(pv_irq_ops, save_fl);
- SITE(pv_cpu_ops, iret);
- SITE(pv_cpu_ops, irq_enable_sysexit);
- SITE(pv_mmu_ops, read_cr2);
- SITE(pv_mmu_ops, read_cr3);
- SITE(pv_mmu_ops, write_cr3);
- SITE(pv_cpu_ops, clts);
- SITE(pv_cpu_ops, read_tsc);
-#undef SITE
-
- patch_site:
- ret = paravirt_patch_insns(ibuf, len, start, end);
- break;
-
- default:
- ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
- break;
- }
-
- return ret;
-}
-
unsigned paravirt_patch_nop(void)
{
return 0;
@@ -186,7 +139,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
/* If the operation is a nop, then nop the callsite */
ret = paravirt_patch_nop();
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
- type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
+ type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
else
@@ -237,7 +190,7 @@ static void native_flush_tlb_single(unsigned long addr)
/* These are in entry.S */
extern void native_iret(void);
-extern void native_irq_enable_sysexit(void);
+extern void native_irq_enable_syscall_ret(void);
static int __init print_banner(void)
{
@@ -285,18 +238,18 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
static inline void enter_lazy(enum paravirt_lazy_mode mode)
{
- BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
+ BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
BUG_ON(preemptible());
- x86_write_percpu(paravirt_lazy_mode, mode);
+ __get_cpu_var(paravirt_lazy_mode) = mode;
}
void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
{
- BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode);
+ BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
BUG_ON(preemptible());
- x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
+ __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
}
void paravirt_enter_lazy_mmu(void)
@@ -321,7 +274,7 @@ void paravirt_leave_lazy_cpu(void)
enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
{
- return x86_read_percpu(paravirt_lazy_mode);
+ return __get_cpu_var(paravirt_lazy_mode);
}
struct pv_info pv_info = {
@@ -366,11 +319,16 @@ struct pv_cpu_ops pv_cpu_ops = {
.read_cr4 = native_read_cr4,
.read_cr4_safe = native_read_cr4_safe,
.write_cr4 = native_write_cr4,
+#ifdef CONFIG_X86_64
+ .read_cr8 = native_read_cr8,
+ .write_cr8 = native_write_cr8,
+#endif
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
+ .read_tscp = native_read_tscp,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,
@@ -379,13 +337,14 @@ struct pv_cpu_ops pv_cpu_ops = {
.store_idt = native_store_idt,
.store_tr = native_store_tr,
.load_tls = native_load_tls,
- .write_ldt_entry = write_dt_entry,
- .write_gdt_entry = write_dt_entry,
- .write_idt_entry = write_dt_entry,
- .load_esp0 = native_load_esp0,
+ .write_ldt_entry = native_write_ldt_entry,
+ .write_gdt_entry = native_write_gdt_entry,
+ .write_idt_entry = native_write_idt_entry,
+ .load_sp0 = native_load_sp0,
- .irq_enable_sysexit = native_irq_enable_sysexit,
+ .irq_enable_syscall_ret = native_irq_enable_syscall_ret,
.iret = native_iret,
+ .swapgs = native_swapgs,
.set_iopl_mask = native_set_iopl_mask,
.io_delay = native_io_delay,
@@ -408,8 +367,10 @@ struct pv_apic_ops pv_apic_ops = {
};
struct pv_mmu_ops pv_mmu_ops = {
+#ifndef CONFIG_X86_64
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
+#endif
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
@@ -437,16 +398,23 @@ struct pv_mmu_ops pv_mmu_ops = {
.kmap_atomic_pte = kmap_atomic,
#endif
+#if PAGETABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
.set_pte_atomic = native_set_pte_atomic,
.set_pte_present = native_set_pte_present,
- .set_pud = native_set_pud,
.pte_clear = native_pte_clear,
.pmd_clear = native_pmd_clear,
-
+#endif
+ .set_pud = native_set_pud,
.pmd_val = native_pmd_val,
.make_pmd = native_make_pmd,
+
+#if PAGETABLE_LEVELS == 4
+ .pud_val = native_pud_val,
+ .make_pud = native_make_pud,
+ .set_pgd = native_set_pgd,
#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
.pte_val = native_pte_val,
.pgd_val = native_pgd_val,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
new file mode 100644
index 00000000000..82fc5fcab4f
--- /dev/null
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -0,0 +1,49 @@
+#include <asm/paravirt.h>
+
+DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
+DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
+DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
+DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
+DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
+DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(pv_cpu_ops, clts, "clts");
+DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+
+unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+ unsigned long addr, unsigned len)
+{
+ const unsigned char *start, *end;
+ unsigned ret;
+
+#define PATCH_SITE(ops, x) \
+ case PARAVIRT_PATCH(ops.x): \
+ start = start_##ops##_##x; \
+ end = end_##ops##_##x; \
+ goto patch_site
+ switch(type) {
+ PATCH_SITE(pv_irq_ops, irq_disable);
+ PATCH_SITE(pv_irq_ops, irq_enable);
+ PATCH_SITE(pv_irq_ops, restore_fl);
+ PATCH_SITE(pv_irq_ops, save_fl);
+ PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
+ PATCH_SITE(pv_mmu_ops, read_cr2);
+ PATCH_SITE(pv_mmu_ops, read_cr3);
+ PATCH_SITE(pv_mmu_ops, write_cr3);
+ PATCH_SITE(pv_cpu_ops, clts);
+ PATCH_SITE(pv_cpu_ops, read_tsc);
+
+ patch_site:
+ ret = paravirt_patch_insns(ibuf, len, start, end);
+ break;
+
+ default:
+ ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+ break;
+ }
+#undef PATCH_SITE
+ return ret;
+}
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
new file mode 100644
index 00000000000..7d904e138d7
--- /dev/null
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -0,0 +1,57 @@
+#include <asm/paravirt.h>
+#include <asm/asm-offsets.h>
+#include <linux/stringify.h>
+
+DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
+DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
+DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
+DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
+DEF_NATIVE(pv_cpu_ops, iret, "iretq");
+DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+DEF_NATIVE(pv_cpu_ops, clts, "clts");
+DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+
+/* the three commands give us more control to how to return from a syscall */
+DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
+DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
+
+unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+ unsigned long addr, unsigned len)
+{
+ const unsigned char *start, *end;
+ unsigned ret;
+
+#define PATCH_SITE(ops, x) \
+ case PARAVIRT_PATCH(ops.x): \
+ start = start_##ops##_##x; \
+ end = end_##ops##_##x; \
+ goto patch_site
+ switch(type) {
+ PATCH_SITE(pv_irq_ops, restore_fl);
+ PATCH_SITE(pv_irq_ops, save_fl);
+ PATCH_SITE(pv_irq_ops, irq_enable);
+ PATCH_SITE(pv_irq_ops, irq_disable);
+ PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
+ PATCH_SITE(pv_cpu_ops, swapgs);
+ PATCH_SITE(pv_mmu_ops, read_cr2);
+ PATCH_SITE(pv_mmu_ops, read_cr3);
+ PATCH_SITE(pv_mmu_ops, write_cr3);
+ PATCH_SITE(pv_cpu_ops, clts);
+ PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+ PATCH_SITE(pv_cpu_ops, wbinvd);
+
+ patch_site:
+ ret = paravirt_patch_insns(ibuf, len, start, end);
+ break;
+
+ default:
+ ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+ break;
+ }
+#undef PATCH_SITE
+ return ret;
+}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 6bf1f716909..21f34db2c03 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -30,7 +30,6 @@
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/dma-mapping.h>
-#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -183,7 +182,7 @@ static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
/* enable this to stress test the chip's TCE cache */
#ifdef CONFIG_IOMMU_DEBUG
-int debugging __read_mostly = 1;
+static int debugging = 1;
static inline unsigned long verify_bit_range(unsigned long* bitmap,
int expected, unsigned long start, unsigned long end)
@@ -202,7 +201,7 @@ static inline unsigned long verify_bit_range(unsigned long* bitmap,
return ~0UL;
}
#else /* debugging is disabled */
-int debugging __read_mostly = 0;
+static int debugging;
static inline unsigned long verify_bit_range(unsigned long* bitmap,
int expected, unsigned long start, unsigned long end)
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index 5552d23d23c..a82473d192a 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -13,7 +13,6 @@
#include <asm/calgary.h>
int iommu_merge __read_mostly = 0;
-EXPORT_SYMBOL(iommu_merge);
dma_addr_t bad_dma_address __read_mostly;
EXPORT_SYMBOL(bad_dma_address);
@@ -230,7 +229,7 @@ EXPORT_SYMBOL(dma_set_mask);
* See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
* documentation.
*/
-__init int iommu_setup(char *p)
+static __init int iommu_setup(char *p)
{
iommu_merge = 1;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 06bcba53604..4d5cc718198 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -1,12 +1,12 @@
/*
* Dynamic DMA mapping support for AMD Hammer.
- *
+ *
* Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
* This allows to use PCI devices that only support 32bit addresses on systems
- * with more than 4GB.
+ * with more than 4GB.
*
* See Documentation/DMA-mapping.txt for the interface specification.
- *
+ *
* Copyright 2002 Andi Kleen, SuSE Labs.
* Subject to the GNU General Public License v2 only.
*/
@@ -37,23 +37,26 @@
#include <asm/k8.h>
static unsigned long iommu_bus_base; /* GART remapping area (physical) */
-static unsigned long iommu_size; /* size of remapping area bytes */
+static unsigned long iommu_size; /* size of remapping area bytes */
static unsigned long iommu_pages; /* .. and in pages */
-static u32 *iommu_gatt_base; /* Remapping table */
+static u32 *iommu_gatt_base; /* Remapping table */
-/* If this is disabled the IOMMU will use an optimized flushing strategy
- of only flushing when an mapping is reused. With it true the GART is flushed
- for every mapping. Problem is that doing the lazy flush seems to trigger
- bugs with some popular PCI cards, in particular 3ware (but has been also
- also seen with Qlogic at least). */
+/*
+ * If this is disabled the IOMMU will use an optimized flushing strategy
+ * of only flushing when an mapping is reused. With it true the GART is
+ * flushed for every mapping. Problem is that doing the lazy flush seems
+ * to trigger bugs with some popular PCI cards, in particular 3ware (but
+ * has been also also seen with Qlogic at least).
+ */
int iommu_fullflush = 1;
-/* Allocation bitmap for the remapping area */
+/* Allocation bitmap for the remapping area: */
static DEFINE_SPINLOCK(iommu_bitmap_lock);
-static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
+/* Guarded by iommu_bitmap_lock: */
+static unsigned long *iommu_gart_bitmap;
-static u32 gart_unmapped_entry;
+static u32 gart_unmapped_entry;
#define GPTE_VALID 1
#define GPTE_COHERENT 2
@@ -61,10 +64,10 @@ static u32 gart_unmapped_entry;
(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-#define to_pages(addr,size) \
+#define to_pages(addr, size) \
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-#define EMERGENCY_PAGES 32 /* = 128KB */
+#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
#define AGPEXTERN extern
@@ -77,130 +80,152 @@ AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */
-static int need_flush; /* global flush state. set for each gart wrap */
+static int need_flush; /* global flush state. set for each gart wrap */
-static unsigned long alloc_iommu(int size)
-{
+static unsigned long alloc_iommu(int size)
+{
unsigned long offset, flags;
- spin_lock_irqsave(&iommu_bitmap_lock, flags);
- offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
+ spin_lock_irqsave(&iommu_bitmap_lock, flags);
+ offset = find_next_zero_string(iommu_gart_bitmap, next_bit,
+ iommu_pages, size);
if (offset == -1) {
need_flush = 1;
- offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
+ offset = find_next_zero_string(iommu_gart_bitmap, 0,
+ iommu_pages, size);
}
- if (offset != -1) {
- set_bit_string(iommu_gart_bitmap, offset, size);
- next_bit = offset+size;
- if (next_bit >= iommu_pages) {
+ if (offset != -1) {
+ set_bit_string(iommu_gart_bitmap, offset, size);
+ next_bit = offset+size;
+ if (next_bit >= iommu_pages) {
next_bit = 0;
need_flush = 1;
- }
- }
+ }
+ }
if (iommu_fullflush)
need_flush = 1;
- spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+ spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+
return offset;
-}
+}
static void free_iommu(unsigned long offset, int size)
-{
+{
unsigned long flags;
+
spin_lock_irqsave(&iommu_bitmap_lock, flags);
__clear_bit_string(iommu_gart_bitmap, offset, size);
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-}
+}
-/*
+/*
* Use global flush state to avoid races with multiple flushers.
*/
static void flush_gart(void)
-{
+{
unsigned long flags;
+
spin_lock_irqsave(&iommu_bitmap_lock, flags);
if (need_flush) {
k8_flush_garts();
need_flush = 0;
- }
+ }
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-}
+}
#ifdef CONFIG_IOMMU_LEAK
-#define SET_LEAK(x) if (iommu_leak_tab) \
- iommu_leak_tab[x] = __builtin_return_address(0);
-#define CLEAR_LEAK(x) if (iommu_leak_tab) \
- iommu_leak_tab[x] = NULL;
+#define SET_LEAK(x) \
+ do { \
+ if (iommu_leak_tab) \
+ iommu_leak_tab[x] = __builtin_return_address(0);\
+ } while (0)
+
+#define CLEAR_LEAK(x) \
+ do { \
+ if (iommu_leak_tab) \
+ iommu_leak_tab[x] = NULL; \
+ } while (0)
/* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
+static void **iommu_leak_tab;
static int leak_trace;
static int iommu_leak_pages = 20;
+
static void dump_leak(void)
{
int i;
- static int dump;
- if (dump || !iommu_leak_tab) return;
+ static int dump;
+
+ if (dump || !iommu_leak_tab)
+ return;
dump = 1;
- show_stack(NULL,NULL);
- /* Very crude. dump some from the end of the table too */
- printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
- for (i = 0; i < iommu_leak_pages; i+=2) {
- printk("%lu: ", iommu_pages-i);
- printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
- printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
- }
- printk("\n");
+ show_stack(NULL, NULL);
+
+ /* Very crude. dump some from the end of the table too */
+ printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
+ iommu_leak_pages);
+ for (i = 0; i < iommu_leak_pages; i += 2) {
+ printk(KERN_DEBUG "%lu: ", iommu_pages-i);
+ printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0);
+ printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
+ }
+ printk(KERN_DEBUG "\n");
}
#else
-#define SET_LEAK(x)
-#define CLEAR_LEAK(x)
+# define SET_LEAK(x)
+# define CLEAR_LEAK(x)
#endif
static void iommu_full(struct device *dev, size_t size, int dir)
{
- /*
+ /*
* Ran out of IOMMU space for this operation. This is very bad.
* Unfortunately the drivers cannot handle this operation properly.
- * Return some non mapped prereserved space in the aperture and
+ * Return some non mapped prereserved space in the aperture and
* let the Northbridge deal with it. This will result in garbage
* in the IO operation. When the size exceeds the prereserved space
- * memory corruption will occur or random memory will be DMAed
+ * memory corruption will occur or random memory will be DMAed
* out. Hopefully no network devices use single mappings that big.
- */
-
- printk(KERN_ERR
- "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
- size, dev->bus_id);
+ */
+
+ printk(KERN_ERR
+ "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
+ size, dev->bus_id);
if (size > PAGE_SIZE*EMERGENCY_PAGES) {
if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
panic("PCI-DMA: Memory would be corrupted\n");
- if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n");
- }
-
+ if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+ panic(KERN_ERR
+ "PCI-DMA: Random memory would be DMAed\n");
+ }
#ifdef CONFIG_IOMMU_LEAK
- dump_leak();
+ dump_leak();
#endif
-}
+}
-static inline int need_iommu(struct device *dev, unsigned long addr, size_t size)
-{
+static inline int
+need_iommu(struct device *dev, unsigned long addr, size_t size)
+{
u64 mask = *dev->dma_mask;
int high = addr + size > mask;
int mmu = high;
- if (force_iommu)
- mmu = 1;
- return mmu;
+
+ if (force_iommu)
+ mmu = 1;
+
+ return mmu;
}
-static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
-{
+static inline int
+nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
+{
u64 mask = *dev->dma_mask;
int high = addr + size > mask;
int mmu = high;
- return mmu;
+
+ return mmu;
}
/* Map a single continuous physical area into the IOMMU.
@@ -208,13 +233,14 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t
*/
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir)
-{
+{
unsigned long npages = to_pages(phys_mem, size);
unsigned long iommu_page = alloc_iommu(npages);
int i;
+
if (iommu_page == -1) {
if (!nonforced_iommu(dev, phys_mem, size))
- return phys_mem;
+ return phys_mem;
if (panic_on_overflow)
panic("dma_map_area overflow %lu bytes\n", size);
iommu_full(dev, size, dir);
@@ -229,35 +255,39 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
}
-static dma_addr_t gart_map_simple(struct device *dev, char *buf,
- size_t size, int dir)
+static dma_addr_t
+gart_map_simple(struct device *dev, char *buf, size_t size, int dir)
{
dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
+
flush_gart();
+
return map;
}
/* Map a single area into the IOMMU */
-static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
+static dma_addr_t
+gart_map_single(struct device *dev, void *addr, size_t size, int dir)
{
unsigned long phys_mem, bus;
if (!dev)
dev = &fallback_dev;
- phys_mem = virt_to_phys(addr);
+ phys_mem = virt_to_phys(addr);
if (!need_iommu(dev, phys_mem, size))
- return phys_mem;
+ return phys_mem;
bus = gart_map_simple(dev, addr, size, dir);
- return bus;
+
+ return bus;
}
/*
* Free a DMA mapping.
*/
static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, int direction)
+ size_t size, int direction)
{
unsigned long iommu_page;
int npages;
@@ -266,6 +296,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
dma_addr >= iommu_bus_base + iommu_size)
return;
+
iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
npages = to_pages(dma_addr, size);
for (i = 0; i < npages; i++) {
@@ -278,7 +309,8 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
/*
* Wrapper for pci_unmap_single working with scatterlists.
*/
-static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
+static void
+gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
{
struct scatterlist *s;
int i;
@@ -303,12 +335,13 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
for_each_sg(sg, s, nents, i) {
unsigned long addr = sg_phys(s);
- if (nonforced_iommu(dev, addr, s->length)) {
+
+ if (nonforced_iommu(dev, addr, s->length)) {
addr = dma_map_area(dev, addr, s->length, dir);
- if (addr == bad_dma_address) {
- if (i > 0)
+ if (addr == bad_dma_address) {
+ if (i > 0)
gart_unmap_sg(dev, sg, i, dir);
- nents = 0;
+ nents = 0;
sg[0].dma_length = 0;
break;
}
@@ -317,15 +350,16 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
s->dma_length = s->length;
}
flush_gart();
+
return nents;
}
/* Map multiple scatterlist entries continuous into the first. */
static int __dma_map_cont(struct scatterlist *start, int nelems,
- struct scatterlist *sout, unsigned long pages)
+ struct scatterlist *sout, unsigned long pages)
{
unsigned long iommu_start = alloc_iommu(pages);
- unsigned long iommu_page = iommu_start;
+ unsigned long iommu_page = iommu_start;
struct scatterlist *s;
int i;
@@ -335,32 +369,33 @@ static int __dma_map_cont(struct scatterlist *start, int nelems,
for_each_sg(start, s, nelems, i) {
unsigned long pages, addr;
unsigned long phys_addr = s->dma_address;
-
+
BUG_ON(s != start && s->offset);
if (s == start) {
sout->dma_address = iommu_bus_base;
sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
sout->dma_length = s->length;
- } else {
- sout->dma_length += s->length;
+ } else {
+ sout->dma_length += s->length;
}
addr = phys_addr;
- pages = to_pages(s->offset, s->length);
- while (pages--) {
- iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
+ pages = to_pages(s->offset, s->length);
+ while (pages--) {
+ iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
SET_LEAK(iommu_page);
addr += PAGE_SIZE;
iommu_page++;
}
- }
- BUG_ON(iommu_page - iommu_start != pages);
+ }
+ BUG_ON(iommu_page - iommu_start != pages);
+
return 0;
}
-static inline int dma_map_cont(struct scatterlist *start, int nelems,
- struct scatterlist *sout,
- unsigned long pages, int need)
+static inline int
+dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout,
+ unsigned long pages, int need)
{
if (!need) {
BUG_ON(nelems != 1);
@@ -370,22 +405,19 @@ static inline int dma_map_cont(struct scatterlist *start, int nelems,
}
return __dma_map_cont(start, nelems, sout, pages);
}
-
+
/*
* DMA map all entries in a scatterlist.
- * Merge chunks that have page aligned sizes into a continuous mapping.
+ * Merge chunks that have page aligned sizes into a continuous mapping.
*/
-static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- int dir)
+static int
+gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
{
- int i;
- int out;
- int start;
- unsigned long pages = 0;
- int need = 0, nextneed;
struct scatterlist *s, *ps, *start_sg, *sgmap;
+ int need = 0, nextneed, i, out, start;
+ unsigned long pages = 0;
- if (nents == 0)
+ if (nents == 0)
return 0;
if (!dev)
@@ -397,15 +429,19 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
ps = NULL; /* shut up gcc */
for_each_sg(sg, s, nents, i) {
dma_addr_t addr = sg_phys(s);
+
s->dma_address = addr;
- BUG_ON(s->length == 0);
+ BUG_ON(s->length == 0);
- nextneed = need_iommu(dev, addr, s->length);
+ nextneed = need_iommu(dev, addr, s->length);
/* Handle the previous not yet processed entries */
if (i > start) {
- /* Can only merge when the last chunk ends on a page
- boundary and the new one doesn't have an offset. */
+ /*
+ * Can only merge when the last chunk ends on a
+ * page boundary and the new one doesn't have an
+ * offset.
+ */
if (!iommu_merge || !nextneed || !need || s->offset ||
(ps->offset + ps->length) % PAGE_SIZE) {
if (dma_map_cont(start_sg, i - start, sgmap,
@@ -436,6 +472,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
error:
flush_gart();
gart_unmap_sg(dev, sg, out, dir);
+
/* When it was forced or merged try again in a dumb way */
if (force_iommu || iommu_merge) {
out = dma_map_sg_nonforce(dev, sg, nents, dir);
@@ -444,64 +481,68 @@ error:
}
if (panic_on_overflow)
panic("dma_map_sg: overflow on %lu pages\n", pages);
+
iommu_full(dev, pages << PAGE_SHIFT, dir);
for_each_sg(sg, s, nents, i)
s->dma_address = bad_dma_address;
return 0;
-}
+}
static int no_agp;
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
-{
- unsigned long a;
- if (!iommu_size) {
- iommu_size = aper_size;
- if (!no_agp)
- iommu_size /= 2;
- }
-
- a = aper + iommu_size;
+{
+ unsigned long a;
+
+ if (!iommu_size) {
+ iommu_size = aper_size;
+ if (!no_agp)
+ iommu_size /= 2;
+ }
+
+ a = aper + iommu_size;
iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
- if (iommu_size < 64*1024*1024)
+ if (iommu_size < 64*1024*1024) {
printk(KERN_WARNING
- "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
-
+ "PCI-DMA: Warning: Small IOMMU %luMB."
+ " Consider increasing the AGP aperture in BIOS\n",
+ iommu_size >> 20);
+ }
+
return iommu_size;
-}
+}
-static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
-{
- unsigned aper_size = 0, aper_base_32;
+static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
+{
+ unsigned aper_size = 0, aper_base_32, aper_order;
u64 aper_base;
- unsigned aper_order;
- pci_read_config_dword(dev, 0x94, &aper_base_32);
+ pci_read_config_dword(dev, 0x94, &aper_base_32);
pci_read_config_dword(dev, 0x90, &aper_order);
- aper_order = (aper_order >> 1) & 7;
+ aper_order = (aper_order >> 1) & 7;
- aper_base = aper_base_32 & 0x7fff;
+ aper_base = aper_base_32 & 0x7fff;
aper_base <<= 25;
- aper_size = (32 * 1024 * 1024) << aper_order;
- if (aper_base + aper_size > 0x100000000UL || !aper_size)
+ aper_size = (32 * 1024 * 1024) << aper_order;
+ if (aper_base + aper_size > 0x100000000UL || !aper_size)
aper_base = 0;
*size = aper_size;
return aper_base;
-}
+}
-/*
+/*
* Private Northbridge GATT initialization in case we cannot use the
- * AGP driver for some reason.
+ * AGP driver for some reason.
*/
static __init int init_k8_gatt(struct agp_kern_info *info)
-{
+{
+ unsigned aper_size, gatt_size, new_aper_size;
+ unsigned aper_base, new_aper_base;
struct pci_dev *dev;
void *gatt;
- unsigned aper_base, new_aper_base;
- unsigned aper_size, gatt_size, new_aper_size;
int i;
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
@@ -509,75 +550,75 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
dev = NULL;
for (i = 0; i < num_k8_northbridges; i++) {
dev = k8_northbridges[i];
- new_aper_base = read_aperture(dev, &new_aper_size);
- if (!new_aper_base)
- goto nommu;
-
- if (!aper_base) {
+ new_aper_base = read_aperture(dev, &new_aper_size);
+ if (!new_aper_base)
+ goto nommu;
+
+ if (!aper_base) {
aper_size = new_aper_size;
aper_base = new_aper_base;
- }
- if (aper_size != new_aper_size || aper_base != new_aper_base)
+ }
+ if (aper_size != new_aper_size || aper_base != new_aper_base)
goto nommu;
}
if (!aper_base)
- goto nommu;
+ goto nommu;
info->aper_base = aper_base;
- info->aper_size = aper_size>>20;
+ info->aper_size = aper_size >> 20;
- gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
- gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
- if (!gatt)
+ gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
+ gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+ if (!gatt)
panic("Cannot allocate GATT table");
- if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE))
+ if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
panic("Could not set GART PTEs to uncacheable pages");
- global_flush_tlb();
- memset(gatt, 0, gatt_size);
+ memset(gatt, 0, gatt_size);
agp_gatt_table = gatt;
for (i = 0; i < num_k8_northbridges; i++) {
- u32 ctl;
- u32 gatt_reg;
+ u32 gatt_reg;
+ u32 ctl;
dev = k8_northbridges[i];
- gatt_reg = __pa(gatt) >> 12;
- gatt_reg <<= 4;
+ gatt_reg = __pa(gatt) >> 12;
+ gatt_reg <<= 4;
pci_write_config_dword(dev, 0x98, gatt_reg);
- pci_read_config_dword(dev, 0x90, &ctl);
+ pci_read_config_dword(dev, 0x90, &ctl);
ctl |= 1;
ctl &= ~((1<<4) | (1<<5));
- pci_write_config_dword(dev, 0x90, ctl);
+ pci_write_config_dword(dev, 0x90, ctl);
}
flush_gart();
-
- printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
+
+ printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
+ aper_base, aper_size>>10);
return 0;
nommu:
- /* Should not happen anymore */
+ /* Should not happen anymore */
printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n");
- return -1;
-}
+ return -1;
+}
extern int agp_amd64_init(void);
static const struct dma_mapping_ops gart_dma_ops = {
- .mapping_error = NULL,
- .map_single = gart_map_single,
- .map_simple = gart_map_simple,
- .unmap_single = gart_unmap_single,
- .sync_single_for_cpu = NULL,
- .sync_single_for_device = NULL,
- .sync_single_range_for_cpu = NULL,
- .sync_single_range_for_device = NULL,
- .sync_sg_for_cpu = NULL,
- .sync_sg_for_device = NULL,
- .map_sg = gart_map_sg,
- .unmap_sg = gart_unmap_sg,
+ .mapping_error = NULL,
+ .map_single = gart_map_single,
+ .map_simple = gart_map_simple,
+ .unmap_single = gart_unmap_single,
+ .sync_single_for_cpu = NULL,
+ .sync_single_for_device = NULL,
+ .sync_single_range_for_cpu = NULL,
+ .sync_single_range_for_device = NULL,
+ .sync_sg_for_cpu = NULL,
+ .sync_sg_for_device = NULL,
+ .map_sg = gart_map_sg,
+ .unmap_sg = gart_unmap_sg,
};
void gart_iommu_shutdown(void)
@@ -588,23 +629,23 @@ void gart_iommu_shutdown(void)
if (no_agp && (dma_ops != &gart_dma_ops))
return;
- for (i = 0; i < num_k8_northbridges; i++) {
- u32 ctl;
+ for (i = 0; i < num_k8_northbridges; i++) {
+ u32 ctl;
- dev = k8_northbridges[i];
- pci_read_config_dword(dev, 0x90, &ctl);
+ dev = k8_northbridges[i];
+ pci_read_config_dword(dev, 0x90, &ctl);
- ctl &= ~1;
+ ctl &= ~1;
- pci_write_config_dword(dev, 0x90, ctl);
- }
+ pci_write_config_dword(dev, 0x90, ctl);
+ }
}
void __init gart_iommu_init(void)
-{
+{
struct agp_kern_info info;
- unsigned long aper_size;
unsigned long iommu_start;
+ unsigned long aper_size;
unsigned long scratch;
long i;
@@ -614,14 +655,14 @@ void __init gart_iommu_init(void)
}
#ifndef CONFIG_AGP_AMD64
- no_agp = 1;
+ no_agp = 1;
#else
/* Makefile puts PCI initialization via subsys_initcall first. */
/* Add other K8 AGP bridge drivers here */
- no_agp = no_agp ||
- (agp_amd64_init() < 0) ||
+ no_agp = no_agp ||
+ (agp_amd64_init() < 0) ||
(agp_copy_info(agp_bridge, &info) < 0);
-#endif
+#endif
if (swiotlb)
return;
@@ -643,77 +684,78 @@ void __init gart_iommu_init(void)
}
printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
- aper_size = info.aper_size * 1024 * 1024;
- iommu_size = check_iommu_size(info.aper_base, aper_size);
- iommu_pages = iommu_size >> PAGE_SHIFT;
-
- iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
- get_order(iommu_pages/8));
- if (!iommu_gart_bitmap)
- panic("Cannot allocate iommu bitmap\n");
+ aper_size = info.aper_size * 1024 * 1024;
+ iommu_size = check_iommu_size(info.aper_base, aper_size);
+ iommu_pages = iommu_size >> PAGE_SHIFT;
+
+ iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+ get_order(iommu_pages/8));
+ if (!iommu_gart_bitmap)
+ panic("Cannot allocate iommu bitmap\n");
memset(iommu_gart_bitmap, 0, iommu_pages/8);
#ifdef CONFIG_IOMMU_LEAK
- if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
+ if (leak_trace) {
+ iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
get_order(iommu_pages*sizeof(void *)));
- if (iommu_leak_tab)
- memset(iommu_leak_tab, 0, iommu_pages * 8);
+ if (iommu_leak_tab)
+ memset(iommu_leak_tab, 0, iommu_pages * 8);
else
- printk("PCI-DMA: Cannot allocate leak trace area\n");
- }
+ printk(KERN_DEBUG
+ "PCI-DMA: Cannot allocate leak trace area\n");
+ }
#endif
- /*
+ /*
* Out of IOMMU space handling.
- * Reserve some invalid pages at the beginning of the GART.
- */
- set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
+ * Reserve some invalid pages at the beginning of the GART.
+ */
+ set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
- agp_memory_reserved = iommu_size;
+ agp_memory_reserved = iommu_size;
printk(KERN_INFO
"PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
- iommu_size>>20);
+ iommu_size >> 20);
- iommu_start = aper_size - iommu_size;
- iommu_bus_base = info.aper_base + iommu_start;
+ iommu_start = aper_size - iommu_size;
+ iommu_bus_base = info.aper_base + iommu_start;
bad_dma_address = iommu_bus_base;
iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
- /*
+ /*
* Unmap the IOMMU part of the GART. The alias of the page is
* always mapped with cache enabled and there is no full cache
* coherency across the GART remapping. The unmapping avoids
* automatic prefetches from the CPU allocating cache lines in
* there. All CPU accesses are done via the direct mapping to
* the backing memory. The GART address is only used by PCI
- * devices.
+ * devices.
*/
clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
- /*
- * Try to workaround a bug (thanks to BenH)
- * Set unmapped entries to a scratch page instead of 0.
+ /*
+ * Try to workaround a bug (thanks to BenH)
+ * Set unmapped entries to a scratch page instead of 0.
* Any prefetches that hit unmapped entries won't get an bus abort
* then.
*/
- scratch = get_zeroed_page(GFP_KERNEL);
- if (!scratch)
+ scratch = get_zeroed_page(GFP_KERNEL);
+ if (!scratch)
panic("Cannot allocate iommu scratch page");
gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
- for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
+ for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
iommu_gatt_base[i] = gart_unmapped_entry;
flush_gart();
dma_ops = &gart_dma_ops;
-}
+}
void __init gart_parse_options(char *p)
{
int arg;
#ifdef CONFIG_IOMMU_LEAK
- if (!strncmp(p,"leak",4)) {
+ if (!strncmp(p, "leak", 4)) {
leak_trace = 1;
p += 4;
if (*p == '=') ++p;
@@ -723,18 +765,18 @@ void __init gart_parse_options(char *p)
#endif
if (isdigit(*p) && get_option(&p, &arg))
iommu_size = arg;
- if (!strncmp(p, "fullflush",8))
+ if (!strncmp(p, "fullflush", 8))
iommu_fullflush = 1;
- if (!strncmp(p, "nofullflush",11))
+ if (!strncmp(p, "nofullflush", 11))
iommu_fullflush = 0;
- if (!strncmp(p,"noagp",5))
+ if (!strncmp(p, "noagp", 5))
no_agp = 1;
- if (!strncmp(p, "noaperture",10))
+ if (!strncmp(p, "noaperture", 10))
fix_aperture = 0;
/* duplicated from pci-dma.c */
- if (!strncmp(p,"force",5))
+ if (!strncmp(p, "force", 5))
gart_iommu_aperture_allowed = 1;
- if (!strncmp(p,"allowed",7))
+ if (!strncmp(p, "allowed", 7))
gart_iommu_aperture_allowed = 1;
if (!strncmp(p, "memaper", 7)) {
fallback_aper_force = 1;
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 102866d729a..82a0a674a00 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -10,7 +10,6 @@
#include <asm/dma.h>
int swiotlb __read_mostly;
-EXPORT_SYMBOL(swiotlb);
const struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c
index ae8f91214f1..b112406f199 100644
--- a/arch/x86/kernel/pmtimer_64.c
+++ b/arch/x86/kernel/pmtimer_64.c
@@ -19,13 +19,13 @@
#include <linux/time.h>
#include <linux/init.h>
#include <linux/cpumask.h>
+#include <linux/acpi_pmtmr.h>
+
#include <asm/io.h>
#include <asm/proto.h>
#include <asm/msr.h>
#include <asm/vsyscall.h>
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
static inline u32 cyc2us(u32 cycles)
{
/* The Power Management Timer ticks at 3.579545 ticks per microsecond.
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 46d391d49de..968371ab223 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,6 +55,7 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
+#include <asm/kdebug.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -74,7 +75,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
*/
unsigned long thread_saved_pc(struct task_struct *tsk)
{
- return ((unsigned long *)tsk->thread.esp)[3];
+ return ((unsigned long *)tsk->thread.sp)[3];
}
/*
@@ -113,10 +114,19 @@ void default_idle(void)
smp_mb();
local_irq_disable();
- if (!need_resched())
+ if (!need_resched()) {
+ ktime_t t0, t1;
+ u64 t0n, t1n;
+
+ t0 = ktime_get();
+ t0n = ktime_to_ns(t0);
safe_halt(); /* enables interrupts racelessly */
- else
- local_irq_enable();
+ local_irq_disable();
+ t1 = ktime_get();
+ t1n = ktime_to_ns(t1);
+ sched_clock_idle_wakeup_event(t1n - t0n);
+ }
+ local_irq_enable();
current_thread_info()->status |= TS_POLLING;
} else {
/* loop is done by the caller */
@@ -132,7 +142,7 @@ EXPORT_SYMBOL(default_idle);
* to poll the ->work.need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
-static void poll_idle (void)
+static void poll_idle(void)
{
cpu_relax();
}
@@ -188,6 +198,9 @@ void cpu_idle(void)
rmb();
idle = pm_idle;
+ if (rcu_pending(cpu))
+ rcu_check_callbacks(cpu, 0);
+
if (!idle)
idle = default_idle;
@@ -255,13 +268,13 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
if (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
- __mwait(eax, ecx);
+ __mwait(ax, cx);
}
}
@@ -272,19 +285,37 @@ static void mwait_idle(void)
mwait_idle_with_hints(0, 0);
}
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+{
+ if (force_mwait)
+ return 1;
+ /* Any C1 states supported? */
+ return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
+}
+
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
- if (cpu_has(c, X86_FEATURE_MWAIT)) {
- printk("monitor/mwait feature present.\n");
+ static int selected;
+
+ if (selected)
+ return;
+#ifdef CONFIG_X86_SMP
+ if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+ " performance may degrade.\n");
+ }
+#endif
+ if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
/*
* Skip, if setup has overridden idle.
* One CPU supports mwait => All CPUs supports mwait
*/
if (!pm_idle) {
- printk("using mwait in idle threads.\n");
+ printk(KERN_INFO "using mwait in idle threads.\n");
pm_idle = mwait_idle;
}
}
+ selected = 1;
}
static int __init idle_setup(char *str)
@@ -292,10 +323,6 @@ static int __init idle_setup(char *str)
if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
pm_idle = poll_idle;
-#ifdef CONFIG_X86_SMP
- if (smp_num_siblings > 1)
- printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
-#endif
} else if (!strcmp(str, "mwait"))
force_mwait = 1;
else
@@ -310,15 +337,15 @@ void __show_registers(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
- unsigned long esp;
+ unsigned long sp;
unsigned short ss, gs;
if (user_mode_vm(regs)) {
- esp = regs->esp;
- ss = regs->xss & 0xffff;
+ sp = regs->sp;
+ ss = regs->ss & 0xffff;
savesegment(gs, gs);
} else {
- esp = (unsigned long) (&regs->esp);
+ sp = (unsigned long) (&regs->sp);
savesegment(ss, ss);
savesegment(gs, gs);
}
@@ -331,17 +358,17 @@ void __show_registers(struct pt_regs *regs, int all)
init_utsname()->version);
printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
- 0xffff & regs->xcs, regs->eip, regs->eflags,
+ 0xffff & regs->cs, regs->ip, regs->flags,
smp_processor_id());
- print_symbol("EIP is at %s\n", regs->eip);
+ print_symbol("EIP is at %s\n", regs->ip);
printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
- regs->eax, regs->ebx, regs->ecx, regs->edx);
+ regs->ax, regs->bx, regs->cx, regs->dx);
printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
- regs->esi, regs->edi, regs->ebp, esp);
+ regs->si, regs->di, regs->bp, sp);
printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff,
- regs->xfs & 0xffff, gs, ss);
+ regs->ds & 0xffff, regs->es & 0xffff,
+ regs->fs & 0xffff, gs, ss);
if (!all)
return;
@@ -369,12 +396,12 @@ void __show_registers(struct pt_regs *regs, int all)
void show_regs(struct pt_regs *regs)
{
__show_registers(regs, 1);
- show_trace(NULL, regs, &regs->esp);
+ show_trace(NULL, regs, &regs->sp, regs->bp);
}
/*
- * This gets run with %ebx containing the
- * function to call, and %edx containing
+ * This gets run with %bx containing the
+ * function to call, and %dx containing
* the "args".
*/
extern void kernel_thread_helper(void);
@@ -388,16 +415,16 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
memset(&regs, 0, sizeof(regs));
- regs.ebx = (unsigned long) fn;
- regs.edx = (unsigned long) arg;
+ regs.bx = (unsigned long) fn;
+ regs.dx = (unsigned long) arg;
- regs.xds = __USER_DS;
- regs.xes = __USER_DS;
- regs.xfs = __KERNEL_PERCPU;
- regs.orig_eax = -1;
- regs.eip = (unsigned long) kernel_thread_helper;
- regs.xcs = __KERNEL_CS | get_kernel_rpl();
- regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+ regs.ds = __USER_DS;
+ regs.es = __USER_DS;
+ regs.fs = __KERNEL_PERCPU;
+ regs.orig_ax = -1;
+ regs.ip = (unsigned long) kernel_thread_helper;
+ regs.cs = __KERNEL_CS | get_kernel_rpl();
+ regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
/* Ok, create the new process.. */
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
@@ -435,7 +462,12 @@ void flush_thread(void)
{
struct task_struct *tsk = current;
- memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+ tsk->thread.debugreg0 = 0;
+ tsk->thread.debugreg1 = 0;
+ tsk->thread.debugreg2 = 0;
+ tsk->thread.debugreg3 = 0;
+ tsk->thread.debugreg6 = 0;
+ tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
clear_tsk_thread_flag(tsk, TIF_DEBUG);
/*
@@ -460,7 +492,7 @@ void prepare_to_copy(struct task_struct *tsk)
unlazy_fpu(tsk);
}
-int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
unsigned long unused,
struct task_struct * p, struct pt_regs * regs)
{
@@ -470,15 +502,15 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
childregs = task_pt_regs(p);
*childregs = *regs;
- childregs->eax = 0;
- childregs->esp = esp;
+ childregs->ax = 0;
+ childregs->sp = sp;
- p->thread.esp = (unsigned long) childregs;
- p->thread.esp0 = (unsigned long) (childregs+1);
+ p->thread.sp = (unsigned long) childregs;
+ p->thread.sp0 = (unsigned long) (childregs+1);
- p->thread.eip = (unsigned long) ret_from_fork;
+ p->thread.ip = (unsigned long) ret_from_fork;
- savesegment(gs,p->thread.gs);
+ savesegment(gs, p->thread.gs);
tsk = current;
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -491,32 +523,15 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
set_tsk_thread_flag(p, TIF_IO_BITMAP);
}
+ err = 0;
+
/*
* Set a new TLS for the child thread?
*/
- if (clone_flags & CLONE_SETTLS) {
- struct desc_struct *desc;
- struct user_desc info;
- int idx;
-
- err = -EFAULT;
- if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
- goto out;
- err = -EINVAL;
- if (LDT_empty(&info))
- goto out;
-
- idx = info.entry_number;
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- goto out;
-
- desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
- }
+ if (clone_flags & CLONE_SETTLS)
+ err = do_set_thread_area(p, -1,
+ (struct user_desc __user *)childregs->si, 0);
- err = 0;
- out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
@@ -529,62 +544,52 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
*/
void dump_thread(struct pt_regs * regs, struct user * dump)
{
- int i;
+ u16 gs;
/* changed the size calculations - should hopefully work better. lbt */
dump->magic = CMAGIC;
dump->start_code = 0;
- dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+ dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
dump->u_dsize -= dump->u_tsize;
dump->u_ssize = 0;
- for (i = 0; i < 8; i++)
- dump->u_debugreg[i] = current->thread.debugreg[i];
+ dump->u_debugreg[0] = current->thread.debugreg0;
+ dump->u_debugreg[1] = current->thread.debugreg1;
+ dump->u_debugreg[2] = current->thread.debugreg2;
+ dump->u_debugreg[3] = current->thread.debugreg3;
+ dump->u_debugreg[4] = 0;
+ dump->u_debugreg[5] = 0;
+ dump->u_debugreg[6] = current->thread.debugreg6;
+ dump->u_debugreg[7] = current->thread.debugreg7;
if (dump->start_stack < TASK_SIZE)
dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
- dump->regs.ebx = regs->ebx;
- dump->regs.ecx = regs->ecx;
- dump->regs.edx = regs->edx;
- dump->regs.esi = regs->esi;
- dump->regs.edi = regs->edi;
- dump->regs.ebp = regs->ebp;
- dump->regs.eax = regs->eax;
- dump->regs.ds = regs->xds;
- dump->regs.es = regs->xes;
- dump->regs.fs = regs->xfs;
- savesegment(gs,dump->regs.gs);
- dump->regs.orig_eax = regs->orig_eax;
- dump->regs.eip = regs->eip;
- dump->regs.cs = regs->xcs;
- dump->regs.eflags = regs->eflags;
- dump->regs.esp = regs->esp;
- dump->regs.ss = regs->xss;
+ dump->regs.bx = regs->bx;
+ dump->regs.cx = regs->cx;
+ dump->regs.dx = regs->dx;
+ dump->regs.si = regs->si;
+ dump->regs.di = regs->di;
+ dump->regs.bp = regs->bp;
+ dump->regs.ax = regs->ax;
+ dump->regs.ds = (u16)regs->ds;
+ dump->regs.es = (u16)regs->es;
+ dump->regs.fs = (u16)regs->fs;
+ savesegment(gs,gs);
+ dump->regs.orig_ax = regs->orig_ax;
+ dump->regs.ip = regs->ip;
+ dump->regs.cs = (u16)regs->cs;
+ dump->regs.flags = regs->flags;
+ dump->regs.sp = regs->sp;
+ dump->regs.ss = (u16)regs->ss;
dump->u_fpvalid = dump_fpu (regs, &dump->i387);
}
EXPORT_SYMBOL(dump_thread);
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
- struct pt_regs ptregs = *task_pt_regs(tsk);
- ptregs.xcs &= 0xffff;
- ptregs.xds &= 0xffff;
- ptregs.xes &= 0xffff;
- ptregs.xss &= 0xffff;
-
- elf_core_copy_regs(regs, &ptregs);
-
- return 1;
-}
-
#ifdef CONFIG_SECCOMP
-void hard_disable_TSC(void)
+static void hard_disable_TSC(void)
{
write_cr4(read_cr4() | X86_CR4_TSD);
}
@@ -599,7 +604,7 @@ void disable_TSC(void)
hard_disable_TSC();
preempt_enable();
}
-void hard_enable_TSC(void)
+static void hard_enable_TSC(void)
{
write_cr4(read_cr4() & ~X86_CR4_TSD);
}
@@ -609,18 +614,32 @@ static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
- struct thread_struct *next;
+ struct thread_struct *prev, *next;
+ unsigned long debugctl;
+ prev = &prev_p->thread;
next = &next_p->thread;
+ debugctl = prev->debugctlmsr;
+ if (next->ds_area_msr != prev->ds_area_msr) {
+ /* we clear debugctl to make sure DS
+ * is not in use when we change it */
+ debugctl = 0;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+ wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
+ }
+
+ if (next->debugctlmsr != debugctl)
+ wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
+
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg[0], 0);
- set_debugreg(next->debugreg[1], 1);
- set_debugreg(next->debugreg[2], 2);
- set_debugreg(next->debugreg[3], 3);
+ set_debugreg(next->debugreg0, 0);
+ set_debugreg(next->debugreg1, 1);
+ set_debugreg(next->debugreg2, 2);
+ set_debugreg(next->debugreg3, 3);
/* no 4 and 5 */
- set_debugreg(next->debugreg[6], 6);
- set_debugreg(next->debugreg[7], 7);
+ set_debugreg(next->debugreg6, 6);
+ set_debugreg(next->debugreg7, 7);
}
#ifdef CONFIG_SECCOMP
@@ -634,6 +653,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
}
#endif
+ if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+ ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
+
+ if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
+ ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
+
+
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
/*
* Disable the bitmap via an invalid offset. We still cache
@@ -687,11 +713,11 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
* More important, however, is the fact that this allows us much
* more flexibility.
*
- * The return value (in %eax) will be the "prev" task after
+ * The return value (in %ax) will be the "prev" task after
* the task-switch, and shows up in ret_from_fork in entry.S,
* for example.
*/
-struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
@@ -710,7 +736,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
/*
* Reload esp0.
*/
- load_esp0(tss, next);
+ load_sp0(tss, next);
/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -774,7 +800,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
asmlinkage int sys_fork(struct pt_regs regs)
{
- return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+ return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
}
asmlinkage int sys_clone(struct pt_regs regs)
@@ -783,12 +809,12 @@ asmlinkage int sys_clone(struct pt_regs regs)
unsigned long newsp;
int __user *parent_tidptr, *child_tidptr;
- clone_flags = regs.ebx;
- newsp = regs.ecx;
- parent_tidptr = (int __user *)regs.edx;
- child_tidptr = (int __user *)regs.edi;
+ clone_flags = regs.bx;
+ newsp = regs.cx;
+ parent_tidptr = (int __user *)regs.dx;
+ child_tidptr = (int __user *)regs.di;
if (!newsp)
- newsp = regs.esp;
+ newsp = regs.sp;
return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
}
@@ -804,7 +830,7 @@ asmlinkage int sys_clone(struct pt_regs regs)
*/
asmlinkage int sys_vfork(struct pt_regs regs)
{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
}
/*
@@ -815,18 +841,15 @@ asmlinkage int sys_execve(struct pt_regs regs)
int error;
char * filename;
- filename = getname((char __user *) regs.ebx);
+ filename = getname((char __user *) regs.bx);
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
error = do_execve(filename,
- (char __user * __user *) regs.ecx,
- (char __user * __user *) regs.edx,
+ (char __user * __user *) regs.cx,
+ (char __user * __user *) regs.dx,
&regs);
if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
/* Make sure we don't return using sysenter.. */
set_thread_flag(TIF_IRET);
}
@@ -840,145 +863,37 @@ out:
unsigned long get_wchan(struct task_struct *p)
{
- unsigned long ebp, esp, eip;
+ unsigned long bp, sp, ip;
unsigned long stack_page;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
stack_page = (unsigned long)task_stack_page(p);
- esp = p->thread.esp;
- if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
+ sp = p->thread.sp;
+ if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
return 0;
- /* include/asm-i386/system.h:switch_to() pushes ebp last. */
- ebp = *(unsigned long *) esp;
+ /* include/asm-i386/system.h:switch_to() pushes bp last. */
+ bp = *(unsigned long *) sp;
do {
- if (ebp < stack_page || ebp > top_ebp+stack_page)
+ if (bp < stack_page || bp > top_ebp+stack_page)
return 0;
- eip = *(unsigned long *) (ebp+4);
- if (!in_sched_functions(eip))
- return eip;
- ebp = *(unsigned long *) ebp;
+ ip = *(unsigned long *) (bp+4);
+ if (!in_sched_functions(ip))
+ return ip;
+ bp = *(unsigned long *) bp;
} while (count++ < 16);
return 0;
}
-/*
- * sys_alloc_thread_area: get a yet unused TLS descriptor index.
- */
-static int get_free_idx(void)
-{
- struct thread_struct *t = &current->thread;
- int idx;
-
- for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
- if (desc_empty(t->tls_array + idx))
- return idx + GDT_ENTRY_TLS_MIN;
- return -ESRCH;
-}
-
-/*
- * Set a given TLS descriptor:
- */
-asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
-{
- struct thread_struct *t = &current->thread;
- struct user_desc info;
- struct desc_struct *desc;
- int cpu, idx;
-
- if (copy_from_user(&info, u_info, sizeof(info)))
- return -EFAULT;
- idx = info.entry_number;
-
- /*
- * index -1 means the kernel should try to find and
- * allocate an empty descriptor:
- */
- if (idx == -1) {
- idx = get_free_idx();
- if (idx < 0)
- return idx;
- if (put_user(idx, &u_info->entry_number))
- return -EFAULT;
- }
-
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
- /*
- * We must not get preempted while modifying the TLS.
- */
- cpu = get_cpu();
-
- if (LDT_empty(&info)) {
- desc->a = 0;
- desc->b = 0;
- } else {
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
- }
- load_TLS(t, cpu);
-
- put_cpu();
-
- return 0;
-}
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
- (((desc)->a >> 16) & 0x0000ffff) | \
- (((desc)->b << 16) & 0x00ff0000) | \
- ( (desc)->b & 0xff000000) )
-
-#define GET_LIMIT(desc) ( \
- ((desc)->a & 0x0ffff) | \
- ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
-#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
-
-asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
-{
- struct user_desc info;
- struct desc_struct *desc;
- int idx;
-
- if (get_user(idx, &u_info->entry_number))
- return -EFAULT;
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- memset(&info, 0, sizeof(info));
-
- desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
- info.entry_number = idx;
- info.base_addr = GET_BASE(desc);
- info.limit = GET_LIMIT(desc);
- info.seg_32bit = GET_32BIT(desc);
- info.contents = GET_CONTENTS(desc);
- info.read_exec_only = !GET_WRITABLE(desc);
- info.limit_in_pages = GET_LIMIT_PAGES(desc);
- info.seg_not_present = !GET_PRESENT(desc);
- info.useable = GET_USEABLE(desc);
-
- if (copy_to_user(u_info, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long range_end = mm->brk + 0x02000000;
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ab79e1dfa02..137a86171c3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -3,7 +3,7 @@
*
* Pentium III FXSR, SSE support
* Gareth Hughes <gareth@valinux.com>, May 2000
- *
+ *
* X86-64 port
* Andi Kleen.
*
@@ -19,19 +19,19 @@
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/fs.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/user.h>
-#include <linux/module.h>
#include <linux/a.out.h>
#include <linux/interrupt.h>
+#include <linux/utsname.h>
#include <linux/delay.h>
+#include <linux/module.h>
#include <linux/ptrace.h>
-#include <linux/utsname.h>
#include <linux/random.h>
#include <linux/notifier.h>
#include <linux/kprobes.h>
@@ -72,13 +72,6 @@ void idle_notifier_register(struct notifier_block *n)
{
atomic_notifier_chain_register(&idle_notifier, n);
}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
- atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL(idle_notifier_unregister);
void enter_idle(void)
{
@@ -106,7 +99,7 @@ void exit_idle(void)
* We use this if we don't have any better
* idle routine..
*/
-static void default_idle(void)
+void default_idle(void)
{
current_thread_info()->status &= ~TS_POLLING;
/*
@@ -116,11 +109,18 @@ static void default_idle(void)
smp_mb();
local_irq_disable();
if (!need_resched()) {
- /* Enables interrupts one instruction before HLT.
- x86 special cases this so there is no race. */
- safe_halt();
- } else
- local_irq_enable();
+ ktime_t t0, t1;
+ u64 t0n, t1n;
+
+ t0 = ktime_get();
+ t0n = ktime_to_ns(t0);
+ safe_halt(); /* enables interrupts racelessly */
+ local_irq_disable();
+ t1 = ktime_get();
+ t1n = ktime_to_ns(t1);
+ sched_clock_idle_wakeup_event(t1n - t0n);
+ }
+ local_irq_enable();
current_thread_info()->status |= TS_POLLING;
}
@@ -129,54 +129,12 @@ static void default_idle(void)
* to poll the ->need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
-static void poll_idle (void)
+static void poll_idle(void)
{
local_irq_enable();
cpu_relax();
}
-static void do_nothing(void *unused)
-{
-}
-
-void cpu_idle_wait(void)
-{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map, tmp = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) &&
- !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- /*
- * We waited 1 sec, if a CPU still did not call idle
- * it may be because it is in idle and not waking up
- * because it has nothing to do.
- * Give all the remaining CPUS a kick.
- */
- smp_call_function_mask(map, do_nothing, 0, 0);
- } while (!cpus_empty(map));
-
- set_cpus_allowed(current, tmp);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
@@ -207,19 +165,18 @@ static inline void play_dead(void)
* low exit latency (ie sit in a loop waiting for
* somebody to say that they'd like to reschedule)
*/
-void cpu_idle (void)
+void cpu_idle(void)
{
current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
+ tick_nohz_stop_sched_tick();
while (!need_resched()) {
void (*idle)(void);
if (__get_cpu_var(cpu_idle_state))
__get_cpu_var(cpu_idle_state) = 0;
- tick_nohz_stop_sched_tick();
-
rmb();
idle = pm_idle;
if (!idle)
@@ -247,6 +204,47 @@ void cpu_idle (void)
}
}
+static void do_nothing(void *unused)
+{
+}
+
+void cpu_idle_wait(void)
+{
+ unsigned int cpu, this_cpu = get_cpu();
+ cpumask_t map, tmp = current->cpus_allowed;
+
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+
+ cpus_clear(map);
+ for_each_online_cpu(cpu) {
+ per_cpu(cpu_idle_state, cpu) = 1;
+ cpu_set(cpu, map);
+ }
+
+ __get_cpu_var(cpu_idle_state) = 0;
+
+ wmb();
+ do {
+ ssleep(1);
+ for_each_online_cpu(cpu) {
+ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+ cpu_clear(cpu, map);
+ }
+ cpus_and(map, map, cpu_online_map);
+ /*
+ * We waited 1 sec, if a CPU still did not call idle
+ * it may be because it is in idle and not waking up
+ * because it has nothing to do.
+ * Give all the remaining CPUS a kick.
+ */
+ smp_call_function_mask(map, do_nothing, 0, 0);
+ } while (!cpus_empty(map));
+
+ set_cpus_allowed(current, tmp);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
/*
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
* which can obviate IPI to trigger checking of need_resched.
@@ -257,13 +255,13 @@ void cpu_idle (void)
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
if (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
- __mwait(eax, ecx);
+ __mwait(ax, cx);
}
}
@@ -282,25 +280,41 @@ static void mwait_idle(void)
}
}
+
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+{
+ if (force_mwait)
+ return 1;
+ /* Any C1 states supported? */
+ return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
+}
+
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
- static int printed;
- if (cpu_has(c, X86_FEATURE_MWAIT)) {
+ static int selected;
+
+ if (selected)
+ return;
+#ifdef CONFIG_X86_SMP
+ if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+ " performance may degrade.\n");
+ }
+#endif
+ if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
/*
* Skip, if setup has overridden idle.
* One CPU supports mwait => All CPUs supports mwait
*/
if (!pm_idle) {
- if (!printed) {
- printk(KERN_INFO "using mwait in idle threads.\n");
- printed = 1;
- }
+ printk(KERN_INFO "using mwait in idle threads.\n");
pm_idle = mwait_idle;
}
}
+ selected = 1;
}
-static int __init idle_setup (char *str)
+static int __init idle_setup(char *str)
{
if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
@@ -315,13 +329,13 @@ static int __init idle_setup (char *str)
}
early_param("idle", idle_setup);
-/* Prints also some state that isn't saved in the pt_regs */
+/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs * regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
- unsigned int fsindex,gsindex;
- unsigned int ds,cs,es;
+ unsigned int fsindex, gsindex;
+ unsigned int ds, cs, es;
printk("\n");
print_modules();
@@ -330,16 +344,16 @@ void __show_regs(struct pt_regs * regs)
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
- printk_address(regs->rip);
- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
- regs->eflags);
+ printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+ printk_address(regs->ip, 1);
+ printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
+ regs->flags);
printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
- regs->rax, regs->rbx, regs->rcx);
+ regs->ax, regs->bx, regs->cx);
printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
- regs->rdx, regs->rsi, regs->rdi);
+ regs->dx, regs->si, regs->di);
printk("RBP: %016lx R08: %016lx R09: %016lx\n",
- regs->rbp, regs->r8, regs->r9);
+ regs->bp, regs->r8, regs->r9);
printk("R10: %016lx R11: %016lx R12: %016lx\n",
regs->r10, regs->r11, regs->r12);
printk("R13: %016lx R14: %016lx R15: %016lx\n",
@@ -379,7 +393,7 @@ void show_regs(struct pt_regs *regs)
{
printk("CPU %d:", smp_processor_id());
__show_regs(regs);
- show_trace(NULL, regs, (void *)(regs + 1));
+ show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
/*
@@ -390,7 +404,7 @@ void exit_thread(void)
struct task_struct *me = current;
struct thread_struct *t = &me->thread;
- if (me->thread.io_bitmap_ptr) {
+ if (me->thread.io_bitmap_ptr) {
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
kfree(t->io_bitmap_ptr);
@@ -426,7 +440,7 @@ void flush_thread(void)
tsk->thread.debugreg3 = 0;
tsk->thread.debugreg6 = 0;
tsk->thread.debugreg7 = 0;
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
*/
@@ -449,26 +463,21 @@ void release_thread(struct task_struct *dead_task)
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
{
- struct user_desc ud = {
+ struct user_desc ud = {
.base_addr = addr,
.limit = 0xfffff,
.seg_32bit = 1,
.limit_in_pages = 1,
.useable = 1,
};
- struct n_desc_struct *desc = (void *)t->thread.tls_array;
+ struct desc_struct *desc = t->thread.tls_array;
desc += tls;
- desc->a = LDT_entry_a(&ud);
- desc->b = LDT_entry_b(&ud);
+ fill_ldt(desc, &ud);
}
static inline u32 read_32bit_tls(struct task_struct *t, int tls)
{
- struct desc_struct *desc = (void *)t->thread.tls_array;
- desc += tls;
- return desc->base0 |
- (((u32)desc->base1) << 16) |
- (((u32)desc->base2) << 24);
+ return get_desc_base(&t->thread.tls_array[tls]);
}
/*
@@ -480,7 +489,7 @@ void prepare_to_copy(struct task_struct *tsk)
unlazy_fpu(tsk);
}
-int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
unsigned long unused,
struct task_struct * p, struct pt_regs * regs)
{
@@ -492,14 +501,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
(THREAD_SIZE + task_stack_page(p))) - 1;
*childregs = *regs;
- childregs->rax = 0;
- childregs->rsp = rsp;
- if (rsp == ~0UL)
- childregs->rsp = (unsigned long)childregs;
+ childregs->ax = 0;
+ childregs->sp = sp;
+ if (sp == ~0UL)
+ childregs->sp = (unsigned long)childregs;
- p->thread.rsp = (unsigned long) childregs;
- p->thread.rsp0 = (unsigned long) (childregs+1);
- p->thread.userrsp = me->thread.userrsp;
+ p->thread.sp = (unsigned long) childregs;
+ p->thread.sp0 = (unsigned long) (childregs+1);
+ p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK);
@@ -520,7 +529,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
IO_BITMAP_BYTES);
set_tsk_thread_flag(p, TIF_IO_BITMAP);
- }
+ }
/*
* Set a new TLS for the child thread?
@@ -528,7 +537,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32))
- err = ia32_child_tls(p, childregs);
+ err = do_set_thread_area(p, -1,
+ (struct user_desc __user *)childregs->si, 0);
else
#endif
err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
@@ -547,17 +557,30 @@ out:
/*
* This special macro can be used to load a debugging register
*/
-#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
+#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
static inline void __switch_to_xtra(struct task_struct *prev_p,
- struct task_struct *next_p,
- struct tss_struct *tss)
+ struct task_struct *next_p,
+ struct tss_struct *tss)
{
struct thread_struct *prev, *next;
+ unsigned long debugctl;
prev = &prev_p->thread,
next = &next_p->thread;
+ debugctl = prev->debugctlmsr;
+ if (next->ds_area_msr != prev->ds_area_msr) {
+ /* we clear debugctl to make sure DS
+ * is not in use when we change it */
+ debugctl = 0;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+ wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+ }
+
+ if (next->debugctlmsr != debugctl)
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
+
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
loaddebug(next, 0);
loaddebug(next, 1);
@@ -581,12 +604,18 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+
+ if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+ ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
+
+ if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
+ ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
}
/*
* switch_to(x,y) should switch tasks from x to y.
*
- * This could still be optimized:
+ * This could still be optimized:
* - fold all the options into a flag word and test it with a single test.
* - could test fs/gs bitsliced
*
@@ -597,7 +626,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
- int cpu = smp_processor_id();
+ int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
/* we're going to use this soon, after a few expensive things */
@@ -607,7 +636,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Reload esp0, LDT and the page table pointer:
*/
- tss->rsp0 = next->rsp0;
+ load_sp0(tss, next);
/*
* Switch DS and ES.
@@ -666,8 +695,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
- prev->userrsp = read_pda(oldrsp);
- write_pda(oldrsp, next->userrsp);
+ prev->usersp = read_pda(oldrsp);
+ write_pda(oldrsp, next->usersp);
write_pda(pcurrent, next_p);
write_pda(kernelstack,
@@ -684,8 +713,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Now maybe reload the debug registers and handle I/O bitmaps
*/
- if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
- || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
+ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
+ task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
/* If the task has used fpu the last 5 timeslices, just do a full
@@ -700,7 +729,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* sys_execve() executes a new program.
*/
-asmlinkage
+asmlinkage
long sys_execve(char __user *name, char __user * __user *argv,
char __user * __user *envp, struct pt_regs regs)
{
@@ -712,11 +741,6 @@ long sys_execve(char __user *name, char __user * __user *argv,
if (IS_ERR(filename))
return error;
error = do_execve(filename, argv, envp, &regs);
- if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
- }
putname(filename);
return error;
}
@@ -726,18 +750,18 @@ void set_personality_64bit(void)
/* inherit personality from parent */
/* Make sure to be in 64bit mode */
- clear_thread_flag(TIF_IA32);
+ clear_thread_flag(TIF_IA32);
/* TBD: overwrites user setup. Should have two bits.
But 64bit processes have always behaved this way,
so it's not too bad. The main problem is just that
- 32bit childs are affected again. */
+ 32bit childs are affected again. */
current->personality &= ~READ_IMPLIES_EXEC;
}
asmlinkage long sys_fork(struct pt_regs *regs)
{
- return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
+ return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
asmlinkage long
@@ -745,7 +769,7 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
{
if (!newsp)
- newsp = regs->rsp;
+ newsp = regs->sp;
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
@@ -761,29 +785,29 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
*/
asmlinkage long sys_vfork(struct pt_regs *regs)
{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
NULL, NULL);
}
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
- u64 fp,rip;
+ u64 fp,ip;
int count = 0;
if (!p || p == current || p->state==TASK_RUNNING)
return 0;
stack = (unsigned long)task_stack_page(p);
- if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
+ if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
return 0;
- fp = *(u64 *)(p->thread.rsp);
+ fp = *(u64 *)(p->thread.sp);
do {
if (fp < (unsigned long)stack ||
fp > (unsigned long)stack+THREAD_SIZE)
return 0;
- rip = *(u64 *)(fp+8);
- if (!in_sched_functions(rip))
- return rip;
+ ip = *(u64 *)(fp+8);
+ if (!in_sched_functions(ip))
+ return ip;
fp = *(u64 *)fp;
} while (count++ < 16);
return 0;
@@ -824,19 +848,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
/* Not strictly needed for fs, but do it for symmetry
with gs */
if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
+ return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
+ /* handle small bases via the GDT because that's faster to
switch. */
- if (addr <= 0xffffffff) {
+ if (addr <= 0xffffffff) {
set_32bit_tls(task, FS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
+ if (doit) {
+ load_TLS(&task->thread, cpu);
asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
}
task->thread.fsindex = FS_TLS_SEL;
task->thread.fs = 0;
- } else {
+ } else {
task->thread.fsindex = 0;
task->thread.fs = addr;
if (doit) {
@@ -848,24 +872,24 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
}
put_cpu();
break;
- case ARCH_GET_FS: {
- unsigned long base;
+ case ARCH_GET_FS: {
+ unsigned long base;
if (task->thread.fsindex == FS_TLS_SEL)
base = read_32bit_tls(task, FS_TLS);
else if (doit)
rdmsrl(MSR_FS_BASE, base);
else
base = task->thread.fs;
- ret = put_user(base, (unsigned long __user *)addr);
- break;
+ ret = put_user(base, (unsigned long __user *)addr);
+ break;
}
- case ARCH_GET_GS: {
+ case ARCH_GET_GS: {
unsigned long base;
unsigned gsindex;
if (task->thread.gsindex == GS_TLS_SEL)
base = read_32bit_tls(task, GS_TLS);
else if (doit) {
- asm("movl %%gs,%0" : "=r" (gsindex));
+ asm("movl %%gs,%0" : "=r" (gsindex));
if (gsindex)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
@@ -873,39 +897,21 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
}
else
base = task->thread.gs;
- ret = put_user(base, (unsigned long __user *)addr);
+ ret = put_user(base, (unsigned long __user *)addr);
break;
}
default:
ret = -EINVAL;
break;
- }
+ }
- return ret;
-}
+ return ret;
+}
long sys_arch_prctl(int code, unsigned long addr)
{
return do_arch_prctl(current, code, addr);
-}
-
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
- struct pt_regs *pp, ptregs;
-
- pp = task_pt_regs(tsk);
-
- ptregs = *pp;
- ptregs.cs &= 0xffff;
- ptregs.ss &= 0xffff;
-
- elf_core_copy_regs(regs, &ptregs);
-
- return 1;
}
unsigned long arch_align_stack(unsigned long sp)
@@ -914,3 +920,9 @@ unsigned long arch_align_stack(unsigned long sp)
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long range_end = mm->brk + 0x02000000;
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
new file mode 100644
index 00000000000..96286df1bb8
--- /dev/null
+++ b/arch/x86/kernel/ptrace.c
@@ -0,0 +1,1545 @@
+/* By Ross Biro 1/23/92 */
+/*
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * BTS tracing
+ * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/regset.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/seccomp.h>
+#include <linux/signal.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/debugreg.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/prctl.h>
+#include <asm/proto.h>
+#include <asm/ds.h>
+
+#include "tls.h"
+
+enum x86_regset {
+ REGSET_GENERAL,
+ REGSET_FP,
+ REGSET_XFP,
+ REGSET_TLS,
+};
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * Determines which flags the user has access to [1 = access, 0 = no access].
+ */
+#define FLAG_MASK_32 ((unsigned long) \
+ (X86_EFLAGS_CF | X86_EFLAGS_PF | \
+ X86_EFLAGS_AF | X86_EFLAGS_ZF | \
+ X86_EFLAGS_SF | X86_EFLAGS_TF | \
+ X86_EFLAGS_DF | X86_EFLAGS_OF | \
+ X86_EFLAGS_RF | X86_EFLAGS_AC))
+
+/*
+ * Determines whether a value may be installed in a segment register.
+ */
+static inline bool invalid_selector(u16 value)
+{
+ return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL);
+}
+
+#ifdef CONFIG_X86_32
+
+#define FLAG_MASK FLAG_MASK_32
+
+static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
+ regno >>= 2;
+ if (regno > FS)
+ --regno;
+ return &regs->bx + regno;
+}
+
+static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
+{
+ /*
+ * Returning the value truncates it to 16 bits.
+ */
+ unsigned int retval;
+ if (offset != offsetof(struct user_regs_struct, gs))
+ retval = *pt_regs_access(task_pt_regs(task), offset);
+ else {
+ retval = task->thread.gs;
+ if (task == current)
+ savesegment(gs, retval);
+ }
+ return retval;
+}
+
+static int set_segment_reg(struct task_struct *task,
+ unsigned long offset, u16 value)
+{
+ /*
+ * The value argument was already truncated to 16 bits.
+ */
+ if (invalid_selector(value))
+ return -EIO;
+
+ if (offset != offsetof(struct user_regs_struct, gs))
+ *pt_regs_access(task_pt_regs(task), offset) = value;
+ else {
+ task->thread.gs = value;
+ if (task == current)
+ /*
+ * The user-mode %gs is not affected by
+ * kernel entry, so we must update the CPU.
+ */
+ loadsegment(gs, value);
+ }
+
+ return 0;
+}
+
+static unsigned long debugreg_addr_limit(struct task_struct *task)
+{
+ return TASK_SIZE - 3;
+}
+
+#else /* CONFIG_X86_64 */
+
+#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
+
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
+ return &regs->r15 + (offset / sizeof(regs->r15));
+}
+
+static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
+{
+ /*
+ * Returning the value truncates it to 16 bits.
+ */
+ unsigned int seg;
+
+ switch (offset) {
+ case offsetof(struct user_regs_struct, fs):
+ if (task == current) {
+ /* Older gas can't assemble movq %?s,%r?? */
+ asm("movl %%fs,%0" : "=r" (seg));
+ return seg;
+ }
+ return task->thread.fsindex;
+ case offsetof(struct user_regs_struct, gs):
+ if (task == current) {
+ asm("movl %%gs,%0" : "=r" (seg));
+ return seg;
+ }
+ return task->thread.gsindex;
+ case offsetof(struct user_regs_struct, ds):
+ if (task == current) {
+ asm("movl %%ds,%0" : "=r" (seg));
+ return seg;
+ }
+ return task->thread.ds;
+ case offsetof(struct user_regs_struct, es):
+ if (task == current) {
+ asm("movl %%es,%0" : "=r" (seg));
+ return seg;
+ }
+ return task->thread.es;
+
+ case offsetof(struct user_regs_struct, cs):
+ case offsetof(struct user_regs_struct, ss):
+ break;
+ }
+ return *pt_regs_access(task_pt_regs(task), offset);
+}
+
+static int set_segment_reg(struct task_struct *task,
+ unsigned long offset, u16 value)
+{
+ /*
+ * The value argument was already truncated to 16 bits.
+ */
+ if (invalid_selector(value))
+ return -EIO;
+
+ switch (offset) {
+ case offsetof(struct user_regs_struct,fs):
+ /*
+ * If this is setting fs as for normal 64-bit use but
+ * setting fs_base has implicitly changed it, leave it.
+ */
+ if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
+ task->thread.fs != 0) ||
+ (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
+ task->thread.fs == 0))
+ break;
+ task->thread.fsindex = value;
+ if (task == current)
+ loadsegment(fs, task->thread.fsindex);
+ break;
+ case offsetof(struct user_regs_struct,gs):
+ /*
+ * If this is setting gs as for normal 64-bit use but
+ * setting gs_base has implicitly changed it, leave it.
+ */
+ if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
+ task->thread.gs != 0) ||
+ (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
+ task->thread.gs == 0))
+ break;
+ task->thread.gsindex = value;
+ if (task == current)
+ load_gs_index(task->thread.gsindex);
+ break;
+ case offsetof(struct user_regs_struct,ds):
+ task->thread.ds = value;
+ if (task == current)
+ loadsegment(ds, task->thread.ds);
+ break;
+ case offsetof(struct user_regs_struct,es):
+ task->thread.es = value;
+ if (task == current)
+ loadsegment(es, task->thread.es);
+ break;
+
+ /*
+ * Can't actually change these in 64-bit mode.
+ */
+ case offsetof(struct user_regs_struct,cs):
+#ifdef CONFIG_IA32_EMULATION
+ if (test_tsk_thread_flag(task, TIF_IA32))
+ task_pt_regs(task)->cs = value;
+#endif
+ break;
+ case offsetof(struct user_regs_struct,ss):
+#ifdef CONFIG_IA32_EMULATION
+ if (test_tsk_thread_flag(task, TIF_IA32))
+ task_pt_regs(task)->ss = value;
+#endif
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned long debugreg_addr_limit(struct task_struct *task)
+{
+#ifdef CONFIG_IA32_EMULATION
+ if (test_tsk_thread_flag(task, TIF_IA32))
+ return IA32_PAGE_OFFSET - 3;
+#endif
+ return TASK_SIZE64 - 7;
+}
+
+#endif /* CONFIG_X86_32 */
+
+static unsigned long get_flags(struct task_struct *task)
+{
+ unsigned long retval = task_pt_regs(task)->flags;
+
+ /*
+ * If the debugger set TF, hide it from the readout.
+ */
+ if (test_tsk_thread_flag(task, TIF_FORCED_TF))
+ retval &= ~X86_EFLAGS_TF;
+
+ return retval;
+}
+
+static int set_flags(struct task_struct *task, unsigned long value)
+{
+ struct pt_regs *regs = task_pt_regs(task);
+
+ /*
+ * If the user value contains TF, mark that
+ * it was not "us" (the debugger) that set it.
+ * If not, make sure it stays set if we had.
+ */
+ if (value & X86_EFLAGS_TF)
+ clear_tsk_thread_flag(task, TIF_FORCED_TF);
+ else if (test_tsk_thread_flag(task, TIF_FORCED_TF))
+ value |= X86_EFLAGS_TF;
+
+ regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK);
+
+ return 0;
+}
+
+static int putreg(struct task_struct *child,
+ unsigned long offset, unsigned long value)
+{
+ switch (offset) {
+ case offsetof(struct user_regs_struct, cs):
+ case offsetof(struct user_regs_struct, ds):
+ case offsetof(struct user_regs_struct, es):
+ case offsetof(struct user_regs_struct, fs):
+ case offsetof(struct user_regs_struct, gs):
+ case offsetof(struct user_regs_struct, ss):
+ return set_segment_reg(child, offset, value);
+
+ case offsetof(struct user_regs_struct, flags):
+ return set_flags(child, value);
+
+#ifdef CONFIG_X86_64
+ case offsetof(struct user_regs_struct,fs_base):
+ if (value >= TASK_SIZE_OF(child))
+ return -EIO;
+ /*
+ * When changing the segment base, use do_arch_prctl
+ * to set either thread.fs or thread.fsindex and the
+ * corresponding GDT slot.
+ */
+ if (child->thread.fs != value)
+ return do_arch_prctl(child, ARCH_SET_FS, value);
+ return 0;
+ case offsetof(struct user_regs_struct,gs_base):
+ /*
+ * Exactly the same here as the %fs handling above.
+ */
+ if (value >= TASK_SIZE_OF(child))
+ return -EIO;
+ if (child->thread.gs != value)
+ return do_arch_prctl(child, ARCH_SET_GS, value);
+ return 0;
+#endif
+ }
+
+ *pt_regs_access(task_pt_regs(child), offset) = value;
+ return 0;
+}
+
+static unsigned long getreg(struct task_struct *task, unsigned long offset)
+{
+ switch (offset) {
+ case offsetof(struct user_regs_struct, cs):
+ case offsetof(struct user_regs_struct, ds):
+ case offsetof(struct user_regs_struct, es):
+ case offsetof(struct user_regs_struct, fs):
+ case offsetof(struct user_regs_struct, gs):
+ case offsetof(struct user_regs_struct, ss):
+ return get_segment_reg(task, offset);
+
+ case offsetof(struct user_regs_struct, flags):
+ return get_flags(task);
+
+#ifdef CONFIG_X86_64
+ case offsetof(struct user_regs_struct, fs_base): {
+ /*
+ * do_arch_prctl may have used a GDT slot instead of
+ * the MSR. To userland, it appears the same either
+ * way, except the %fs segment selector might not be 0.
+ */
+ unsigned int seg = task->thread.fsindex;
+ if (task->thread.fs != 0)
+ return task->thread.fs;
+ if (task == current)
+ asm("movl %%fs,%0" : "=r" (seg));
+ if (seg != FS_TLS_SEL)
+ return 0;
+ return get_desc_base(&task->thread.tls_array[FS_TLS]);
+ }
+ case offsetof(struct user_regs_struct, gs_base): {
+ /*
+ * Exactly the same here as the %fs handling above.
+ */
+ unsigned int seg = task->thread.gsindex;
+ if (task->thread.gs != 0)
+ return task->thread.gs;
+ if (task == current)
+ asm("movl %%gs,%0" : "=r" (seg));
+ if (seg != GS_TLS_SEL)
+ return 0;
+ return get_desc_base(&task->thread.tls_array[GS_TLS]);
+ }
+#endif
+ }
+
+ return *pt_regs_access(task_pt_regs(task), offset);
+}
+
+static int genregs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ while (count > 0) {
+ *k++ = getreg(target, pos);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ unsigned long __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(getreg(target, pos), u++))
+ return -EFAULT;
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+
+ return 0;
+}
+
+static int genregs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret = 0;
+ if (kbuf) {
+ const unsigned long *k = kbuf;
+ while (count > 0 && !ret) {
+ ret = putreg(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const unsigned long __user *u = ubuf;
+ while (count > 0 && !ret) {
+ unsigned long word;
+ ret = __get_user(word, u++);
+ if (ret)
+ break;
+ ret = putreg(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+ return ret;
+}
+
+/*
+ * This function is trivial and will be inlined by the compiler.
+ * Having it separates the implementation details of debug
+ * registers from the interface details of ptrace.
+ */
+static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+{
+ switch (n) {
+ case 0: return child->thread.debugreg0;
+ case 1: return child->thread.debugreg1;
+ case 2: return child->thread.debugreg2;
+ case 3: return child->thread.debugreg3;
+ case 6: return child->thread.debugreg6;
+ case 7: return child->thread.debugreg7;
+ }
+ return 0;
+}
+
+static int ptrace_set_debugreg(struct task_struct *child,
+ int n, unsigned long data)
+{
+ int i;
+
+ if (unlikely(n == 4 || n == 5))
+ return -EIO;
+
+ if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
+ return -EIO;
+
+ switch (n) {
+ case 0: child->thread.debugreg0 = data; break;
+ case 1: child->thread.debugreg1 = data; break;
+ case 2: child->thread.debugreg2 = data; break;
+ case 3: child->thread.debugreg3 = data; break;
+
+ case 6:
+ if ((data & ~0xffffffffUL) != 0)
+ return -EIO;
+ child->thread.debugreg6 = data;
+ break;
+
+ case 7:
+ /*
+ * Sanity-check data. Take one half-byte at once with
+ * check = (val >> (16 + 4*i)) & 0xf. It contains the
+ * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
+ * 2 and 3 are LENi. Given a list of invalid values,
+ * we do mask |= 1 << invalid_value, so that
+ * (mask >> check) & 1 is a correct test for invalid
+ * values.
+ *
+ * R/Wi contains the type of the breakpoint /
+ * watchpoint, LENi contains the length of the watched
+ * data in the watchpoint case.
+ *
+ * The invalid values are:
+ * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
+ * - R/Wi == 0x10 (break on I/O reads or writes), so
+ * mask |= 0x4444.
+ * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
+ * 0x1110.
+ *
+ * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
+ *
+ * See the Intel Manual "System Programming Guide",
+ * 15.2.4
+ *
+ * Note that LENi == 0x10 is defined on x86_64 in long
+ * mode (i.e. even for 32-bit userspace software, but
+ * 64-bit kernel), so the x86_64 mask value is 0x5454.
+ * See the AMD manual no. 24593 (AMD64 System Programming)
+ */
+#ifdef CONFIG_X86_32
+#define DR7_MASK 0x5f54
+#else
+#define DR7_MASK 0x5554
+#endif
+ data &= ~DR_CONTROL_RESERVED;
+ for (i = 0; i < 4; i++)
+ if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+ return -EIO;
+ child->thread.debugreg7 = data;
+ if (data)
+ set_tsk_thread_flag(child, TIF_DEBUG);
+ else
+ clear_tsk_thread_flag(child, TIF_DEBUG);
+ break;
+ }
+
+ return 0;
+}
+
+static int ptrace_bts_get_size(struct task_struct *child)
+{
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
+
+ return ds_get_bts_index((void *)child->thread.ds_area_msr);
+}
+
+static int ptrace_bts_read_record(struct task_struct *child,
+ long index,
+ struct bts_struct __user *out)
+{
+ struct bts_struct ret;
+ int retval;
+ int bts_end;
+ int bts_index;
+
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
+
+ if (index < 0)
+ return -EINVAL;
+
+ bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
+ if (bts_end <= index)
+ return -EINVAL;
+
+ /* translate the ptrace bts index into the ds bts index */
+ bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
+ bts_index -= (index + 1);
+ if (bts_index < 0)
+ bts_index += bts_end;
+
+ retval = ds_read_bts((void *)child->thread.ds_area_msr,
+ bts_index, &ret);
+ if (retval < 0)
+ return retval;
+
+ if (copy_to_user(out, &ret, sizeof(ret)))
+ return -EFAULT;
+
+ return sizeof(ret);
+}
+
+static int ptrace_bts_write_record(struct task_struct *child,
+ const struct bts_struct *in)
+{
+ int retval;
+
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
+
+ retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
+ if (retval)
+ return retval;
+
+ return sizeof(*in);
+}
+
+static int ptrace_bts_clear(struct task_struct *child)
+{
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
+
+ return ds_clear((void *)child->thread.ds_area_msr);
+}
+
+static int ptrace_bts_drain(struct task_struct *child,
+ long size,
+ struct bts_struct __user *out)
+{
+ int end, i;
+ void *ds = (void *)child->thread.ds_area_msr;
+
+ if (!ds)
+ return -ENXIO;
+
+ end = ds_get_bts_index(ds);
+ if (end <= 0)
+ return end;
+
+ if (size < (end * sizeof(struct bts_struct)))
+ return -EIO;
+
+ for (i = 0; i < end; i++, out++) {
+ struct bts_struct ret;
+ int retval;
+
+ retval = ds_read_bts(ds, i, &ret);
+ if (retval < 0)
+ return retval;
+
+ if (copy_to_user(out, &ret, sizeof(ret)))
+ return -EFAULT;
+ }
+
+ ds_clear(ds);
+
+ return end;
+}
+
+static int ptrace_bts_realloc(struct task_struct *child,
+ int size, int reduce_size)
+{
+ unsigned long rlim, vm;
+ int ret, old_size;
+
+ if (size < 0)
+ return -EINVAL;
+
+ old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
+ if (old_size < 0)
+ return old_size;
+
+ ret = ds_free((void **)&child->thread.ds_area_msr);
+ if (ret < 0)
+ goto out;
+
+ size >>= PAGE_SHIFT;
+ old_size >>= PAGE_SHIFT;
+
+ current->mm->total_vm -= old_size;
+ current->mm->locked_vm -= old_size;
+
+ if (size == 0)
+ goto out;
+
+ rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->total_vm + size;
+ if (rlim < vm) {
+ ret = -ENOMEM;
+
+ if (!reduce_size)
+ goto out;
+
+ size = rlim - current->mm->total_vm;
+ if (size <= 0)
+ goto out;
+ }
+
+ rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->locked_vm + size;
+ if (rlim < vm) {
+ ret = -ENOMEM;
+
+ if (!reduce_size)
+ goto out;
+
+ size = rlim - current->mm->locked_vm;
+ if (size <= 0)
+ goto out;
+ }
+
+ ret = ds_allocate((void **)&child->thread.ds_area_msr,
+ size << PAGE_SHIFT);
+ if (ret < 0)
+ goto out;
+
+ current->mm->total_vm += size;
+ current->mm->locked_vm += size;
+
+out:
+ if (child->thread.ds_area_msr)
+ set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+ else
+ clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+
+ return ret;
+}
+
+static int ptrace_bts_config(struct task_struct *child,
+ long cfg_size,
+ const struct ptrace_bts_config __user *ucfg)
+{
+ struct ptrace_bts_config cfg;
+ int bts_size, ret = 0;
+ void *ds;
+
+ if (cfg_size < sizeof(cfg))
+ return -EIO;
+
+ if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
+ return -EFAULT;
+
+ if ((int)cfg.size < 0)
+ return -EINVAL;
+
+ bts_size = 0;
+ ds = (void *)child->thread.ds_area_msr;
+ if (ds) {
+ bts_size = ds_get_bts_size(ds);
+ if (bts_size < 0)
+ return bts_size;
+ }
+ cfg.size = PAGE_ALIGN(cfg.size);
+
+ if (bts_size != cfg.size) {
+ ret = ptrace_bts_realloc(child, cfg.size,
+ cfg.flags & PTRACE_BTS_O_CUT_SIZE);
+ if (ret < 0)
+ goto errout;
+
+ ds = (void *)child->thread.ds_area_msr;
+ }
+
+ if (cfg.flags & PTRACE_BTS_O_SIGNAL)
+ ret = ds_set_overflow(ds, DS_O_SIGNAL);
+ else
+ ret = ds_set_overflow(ds, DS_O_WRAP);
+ if (ret < 0)
+ goto errout;
+
+ if (cfg.flags & PTRACE_BTS_O_TRACE)
+ child->thread.debugctlmsr |= ds_debugctl_mask();
+ else
+ child->thread.debugctlmsr &= ~ds_debugctl_mask();
+
+ if (cfg.flags & PTRACE_BTS_O_SCHED)
+ set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+ else
+ clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+
+ ret = sizeof(cfg);
+
+out:
+ if (child->thread.debugctlmsr)
+ set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+ else
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+ return ret;
+
+errout:
+ child->thread.debugctlmsr &= ~ds_debugctl_mask();
+ clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+ goto out;
+}
+
+static int ptrace_bts_status(struct task_struct *child,
+ long cfg_size,
+ struct ptrace_bts_config __user *ucfg)
+{
+ void *ds = (void *)child->thread.ds_area_msr;
+ struct ptrace_bts_config cfg;
+
+ if (cfg_size < sizeof(cfg))
+ return -EIO;
+
+ memset(&cfg, 0, sizeof(cfg));
+
+ if (ds) {
+ cfg.size = ds_get_bts_size(ds);
+
+ if (ds_get_overflow(ds) == DS_O_SIGNAL)
+ cfg.flags |= PTRACE_BTS_O_SIGNAL;
+
+ if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+ child->thread.debugctlmsr & ds_debugctl_mask())
+ cfg.flags |= PTRACE_BTS_O_TRACE;
+
+ if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+ cfg.flags |= PTRACE_BTS_O_SCHED;
+ }
+
+ cfg.bts_size = sizeof(struct bts_struct);
+
+ if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
+ return -EFAULT;
+
+ return sizeof(cfg);
+}
+
+void ptrace_bts_take_timestamp(struct task_struct *tsk,
+ enum bts_qualifier qualifier)
+{
+ struct bts_struct rec = {
+ .qualifier = qualifier,
+ .variant.jiffies = jiffies_64
+ };
+
+ ptrace_bts_write_record(tsk, &rec);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ user_disable_single_step(child);
+#ifdef TIF_SYSCALL_EMU
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
+ if (child->thread.ds_area_msr) {
+ ptrace_bts_realloc(child, 0, 0);
+ child->thread.debugctlmsr &= ~ds_debugctl_mask();
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+ clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+ }
+}
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+static const struct user_regset_view user_x86_32_view; /* Initialized below. */
+#endif
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+ int ret;
+ unsigned long __user *datap = (unsigned long __user *)data;
+
+ switch (request) {
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR: {
+ unsigned long tmp;
+
+ ret = -EIO;
+ if ((addr & (sizeof(data) - 1)) || addr < 0 ||
+ addr >= sizeof(struct user))
+ break;
+
+ tmp = 0; /* Default return condition */
+ if (addr < sizeof(struct user_regs_struct))
+ tmp = getreg(child, addr);
+ else if (addr >= offsetof(struct user, u_debugreg[0]) &&
+ addr <= offsetof(struct user, u_debugreg[7])) {
+ addr -= offsetof(struct user, u_debugreg[0]);
+ tmp = ptrace_get_debugreg(child, addr / sizeof(data));
+ }
+ ret = put_user(tmp, datap);
+ break;
+ }
+
+ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+ ret = -EIO;
+ if ((addr & (sizeof(data) - 1)) || addr < 0 ||
+ addr >= sizeof(struct user))
+ break;
+
+ if (addr < sizeof(struct user_regs_struct))
+ ret = putreg(child, addr, data);
+ else if (addr >= offsetof(struct user, u_debugreg[0]) &&
+ addr <= offsetof(struct user, u_debugreg[7])) {
+ addr -= offsetof(struct user, u_debugreg[0]);
+ ret = ptrace_set_debugreg(child,
+ addr / sizeof(data), data);
+ }
+ break;
+
+ case PTRACE_GETREGS: /* Get all gp regs from the child. */
+ return copy_regset_to_user(child,
+ task_user_regset_view(current),
+ REGSET_GENERAL,
+ 0, sizeof(struct user_regs_struct),
+ datap);
+
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(child,
+ task_user_regset_view(current),
+ REGSET_GENERAL,
+ 0, sizeof(struct user_regs_struct),
+ datap);
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state. */
+ return copy_regset_to_user(child,
+ task_user_regset_view(current),
+ REGSET_FP,
+ 0, sizeof(struct user_i387_struct),
+ datap);
+
+ case PTRACE_SETFPREGS: /* Set the child FPU state. */
+ return copy_regset_from_user(child,
+ task_user_regset_view(current),
+ REGSET_FP,
+ 0, sizeof(struct user_i387_struct),
+ datap);
+
+#ifdef CONFIG_X86_32
+ case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
+ return copy_regset_to_user(child, &user_x86_32_view,
+ REGSET_XFP,
+ 0, sizeof(struct user_fxsr_struct),
+ datap);
+
+ case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
+ return copy_regset_from_user(child, &user_x86_32_view,
+ REGSET_XFP,
+ 0, sizeof(struct user_fxsr_struct),
+ datap);
+#endif
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+ case PTRACE_GET_THREAD_AREA:
+ if (addr < 0)
+ return -EIO;
+ ret = do_get_thread_area(child, addr,
+ (struct user_desc __user *) data);
+ break;
+
+ case PTRACE_SET_THREAD_AREA:
+ if (addr < 0)
+ return -EIO;
+ ret = do_set_thread_area(child, addr,
+ (struct user_desc __user *) data, 0);
+ break;
+#endif
+
+#ifdef CONFIG_X86_64
+ /* normal 64bit interface to access TLS data.
+ Works just like arch_prctl, except that the arguments
+ are reversed. */
+ case PTRACE_ARCH_PRCTL:
+ ret = do_arch_prctl(child, data, addr);
+ break;
+#endif
+
+ case PTRACE_BTS_CONFIG:
+ ret = ptrace_bts_config
+ (child, data, (struct ptrace_bts_config __user *)addr);
+ break;
+
+ case PTRACE_BTS_STATUS:
+ ret = ptrace_bts_status
+ (child, data, (struct ptrace_bts_config __user *)addr);
+ break;
+
+ case PTRACE_BTS_SIZE:
+ ret = ptrace_bts_get_size(child);
+ break;
+
+ case PTRACE_BTS_GET:
+ ret = ptrace_bts_read_record
+ (child, data, (struct bts_struct __user *) addr);
+ break;
+
+ case PTRACE_BTS_CLEAR:
+ ret = ptrace_bts_clear(child);
+ break;
+
+ case PTRACE_BTS_DRAIN:
+ ret = ptrace_bts_drain
+ (child, data, (struct bts_struct __user *) addr);
+ break;
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <asm/ia32.h>
+#include <asm/user32.h>
+
+#define R32(l,q) \
+ case offsetof(struct user32, regs.l): \
+ regs->q = value; break
+
+#define SEG32(rs) \
+ case offsetof(struct user32, regs.rs): \
+ return set_segment_reg(child, \
+ offsetof(struct user_regs_struct, rs), \
+ value); \
+ break
+
+static int putreg32(struct task_struct *child, unsigned regno, u32 value)
+{
+ struct pt_regs *regs = task_pt_regs(child);
+
+ switch (regno) {
+
+ SEG32(cs);
+ SEG32(ds);
+ SEG32(es);
+ SEG32(fs);
+ SEG32(gs);
+ SEG32(ss);
+
+ R32(ebx, bx);
+ R32(ecx, cx);
+ R32(edx, dx);
+ R32(edi, di);
+ R32(esi, si);
+ R32(ebp, bp);
+ R32(eax, ax);
+ R32(orig_eax, orig_ax);
+ R32(eip, ip);
+ R32(esp, sp);
+
+ case offsetof(struct user32, regs.eflags):
+ return set_flags(child, value);
+
+ case offsetof(struct user32, u_debugreg[0]) ...
+ offsetof(struct user32, u_debugreg[7]):
+ regno -= offsetof(struct user32, u_debugreg[0]);
+ return ptrace_set_debugreg(child, regno / 4, value);
+
+ default:
+ if (regno > sizeof(struct user32) || (regno & 3))
+ return -EIO;
+
+ /*
+ * Other dummy fields in the virtual user structure
+ * are ignored
+ */
+ break;
+ }
+ return 0;
+}
+
+#undef R32
+#undef SEG32
+
+#define R32(l,q) \
+ case offsetof(struct user32, regs.l): \
+ *val = regs->q; break
+
+#define SEG32(rs) \
+ case offsetof(struct user32, regs.rs): \
+ *val = get_segment_reg(child, \
+ offsetof(struct user_regs_struct, rs)); \
+ break
+
+static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
+{
+ struct pt_regs *regs = task_pt_regs(child);
+
+ switch (regno) {
+
+ SEG32(ds);
+ SEG32(es);
+ SEG32(fs);
+ SEG32(gs);
+
+ R32(cs, cs);
+ R32(ss, ss);
+ R32(ebx, bx);
+ R32(ecx, cx);
+ R32(edx, dx);
+ R32(edi, di);
+ R32(esi, si);
+ R32(ebp, bp);
+ R32(eax, ax);
+ R32(orig_eax, orig_ax);
+ R32(eip, ip);
+ R32(esp, sp);
+
+ case offsetof(struct user32, regs.eflags):
+ *val = get_flags(child);
+ break;
+
+ case offsetof(struct user32, u_debugreg[0]) ...
+ offsetof(struct user32, u_debugreg[7]):
+ regno -= offsetof(struct user32, u_debugreg[0]);
+ *val = ptrace_get_debugreg(child, regno / 4);
+ break;
+
+ default:
+ if (regno > sizeof(struct user32) || (regno & 3))
+ return -EIO;
+
+ /*
+ * Other dummy fields in the virtual user structure
+ * are ignored
+ */
+ *val = 0;
+ break;
+ }
+ return 0;
+}
+
+#undef R32
+#undef SEG32
+
+static int genregs32_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (kbuf) {
+ compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ getreg32(target, pos, k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ compat_ulong_t __user *u = ubuf;
+ while (count > 0) {
+ compat_ulong_t word;
+ getreg32(target, pos, &word);
+ if (__put_user(word, u++))
+ return -EFAULT;
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+
+ return 0;
+}
+
+static int genregs32_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret = 0;
+ if (kbuf) {
+ const compat_ulong_t *k = kbuf;
+ while (count > 0 && !ret) {
+ ret = putreg(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const compat_ulong_t __user *u = ubuf;
+ while (count > 0 && !ret) {
+ compat_ulong_t word;
+ ret = __get_user(word, u++);
+ if (ret)
+ break;
+ ret = putreg(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+ return ret;
+}
+
+static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+{
+ siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
+ compat_siginfo_t __user *si32 = compat_ptr(data);
+ siginfo_t ssi;
+ int ret;
+
+ if (request == PTRACE_SETSIGINFO) {
+ memset(&ssi, 0, sizeof(siginfo_t));
+ ret = copy_siginfo_from_user32(&ssi, si32);
+ if (ret)
+ return ret;
+ if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
+ return -EFAULT;
+ }
+ ret = sys_ptrace(request, pid, addr, (unsigned long)si);
+ if (ret)
+ return ret;
+ if (request == PTRACE_GETSIGINFO) {
+ if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
+ return -EFAULT;
+ ret = copy_siginfo_to_user32(si32, &ssi);
+ }
+ return ret;
+}
+
+asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
+{
+ struct task_struct *child;
+ struct pt_regs *childregs;
+ void __user *datap = compat_ptr(data);
+ int ret;
+ __u32 val;
+
+ switch (request) {
+ case PTRACE_TRACEME:
+ case PTRACE_ATTACH:
+ case PTRACE_KILL:
+ case PTRACE_CONT:
+ case PTRACE_SINGLESTEP:
+ case PTRACE_SINGLEBLOCK:
+ case PTRACE_DETACH:
+ case PTRACE_SYSCALL:
+ case PTRACE_OLDSETOPTIONS:
+ case PTRACE_SETOPTIONS:
+ case PTRACE_SET_THREAD_AREA:
+ case PTRACE_GET_THREAD_AREA:
+ case PTRACE_BTS_CONFIG:
+ case PTRACE_BTS_STATUS:
+ case PTRACE_BTS_SIZE:
+ case PTRACE_BTS_GET:
+ case PTRACE_BTS_CLEAR:
+ case PTRACE_BTS_DRAIN:
+ return sys_ptrace(request, pid, addr, data);
+
+ default:
+ return -EINVAL;
+
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ case PTRACE_POKEDATA:
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEUSR:
+ case PTRACE_PEEKUSR:
+ case PTRACE_GETREGS:
+ case PTRACE_SETREGS:
+ case PTRACE_SETFPREGS:
+ case PTRACE_GETFPREGS:
+ case PTRACE_SETFPXREGS:
+ case PTRACE_GETFPXREGS:
+ case PTRACE_GETEVENTMSG:
+ break;
+
+ case PTRACE_SETSIGINFO:
+ case PTRACE_GETSIGINFO:
+ return ptrace32_siginfo(request, pid, addr, data);
+ }
+
+ child = ptrace_get_task_struct(pid);
+ if (IS_ERR(child))
+ return PTR_ERR(child);
+
+ ret = ptrace_check_attach(child, request == PTRACE_KILL);
+ if (ret < 0)
+ goto out;
+
+ childregs = task_pt_regs(child);
+
+ switch (request) {
+ case PTRACE_PEEKUSR:
+ ret = getreg32(child, addr, &val);
+ if (ret == 0)
+ ret = put_user(val, (__u32 __user *)datap);
+ break;
+
+ case PTRACE_POKEUSR:
+ ret = putreg32(child, addr, data);
+ break;
+
+ case PTRACE_GETREGS: /* Get all gp regs from the child. */
+ return copy_regset_to_user(child, &user_x86_32_view,
+ REGSET_GENERAL,
+ 0, sizeof(struct user_regs_struct32),
+ datap);
+
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(child, &user_x86_32_view,
+ REGSET_GENERAL, 0,
+ sizeof(struct user_regs_struct32),
+ datap);
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state. */
+ return copy_regset_to_user(child, &user_x86_32_view,
+ REGSET_FP, 0,
+ sizeof(struct user_i387_ia32_struct),
+ datap);
+
+ case PTRACE_SETFPREGS: /* Set the child FPU state. */
+ return copy_regset_from_user(
+ child, &user_x86_32_view, REGSET_FP,
+ 0, sizeof(struct user_i387_ia32_struct), datap);
+
+ case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
+ return copy_regset_to_user(child, &user_x86_32_view,
+ REGSET_XFP, 0,
+ sizeof(struct user32_fxsr_struct),
+ datap);
+
+ case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
+ return copy_regset_from_user(child, &user_x86_32_view,
+ REGSET_XFP, 0,
+ sizeof(struct user32_fxsr_struct),
+ datap);
+
+ default:
+ return compat_ptrace_request(child, request, addr, data);
+ }
+
+ out:
+ put_task_struct(child);
+ return ret;
+}
+
+#endif /* CONFIG_IA32_EMULATION */
+
+#ifdef CONFIG_X86_64
+
+static const struct user_regset x86_64_regsets[] = {
+ [REGSET_GENERAL] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(struct user_regs_struct) / sizeof(long),
+ .size = sizeof(long), .align = sizeof(long),
+ .get = genregs_get, .set = genregs_set
+ },
+ [REGSET_FP] = {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(struct user_i387_struct) / sizeof(long),
+ .size = sizeof(long), .align = sizeof(long),
+ .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+ },
+};
+
+static const struct user_regset_view user_x86_64_view = {
+ .name = "x86_64", .e_machine = EM_X86_64,
+ .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets)
+};
+
+#else /* CONFIG_X86_32 */
+
+#define user_regs_struct32 user_regs_struct
+#define genregs32_get genregs_get
+#define genregs32_set genregs_set
+
+#endif /* CONFIG_X86_64 */
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+static const struct user_regset x86_32_regsets[] = {
+ [REGSET_GENERAL] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(struct user_regs_struct32) / sizeof(u32),
+ .size = sizeof(u32), .align = sizeof(u32),
+ .get = genregs32_get, .set = genregs32_set
+ },
+ [REGSET_FP] = {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(struct user_i387_struct) / sizeof(u32),
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = fpregs_active, .get = fpregs_get, .set = fpregs_set
+ },
+ [REGSET_XFP] = {
+ .core_note_type = NT_PRXFPREG,
+ .n = sizeof(struct user_i387_struct) / sizeof(u32),
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+ },
+ [REGSET_TLS] = {
+ .core_note_type = NT_386_TLS,
+ .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
+ .size = sizeof(struct user_desc),
+ .align = sizeof(struct user_desc),
+ .active = regset_tls_active,
+ .get = regset_tls_get, .set = regset_tls_set
+ },
+};
+
+static const struct user_regset_view user_x86_32_view = {
+ .name = "i386", .e_machine = EM_386,
+ .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_IA32_EMULATION
+ if (test_tsk_thread_flag(task, TIF_IA32))
+#endif
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+ return &user_x86_32_view;
+#endif
+#ifdef CONFIG_X86_64
+ return &user_x86_64_view;
+#endif
+}
+
+#ifdef CONFIG_X86_32
+
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+{
+ struct siginfo info;
+
+ tsk->thread.trap_no = 1;
+ tsk->thread.error_code = error_code;
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = SIGTRAP;
+ info.si_code = TRAP_BRKPT;
+
+ /* User-mode ip? */
+ info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
+
+ /* Send us the fake SIGTRAP */
+ force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/* notification of system call entry/exit
+ * - triggered by current->work.syscall_trace
+ */
+__attribute__((regparm(3)))
+int do_syscall_trace(struct pt_regs *regs, int entryexit)
+{
+ int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
+ /*
+ * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
+ * interception
+ */
+ int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
+ int ret = 0;
+
+ /* do the secure computing check first */
+ if (!entryexit)
+ secure_computing(regs->orig_ax);
+
+ if (unlikely(current->audit_context)) {
+ if (entryexit)
+ audit_syscall_exit(AUDITSC_RESULT(regs->ax),
+ regs->ax);
+ /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
+ * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
+ * not used, entry.S will call us only on syscall exit, not
+ * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
+ * calling send_sigtrap() on syscall entry.
+ *
+ * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
+ * is_singlestep is false, despite his name, so we will still do
+ * the correct thing.
+ */
+ else if (is_singlestep)
+ goto out;
+ }
+
+ if (!(current->ptrace & PT_PTRACED))
+ goto out;
+
+ /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
+ * and then is resumed with SYSEMU_SINGLESTEP, it will come in
+ * here. We have to check this and return */
+ if (is_sysemu && entryexit)
+ return 0;
+
+ /* Fake a debug trap */
+ if (is_singlestep)
+ send_sigtrap(current, regs, 0);
+
+ if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
+ goto out;
+
+ /* the 0x80 provides a way for the tracing parent to distinguish
+ between a syscall stop and SIGTRAP delivery */
+ /* Note that the debugger could change the result of test_thread_flag!*/
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
+
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+ ret = is_sysemu;
+out:
+ if (unlikely(current->audit_context) && !entryexit)
+ audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
+ regs->bx, regs->cx, regs->dx, regs->si);
+ if (ret == 0)
+ return 0;
+
+ regs->orig_ax = -1; /* force skip of syscall restarting */
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+ return 1;
+}
+
+#else /* CONFIG_X86_64 */
+
+static void syscall_trace(struct pt_regs *regs)
+{
+
+#if 0
+ printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
+ current->comm,
+ regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
+ current_thread_info()->flags, current->ptrace);
+#endif
+
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ ? 0x80 : 0));
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
+
+asmlinkage void syscall_trace_enter(struct pt_regs *regs)
+{
+ /* do the secure computing check first */
+ secure_computing(regs->orig_ax);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE)
+ && (current->ptrace & PT_PTRACED))
+ syscall_trace(regs);
+
+ if (unlikely(current->audit_context)) {
+ if (test_thread_flag(TIF_IA32)) {
+ audit_syscall_entry(AUDIT_ARCH_I386,
+ regs->orig_ax,
+ regs->bx, regs->cx,
+ regs->dx, regs->si);
+ } else {
+ audit_syscall_entry(AUDIT_ARCH_X86_64,
+ regs->orig_ax,
+ regs->di, regs->si,
+ regs->dx, regs->r10);
+ }
+ }
+}
+
+asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+{
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+
+ if ((test_thread_flag(TIF_SYSCALL_TRACE)
+ || test_thread_flag(TIF_SINGLESTEP))
+ && (current->ptrace & PT_PTRACED))
+ syscall_trace(regs);
+}
+
+#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
deleted file mode 100644
index ff5431cc03e..00000000000
--- a/arch/x86/kernel/ptrace_32.c
+++ /dev/null
@@ -1,717 +0,0 @@
-/* By Ross Biro 1/23/92 */
-/*
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/audit.h>
-#include <linux/seccomp.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/debugreg.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9).
- * Also masks reserved bits (31-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x00050dd5
-
-/* set's the trap flag. */
-#define TRAP_FLAG 0x100
-
-/*
- * Offset of eflags on child stack..
- */
-#define EFL_OFFSET offsetof(struct pt_regs, eflags)
-
-static inline struct pt_regs *get_child_regs(struct task_struct *task)
-{
- void *stack_top = (void *)task->thread.esp0;
- return stack_top - sizeof(struct pt_regs);
-}
-
-/*
- * This routine will get a word off of the processes privileged stack.
- * the offset is bytes into the pt_regs structure on the stack.
- * This routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline int get_stack_long(struct task_struct *task, int offset)
-{
- unsigned char *stack;
-
- stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs);
- stack += offset;
- return (*((int *)stack));
-}
-
-/*
- * This routine will put a word on the processes privileged stack.
- * the offset is bytes into the pt_regs structure on the stack.
- * This routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline int put_stack_long(struct task_struct *task, int offset,
- unsigned long data)
-{
- unsigned char * stack;
-
- stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs);
- stack += offset;
- *(unsigned long *) stack = data;
- return 0;
-}
-
-static int putreg(struct task_struct *child,
- unsigned long regno, unsigned long value)
-{
- switch (regno >> 2) {
- case GS:
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.gs = value;
- return 0;
- case DS:
- case ES:
- case FS:
- if (value && (value & 3) != 3)
- return -EIO;
- value &= 0xffff;
- break;
- case SS:
- case CS:
- if ((value & 3) != 3)
- return -EIO;
- value &= 0xffff;
- break;
- case EFL:
- value &= FLAG_MASK;
- value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
- break;
- }
- if (regno > FS*4)
- regno -= 1*4;
- put_stack_long(child, regno, value);
- return 0;
-}
-
-static unsigned long getreg(struct task_struct *child,
- unsigned long regno)
-{
- unsigned long retval = ~0UL;
-
- switch (regno >> 2) {
- case GS:
- retval = child->thread.gs;
- break;
- case DS:
- case ES:
- case FS:
- case SS:
- case CS:
- retval = 0xffff;
- /* fall through */
- default:
- if (regno > FS*4)
- regno -= 1*4;
- retval &= get_stack_long(child, regno);
- }
- return retval;
-}
-
-#define LDT_SEGMENT 4
-
-static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs)
-{
- unsigned long addr, seg;
-
- addr = regs->eip;
- seg = regs->xcs & 0xffff;
- if (regs->eflags & VM_MASK) {
- addr = (addr & 0xffff) + (seg << 4);
- return addr;
- }
-
- /*
- * We'll assume that the code segments in the GDT
- * are all zero-based. That is largely true: the
- * TLS segments are used for data, and the PNPBIOS
- * and APM bios ones we just ignore here.
- */
- if (seg & LDT_SEGMENT) {
- u32 *desc;
- unsigned long base;
-
- seg &= ~7UL;
-
- mutex_lock(&child->mm->context.lock);
- if (unlikely((seg >> 3) >= child->mm->context.size))
- addr = -1L; /* bogus selector, access would fault */
- else {
- desc = child->mm->context.ldt + seg;
- base = ((desc[0] >> 16) |
- ((desc[1] & 0xff) << 16) |
- (desc[1] & 0xff000000));
-
- /* 16-bit code segment? */
- if (!((desc[1] >> 22) & 1))
- addr &= 0xffff;
- addr += base;
- }
- mutex_unlock(&child->mm->context.lock);
- }
- return addr;
-}
-
-static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
-{
- int i, copied;
- unsigned char opcode[15];
- unsigned long addr = convert_eip_to_linear(child, regs);
-
- copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
- for (i = 0; i < copied; i++) {
- switch (opcode[i]) {
- /* popf and iret */
- case 0x9d: case 0xcf:
- return 1;
- /* opcode and address size prefixes */
- case 0x66: case 0x67:
- continue;
- /* irrelevant prefixes (segment overrides and repeats) */
- case 0x26: case 0x2e:
- case 0x36: case 0x3e:
- case 0x64: case 0x65:
- case 0xf0: case 0xf2: case 0xf3:
- continue;
-
- /*
- * pushf: NOTE! We should probably not let
- * the user see the TF bit being set. But
- * it's more pain than it's worth to avoid
- * it, and a debugger could emulate this
- * all in user space if it _really_ cares.
- */
- case 0x9c:
- default:
- return 0;
- }
- }
- return 0;
-}
-
-static void set_singlestep(struct task_struct *child)
-{
- struct pt_regs *regs = get_child_regs(child);
-
- /*
- * Always set TIF_SINGLESTEP - this guarantees that
- * we single-step system calls etc.. This will also
- * cause us to set TF when returning to user mode.
- */
- set_tsk_thread_flag(child, TIF_SINGLESTEP);
-
- /*
- * If TF was already set, don't do anything else
- */
- if (regs->eflags & TRAP_FLAG)
- return;
-
- /* Set TF on the kernel stack.. */
- regs->eflags |= TRAP_FLAG;
-
- /*
- * ..but if TF is changed by the instruction we will trace,
- * don't mark it as being "us" that set it, so that we
- * won't clear it by hand later.
- */
- if (is_setting_trap_flag(child, regs))
- return;
-
- child->ptrace |= PT_DTRACE;
-}
-
-static void clear_singlestep(struct task_struct *child)
-{
- /* Always clear TIF_SINGLESTEP... */
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-
- /* But touch TF only if it was set by us.. */
- if (child->ptrace & PT_DTRACE) {
- struct pt_regs *regs = get_child_regs(child);
- regs->eflags &= ~TRAP_FLAG;
- child->ptrace &= ~PT_DTRACE;
- }
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
- clear_singlestep(child);
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-}
-
-/*
- * Perform get_thread_area on behalf of the traced child.
- */
-static int
-ptrace_get_thread_area(struct task_struct *child,
- int idx, struct user_desc __user *user_desc)
-{
- struct user_desc info;
- struct desc_struct *desc;
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
- (((desc)->a >> 16) & 0x0000ffff) | \
- (((desc)->b << 16) & 0x00ff0000) | \
- ( (desc)->b & 0xff000000) )
-
-#define GET_LIMIT(desc) ( \
- ((desc)->a & 0x0ffff) | \
- ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
-#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
-
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
- info.entry_number = idx;
- info.base_addr = GET_BASE(desc);
- info.limit = GET_LIMIT(desc);
- info.seg_32bit = GET_32BIT(desc);
- info.contents = GET_CONTENTS(desc);
- info.read_exec_only = !GET_WRITABLE(desc);
- info.limit_in_pages = GET_LIMIT_PAGES(desc);
- info.seg_not_present = !GET_PRESENT(desc);
- info.useable = GET_USEABLE(desc);
-
- if (copy_to_user(user_desc, &info, sizeof(info)))
- return -EFAULT;
-
- return 0;
-}
-
-/*
- * Perform set_thread_area on behalf of the traced child.
- */
-static int
-ptrace_set_thread_area(struct task_struct *child,
- int idx, struct user_desc __user *user_desc)
-{
- struct user_desc info;
- struct desc_struct *desc;
-
- if (copy_from_user(&info, user_desc, sizeof(info)))
- return -EFAULT;
-
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
- if (LDT_empty(&info)) {
- desc->a = 0;
- desc->b = 0;
- } else {
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
- }
-
- return 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
- struct user * dummy = NULL;
- int i, ret;
- unsigned long __user *datap = (unsigned long __user *)data;
-
- switch (request) {
- /* when I and D space are separate, these will need to be fixed. */
- case PTRACE_PEEKTEXT: /* read word at location addr. */
- case PTRACE_PEEKDATA:
- ret = generic_ptrace_peekdata(child, addr, data);
- break;
-
- /* read the word at location addr in the USER area. */
- case PTRACE_PEEKUSR: {
- unsigned long tmp;
-
- ret = -EIO;
- if ((addr & 3) || addr < 0 ||
- addr > sizeof(struct user) - 3)
- break;
-
- tmp = 0; /* Default return condition */
- if(addr < FRAME_SIZE*sizeof(long))
- tmp = getreg(child, addr);
- if(addr >= (long) &dummy->u_debugreg[0] &&
- addr <= (long) &dummy->u_debugreg[7]){
- addr -= (long) &dummy->u_debugreg[0];
- addr = addr >> 2;
- tmp = child->thread.debugreg[addr];
- }
- ret = put_user(tmp, datap);
- break;
- }
-
- /* when I and D space are separate, this will have to be fixed. */
- case PTRACE_POKETEXT: /* write the word at location addr. */
- case PTRACE_POKEDATA:
- ret = generic_ptrace_pokedata(child, addr, data);
- break;
-
- case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
- ret = -EIO;
- if ((addr & 3) || addr < 0 ||
- addr > sizeof(struct user) - 3)
- break;
-
- if (addr < FRAME_SIZE*sizeof(long)) {
- ret = putreg(child, addr, data);
- break;
- }
- /* We need to be very careful here. We implicitly
- want to modify a portion of the task_struct, and we
- have to be selective about what portions we allow someone
- to modify. */
-
- ret = -EIO;
- if(addr >= (long) &dummy->u_debugreg[0] &&
- addr <= (long) &dummy->u_debugreg[7]){
-
- if(addr == (long) &dummy->u_debugreg[4]) break;
- if(addr == (long) &dummy->u_debugreg[5]) break;
- if(addr < (long) &dummy->u_debugreg[4] &&
- ((unsigned long) data) >= TASK_SIZE-3) break;
-
- /* Sanity-check data. Take one half-byte at once with
- * check = (val >> (16 + 4*i)) & 0xf. It contains the
- * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
- * 2 and 3 are LENi. Given a list of invalid values,
- * we do mask |= 1 << invalid_value, so that
- * (mask >> check) & 1 is a correct test for invalid
- * values.
- *
- * R/Wi contains the type of the breakpoint /
- * watchpoint, LENi contains the length of the watched
- * data in the watchpoint case.
- *
- * The invalid values are:
- * - LENi == 0x10 (undefined), so mask |= 0x0f00.
- * - R/Wi == 0x10 (break on I/O reads or writes), so
- * mask |= 0x4444.
- * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
- * 0x1110.
- *
- * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
- *
- * See the Intel Manual "System Programming Guide",
- * 15.2.4
- *
- * Note that LENi == 0x10 is defined on x86_64 in long
- * mode (i.e. even for 32-bit userspace software, but
- * 64-bit kernel), so the x86_64 mask value is 0x5454.
- * See the AMD manual no. 24593 (AMD64 System
- * Programming)*/
-
- if(addr == (long) &dummy->u_debugreg[7]) {
- data &= ~DR_CONTROL_RESERVED;
- for(i=0; i<4; i++)
- if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- goto out_tsk;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- }
- addr -= (long) &dummy->u_debugreg;
- addr = addr >> 2;
- child->thread.debugreg[addr] = data;
- ret = 0;
- }
- break;
-
- case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
- case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
- case PTRACE_CONT: /* restart after signal. */
- ret = -EIO;
- if (!valid_signal(data))
- break;
- if (request == PTRACE_SYSEMU) {
- set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- } else if (request == PTRACE_SYSCALL) {
- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
- } else {
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- }
- child->exit_code = data;
- /* make sure the single step bit is not set. */
- clear_singlestep(child);
- wake_up_process(child);
- ret = 0;
- break;
-
-/*
- * make the child exit. Best I can do is send it a sigkill.
- * perhaps it should be put in the status that it wants to
- * exit.
- */
- case PTRACE_KILL:
- ret = 0;
- if (child->exit_state == EXIT_ZOMBIE) /* already dead */
- break;
- child->exit_code = SIGKILL;
- /* make sure the single step bit is not set. */
- clear_singlestep(child);
- wake_up_process(child);
- break;
-
- case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */
- case PTRACE_SINGLESTEP: /* set the trap flag. */
- ret = -EIO;
- if (!valid_signal(data))
- break;
-
- if (request == PTRACE_SYSEMU_SINGLESTEP)
- set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
- else
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- set_singlestep(child);
- child->exit_code = data;
- /* give it a chance to run. */
- wake_up_process(child);
- ret = 0;
- break;
-
- case PTRACE_GETREGS: { /* Get all gp regs from the child. */
- if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
- ret = -EIO;
- break;
- }
- for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
- __put_user(getreg(child, i), datap);
- datap++;
- }
- ret = 0;
- break;
- }
-
- case PTRACE_SETREGS: { /* Set all gp regs in the child. */
- unsigned long tmp;
- if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) {
- ret = -EIO;
- break;
- }
- for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
- __get_user(tmp, datap);
- putreg(child, i, tmp);
- datap++;
- }
- ret = 0;
- break;
- }
-
- case PTRACE_GETFPREGS: { /* Get the child FPU state. */
- if (!access_ok(VERIFY_WRITE, datap,
- sizeof(struct user_i387_struct))) {
- ret = -EIO;
- break;
- }
- ret = 0;
- if (!tsk_used_math(child))
- init_fpu(child);
- get_fpregs((struct user_i387_struct __user *)data, child);
- break;
- }
-
- case PTRACE_SETFPREGS: { /* Set the child FPU state. */
- if (!access_ok(VERIFY_READ, datap,
- sizeof(struct user_i387_struct))) {
- ret = -EIO;
- break;
- }
- set_stopped_child_used_math(child);
- set_fpregs(child, (struct user_i387_struct __user *)data);
- ret = 0;
- break;
- }
-
- case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
- if (!access_ok(VERIFY_WRITE, datap,
- sizeof(struct user_fxsr_struct))) {
- ret = -EIO;
- break;
- }
- if (!tsk_used_math(child))
- init_fpu(child);
- ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
- break;
- }
-
- case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
- if (!access_ok(VERIFY_READ, datap,
- sizeof(struct user_fxsr_struct))) {
- ret = -EIO;
- break;
- }
- set_stopped_child_used_math(child);
- ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
- break;
- }
-
- case PTRACE_GET_THREAD_AREA:
- ret = ptrace_get_thread_area(child, addr,
- (struct user_desc __user *) data);
- break;
-
- case PTRACE_SET_THREAD_AREA:
- ret = ptrace_set_thread_area(child, addr,
- (struct user_desc __user *) data);
- break;
-
- default:
- ret = ptrace_request(child, request, addr, data);
- break;
- }
- out_tsk:
- return ret;
-}
-
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
-{
- struct siginfo info;
-
- tsk->thread.trap_no = 1;
- tsk->thread.error_code = error_code;
-
- memset(&info, 0, sizeof(info));
- info.si_signo = SIGTRAP;
- info.si_code = TRAP_BRKPT;
-
- /* User-mode eip? */
- info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL;
-
- /* Send us the fake SIGTRAP */
- force_sig_info(SIGTRAP, &info, tsk);
-}
-
-/* notification of system call entry/exit
- * - triggered by current->work.syscall_trace
- */
-__attribute__((regparm(3)))
-int do_syscall_trace(struct pt_regs *regs, int entryexit)
-{
- int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
- /*
- * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
- * interception
- */
- int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
- int ret = 0;
-
- /* do the secure computing check first */
- if (!entryexit)
- secure_computing(regs->orig_eax);
-
- if (unlikely(current->audit_context)) {
- if (entryexit)
- audit_syscall_exit(AUDITSC_RESULT(regs->eax),
- regs->eax);
- /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
- * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
- * not used, entry.S will call us only on syscall exit, not
- * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
- * calling send_sigtrap() on syscall entry.
- *
- * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
- * is_singlestep is false, despite his name, so we will still do
- * the correct thing.
- */
- else if (is_singlestep)
- goto out;
- }
-
- if (!(current->ptrace & PT_PTRACED))
- goto out;
-
- /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
- * and then is resumed with SYSEMU_SINGLESTEP, it will come in
- * here. We have to check this and return */
- if (is_sysemu && entryexit)
- return 0;
-
- /* Fake a debug trap */
- if (is_singlestep)
- send_sigtrap(current, regs, 0);
-
- if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
- goto out;
-
- /* the 0x80 provides a way for the tracing parent to distinguish
- between a syscall stop and SIGTRAP delivery */
- /* Note that the debugger could change the result of test_thread_flag!*/
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
-
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
- ret = is_sysemu;
-out:
- if (unlikely(current->audit_context) && !entryexit)
- audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax,
- regs->ebx, regs->ecx, regs->edx, regs->esi);
- if (ret == 0)
- return 0;
-
- regs->orig_eax = -1; /* force skip of syscall restarting */
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax);
- return 1;
-}
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
deleted file mode 100644
index 607085f3f08..00000000000
--- a/arch/x86/kernel/ptrace_64.c
+++ /dev/null
@@ -1,621 +0,0 @@
-/* By Ross Biro 1/23/92 */
-/*
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * x86-64 port 2000-2002 Andi Kleen
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/audit.h>
-#include <linux/seccomp.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/debugreg.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/ia32.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
- * Also masks reserved bits (63-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x54dd5UL
-
-/* set's the trap flag. */
-#define TRAP_FLAG 0x100UL
-
-/*
- * eflags and offset of eflags on child stack..
- */
-#define EFLAGS offsetof(struct pt_regs, eflags)
-#define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs)))
-
-/*
- * this routine will get a word off of the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline unsigned long get_stack_long(struct task_struct *task, int offset)
-{
- unsigned char *stack;
-
- stack = (unsigned char *)task->thread.rsp0;
- stack += offset;
- return (*((unsigned long *)stack));
-}
-
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline long put_stack_long(struct task_struct *task, int offset,
- unsigned long data)
-{
- unsigned char * stack;
-
- stack = (unsigned char *) task->thread.rsp0;
- stack += offset;
- *(unsigned long *) stack = data;
- return 0;
-}
-
-#define LDT_SEGMENT 4
-
-unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
-{
- unsigned long addr, seg;
-
- addr = regs->rip;
- seg = regs->cs & 0xffff;
-
- /*
- * We'll assume that the code segments in the GDT
- * are all zero-based. That is largely true: the
- * TLS segments are used for data, and the PNPBIOS
- * and APM bios ones we just ignore here.
- */
- if (seg & LDT_SEGMENT) {
- u32 *desc;
- unsigned long base;
-
- seg &= ~7UL;
-
- mutex_lock(&child->mm->context.lock);
- if (unlikely((seg >> 3) >= child->mm->context.size))
- addr = -1L; /* bogus selector, access would fault */
- else {
- desc = child->mm->context.ldt + seg;
- base = ((desc[0] >> 16) |
- ((desc[1] & 0xff) << 16) |
- (desc[1] & 0xff000000));
-
- /* 16-bit code segment? */
- if (!((desc[1] >> 22) & 1))
- addr &= 0xffff;
- addr += base;
- }
- mutex_unlock(&child->mm->context.lock);
- }
-
- return addr;
-}
-
-static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
-{
- int i, copied;
- unsigned char opcode[15];
- unsigned long addr = convert_rip_to_linear(child, regs);
-
- copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
- for (i = 0; i < copied; i++) {
- switch (opcode[i]) {
- /* popf and iret */
- case 0x9d: case 0xcf:
- return 1;
-
- /* CHECKME: 64 65 */
-
- /* opcode and address size prefixes */
- case 0x66: case 0x67:
- continue;
- /* irrelevant prefixes (segment overrides and repeats) */
- case 0x26: case 0x2e:
- case 0x36: case 0x3e:
- case 0x64: case 0x65:
- case 0xf2: case 0xf3:
- continue;
-
- case 0x40 ... 0x4f:
- if (regs->cs != __USER_CS)
- /* 32-bit mode: register increment */
- return 0;
- /* 64-bit mode: REX prefix */
- continue;
-
- /* CHECKME: f2, f3 */
-
- /*
- * pushf: NOTE! We should probably not let
- * the user see the TF bit being set. But
- * it's more pain than it's worth to avoid
- * it, and a debugger could emulate this
- * all in user space if it _really_ cares.
- */
- case 0x9c:
- default:
- return 0;
- }
- }
- return 0;
-}
-
-static void set_singlestep(struct task_struct *child)
-{
- struct pt_regs *regs = task_pt_regs(child);
-
- /*
- * Always set TIF_SINGLESTEP - this guarantees that
- * we single-step system calls etc.. This will also
- * cause us to set TF when returning to user mode.
- */
- set_tsk_thread_flag(child, TIF_SINGLESTEP);
-
- /*
- * If TF was already set, don't do anything else
- */
- if (regs->eflags & TRAP_FLAG)
- return;
-
- /* Set TF on the kernel stack.. */
- regs->eflags |= TRAP_FLAG;
-
- /*
- * ..but if TF is changed by the instruction we will trace,
- * don't mark it as being "us" that set it, so that we
- * won't clear it by hand later.
- */
- if (is_setting_trap_flag(child, regs))
- return;
-
- child->ptrace |= PT_DTRACE;
-}
-
-static void clear_singlestep(struct task_struct *child)
-{
- /* Always clear TIF_SINGLESTEP... */
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-
- /* But touch TF only if it was set by us.. */
- if (child->ptrace & PT_DTRACE) {
- struct pt_regs *regs = task_pt_regs(child);
- regs->eflags &= ~TRAP_FLAG;
- child->ptrace &= ~PT_DTRACE;
- }
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
- clear_singlestep(child);
-}
-
-static int putreg(struct task_struct *child,
- unsigned long regno, unsigned long value)
-{
- unsigned long tmp;
-
- switch (regno) {
- case offsetof(struct user_regs_struct,fs):
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.fsindex = value & 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,gs):
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.gsindex = value & 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,ds):
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.ds = value & 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,es):
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.es = value & 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,ss):
- if ((value & 3) != 3)
- return -EIO;
- value &= 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,fs_base):
- if (value >= TASK_SIZE_OF(child))
- return -EIO;
- child->thread.fs = value;
- return 0;
- case offsetof(struct user_regs_struct,gs_base):
- if (value >= TASK_SIZE_OF(child))
- return -EIO;
- child->thread.gs = value;
- return 0;
- case offsetof(struct user_regs_struct, eflags):
- value &= FLAG_MASK;
- tmp = get_stack_long(child, EFL_OFFSET);
- tmp &= ~FLAG_MASK;
- value |= tmp;
- break;
- case offsetof(struct user_regs_struct,cs):
- if ((value & 3) != 3)
- return -EIO;
- value &= 0xffff;
- break;
- }
- put_stack_long(child, regno - sizeof(struct pt_regs), value);
- return 0;
-}
-
-static unsigned long getreg(struct task_struct *child, unsigned long regno)
-{
- unsigned long val;
- switch (regno) {
- case offsetof(struct user_regs_struct, fs):
- return child->thread.fsindex;
- case offsetof(struct user_regs_struct, gs):
- return child->thread.gsindex;
- case offsetof(struct user_regs_struct, ds):
- return child->thread.ds;
- case offsetof(struct user_regs_struct, es):
- return child->thread.es;
- case offsetof(struct user_regs_struct, fs_base):
- return child->thread.fs;
- case offsetof(struct user_regs_struct, gs_base):
- return child->thread.gs;
- default:
- regno = regno - sizeof(struct pt_regs);
- val = get_stack_long(child, regno);
- if (test_tsk_thread_flag(child, TIF_IA32))
- val &= 0xffffffff;
- return val;
- }
-
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
- long i, ret;
- unsigned ui;
-
- switch (request) {
- /* when I and D space are separate, these will need to be fixed. */
- case PTRACE_PEEKTEXT: /* read word at location addr. */
- case PTRACE_PEEKDATA:
- ret = generic_ptrace_peekdata(child, addr, data);
- break;
-
- /* read the word at location addr in the USER area. */
- case PTRACE_PEEKUSR: {
- unsigned long tmp;
-
- ret = -EIO;
- if ((addr & 7) ||
- addr > sizeof(struct user) - 7)
- break;
-
- switch (addr) {
- case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
- tmp = getreg(child, addr);
- break;
- case offsetof(struct user, u_debugreg[0]):
- tmp = child->thread.debugreg0;
- break;
- case offsetof(struct user, u_debugreg[1]):
- tmp = child->thread.debugreg1;
- break;
- case offsetof(struct user, u_debugreg[2]):
- tmp = child->thread.debugreg2;
- break;
- case offsetof(struct user, u_debugreg[3]):
- tmp = child->thread.debugreg3;
- break;
- case offsetof(struct user, u_debugreg[6]):
- tmp = child->thread.debugreg6;
- break;
- case offsetof(struct user, u_debugreg[7]):
- tmp = child->thread.debugreg7;
- break;
- default:
- tmp = 0;
- break;
- }
- ret = put_user(tmp,(unsigned long __user *) data);
- break;
- }
-
- /* when I and D space are separate, this will have to be fixed. */
- case PTRACE_POKETEXT: /* write the word at location addr. */
- case PTRACE_POKEDATA:
- ret = generic_ptrace_pokedata(child, addr, data);
- break;
-
- case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
- {
- int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
- ret = -EIO;
- if ((addr & 7) ||
- addr > sizeof(struct user) - 7)
- break;
-
- switch (addr) {
- case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
- ret = putreg(child, addr, data);
- break;
- /* Disallows to set a breakpoint into the vsyscall */
- case offsetof(struct user, u_debugreg[0]):
- if (data >= TASK_SIZE_OF(child) - dsize) break;
- child->thread.debugreg0 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[1]):
- if (data >= TASK_SIZE_OF(child) - dsize) break;
- child->thread.debugreg1 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[2]):
- if (data >= TASK_SIZE_OF(child) - dsize) break;
- child->thread.debugreg2 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[3]):
- if (data >= TASK_SIZE_OF(child) - dsize) break;
- child->thread.debugreg3 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[6]):
- if (data >> 32)
- break;
- child->thread.debugreg6 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[7]):
- /* See arch/i386/kernel/ptrace.c for an explanation of
- * this awkward check.*/
- data &= ~DR_CONTROL_RESERVED;
- for(i=0; i<4; i++)
- if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- break;
- if (i == 4) {
- child->thread.debugreg7 = data;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- ret = 0;
- }
- break;
- }
- break;
- }
- case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
- case PTRACE_CONT: /* restart after signal. */
-
- ret = -EIO;
- if (!valid_signal(data))
- break;
- if (request == PTRACE_SYSCALL)
- set_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
- else
- clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
- child->exit_code = data;
- /* make sure the single step bit is not set. */
- clear_singlestep(child);
- wake_up_process(child);
- ret = 0;
- break;
-
-#ifdef CONFIG_IA32_EMULATION
- /* This makes only sense with 32bit programs. Allow a
- 64bit debugger to fully examine them too. Better
- don't use it against 64bit processes, use
- PTRACE_ARCH_PRCTL instead. */
- case PTRACE_SET_THREAD_AREA: {
- struct user_desc __user *p;
- int old;
- p = (struct user_desc __user *)data;
- get_user(old, &p->entry_number);
- put_user(addr, &p->entry_number);
- ret = do_set_thread_area(&child->thread, p);
- put_user(old, &p->entry_number);
- break;
- case PTRACE_GET_THREAD_AREA:
- p = (struct user_desc __user *)data;
- get_user(old, &p->entry_number);
- put_user(addr, &p->entry_number);
- ret = do_get_thread_area(&child->thread, p);
- put_user(old, &p->entry_number);
- break;
- }
-#endif
- /* normal 64bit interface to access TLS data.
- Works just like arch_prctl, except that the arguments
- are reversed. */
- case PTRACE_ARCH_PRCTL:
- ret = do_arch_prctl(child, data, addr);
- break;
-
-/*
- * make the child exit. Best I can do is send it a sigkill.
- * perhaps it should be put in the status that it wants to
- * exit.
- */
- case PTRACE_KILL:
- ret = 0;
- if (child->exit_state == EXIT_ZOMBIE) /* already dead */
- break;
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
- child->exit_code = SIGKILL;
- /* make sure the single step bit is not set. */
- clear_singlestep(child);
- wake_up_process(child);
- break;
-
- case PTRACE_SINGLESTEP: /* set the trap flag. */
- ret = -EIO;
- if (!valid_signal(data))
- break;
- clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
- set_singlestep(child);
- child->exit_code = data;
- /* give it a chance to run. */
- wake_up_process(child);
- ret = 0;
- break;
-
- case PTRACE_GETREGS: { /* Get all gp regs from the child. */
- if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
- sizeof(struct user_regs_struct))) {
- ret = -EIO;
- break;
- }
- ret = 0;
- for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) {
- ret |= __put_user(getreg(child, ui),(unsigned long __user *) data);
- data += sizeof(long);
- }
- break;
- }
-
- case PTRACE_SETREGS: { /* Set all gp regs in the child. */
- unsigned long tmp;
- if (!access_ok(VERIFY_READ, (unsigned __user *)data,
- sizeof(struct user_regs_struct))) {
- ret = -EIO;
- break;
- }
- ret = 0;
- for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) {
- ret = __get_user(tmp, (unsigned long __user *) data);
- if (ret)
- break;
- ret = putreg(child, ui, tmp);
- if (ret)
- break;
- data += sizeof(long);
- }
- break;
- }
-
- case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */
- if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
- sizeof(struct user_i387_struct))) {
- ret = -EIO;
- break;
- }
- ret = get_fpregs((struct user_i387_struct __user *)data, child);
- break;
- }
-
- case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */
- if (!access_ok(VERIFY_READ, (unsigned __user *)data,
- sizeof(struct user_i387_struct))) {
- ret = -EIO;
- break;
- }
- set_stopped_child_used_math(child);
- ret = set_fpregs(child, (struct user_i387_struct __user *)data);
- break;
- }
-
- default:
- ret = ptrace_request(child, request, addr, data);
- break;
- }
- return ret;
-}
-
-static void syscall_trace(struct pt_regs *regs)
-{
-
-#if 0
- printk("trace %s rip %lx rsp %lx rax %d origrax %d caller %lx tiflags %x ptrace %x\n",
- current->comm,
- regs->rip, regs->rsp, regs->rax, regs->orig_rax, __builtin_return_address(0),
- current_thread_info()->flags, current->ptrace);
-#endif
-
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
-}
-
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
-{
- /* do the secure computing check first */
- secure_computing(regs->orig_rax);
-
- if (test_thread_flag(TIF_SYSCALL_TRACE)
- && (current->ptrace & PT_PTRACED))
- syscall_trace(regs);
-
- if (unlikely(current->audit_context)) {
- if (test_thread_flag(TIF_IA32)) {
- audit_syscall_entry(AUDIT_ARCH_I386,
- regs->orig_rax,
- regs->rbx, regs->rcx,
- regs->rdx, regs->rsi);
- } else {
- audit_syscall_entry(AUDIT_ARCH_X86_64,
- regs->orig_rax,
- regs->rdi, regs->rsi,
- regs->rdx, regs->r10);
- }
- }
-}
-
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
-{
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax);
-
- if ((test_thread_flag(TIF_SYSCALL_TRACE)
- || test_thread_flag(TIF_SINGLESTEP))
- && (current->ptrace & PT_PTRACED))
- syscall_trace(regs);
-}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index fab30e13483..150ba29a0d3 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -162,6 +162,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
+ ich_force_enable_hpet);
static struct pci_dev *cached_dev;
diff --git a/arch/x86/kernel/reboot_32.c b/arch/x86/kernel/reboot.c
index bb1a0f889c5..5818dc28167 100644
--- a/arch/x86/kernel/reboot_32.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,64 +1,94 @@
-#include <linux/mm.h>
#include <linux/module.h>
-#include <linux/delay.h>
#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/efi.h>
-#include <linux/dmi.h>
-#include <linux/ctype.h>
-#include <linux/pm.h>
#include <linux/reboot.h>
-#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <linux/efi.h>
+#include <acpi/reboot.h>
+#include <asm/io.h>
#include <asm/apic.h>
-#include <asm/hpet.h>
#include <asm/desc.h>
-#include "mach_reboot.h"
+#include <asm/hpet.h>
#include <asm/reboot_fixups.h>
#include <asm/reboot.h>
+#ifdef CONFIG_X86_32
+# include <linux/dmi.h>
+# include <linux/ctype.h>
+# include <linux/mc146818rtc.h>
+# include <asm/pgtable.h>
+#else
+# include <asm/iommu.h>
+#endif
+
/*
* Power off function, if any
*/
void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
+static long no_idt[3];
static int reboot_mode;
-static int reboot_thru_bios;
+enum reboot_type reboot_type = BOOT_KBD;
+int reboot_force;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
static int reboot_cpu = -1;
#endif
+
+/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
+ warm Don't set the cold reboot flag
+ cold Set the cold reboot flag
+ bios Reboot by jumping through the BIOS (only for X86_32)
+ smp Reboot by executing reset on BSP or other CPU (only for X86_32)
+ triple Force a triple fault (init)
+ kbd Use the keyboard controller. cold reset (default)
+ acpi Use the RESET_REG in the FADT
+ efi Use efi reset_system runtime service
+ force Avoid anything that could hang.
+ */
static int __init reboot_setup(char *str)
{
- while(1) {
+ for (;;) {
switch (*str) {
- case 'w': /* "warm" reboot (no memory testing etc) */
+ case 'w':
reboot_mode = 0x1234;
break;
- case 'c': /* "cold" reboot (with memory testing etc) */
- reboot_mode = 0x0;
- break;
- case 'b': /* "bios" reboot by jumping through the BIOS */
- reboot_thru_bios = 1;
- break;
- case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
- reboot_thru_bios = 0;
+
+ case 'c':
+ reboot_mode = 0;
break;
+
+#ifdef CONFIG_X86_32
#ifdef CONFIG_SMP
- case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
+ case 's':
if (isdigit(*(str+1))) {
reboot_cpu = (int) (*(str+1) - '0');
if (isdigit(*(str+2)))
reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
}
- /* we will leave sorting out the final value
- when we are ready to reboot, since we might not
- have set up boot_cpu_id or smp_num_cpu */
+ /* we will leave sorting out the final value
+ when we are ready to reboot, since we might not
+ have set up boot_cpu_id or smp_num_cpu */
break;
+#endif /* CONFIG_SMP */
+
+ case 'b':
#endif
+ case 'a':
+ case 'k':
+ case 't':
+ case 'e':
+ reboot_type = *str;
+ break;
+
+ case 'f':
+ reboot_force = 1;
+ break;
}
- if((str = strchr(str,',')) != NULL)
+
+ str = strchr(str, ',');
+ if (str)
str++;
else
break;
@@ -68,18 +98,21 @@ static int __init reboot_setup(char *str)
__setup("reboot=", reboot_setup);
+
+#ifdef CONFIG_X86_32
/*
* Reboot options and system auto-detection code provided by
* Dell Inc. so their systems "just work". :-)
*/
/*
- * Some machines require the "reboot=b" commandline option, this quirk makes that automatic.
+ * Some machines require the "reboot=b" commandline option,
+ * this quirk makes that automatic.
*/
static int __init set_bios_reboot(const struct dmi_system_id *d)
{
- if (!reboot_thru_bios) {
- reboot_thru_bios = 1;
+ if (reboot_type != BOOT_BIOS) {
+ reboot_type = BOOT_BIOS;
printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
}
return 0;
@@ -143,7 +176,6 @@ static int __init reboot_init(void)
dmi_check_system(reboot_dmi_table);
return 0;
}
-
core_initcall(reboot_init);
/* The following code and data reboots the machine by switching to real
@@ -152,7 +184,6 @@ core_initcall(reboot_init);
controller to pulse the CPU reset line, which is more thorough, but
doesn't work with at least one type of 486 motherboard. It is easy
to stop this code working; hence the copious comments. */
-
static unsigned long long
real_mode_gdt_entries [3] =
{
@@ -161,11 +192,9 @@ real_mode_gdt_entries [3] =
0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
};
-static struct Xgt_desc_struct
+static struct desc_ptr
real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
-real_mode_idt = { 0x3ff, 0 },
-no_idt = { 0, 0 };
-
+real_mode_idt = { 0x3ff, 0 };
/* This is 16-bit protected mode code to disable paging and the cache,
switch to real mode and jump to the BIOS reset code.
@@ -185,7 +214,6 @@ no_idt = { 0, 0 };
More could be done here to set up the registers as if a CPU reset had
occurred; hopefully real BIOSs don't assume much. */
-
static unsigned char real_mode_switch [] =
{
0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
@@ -223,7 +251,6 @@ void machine_real_restart(unsigned char *code, int length)
`outb_p' is needed instead of just `outb'. Use it to be on the
safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
*/
-
spin_lock(&rtc_lock);
CMOS_WRITE(0x00, 0x8f);
spin_unlock(&rtc_lock);
@@ -231,9 +258,8 @@ void machine_real_restart(unsigned char *code, int length)
/* Remap the kernel at virtual address zero, as well as offset zero
from the kernel segment. This assumes the kernel segment starts at
virtual address PAGE_OFFSET. */
-
- memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
+ memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+ sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
/*
* Use `swapper_pg_dir' as our page directory.
@@ -245,7 +271,6 @@ void machine_real_restart(unsigned char *code, int length)
boot)". This seems like a fairly standard thing that gets set by
REBOOT.COM programs, and the previous reset routine did this
too. */
-
*((unsigned short *)0x472) = reboot_mode;
/* For the switch to real mode, copy some code to low memory. It has
@@ -253,19 +278,16 @@ void machine_real_restart(unsigned char *code, int length)
has to have the same physical and virtual address, because it turns
off paging. Copy it near the end of the first page, out of the way
of BIOS variables. */
-
- memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100),
+ memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100),
real_mode_switch, sizeof (real_mode_switch));
- memcpy ((void *) (0x1000 - 100), code, length);
+ memcpy((void *)(0x1000 - 100), code, length);
/* Set up the IDT for real mode. */
-
load_idt(&real_mode_idt);
/* Set up a GDT from which we can load segment descriptors for real
mode. The GDT is not used in real mode; it is just needed here to
prepare the descriptors. */
-
load_gdt(&real_mode_gdt);
/* Load the data segment registers, and thus the descriptors ready for
@@ -273,7 +295,6 @@ void machine_real_restart(unsigned char *code, int length)
selector value being loaded here. This is so that the segment
registers don't have to be reloaded after switching to real mode:
the values are consistent for real mode operation already. */
-
__asm__ __volatile__ ("movl $0x0010,%%eax\n"
"\tmovl %%eax,%%ds\n"
"\tmovl %%eax,%%es\n"
@@ -284,130 +305,147 @@ void machine_real_restart(unsigned char *code, int length)
/* Jump to the 16-bit code that we copied earlier. It disables paging
and the cache, switches to real mode, and jumps to the BIOS reset
entry point. */
-
__asm__ __volatile__ ("ljmp $0x0008,%0"
:
- : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
+ : "i" ((void *)(0x1000 - sizeof (real_mode_switch) - 100)));
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(machine_real_restart);
#endif
-static void native_machine_shutdown(void)
+#endif /* CONFIG_X86_32 */
+
+static inline void kb_wait(void)
+{
+ int i;
+
+ for (i = 0; i < 0x10000; i++) {
+ if ((inb(0x64) & 0x02) == 0)
+ break;
+ udelay(2);
+ }
+}
+
+void machine_emergency_restart(void)
+{
+ int i;
+
+ /* Tell the BIOS if we want cold or warm reboot */
+ *((unsigned short *)__va(0x472)) = reboot_mode;
+
+ for (;;) {
+ /* Could also try the reset bit in the Hammer NB */
+ switch (reboot_type) {
+ case BOOT_KBD:
+ for (i = 0; i < 10; i++) {
+ kb_wait();
+ udelay(50);
+ outb(0xfe, 0x64); /* pulse reset low */
+ udelay(50);
+ }
+
+ case BOOT_TRIPLE:
+ load_idt((const struct desc_ptr *)&no_idt);
+ __asm__ __volatile__("int3");
+
+ reboot_type = BOOT_KBD;
+ break;
+
+#ifdef CONFIG_X86_32
+ case BOOT_BIOS:
+ machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
+
+ reboot_type = BOOT_KBD;
+ break;
+#endif
+
+ case BOOT_ACPI:
+ acpi_reboot();
+ reboot_type = BOOT_KBD;
+ break;
+
+
+ case BOOT_EFI:
+ if (efi_enabled)
+ efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
+ EFI_SUCCESS, 0, NULL);
+
+ reboot_type = BOOT_KBD;
+ break;
+ }
+ }
+}
+
+void machine_shutdown(void)
{
+ /* Stop the cpus and apics */
#ifdef CONFIG_SMP
int reboot_cpu_id;
/* The boot cpu is always logical cpu 0 */
reboot_cpu_id = 0;
+#ifdef CONFIG_X86_32
/* See if there has been given a command line override */
if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
- cpu_isset(reboot_cpu, cpu_online_map)) {
+ cpu_isset(reboot_cpu, cpu_online_map))
reboot_cpu_id = reboot_cpu;
- }
+#endif
- /* Make certain the cpu I'm rebooting on is online */
- if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
+ /* Make certain the cpu I'm about to reboot on is online */
+ if (!cpu_isset(reboot_cpu_id, cpu_online_map))
reboot_cpu_id = smp_processor_id();
- }
/* Make certain I only run on the appropriate processor */
set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
- /* O.K. Now that I'm on the appropriate processor, stop
- * all of the others, and disable their local APICs.
+ /* O.K Now that I'm on the appropriate processor,
+ * stop all of the others.
*/
-
smp_send_stop();
-#endif /* CONFIG_SMP */
+#endif
lapic_shutdown();
#ifdef CONFIG_X86_IO_APIC
disable_IO_APIC();
#endif
+
#ifdef CONFIG_HPET_TIMER
hpet_disable();
#endif
-}
-void __attribute__((weak)) mach_reboot_fixups(void)
-{
+#ifdef CONFIG_X86_64
+ pci_iommu_shutdown();
+#endif
}
-static void native_machine_emergency_restart(void)
+void machine_restart(char *__unused)
{
- if (!reboot_thru_bios) {
- if (efi_enabled) {
- efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
- load_idt(&no_idt);
- __asm__ __volatile__("int3");
- }
- /* rebooting needs to touch the page at absolute addr 0 */
- *((unsigned short *)__va(0x472)) = reboot_mode;
- for (;;) {
- mach_reboot_fixups(); /* for board specific fixups */
- mach_reboot();
- /* That didn't work - force a triple fault.. */
- load_idt(&no_idt);
- __asm__ __volatile__("int3");
- }
- }
- if (efi_enabled)
- efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL);
+ printk("machine restart\n");
- machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
-}
-
-static void native_machine_restart(char * __unused)
-{
- machine_shutdown();
+ if (!reboot_force)
+ machine_shutdown();
machine_emergency_restart();
}
-static void native_machine_halt(void)
+void machine_halt(void)
{
}
-static void native_machine_power_off(void)
+void machine_power_off(void)
{
if (pm_power_off) {
- machine_shutdown();
+ if (!reboot_force)
+ machine_shutdown();
pm_power_off();
}
}
-
struct machine_ops machine_ops = {
- .power_off = native_machine_power_off,
- .shutdown = native_machine_shutdown,
- .emergency_restart = native_machine_emergency_restart,
- .restart = native_machine_restart,
- .halt = native_machine_halt,
+ .power_off = machine_power_off,
+ .shutdown = machine_shutdown,
+ .emergency_restart = machine_emergency_restart,
+ .restart = machine_restart,
+ .halt = machine_halt
};
-
-void machine_power_off(void)
-{
- machine_ops.power_off();
-}
-
-void machine_shutdown(void)
-{
- machine_ops.shutdown();
-}
-
-void machine_emergency_restart(void)
-{
- machine_ops.emergency_restart();
-}
-
-void machine_restart(char *cmd)
-{
- machine_ops.restart(cmd);
-}
-
-void machine_halt(void)
-{
- machine_ops.halt();
-}
diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c
deleted file mode 100644
index 53620a92a8f..00000000000
--- a/arch/x86/kernel/reboot_64.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/* Various gunk just to reboot the machine. */
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/string.h>
-#include <linux/pm.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-#include <asm/desc.h>
-#include <asm/hw_irq.h>
-#include <asm/system.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/apic.h>
-#include <asm/hpet.h>
-#include <asm/gart.h>
-
-/*
- * Power off function, if any
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-static long no_idt[3];
-static enum {
- BOOT_TRIPLE = 't',
- BOOT_KBD = 'k'
-} reboot_type = BOOT_KBD;
-static int reboot_mode = 0;
-int reboot_force;
-
-/* reboot=t[riple] | k[bd] [, [w]arm | [c]old]
- warm Don't set the cold reboot flag
- cold Set the cold reboot flag
- triple Force a triple fault (init)
- kbd Use the keyboard controller. cold reset (default)
- force Avoid anything that could hang.
- */
-static int __init reboot_setup(char *str)
-{
- for (;;) {
- switch (*str) {
- case 'w':
- reboot_mode = 0x1234;
- break;
-
- case 'c':
- reboot_mode = 0;
- break;
-
- case 't':
- case 'b':
- case 'k':
- reboot_type = *str;
- break;
- case 'f':
- reboot_force = 1;
- break;
- }
- if((str = strchr(str,',')) != NULL)
- str++;
- else
- break;
- }
- return 1;
-}
-
-__setup("reboot=", reboot_setup);
-
-static inline void kb_wait(void)
-{
- int i;
-
- for (i=0; i<0x10000; i++)
- if ((inb_p(0x64) & 0x02) == 0)
- break;
-}
-
-void machine_shutdown(void)
-{
- unsigned long flags;
-
- /* Stop the cpus and apics */
-#ifdef CONFIG_SMP
- int reboot_cpu_id;
-
- /* The boot cpu is always logical cpu 0 */
- reboot_cpu_id = 0;
-
- /* Make certain the cpu I'm about to reboot on is online */
- if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
- reboot_cpu_id = smp_processor_id();
- }
-
- /* Make certain I only run on the appropriate processor */
- set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
-
- /* O.K Now that I'm on the appropriate processor,
- * stop all of the others.
- */
- smp_send_stop();
-#endif
-
- local_irq_save(flags);
-
-#ifndef CONFIG_SMP
- disable_local_APIC();
-#endif
-
- disable_IO_APIC();
-
-#ifdef CONFIG_HPET_TIMER
- hpet_disable();
-#endif
- local_irq_restore(flags);
-
- pci_iommu_shutdown();
-}
-
-void machine_emergency_restart(void)
-{
- int i;
-
- /* Tell the BIOS if we want cold or warm reboot */
- *((unsigned short *)__va(0x472)) = reboot_mode;
-
- for (;;) {
- /* Could also try the reset bit in the Hammer NB */
- switch (reboot_type) {
- case BOOT_KBD:
- for (i=0; i<10; i++) {
- kb_wait();
- udelay(50);
- outb(0xfe,0x64); /* pulse reset low */
- udelay(50);
- }
-
- case BOOT_TRIPLE:
- load_idt((const struct desc_ptr *)&no_idt);
- __asm__ __volatile__("int3");
-
- reboot_type = BOOT_KBD;
- break;
- }
- }
-}
-
-void machine_restart(char * __unused)
-{
- printk("machine restart\n");
-
- if (!reboot_force) {
- machine_shutdown();
- }
- machine_emergency_restart();
-}
-
-void machine_halt(void)
-{
-}
-
-void machine_power_off(void)
-{
- if (pm_power_off) {
- if (!reboot_force) {
- machine_shutdown();
- }
- pm_power_off();
- }
-}
-
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index f452726c0fe..dec0b5ec25c 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -30,6 +30,19 @@ static void cs5536_warm_reset(struct pci_dev *dev)
udelay(50); /* shouldn't get here but be safe and spin a while */
}
+static void rdc321x_reset(struct pci_dev *dev)
+{
+ unsigned i;
+ /* Voluntary reset the watchdog timer */
+ outl(0x80003840, 0xCF8);
+ /* Generate a CPU reset on next tick */
+ i = inl(0xCFC);
+ /* Use the minimum timer resolution */
+ i |= 0x1600;
+ outl(i, 0xCFC);
+ outb(1, 0x92);
+}
+
struct device_fixup {
unsigned int vendor;
unsigned int device;
@@ -40,6 +53,7 @@ static struct device_fixup fixups_table[] = {
{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
+{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
};
/*
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
new file mode 100644
index 00000000000..eb9b1a198f5
--- /dev/null
+++ b/arch/x86/kernel/rtc.c
@@ -0,0 +1,204 @@
+/*
+ * RTC related functions
+ */
+#include <linux/acpi.h>
+#include <linux/bcd.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/time.h>
+#include <asm/vsyscall.h>
+
+#ifdef CONFIG_X86_32
+# define CMOS_YEARS_OFFS 1900
+/*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with. It is required for NMI access to the
+ * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+#else
+/*
+ * x86-64 systems only exists since 2002.
+ * This will work up to Dec 31, 2100
+ */
+# define CMOS_YEARS_OFFS 2000
+#endif
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
+/*
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be
+ * called 500 ms after the second nowtime has started, because when
+ * nowtime is written into the registers of the CMOS clock, it will
+ * jump to the next second precisely 500 ms later. Check the Motorola
+ * MC146818A or Dallas DS12887 data sheet for details.
+ *
+ * BUG: This routine does not handle hour overflow properly; it just
+ * sets the minutes. Usually you'll only notice that after reboot!
+ */
+int mach_set_rtc_mmss(unsigned long nowtime)
+{
+ int retval = 0;
+ int real_seconds, real_minutes, cmos_minutes;
+ unsigned char save_control, save_freq_select;
+
+ /* tell the clock it's being set */
+ save_control = CMOS_READ(RTC_CONTROL);
+ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+ /* stop and reset prescaler */
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+ cmos_minutes = CMOS_READ(RTC_MINUTES);
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ BCD_TO_BIN(cmos_minutes);
+
+ /*
+ * since we're only adjusting minutes and seconds,
+ * don't interfere with hour overflow. This avoids
+ * messing with unknown time zones but requires your
+ * RTC not to be off by more than 15 minutes
+ */
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+ /* correct for half hour time zone */
+ if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
+ real_minutes += 30;
+ real_minutes %= 60;
+
+ if (abs(real_minutes - cmos_minutes) < 30) {
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ BIN_TO_BCD(real_seconds);
+ BIN_TO_BCD(real_minutes);
+ }
+ CMOS_WRITE(real_seconds,RTC_SECONDS);
+ CMOS_WRITE(real_minutes,RTC_MINUTES);
+ } else {
+ printk(KERN_WARNING
+ "set_rtc_mmss: can't update from %d to %d\n",
+ cmos_minutes, real_minutes);
+ retval = -1;
+ }
+
+ /* The following flags have to be released exactly in this order,
+ * otherwise the DS12887 (popular MC146818A clone with integrated
+ * battery and quartz) will not reset the oscillator and will not
+ * update precisely 500 ms later. You won't find this mentioned in
+ * the Dallas Semiconductor data sheets, but who believes data
+ * sheets anyway ... -- Markus Kuhn
+ */
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+
+ return retval;
+}
+
+unsigned long mach_get_cmos_time(void)
+{
+ unsigned int year, mon, day, hour, min, sec, century = 0;
+
+ /*
+ * If UIP is clear, then we have >= 244 microseconds before
+ * RTC registers will be updated. Spec sheet says that this
+ * is the reliable way to read RTC - registers. If UIP is set
+ * then the register access might be invalid.
+ */
+ while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+ cpu_relax();
+
+ sec = CMOS_READ(RTC_SECONDS);
+ min = CMOS_READ(RTC_MINUTES);
+ hour = CMOS_READ(RTC_HOURS);
+ day = CMOS_READ(RTC_DAY_OF_MONTH);
+ mon = CMOS_READ(RTC_MONTH);
+ year = CMOS_READ(RTC_YEAR);
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_X86_64)
+ /* CHECKME: Is this really 64bit only ??? */
+ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+ acpi_gbl_FADT.century)
+ century = CMOS_READ(acpi_gbl_FADT.century);
+#endif
+
+ if (RTC_ALWAYS_BCD || !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)) {
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+ }
+
+ if (century) {
+ BCD_TO_BIN(century);
+ year += century * 100;
+ printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
+ } else {
+ year += CMOS_YEARS_OFFS;
+ if (year < 1970)
+ year += 100;
+ }
+
+ return mktime(year, mon, day, hour, min, sec);
+}
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+ unsigned char val;
+
+ lock_cmos_prefix(addr);
+ outb_p(addr, RTC_PORT(0));
+ val = inb_p(RTC_PORT(1));
+ lock_cmos_suffix(addr);
+ return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+ lock_cmos_prefix(addr);
+ outb_p(addr, RTC_PORT(0));
+ outb_p(val, RTC_PORT(1));
+ lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
+static int set_rtc_mmss(unsigned long nowtime)
+{
+ int retval;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ retval = set_wallclock(nowtime);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ return retval;
+}
+
+/* not static: needed by APM */
+unsigned long read_persistent_clock(void)
+{
+ unsigned long retval, flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ retval = get_wallclock();
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ return retval;
+}
+
+int update_persistent_clock(struct timespec now)
+{
+ return set_rtc_mmss(now.tv_sec);
+}
+
+unsigned long long native_read_tsc(void)
+{
+ return __native_read_tsc();
+}
+EXPORT_SYMBOL(native_read_tsc);
+
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 3558ac78c92..309366f8f60 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -24,7 +24,11 @@
#include <asm/sections.h>
#include <asm/setup.h>
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
struct boot_params __initdata boot_params;
+#else
+struct boot_params boot_params;
+#endif
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
@@ -37,6 +41,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
unsigned long __supported_pte_mask __read_mostly = ~0UL;
+EXPORT_SYMBOL_GPL(__supported_pte_mask);
+
static int do_not_nx __cpuinitdata = 0;
/* noexec=on|off
@@ -80,6 +86,43 @@ static int __init nonx32_setup(char *str)
__setup("noexec32=", nonx32_setup);
/*
+ * Copy data used in early init routines from the initial arrays to the
+ * per cpu data areas. These arrays then become expendable and the
+ * *_early_ptr's are zeroed indicating that the static arrays are gone.
+ */
+static void __init setup_per_cpu_maps(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SMP
+ if (per_cpu_offset(cpu)) {
+#endif
+ per_cpu(x86_cpu_to_apicid, cpu) =
+ x86_cpu_to_apicid_init[cpu];
+ per_cpu(x86_bios_cpu_apicid, cpu) =
+ x86_bios_cpu_apicid_init[cpu];
+#ifdef CONFIG_NUMA
+ per_cpu(x86_cpu_to_node_map, cpu) =
+ x86_cpu_to_node_map_init[cpu];
+#endif
+#ifdef CONFIG_SMP
+ }
+ else
+ printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
+ cpu);
+#endif
+ }
+
+ /* indicate the early static arrays will soon be gone */
+ x86_cpu_to_apicid_early_ptr = NULL;
+ x86_bios_cpu_apicid_early_ptr = NULL;
+#ifdef CONFIG_NUMA
+ x86_cpu_to_node_map_early_ptr = NULL;
+#endif
+}
+
+/*
* Great future plan:
* Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
* Always point %gs to its beginning
@@ -100,18 +143,21 @@ void __init setup_per_cpu_areas(void)
for_each_cpu_mask (i, cpu_possible_map) {
char *ptr;
- if (!NODE_DATA(cpu_to_node(i))) {
+ if (!NODE_DATA(early_cpu_to_node(i))) {
printk("cpu with no node %d, num_online_nodes %d\n",
i, num_online_nodes());
ptr = alloc_bootmem_pages(size);
} else {
- ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
+ ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size);
}
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
cpu_pda(i)->data_offset = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
}
+
+ /* setup percpu data maps early */
+ setup_per_cpu_maps();
}
void pda_init(int cpu)
@@ -169,7 +215,8 @@ void syscall_init(void)
#endif
/* Flags to clear on syscall */
- wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
+ wrmsrl(MSR_SYSCALL_MASK,
+ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
}
void __cpuinit check_efer(void)
@@ -227,7 +274,7 @@ void __cpuinit cpu_init (void)
* and set up the GDT descriptor:
*/
if (cpu)
- memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
+ memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
cpu_gdt_descr[cpu].size = GDT_SIZE;
load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
@@ -257,10 +304,10 @@ void __cpuinit cpu_init (void)
v, cpu);
}
estacks += PAGE_SIZE << order[v];
- orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
+ orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
}
- t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
/*
* <= is required because the CPU will access up to
* 8 bits beyond the end of the IO permission bitmap.
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 9c24b45b513..62adc5f20be 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -44,9 +44,12 @@
#include <linux/crash_dump.h>
#include <linux/dmi.h>
#include <linux/pfn.h>
+#include <linux/pci.h>
+#include <linux/init_ohci1394_dma.h>
#include <video/edid.h>
+#include <asm/mtrr.h>
#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
@@ -67,14 +70,83 @@
address, and must not be in the .bss segment! */
unsigned long init_pg_tables_end __initdata = ~0UL;
-int disable_pse __cpuinitdata = 0;
-
/*
* Machine setup..
*/
-extern struct resource code_resource;
-extern struct resource data_resource;
-extern struct resource bss_resource;
+static struct resource data_resource = {
+ .name = "Kernel data",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+ .name = "Kernel code",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource bss_resource = {
+ .name = "Kernel bss",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource video_ram_resource = {
+ .name = "Video RAM area",
+ .start = 0xa0000,
+ .end = 0xbffff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource standard_io_resources[] = { {
+ .name = "dma1",
+ .start = 0x0000,
+ .end = 0x001f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "pic1",
+ .start = 0x0020,
+ .end = 0x0021,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "timer0",
+ .start = 0x0040,
+ .end = 0x0043,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "timer1",
+ .start = 0x0050,
+ .end = 0x0053,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "keyboard",
+ .start = 0x0060,
+ .end = 0x006f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "dma page reg",
+ .start = 0x0080,
+ .end = 0x008f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "pic2",
+ .start = 0x00a0,
+ .end = 0x00a1,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "dma2",
+ .start = 0x00c0,
+ .end = 0x00df,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "fpu",
+ .start = 0x00f0,
+ .end = 0x00ff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+} };
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
@@ -116,13 +188,17 @@ extern int root_mountflags;
unsigned long saved_videomode;
-#define RAMDISK_IMAGE_START_MASK 0x07FF
+#define RAMDISK_IMAGE_START_MASK 0x07FF
#define RAMDISK_PROMPT_FLAG 0x8000
-#define RAMDISK_LOAD_FLAG 0x4000
+#define RAMDISK_LOAD_FLAG 0x4000
static char __initdata command_line[COMMAND_LINE_SIZE];
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
struct boot_params __initdata boot_params;
+#else
+struct boot_params boot_params;
+#endif
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
@@ -166,8 +242,7 @@ static int __init parse_mem(char *arg)
return -EINVAL;
if (strcmp(arg, "nopentium") == 0) {
- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
- disable_pse = 1;
+ setup_clear_cpu_cap(X86_FEATURE_PSE);
} else {
/* If the user specifies memory size, we
* limit the BIOS-provided memory map to
@@ -176,7 +251,7 @@ static int __init parse_mem(char *arg)
* trim the existing memory map.
*/
unsigned long long mem_size;
-
+
mem_size = memparse(arg, &arg);
limit_regions(mem_size);
user_defined_memmap = 1;
@@ -315,7 +390,7 @@ static void __init reserve_ebda_region(void)
unsigned int addr;
addr = get_bios_ebda();
if (addr)
- reserve_bootmem(addr, PAGE_SIZE);
+ reserve_bootmem(addr, PAGE_SIZE);
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -420,6 +495,100 @@ static inline void __init reserve_crashkernel(void)
{}
#endif
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static bool do_relocate_initrd = false;
+
+static void __init reserve_initrd(void)
+{
+ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+ unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
+ unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
+ unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ unsigned long ramdisk_here;
+
+ initrd_start = 0;
+
+ if (!boot_params.hdr.type_of_loader ||
+ !ramdisk_image || !ramdisk_size)
+ return; /* No initrd provided by bootloader */
+
+ if (ramdisk_end < ramdisk_image) {
+ printk(KERN_ERR "initrd wraps around end of memory, "
+ "disabling initrd\n");
+ return;
+ }
+ if (ramdisk_size >= end_of_lowmem/2) {
+ printk(KERN_ERR "initrd too large to handle, "
+ "disabling initrd\n");
+ return;
+ }
+ if (ramdisk_end <= end_of_lowmem) {
+ /* All in lowmem, easy case */
+ reserve_bootmem(ramdisk_image, ramdisk_size);
+ initrd_start = ramdisk_image + PAGE_OFFSET;
+ initrd_end = initrd_start+ramdisk_size;
+ return;
+ }
+
+ /* We need to move the initrd down into lowmem */
+ ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
+
+ /* Note: this includes all the lowmem currently occupied by
+ the initrd, we rely on that fact to keep the data intact. */
+ reserve_bootmem(ramdisk_here, ramdisk_size);
+ initrd_start = ramdisk_here + PAGE_OFFSET;
+ initrd_end = initrd_start + ramdisk_size;
+
+ do_relocate_initrd = true;
+}
+
+#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
+
+static void __init relocate_initrd(void)
+{
+ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+ unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
+ unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ unsigned long ramdisk_here;
+ unsigned long slop, clen, mapaddr;
+ char *p, *q;
+
+ if (!do_relocate_initrd)
+ return;
+
+ ramdisk_here = initrd_start - PAGE_OFFSET;
+
+ q = (char *)initrd_start;
+
+ /* Copy any lowmem portion of the initrd */
+ if (ramdisk_image < end_of_lowmem) {
+ clen = end_of_lowmem - ramdisk_image;
+ p = (char *)__va(ramdisk_image);
+ memcpy(q, p, clen);
+ q += clen;
+ ramdisk_image += clen;
+ ramdisk_size -= clen;
+ }
+
+ /* Copy the highmem portion of the initrd */
+ while (ramdisk_size) {
+ slop = ramdisk_image & ~PAGE_MASK;
+ clen = ramdisk_size;
+ if (clen > MAX_MAP_CHUNK-slop)
+ clen = MAX_MAP_CHUNK-slop;
+ mapaddr = ramdisk_image & PAGE_MASK;
+ p = early_ioremap(mapaddr, clen+slop);
+ memcpy(q, p+slop, clen);
+ early_iounmap(p, clen+slop);
+ q += clen;
+ ramdisk_image += clen;
+ ramdisk_size -= clen;
+ }
+}
+
+#endif /* CONFIG_BLK_DEV_INITRD */
+
void __init setup_bootmem_allocator(void)
{
unsigned long bootmap_size;
@@ -475,26 +644,10 @@ void __init setup_bootmem_allocator(void)
*/
find_smp_config();
#endif
- numa_kva_reserve();
#ifdef CONFIG_BLK_DEV_INITRD
- if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
-
- if (ramdisk_end <= end_of_lowmem) {
- reserve_bootmem(ramdisk_image, ramdisk_size);
- initrd_start = ramdisk_image + PAGE_OFFSET;
- initrd_end = initrd_start+ramdisk_size;
- } else {
- printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- ramdisk_end, end_of_lowmem);
- initrd_start = 0;
- }
- }
+ reserve_initrd();
#endif
+ numa_kva_reserve();
reserve_crashkernel();
}
@@ -545,17 +698,11 @@ void __init setup_arch(char **cmdline_p)
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
pre_setup_arch_hook();
early_cpu_init();
+ early_ioremap_init();
- /*
- * FIXME: This isn't an official loader_type right
- * now but does currently work with elilo.
- * If we were configured as an EFI kernel, check to make
- * sure that we were loaded correctly from elilo and that
- * the system table is valid. If not, then initialize normally.
- */
#ifdef CONFIG_EFI
- if ((boot_params.hdr.type_of_loader == 0x50) &&
- boot_params.efi_info.efi_systab)
+ if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
+ "EL32", 4))
efi_enabled = 1;
#endif
@@ -579,12 +726,9 @@ void __init setup_arch(char **cmdline_p)
rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
#endif
ARCH_SETUP
- if (efi_enabled)
- efi_init();
- else {
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- print_memory_map(memory_setup());
- }
+
+ printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+ print_memory_map(memory_setup());
copy_edd();
@@ -612,8 +756,16 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
+ if (efi_enabled)
+ efi_init();
+
max_low_pfn = setup_memory();
+ /* update e820 for memory not covered by WB MTRRs */
+ mtrr_bp_init();
+ if (mtrr_trim_uncached_memory(max_pfn))
+ max_low_pfn = setup_memory();
+
#ifdef CONFIG_VMI
/*
* Must be after max_low_pfn is determined, and before kernel
@@ -636,6 +788,16 @@ void __init setup_arch(char **cmdline_p)
smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
#endif
paging_init();
+
+ /*
+ * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
+ */
+
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ if (init_ohci1394_dma_early)
+ init_ohci1394_dma_on_all_controllers();
+#endif
+
remapped_pgdat_init();
sparse_init();
zone_sizes_init();
@@ -644,15 +806,19 @@ void __init setup_arch(char **cmdline_p)
* NOTE: at this point the bootmem allocator is fully available.
*/
+#ifdef CONFIG_BLK_DEV_INITRD
+ relocate_initrd();
+#endif
+
paravirt_post_allocator_init();
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
-#endif
- if (efi_enabled)
- efi_map_memmap();
+#endif
#ifdef CONFIG_ACPI
/*
@@ -661,9 +827,7 @@ void __init setup_arch(char **cmdline_p)
acpi_boot_table_init();
#endif
-#ifdef CONFIG_PCI
early_quirks();
-#endif
#ifdef CONFIG_ACPI
acpi_boot_init();
@@ -692,3 +856,26 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
}
+
+/*
+ * Request address space for all standard resources
+ *
+ * This is called just before pcibios_init(), which is also a
+ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
+ */
+static int __init request_standard_resources(void)
+{
+ int i;
+
+ printk(KERN_INFO "Setting up standard PCI resources\n");
+ init_iomem_resources(&code_resource, &data_resource, &bss_resource);
+
+ request_resource(&iomem_resource, &video_ram_resource);
+
+ /* request I/O space for devices used on all i[345]86 PCs */
+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+ request_resource(&ioport_resource, &standard_io_resources[i]);
+ return 0;
+}
+
+subsys_initcall(request_standard_resources);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1d5f5..77fb87bf6e5 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -30,6 +30,7 @@
#include <linux/crash_dump.h>
#include <linux/root_dev.h>
#include <linux/pci.h>
+#include <linux/efi.h>
#include <linux/acpi.h>
#include <linux/kallsyms.h>
#include <linux/edd.h>
@@ -39,10 +40,13 @@
#include <linux/dmi.h>
#include <linux/dma-mapping.h>
#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/init_ohci1394_dma.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/system.h>
+#include <asm/vsyscall.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/msr.h>
@@ -50,6 +54,7 @@
#include <video/edid.h>
#include <asm/e820.h>
#include <asm/dma.h>
+#include <asm/gart.h>
#include <asm/mpspec.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
@@ -59,6 +64,15 @@
#include <asm/sections.h>
#include <asm/dmi.h>
#include <asm/cacheflush.h>
+#include <asm/mce.h>
+#include <asm/ds.h>
+#include <asm/topology.h>
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define ARCH_SETUP
+#endif
/*
* Machine setup..
@@ -67,6 +81,8 @@
struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
unsigned long mmu_cr4_features;
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
@@ -76,7 +92,7 @@ unsigned long saved_video_mode;
int force_mwait __cpuinitdata;
-/*
+/*
* Early DMI memory
*/
int dmi_alloc_index;
@@ -122,25 +138,27 @@ struct resource standard_io_resources[] = {
#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
-struct resource data_resource = {
+static struct resource data_resource = {
.name = "Kernel data",
.start = 0,
.end = 0,
.flags = IORESOURCE_RAM,
};
-struct resource code_resource = {
+static struct resource code_resource = {
.name = "Kernel code",
.start = 0,
.end = 0,
.flags = IORESOURCE_RAM,
};
-struct resource bss_resource = {
+static struct resource bss_resource = {
.name = "Kernel bss",
.start = 0,
.end = 0,
.flags = IORESOURCE_RAM,
};
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
+
#ifdef CONFIG_PROC_VMCORE
/* elfcorehdr= specifies the location of elf core header
* stored by the crashed kernel. This option will be passed
@@ -166,12 +184,12 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
if (bootmap == -1L)
- panic("Cannot find bootmem map of size %ld\n",bootmap_size);
+ panic("Cannot find bootmem map of size %ld\n", bootmap_size);
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
reserve_bootmem(bootmap, bootmap_size);
-}
+}
#endif
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
@@ -205,7 +223,8 @@ static void __init reserve_crashkernel(void)
unsigned long long crash_size, crash_base;
int ret;
- free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+ free_mem =
+ ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
ret = parse_crashkernel(boot_command_line, free_mem,
&crash_size, &crash_base);
@@ -229,33 +248,21 @@ static inline void __init reserve_crashkernel(void)
{}
#endif
-#define EBDA_ADDR_POINTER 0x40E
-
-unsigned __initdata ebda_addr;
-unsigned __initdata ebda_size;
-
-static void discover_ebda(void)
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+void __attribute__((weak)) __init memory_setup(void)
{
- /*
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E
- */
- ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
- ebda_addr <<= 4;
-
- ebda_size = *(unsigned short *)__va(ebda_addr);
-
- /* Round EBDA up to pages */
- if (ebda_size == 0)
- ebda_size = 1;
- ebda_size <<= 10;
- ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
- if (ebda_size > 64*1024)
- ebda_size = 64*1024;
+ machine_specific_memory_setup();
}
+/*
+ * setup_arch - architecture-specific boot-time initializations
+ *
+ * Note: On x86_64, fixmaps are ready for use even before this is called.
+ */
void __init setup_arch(char **cmdline_p)
{
+ unsigned i;
+
printk(KERN_INFO "Command line: %s\n", boot_command_line);
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
@@ -269,7 +276,15 @@ void __init setup_arch(char **cmdline_p)
rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
#endif
- setup_memory_region();
+#ifdef CONFIG_EFI
+ if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
+ "EL64", 4))
+ efi_enabled = 1;
+#endif
+
+ ARCH_SETUP
+
+ memory_setup();
copy_edd();
if (!boot_params.hdr.root_flags)
@@ -293,27 +308,47 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ if (init_ohci1394_dma_early)
+ init_ohci1394_dma_on_all_controllers();
+#endif
+
finish_e820_parsing();
+ early_gart_iommu_check();
+
e820_register_active_regions(0, 0, -1UL);
/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
end_pfn = e820_end_of_ram();
+ /* update e820 for memory not covered by WB MTRRs */
+ mtrr_bp_init();
+ if (mtrr_trim_uncached_memory(end_pfn)) {
+ e820_register_active_regions(0, 0, -1UL);
+ end_pfn = e820_end_of_ram();
+ }
+
num_physpages = end_pfn;
check_efer();
- discover_ebda();
-
init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
+ if (efi_enabled)
+ efi_init();
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
- /* setup to use the static apicid table during kernel startup */
- x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
+ /* setup to use the early static init tables during kernel startup */
+ x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
+ x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
+#ifdef CONFIG_NUMA
+ x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
+#endif
#endif
#ifdef CONFIG_ACPI
@@ -340,48 +375,26 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_NUMA
- numa_initmem_init(0, end_pfn);
+ numa_initmem_init(0, end_pfn);
#else
contig_initmem_init(0, end_pfn);
#endif
- /* Reserve direct mapping */
- reserve_bootmem_generic(table_start << PAGE_SHIFT,
- (table_end - table_start) << PAGE_SHIFT);
-
- /* reserve kernel */
- reserve_bootmem_generic(__pa_symbol(&_text),
- __pa_symbol(&_end) - __pa_symbol(&_text));
+ early_res_to_bootmem();
+#ifdef CONFIG_ACPI_SLEEP
/*
- * reserve physical page 0 - it's a special BIOS page on many boxes,
- * enabling clean reboots, SMP operation, laptop functions.
+ * Reserve low memory region for sleep support.
*/
- reserve_bootmem_generic(0, PAGE_SIZE);
-
- /* reserve ebda region */
- if (ebda_addr)
- reserve_bootmem_generic(ebda_addr, ebda_size);
-#ifdef CONFIG_NUMA
- /* reserve nodemap region */
- if (nodemap_addr)
- reserve_bootmem_generic(nodemap_addr, nodemap_size);
+ acpi_reserve_bootmem();
#endif
-#ifdef CONFIG_SMP
- /* Reserve SMP trampoline */
- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
-#endif
+ if (efi_enabled)
+ efi_reserve_bootmem();
-#ifdef CONFIG_ACPI_SLEEP
/*
- * Reserve low memory region for sleep support.
- */
- acpi_reserve_bootmem();
-#endif
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
+ * Find and reserve possible boot-time SMP configuration:
+ */
find_smp_config();
#ifdef CONFIG_BLK_DEV_INITRD
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
@@ -395,6 +408,8 @@ void __init setup_arch(char **cmdline_p)
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
} else {
+ /* Assumes everything on node 0 */
+ free_bootmem(ramdisk_image, ramdisk_size);
printk(KERN_ERR "initrd extends beyond end of memory "
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
ramdisk_end, end_of_mem);
@@ -404,17 +419,10 @@ void __init setup_arch(char **cmdline_p)
#endif
reserve_crashkernel();
paging_init();
+ map_vsyscall();
-#ifdef CONFIG_PCI
early_quirks();
-#endif
- /*
- * set this early, so we dont allocate cpu0
- * if MADT list doesnt list BSP first
- * mpparse.c/MP_processor_info() allocates logical cpu numbers.
- */
- cpu_set(0, cpu_present_map);
#ifdef CONFIG_ACPI
/*
* Read APIC and some other early information from ACPI tables.
@@ -430,25 +438,24 @@ void __init setup_arch(char **cmdline_p)
if (smp_found_config)
get_smp_config();
init_apic_mappings();
+ ioapic_init_mappings();
/*
* We trust e820 completely. No explicit ROM probing in memory.
- */
- e820_reserve_resources();
+ */
+ e820_reserve_resources(&code_resource, &data_resource, &bss_resource);
e820_mark_nosave_regions();
- {
- unsigned i;
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
request_resource(&ioport_resource, &standard_io_resources[i]);
- }
e820_setup_gap();
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
- conswitchp = &vga_con;
+ if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+ conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
@@ -479,9 +486,10 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
if (n >= 0x80000005) {
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
- c->x86_cache_size=(ecx>>24)+(edx>>24);
+ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
+ "D cache %dK (%d bytes/line)\n",
+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+ c->x86_cache_size = (ecx>>24) + (edx>>24);
/* On K8 L1 TLB is inclusive, so don't count it */
c->x86_tlbsize = 0;
}
@@ -495,11 +503,8 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
c->x86_cache_size, ecx & 0xFF);
}
-
- if (n >= 0x80000007)
- cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
if (n >= 0x80000008) {
- cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
+ cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
}
@@ -508,14 +513,15 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
#ifdef CONFIG_NUMA
static int nearby_node(int apicid)
{
- int i;
+ int i, node;
+
for (i = apicid - 1; i >= 0; i--) {
- int node = apicid_to_node[i];
+ node = apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- int node = apicid_to_node[i];
+ node = apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
@@ -527,7 +533,7 @@ static int nearby_node(int apicid)
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
* Assumes number of cores is a power of two.
*/
-static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
+static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned bits;
@@ -536,7 +542,54 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
int node = 0;
unsigned apicid = hard_smp_processor_id();
#endif
- unsigned ecx = cpuid_ecx(0x80000008);
+ bits = c->x86_coreid_bits;
+
+ /* Low order bits define the core id (index of core in socket) */
+ c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
+ /* Convert the APIC ID into the socket ID */
+ c->phys_proc_id = phys_pkg_id(bits);
+
+#ifdef CONFIG_NUMA
+ node = c->phys_proc_id;
+ if (apicid_to_node[apicid] != NUMA_NO_NODE)
+ node = apicid_to_node[apicid];
+ if (!node_online(node)) {
+ /* Two possibilities here:
+ - The CPU is missing memory and no node was created.
+ In that case try picking one from a nearby CPU
+ - The APIC IDs differ from the HyperTransport node IDs
+ which the K8 northbridge parsing fills in.
+ Assume they are all increased by a constant offset,
+ but in the same order as the HT nodeids.
+ If that doesn't result in a usable node fall back to the
+ path for the previous case. */
+
+ int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
+
+ if (ht_nodeid >= 0 &&
+ apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = apicid_to_node[ht_nodeid];
+ /* Pick a nearby node */
+ if (!node_online(node))
+ node = nearby_node(apicid);
+ }
+ numa_set_node(cpu, node);
+
+ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+#endif
+#endif
+}
+
+static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ unsigned bits, ecx;
+
+ /* Multi core CPU? */
+ if (c->extended_cpuid_level < 0x80000008)
+ return;
+
+ ecx = cpuid_ecx(0x80000008);
c->x86_max_cores = (ecx & 0xff) + 1;
@@ -549,37 +602,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
bits++;
}
- /* Low order bits define the core id (index of core in socket) */
- c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
- /* Convert the APIC ID into the socket ID */
- c->phys_proc_id = phys_pkg_id(bits);
-
-#ifdef CONFIG_NUMA
- node = c->phys_proc_id;
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
- node = apicid_to_node[apicid];
- if (!node_online(node)) {
- /* Two possibilities here:
- - The CPU is missing memory and no node was created.
- In that case try picking one from a nearby CPU
- - The APIC IDs differ from the HyperTransport node IDs
- which the K8 northbridge parsing fills in.
- Assume they are all increased by a constant offset,
- but in the same order as the HT nodeids.
- If that doesn't result in a usable node fall back to the
- path for the previous case. */
- int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
- if (ht_nodeid >= 0 &&
- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = apicid_to_node[ht_nodeid];
- /* Pick a nearby node */
- if (!node_online(node))
- node = nearby_node(apicid);
- }
- numa_set_node(cpu, node);
+ c->x86_coreid_bits = bits;
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
-#endif
#endif
}
@@ -595,8 +619,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
static __cpuinit int amd_apic_timer_broken(void)
{
- u32 lo, hi;
- u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+ u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+
switch (eax & CPUID_XFAM) {
case CPUID_XFAM_K8:
if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
@@ -614,6 +638,15 @@ static __cpuinit int amd_apic_timer_broken(void)
return 0;
}
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+ early_init_amd_mc(c);
+
+ /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
+ if (c->x86_power & (1<<8))
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
unsigned level;
@@ -624,7 +657,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
/*
* Disable TLB flush filter by setting HWCR.FFDIS on K8
* bit 6 of msr C001_0015
- *
+ *
* Errata 63 for SH-B3 steppings
* Errata 122 for all steppings (F+ have it disabled by default)
*/
@@ -637,35 +670,32 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
- clear_bit(0*32+31, &c->x86_capability);
-
+ clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
+
/* On C+ stepping K8 rep microcode works well for copy/memset */
level = cpuid_eax(1);
- if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
- set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
+ level >= 0x0f58))
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
if (c->x86 == 0x10 || c->x86 == 0x11)
- set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
/* Enable workaround for FXSAVE leak */
if (c->x86 >= 6)
- set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+ set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
level = get_model_name(c);
if (!level) {
- switch (c->x86) {
+ switch (c->x86) {
case 15:
/* Should distinguish Models here, but this is only
a fallback anyways. */
strcpy(c->x86_model_id, "Hammer");
- break;
- }
- }
+ break;
+ }
+ }
display_cacheinfo(c);
- /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
- if (c->x86_power & (1<<8))
- set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
-
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008)
amd_detect_cmp(c);
@@ -677,41 +707,38 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
num_cache_leaves = 3;
if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
- set_bit(X86_FEATURE_K8, &c->x86_capability);
-
- /* RDTSC can be speculated around */
- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ set_cpu_cap(c, X86_FEATURE_K8);
- /* Family 10 doesn't support C states in MWAIT so don't use it */
- if (c->x86 == 0x10 && !force_mwait)
- clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
if (amd_apic_timer_broken())
disable_apic_timer = 1;
}
-static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
- u32 eax, ebx, ecx, edx;
- int index_msb, core_bits;
+ u32 eax, ebx, ecx, edx;
+ int index_msb, core_bits;
cpuid(1, &eax, &ebx, &ecx, &edx);
if (!cpu_has(c, X86_FEATURE_HT))
return;
- if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+ if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
goto out;
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- } else if (smp_num_siblings > 1 ) {
+ } else if (smp_num_siblings > 1) {
if (smp_num_siblings > NR_CPUS) {
- printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
+ printk(KERN_WARNING "CPU: Unsupported number of "
+ "siblings %d", smp_num_siblings);
smp_num_siblings = 1;
return;
}
@@ -721,7 +748,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
smp_num_siblings = smp_num_siblings / c->x86_max_cores;
- index_msb = get_count_order(smp_num_siblings) ;
+ index_msb = get_count_order(smp_num_siblings);
core_bits = get_count_order(c->x86_max_cores);
@@ -730,8 +757,10 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
}
out:
if ((c->x86_max_cores * smp_num_siblings) > 1) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
- printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id);
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
}
#endif
@@ -773,28 +802,39 @@ static void srat_detect_node(void)
#endif
}
+static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+{
+ if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+ (c->x86 == 0x6 && c->x86_model >= 0x0e))
+ set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+}
+
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned n;
init_intel_cacheinfo(c);
- if (c->cpuid_level > 9 ) {
+ if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
/* Check for version and the number of counters */
if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
- set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
if (cpu_has_ds) {
unsigned int l1, l2;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
- set_bit(X86_FEATURE_BTS, c->x86_capability);
+ set_cpu_cap(c, X86_FEATURE_BTS);
if (!(l1 & (1<<12)))
- set_bit(X86_FEATURE_PEBS, c->x86_capability);
+ set_cpu_cap(c, X86_FEATURE_PEBS);
}
+
+ if (cpu_has_bts)
+ ds_init_intel(c);
+
n = c->extended_cpuid_level;
if (n >= 0x80000008) {
unsigned eax = cpuid_eax(0x80000008);
@@ -811,14 +851,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
c->x86_cache_alignment = c->x86_clflush_size * 2;
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
- set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
if (c->x86 == 6)
- set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
- if (c->x86 == 15)
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- else
- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- c->x86_max_cores = intel_num_cpu_cores(c);
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ c->x86_max_cores = intel_num_cpu_cores(c);
srat_detect_node();
}
@@ -835,18 +872,12 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
c->x86_vendor = X86_VENDOR_UNKNOWN;
}
-struct cpu_model_info {
- int vendor;
- int family;
- char *model_names[16];
-};
-
/* Do some early cpuid on the boot CPU to get some parameter that are
needed before check_bugs. Everything advanced is in identify_cpu
below. */
-void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
{
- u32 tfms;
+ u32 tfms, xlvl;
c->loops_per_jiffy = loops_per_jiffy;
c->x86_cache_size = -1;
@@ -857,6 +888,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_clflush_size = 64;
c->x86_cache_alignment = c->x86_clflush_size;
c->x86_max_cores = 1;
+ c->x86_coreid_bits = 0;
c->extended_cpuid_level = 0;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
@@ -865,7 +897,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
(unsigned int *)&c->x86_vendor_id[0],
(unsigned int *)&c->x86_vendor_id[8],
(unsigned int *)&c->x86_vendor_id[4]);
-
+
get_cpu_vendor(c);
/* Initialize the standard set of capabilities */
@@ -883,7 +915,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86 += (tfms >> 20) & 0xff;
if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
- if (c->x86_capability[0] & (1<<19))
+ if (c->x86_capability[0] & (1<<19))
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
} else {
/* Have CPUID level 0 only - unheard of */
@@ -893,18 +925,6 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_SMP
c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
#endif
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
-{
- int i;
- u32 xlvl;
-
- early_identify_cpu(c);
-
/* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000);
c->extended_cpuid_level = xlvl;
@@ -925,6 +945,30 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
c->x86_capability[2] = cpuid_edx(0x80860001);
}
+ c->extended_cpuid_level = cpuid_eax(0x80000000);
+ if (c->extended_cpuid_level >= 0x80000007)
+ c->x86_power = cpuid_edx(0x80000007);
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ early_init_amd(c);
+ break;
+ case X86_VENDOR_INTEL:
+ early_init_intel(c);
+ break;
+ }
+
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ early_identify_cpu(c);
+
init_scattered_cpuid_features(c);
c->apicid = phys_pkg_id(0);
@@ -954,8 +998,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
break;
}
- select_idle_routine(c);
- detect_ht(c);
+ detect_ht(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
@@ -965,32 +1008,56 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
*/
if (c != &boot_cpu_data) {
/* AND the already accumulated flags with these */
- for (i = 0 ; i < NCAPINTS ; i++)
+ for (i = 0; i < NCAPINTS; i++)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
+ /* Clear all flags overriden by options */
+ for (i = 0; i < NCAPINTS; i++)
+ c->x86_capability[i] ^= cleared_cpu_caps[i];
+
#ifdef CONFIG_X86_MCE
mcheck_init(c);
#endif
+ select_idle_routine(c);
+
if (c != &boot_cpu_data)
mtrr_ap_init();
#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
+
+}
+
+static __init int setup_noclflush(char *arg)
+{
+ setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+ return 1;
}
-
+__setup("noclflush", setup_noclflush);
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
{
if (c->x86_model_id[0])
- printk("%s", c->x86_model_id);
+ printk(KERN_INFO "%s", c->x86_model_id);
- if (c->x86_mask || c->cpuid_level >= 0)
- printk(" stepping %02x\n", c->x86_mask);
+ if (c->x86_mask || c->cpuid_level >= 0)
+ printk(KERN_CONT " stepping %02x\n", c->x86_mask);
else
- printk("\n");
+ printk(KERN_CONT "\n");
}
+static __init int setup_disablecpuid(char *arg)
+{
+ int bit;
+ if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+ setup_clear_cpu_cap(bit);
+ else
+ return 0;
+ return 1;
+}
+__setup("clearcpuid=", setup_disablecpuid);
+
/*
* Get CPU information for use by the procfs.
*/
@@ -998,9 +1065,9 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
static int show_cpuinfo(struct seq_file *m, void *v)
{
struct cpuinfo_x86 *c = v;
- int cpu = 0;
+ int cpu = 0, i;
- /*
+ /*
* These flag bits must match the definitions in <asm/cpufeature.h>.
* NULL means this bit is undefined or reserved; either way it doesn't
* have meaning as far as Linux is concerned. Note that it's important
@@ -1010,10 +1077,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
*/
static const char *const x86_cap_flags[] = {
/* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
/* AMD-defined */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1080,34 +1147,35 @@ static int show_cpuinfo(struct seq_file *m, void *v)
cpu = c->cpu_index;
#endif
- seq_printf(m,"processor\t: %u\n"
- "vendor_id\t: %s\n"
- "cpu family\t: %d\n"
- "model\t\t: %d\n"
- "model name\t: %s\n",
- (unsigned)cpu,
- c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
- c->x86,
- (int)c->x86_model,
- c->x86_model_id[0] ? c->x86_model_id : "unknown");
-
+ seq_printf(m, "processor\t: %u\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+ "model\t\t: %d\n"
+ "model name\t: %s\n",
+ (unsigned)cpu,
+ c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+ c->x86,
+ (int)c->x86_model,
+ c->x86_model_id[0] ? c->x86_model_id : "unknown");
+
if (c->x86_mask || c->cpuid_level >= 0)
seq_printf(m, "stepping\t: %d\n", c->x86_mask);
else
seq_printf(m, "stepping\t: unknown\n");
-
- if (cpu_has(c,X86_FEATURE_TSC)) {
+
+ if (cpu_has(c, X86_FEATURE_TSC)) {
unsigned int freq = cpufreq_quick_get((unsigned)cpu);
+
if (!freq)
freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- freq / 1000, (freq % 1000));
+ freq / 1000, (freq % 1000));
}
/* Cache size */
- if (c->x86_cache_size >= 0)
+ if (c->x86_cache_size >= 0)
seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-
+
#ifdef CONFIG_SMP
if (smp_num_siblings * c->x86_max_cores > 1) {
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
@@ -1116,48 +1184,43 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
}
-#endif
+#endif
seq_printf(m,
- "fpu\t\t: yes\n"
- "fpu_exception\t: yes\n"
- "cpuid level\t: %d\n"
- "wp\t\t: yes\n"
- "flags\t\t:",
+ "fpu\t\t: yes\n"
+ "fpu_exception\t: yes\n"
+ "cpuid level\t: %d\n"
+ "wp\t\t: yes\n"
+ "flags\t\t:",
c->cpuid_level);
- {
- int i;
- for ( i = 0 ; i < 32*NCAPINTS ; i++ )
- if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
- seq_printf(m, " %s", x86_cap_flags[i]);
- }
-
+ for (i = 0; i < 32*NCAPINTS; i++)
+ if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
+ seq_printf(m, " %s", x86_cap_flags[i]);
+
seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
c->loops_per_jiffy/(500000/HZ),
(c->loops_per_jiffy/(5000/HZ)) % 100);
- if (c->x86_tlbsize > 0)
+ if (c->x86_tlbsize > 0)
seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
- seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
+ seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
c->x86_phys_bits, c->x86_virt_bits);
seq_printf(m, "power management:");
- {
- unsigned i;
- for (i = 0; i < 32; i++)
- if (c->x86_power & (1 << i)) {
- if (i < ARRAY_SIZE(x86_power_flags) &&
- x86_power_flags[i])
- seq_printf(m, "%s%s",
- x86_power_flags[i][0]?" ":"",
- x86_power_flags[i]);
- else
- seq_printf(m, " [%d]", i);
- }
+ for (i = 0; i < 32; i++) {
+ if (c->x86_power & (1 << i)) {
+ if (i < ARRAY_SIZE(x86_power_flags) &&
+ x86_power_flags[i])
+ seq_printf(m, "%s%s",
+ x86_power_flags[i][0]?" ":"",
+ x86_power_flags[i]);
+ else
+ seq_printf(m, " [%d]", i);
+ }
}
seq_printf(m, "\n\n");
@@ -1184,8 +1247,8 @@ static void c_stop(struct seq_file *m, void *v)
{
}
-struct seq_operations cpuinfo_op = {
- .start =c_start,
+const struct seq_operations cpuinfo_op = {
+ .start = c_start,
.next = c_next,
.stop = c_stop,
.show = show_cpuinfo,
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 20f29e4c1d3..caee1f002fe 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -23,6 +23,7 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/vdso.h>
#include "sigframe_32.h"
#define DEBUG_SIG 0
@@ -81,14 +82,14 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
}
asmlinkage int
-sys_sigaltstack(unsigned long ebx)
+sys_sigaltstack(unsigned long bx)
{
/* This is needed to make gcc realize it doesn't own the "struct pt_regs" */
- struct pt_regs *regs = (struct pt_regs *)&ebx;
- const stack_t __user *uss = (const stack_t __user *)ebx;
- stack_t __user *uoss = (stack_t __user *)regs->ecx;
+ struct pt_regs *regs = (struct pt_regs *)&bx;
+ const stack_t __user *uss = (const stack_t __user *)bx;
+ stack_t __user *uoss = (stack_t __user *)regs->cx;
- return do_sigaltstack(uss, uoss, regs->esp);
+ return do_sigaltstack(uss, uoss, regs->sp);
}
@@ -109,12 +110,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
#define COPY_SEG(seg) \
{ unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
- regs->x##seg = tmp; }
+ regs->seg = tmp; }
#define COPY_SEG_STRICT(seg) \
{ unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
- regs->x##seg = tmp|3; }
+ regs->seg = tmp|3; }
#define GET_SEG(seg) \
{ unsigned short tmp; \
@@ -130,22 +131,22 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
COPY_SEG(fs);
COPY_SEG(es);
COPY_SEG(ds);
- COPY(edi);
- COPY(esi);
- COPY(ebp);
- COPY(esp);
- COPY(ebx);
- COPY(edx);
- COPY(ecx);
- COPY(eip);
+ COPY(di);
+ COPY(si);
+ COPY(bp);
+ COPY(sp);
+ COPY(bx);
+ COPY(dx);
+ COPY(cx);
+ COPY(ip);
COPY_SEG_STRICT(cs);
COPY_SEG_STRICT(ss);
{
unsigned int tmpflags;
- err |= __get_user(tmpflags, &sc->eflags);
- regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
- regs->orig_eax = -1; /* disable syscall checks */
+ err |= __get_user(tmpflags, &sc->flags);
+ regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+ regs->orig_ax = -1; /* disable syscall checks */
}
{
@@ -164,7 +165,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
}
}
- err |= __get_user(*peax, &sc->eax);
+ err |= __get_user(*peax, &sc->ax);
return err;
badframe:
@@ -174,9 +175,9 @@ badframe:
asmlinkage int sys_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *) &__unused;
- struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8);
+ struct sigframe __user *frame = (struct sigframe __user *)(regs->sp - 8);
sigset_t set;
- int eax;
+ int ax;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
@@ -192,17 +193,20 @@ asmlinkage int sys_sigreturn(unsigned long __unused)
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
- if (restore_sigcontext(regs, &frame->sc, &eax))
+ if (restore_sigcontext(regs, &frame->sc, &ax))
goto badframe;
- return eax;
+ return ax;
badframe:
- if (show_unhandled_signals && printk_ratelimit())
- printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
- " esp:%lx oeax:%lx\n",
+ if (show_unhandled_signals && printk_ratelimit()) {
+ printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx"
+ " sp:%lx oeax:%lx",
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
- current->comm, task_pid_nr(current), frame, regs->eip,
- regs->esp, regs->orig_eax);
+ current->comm, task_pid_nr(current), frame, regs->ip,
+ regs->sp, regs->orig_ax);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
force_sig(SIGSEGV, current);
return 0;
@@ -211,9 +215,9 @@ badframe:
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *) &__unused;
- struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4);
+ struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp - 4);
sigset_t set;
- int eax;
+ int ax;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
@@ -226,13 +230,13 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
- if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT)
+ if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
goto badframe;
- return eax;
+ return ax;
badframe:
force_sig(SIGSEGV, current);
@@ -249,27 +253,27 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
{
int tmp, err = 0;
- err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs);
+ err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
savesegment(gs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
- err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
- err |= __put_user(regs->edi, &sc->edi);
- err |= __put_user(regs->esi, &sc->esi);
- err |= __put_user(regs->ebp, &sc->ebp);
- err |= __put_user(regs->esp, &sc->esp);
- err |= __put_user(regs->ebx, &sc->ebx);
- err |= __put_user(regs->edx, &sc->edx);
- err |= __put_user(regs->ecx, &sc->ecx);
- err |= __put_user(regs->eax, &sc->eax);
+ err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+ err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+ err |= __put_user(regs->di, &sc->di);
+ err |= __put_user(regs->si, &sc->si);
+ err |= __put_user(regs->bp, &sc->bp);
+ err |= __put_user(regs->sp, &sc->sp);
+ err |= __put_user(regs->bx, &sc->bx);
+ err |= __put_user(regs->dx, &sc->dx);
+ err |= __put_user(regs->cx, &sc->cx);
+ err |= __put_user(regs->ax, &sc->ax);
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
- err |= __put_user(regs->eip, &sc->eip);
- err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
- err |= __put_user(regs->eflags, &sc->eflags);
- err |= __put_user(regs->esp, &sc->esp_at_signal);
- err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
+ err |= __put_user(regs->ip, &sc->ip);
+ err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+ err |= __put_user(regs->flags, &sc->flags);
+ err |= __put_user(regs->sp, &sc->sp_at_signal);
+ err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
tmp = save_i387(fpstate);
if (tmp < 0)
@@ -290,29 +294,36 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
static inline void __user *
get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
{
- unsigned long esp;
+ unsigned long sp;
/* Default to using normal stack */
- esp = regs->esp;
+ sp = regs->sp;
+
+ /*
+ * If we are on the alternate signal stack and would overflow it, don't.
+ * Return an always-bogus address instead so we will die with SIGSEGV.
+ */
+ if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+ return (void __user *) -1L;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(esp) == 0)
- esp = current->sas_ss_sp + current->sas_ss_size;
+ if (sas_ss_flags(sp) == 0)
+ sp = current->sas_ss_sp + current->sas_ss_size;
}
/* This is the legacy signal stack switching. */
- else if ((regs->xss & 0xffff) != __USER_DS &&
+ else if ((regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer) {
- esp = (unsigned long) ka->sa.sa_restorer;
+ sp = (unsigned long) ka->sa.sa_restorer;
}
- esp -= frame_size;
+ sp -= frame_size;
/* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0. */
- esp = ((esp + 4) & -16ul) - 4;
- return (void __user *) esp;
+ sp = ((sp + 4) & -16ul) - 4;
+ return (void __user *) sp;
}
/* These symbols are defined with the addresses in the vsyscall page.
@@ -355,9 +366,9 @@ static int setup_frame(int sig, struct k_sigaction *ka,
}
if (current->binfmt->hasvdso)
- restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
+ restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
else
- restorer = (void *)&frame->retcode;
+ restorer = &frame->retcode;
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
@@ -379,16 +390,16 @@ static int setup_frame(int sig, struct k_sigaction *ka,
goto give_sigsegv;
/* Set up registers for signal handler */
- regs->esp = (unsigned long) frame;
- regs->eip = (unsigned long) ka->sa.sa_handler;
- regs->eax = (unsigned long) sig;
- regs->edx = (unsigned long) 0;
- regs->ecx = (unsigned long) 0;
+ regs->sp = (unsigned long) frame;
+ regs->ip = (unsigned long) ka->sa.sa_handler;
+ regs->ax = (unsigned long) sig;
+ regs->dx = (unsigned long) 0;
+ regs->cx = (unsigned long) 0;
- regs->xds = __USER_DS;
- regs->xes = __USER_DS;
- regs->xss = __USER_DS;
- regs->xcs = __USER_CS;
+ regs->ds = __USER_DS;
+ regs->es = __USER_DS;
+ regs->ss = __USER_DS;
+ regs->cs = __USER_CS;
/*
* Clear TF when entering the signal handler, but
@@ -396,13 +407,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
* The tracer may want to single-step inside the
* handler too.
*/
- regs->eflags &= ~TF_MASK;
+ regs->flags &= ~TF_MASK;
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
#if DEBUG_SIG
printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
- current->comm, current->pid, frame, regs->eip, frame->pretcode);
+ current->comm, current->pid, frame, regs->ip, frame->pretcode);
#endif
return 0;
@@ -442,7 +453,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->esp),
+ err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
@@ -452,13 +463,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
/* Set up to return from userspace. */
- restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn);
+ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
err |= __put_user(restorer, &frame->pretcode);
/*
- * This is movl $,%eax ; int $0x80
+ * This is movl $,%ax ; int $0x80
*
* WE DO NOT USE IT ANY MORE! It's only left here for historical
* reasons and because gdb uses it as a signature to notice
@@ -472,16 +483,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
/* Set up registers for signal handler */
- regs->esp = (unsigned long) frame;
- regs->eip = (unsigned long) ka->sa.sa_handler;
- regs->eax = (unsigned long) usig;
- regs->edx = (unsigned long) &frame->info;
- regs->ecx = (unsigned long) &frame->uc;
+ regs->sp = (unsigned long) frame;
+ regs->ip = (unsigned long) ka->sa.sa_handler;
+ regs->ax = (unsigned long) usig;
+ regs->dx = (unsigned long) &frame->info;
+ regs->cx = (unsigned long) &frame->uc;
- regs->xds = __USER_DS;
- regs->xes = __USER_DS;
- regs->xss = __USER_DS;
- regs->xcs = __USER_CS;
+ regs->ds = __USER_DS;
+ regs->es = __USER_DS;
+ regs->ss = __USER_DS;
+ regs->cs = __USER_CS;
/*
* Clear TF when entering the signal handler, but
@@ -489,13 +500,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* The tracer may want to single-step inside the
* handler too.
*/
- regs->eflags &= ~TF_MASK;
+ regs->flags &= ~TF_MASK;
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
#if DEBUG_SIG
printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
- current->comm, current->pid, frame, regs->eip, frame->pretcode);
+ current->comm, current->pid, frame, regs->ip, frame->pretcode);
#endif
return 0;
@@ -516,35 +527,33 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
int ret;
/* Are we from a system call? */
- if (regs->orig_eax >= 0) {
+ if (regs->orig_ax >= 0) {
/* If so, check system call restarting.. */
- switch (regs->eax) {
+ switch (regs->ax) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
- regs->eax = -EINTR;
+ regs->ax = -EINTR;
break;
case -ERESTARTSYS:
if (!(ka->sa.sa_flags & SA_RESTART)) {
- regs->eax = -EINTR;
+ regs->ax = -EINTR;
break;
}
/* fallthrough */
case -ERESTARTNOINTR:
- regs->eax = regs->orig_eax;
- regs->eip -= 2;
+ regs->ax = regs->orig_ax;
+ regs->ip -= 2;
}
}
/*
- * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
- * that register information in the sigcontext is correct.
+ * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
+ * flag so that register information in the sigcontext is correct.
*/
- if (unlikely(regs->eflags & TF_MASK)
- && likely(current->ptrace & PT_DTRACE)) {
- current->ptrace &= ~PT_DTRACE;
- regs->eflags &= ~TF_MASK;
- }
+ if (unlikely(regs->flags & X86_EFLAGS_TF) &&
+ likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
+ regs->flags &= ~X86_EFLAGS_TF;
/* Set up the stack frame */
if (ka->sa.sa_flags & SA_SIGINFO)
@@ -569,7 +578,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
* want to handle. Thus you cannot kill init even with a SIGKILL even by
* mistake.
*/
-static void fastcall do_signal(struct pt_regs *regs)
+static void do_signal(struct pt_regs *regs)
{
siginfo_t info;
int signr;
@@ -599,8 +608,8 @@ static void fastcall do_signal(struct pt_regs *regs)
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
- if (unlikely(current->thread.debugreg[7]))
- set_debugreg(current->thread.debugreg[7], 7);
+ if (unlikely(current->thread.debugreg7))
+ set_debugreg(current->thread.debugreg7, 7);
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
@@ -616,19 +625,19 @@ static void fastcall do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if (regs->orig_eax >= 0) {
+ if (regs->orig_ax >= 0) {
/* Restart the system call - no handlers present */
- switch (regs->eax) {
+ switch (regs->ax) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
- regs->eax = regs->orig_eax;
- regs->eip -= 2;
+ regs->ax = regs->orig_ax;
+ regs->ip -= 2;
break;
case -ERESTART_RESTARTBLOCK:
- regs->eax = __NR_restart_syscall;
- regs->eip -= 2;
+ regs->ax = __NR_restart_syscall;
+ regs->ip -= 2;
break;
}
}
@@ -651,7 +660,7 @@ void do_notify_resume(struct pt_regs *regs, void *_unused,
{
/* Pending single-step? */
if (thread_info_flags & _TIF_SINGLESTEP) {
- regs->eflags |= TF_MASK;
+ regs->flags |= TF_MASK;
clear_thread_flag(TIF_SINGLESTEP);
}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 38d806467c0..7347bb14e30 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -39,7 +39,7 @@ asmlinkage long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
{
- return do_sigaltstack(uss, uoss, regs->rsp);
+ return do_sigaltstack(uss, uoss, regs->sp);
}
@@ -64,8 +64,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
#define COPY(x) err |= __get_user(regs->x, &sc->x)
- COPY(rdi); COPY(rsi); COPY(rbp); COPY(rsp); COPY(rbx);
- COPY(rdx); COPY(rcx); COPY(rip);
+ COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+ COPY(dx); COPY(cx); COPY(ip);
COPY(r8);
COPY(r9);
COPY(r10);
@@ -86,9 +86,9 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
{
unsigned int tmpflags;
- err |= __get_user(tmpflags, &sc->eflags);
- regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
- regs->orig_rax = -1; /* disable syscall checks */
+ err |= __get_user(tmpflags, &sc->flags);
+ regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
+ regs->orig_ax = -1; /* disable syscall checks */
}
{
@@ -108,7 +108,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
}
}
- err |= __get_user(*prax, &sc->rax);
+ err |= __get_user(*prax, &sc->ax);
return err;
badframe:
@@ -119,9 +119,9 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
sigset_t set;
- unsigned long eax;
+ unsigned long ax;
- frame = (struct rt_sigframe __user *)(regs->rsp - 8);
+ frame = (struct rt_sigframe __user *)(regs->sp - 8);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) {
goto badframe;
}
@@ -135,17 +135,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
#ifdef DEBUG_SIG
- printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax);
+ printk("%d sigreturn ip:%lx sp:%lx frame:%p ax:%lx\n",current->pid,regs->ip,regs->sp,frame,ax);
#endif
- if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
+ if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
goto badframe;
- return eax;
+ return ax;
badframe:
signal_fault(regs,frame,"sigreturn");
@@ -165,14 +165,14 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
err |= __put_user(0, &sc->gs);
err |= __put_user(0, &sc->fs);
- err |= __put_user(regs->rdi, &sc->rdi);
- err |= __put_user(regs->rsi, &sc->rsi);
- err |= __put_user(regs->rbp, &sc->rbp);
- err |= __put_user(regs->rsp, &sc->rsp);
- err |= __put_user(regs->rbx, &sc->rbx);
- err |= __put_user(regs->rdx, &sc->rdx);
- err |= __put_user(regs->rcx, &sc->rcx);
- err |= __put_user(regs->rax, &sc->rax);
+ err |= __put_user(regs->di, &sc->di);
+ err |= __put_user(regs->si, &sc->si);
+ err |= __put_user(regs->bp, &sc->bp);
+ err |= __put_user(regs->sp, &sc->sp);
+ err |= __put_user(regs->bx, &sc->bx);
+ err |= __put_user(regs->dx, &sc->dx);
+ err |= __put_user(regs->cx, &sc->cx);
+ err |= __put_user(regs->ax, &sc->ax);
err |= __put_user(regs->r8, &sc->r8);
err |= __put_user(regs->r9, &sc->r9);
err |= __put_user(regs->r10, &sc->r10);
@@ -183,8 +183,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
err |= __put_user(regs->r15, &sc->r15);
err |= __put_user(me->thread.trap_no, &sc->trapno);
err |= __put_user(me->thread.error_code, &sc->err);
- err |= __put_user(regs->rip, &sc->rip);
- err |= __put_user(regs->eflags, &sc->eflags);
+ err |= __put_user(regs->ip, &sc->ip);
+ err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(mask, &sc->oldmask);
err |= __put_user(me->thread.cr2, &sc->cr2);
@@ -198,18 +198,18 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
static void __user *
get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
{
- unsigned long rsp;
+ unsigned long sp;
/* Default to using normal stack - redzone*/
- rsp = regs->rsp - 128;
+ sp = regs->sp - 128;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(rsp) == 0)
- rsp = current->sas_ss_sp + current->sas_ss_size;
+ if (sas_ss_flags(sp) == 0)
+ sp = current->sas_ss_sp + current->sas_ss_size;
}
- return (void __user *)round_down(rsp - size, 16);
+ return (void __user *)round_down(sp - size, 16);
}
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -246,7 +246,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->rsp),
+ err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
@@ -271,21 +271,21 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
#ifdef DEBUG_SIG
- printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
+ printk("%d old ip %lx old sp %lx old ax %lx\n", current->pid,regs->ip,regs->sp,regs->ax);
#endif
/* Set up registers for signal handler */
- regs->rdi = sig;
+ regs->di = sig;
/* In case the signal handler was declared without prototypes */
- regs->rax = 0;
+ regs->ax = 0;
/* This also works for non SA_SIGINFO handlers because they expect the
next argument after the signal number on the stack. */
- regs->rsi = (unsigned long)&frame->info;
- regs->rdx = (unsigned long)&frame->uc;
- regs->rip = (unsigned long) ka->sa.sa_handler;
+ regs->si = (unsigned long)&frame->info;
+ regs->dx = (unsigned long)&frame->uc;
+ regs->ip = (unsigned long) ka->sa.sa_handler;
- regs->rsp = (unsigned long)frame;
+ regs->sp = (unsigned long)frame;
/* Set up the CS register to run signal handlers in 64-bit mode,
even if the handler happens to be interrupting 32-bit code. */
@@ -295,12 +295,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
see include/asm-x86_64/uaccess.h for details. */
set_fs(USER_DS);
- regs->eflags &= ~TF_MASK;
+ regs->flags &= ~X86_EFLAGS_TF;
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
#ifdef DEBUG_SIG
printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
+ current->comm, current->pid, frame, regs->ip, frame->pretcode);
#endif
return 0;
@@ -321,44 +321,40 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
int ret;
#ifdef DEBUG_SIG
- printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n",
+ printk("handle_signal pid:%d sig:%lu ip:%lx sp:%lx regs=%p\n",
current->pid, sig,
- regs->rip, regs->rsp, regs);
+ regs->ip, regs->sp, regs);
#endif
/* Are we from a system call? */
- if ((long)regs->orig_rax >= 0) {
+ if ((long)regs->orig_ax >= 0) {
/* If so, check system call restarting.. */
- switch (regs->rax) {
+ switch (regs->ax) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
- regs->rax = -EINTR;
+ regs->ax = -EINTR;
break;
case -ERESTARTSYS:
if (!(ka->sa.sa_flags & SA_RESTART)) {
- regs->rax = -EINTR;
+ regs->ax = -EINTR;
break;
}
/* fallthrough */
case -ERESTARTNOINTR:
- regs->rax = regs->orig_rax;
- regs->rip -= 2;
+ regs->ax = regs->orig_ax;
+ regs->ip -= 2;
break;
}
}
/*
- * If TF is set due to a debugger (PT_DTRACE), clear the TF
- * flag so that register information in the sigcontext is
- * correct.
+ * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
+ * flag so that register information in the sigcontext is correct.
*/
- if (unlikely(regs->eflags & TF_MASK)) {
- if (likely(current->ptrace & PT_DTRACE)) {
- current->ptrace &= ~PT_DTRACE;
- regs->eflags &= ~TF_MASK;
- }
- }
+ if (unlikely(regs->flags & X86_EFLAGS_TF) &&
+ likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
+ regs->flags &= ~X86_EFLAGS_TF;
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32)) {
@@ -430,21 +426,21 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if ((long)regs->orig_rax >= 0) {
+ if ((long)regs->orig_ax >= 0) {
/* Restart the system call - no handlers present */
- long res = regs->rax;
+ long res = regs->ax;
switch (res) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
- regs->rax = regs->orig_rax;
- regs->rip -= 2;
+ regs->ax = regs->orig_ax;
+ regs->ip -= 2;
break;
case -ERESTART_RESTARTBLOCK:
- regs->rax = test_thread_flag(TIF_IA32) ?
+ regs->ax = test_thread_flag(TIF_IA32) ?
__NR_ia32_restart_syscall :
__NR_restart_syscall;
- regs->rip -= 2;
+ regs->ip -= 2;
break;
}
}
@@ -461,13 +457,13 @@ void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
#ifdef DEBUG_SIG
- printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n",
- thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current));
+ printk("do_notify_resume flags:%x ip:%lx sp:%lx caller:%p pending:%x\n",
+ thread_info_flags, regs->ip, regs->sp, __builtin_return_address(0),signal_pending(current));
#endif
/* Pending single-step? */
if (thread_info_flags & _TIF_SINGLESTEP) {
- regs->eflags |= TF_MASK;
+ regs->flags |= X86_EFLAGS_TF;
clear_thread_flag(TIF_SINGLESTEP);
}
@@ -488,9 +484,12 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{
struct task_struct *me = current;
- if (show_unhandled_signals && printk_ratelimit())
- printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n",
- me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax);
+ if (show_unhandled_signals && printk_ratelimit()) {
+ printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+ me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
force_sig(SIGSEGV, me);
}
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index fcaa026eb80..dc0cde9d16f 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -159,7 +159,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
apic_write_around(APIC_ICR, cfg);
}
-void fastcall send_IPI_self(int vector)
+void send_IPI_self(int vector)
{
__send_IPI_shortcut(APIC_DEST_SELF, vector);
}
@@ -223,7 +223,7 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
*/
local_irq_save(flags);
- for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
+ for_each_possible_cpu(query_cpu) {
if (cpu_isset(query_cpu, mask)) {
__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
vector);
@@ -256,13 +256,14 @@ static DEFINE_SPINLOCK(tlbstate_lock);
* We need to reload %cr3 since the page tables may be going
* away from under us..
*/
-void leave_mm(unsigned long cpu)
+void leave_mm(int cpu)
{
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
BUG();
cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
+EXPORT_SYMBOL_GPL(leave_mm);
/*
*
@@ -310,7 +311,7 @@ void leave_mm(unsigned long cpu)
* 2) Leave the mm if we are in the lazy tlb mode.
*/
-fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
+void smp_invalidate_interrupt(struct pt_regs *regs)
{
unsigned long cpu;
@@ -638,13 +639,13 @@ static void native_smp_send_stop(void)
* all the work is done automatically when
* we return from the interrupt.
*/
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+void smp_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
__get_cpu_var(irq_stat).irq_resched_count++;
}
-fastcall void smp_call_function_interrupt(struct pt_regs *regs)
+void smp_call_function_interrupt(struct pt_regs *regs)
{
void (*func) (void *info) = call_data->func;
void *info = call_data->info;
@@ -675,7 +676,7 @@ static int convert_apicid_to_cpu(int apic_id)
{
int i;
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_possible_cpu(i) {
if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
return i;
}
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 03fa6ed559c..2fd74b06db6 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -29,7 +29,7 @@
#include <asm/idle.h>
/*
- * Smarter SMP flushing macros.
+ * Smarter SMP flushing macros.
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
@@ -37,15 +37,15 @@
*
* Optimizations Manfred Spraul <manfred@colorfullife.com>
*
- * More scalable flush, from Andi Kleen
+ * More scalable flush, from Andi Kleen
*
- * To avoid global state use 8 different call vectors.
- * Each CPU uses a specific vector to trigger flushes on other
- * CPUs. Depending on the received vector the target CPUs look into
+ * To avoid global state use 8 different call vectors.
+ * Each CPU uses a specific vector to trigger flushes on other
+ * CPUs. Depending on the received vector the target CPUs look into
* the right per cpu variable for the flush data.
*
- * With more than 8 CPUs they are hashed to the 8 available
- * vectors. The limited global vector space forces us to this right now.
+ * With more than 8 CPUs they are hashed to the 8 available
+ * vectors. The limited global vector space forces us to this right now.
* In future when interrupts are split into per CPU domains this could be
* fixed, at the cost of triggering multiple IPIs in some cases.
*/
@@ -55,7 +55,6 @@ union smp_flush_state {
cpumask_t flush_cpumask;
struct mm_struct *flush_mm;
unsigned long flush_va;
-#define FLUSH_ALL -1ULL
spinlock_t tlbstate_lock;
};
char pad[SMP_CACHE_BYTES];
@@ -67,16 +66,17 @@ union smp_flush_state {
static DEFINE_PER_CPU(union smp_flush_state, flush_state);
/*
- * We cannot call mmdrop() because we are in interrupt context,
+ * We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*/
-static inline void leave_mm(int cpu)
+void leave_mm(int cpu)
{
if (read_pda(mmu_state) == TLBSTATE_OK)
BUG();
cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
+EXPORT_SYMBOL_GPL(leave_mm);
/*
*
@@ -85,25 +85,25 @@ static inline void leave_mm(int cpu)
* 1) switch_mm() either 1a) or 1b)
* 1a) thread switch to a different mm
* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superfluous
- * tlb flush.
+ * Stop ipi delivery for the old mm. This is not synchronized with
+ * the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * for the wrong mm, and in the worst case we perform a superfluous
+ * tlb flush.
* 1a2) set cpu mmu_state to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
* was in lazy tlb mode.
* 1a3) update cpu active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
+ * Now cpu0 accepts tlb flushes for the new mm.
* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
+ * Now the other cpus will send tlb flush ipis.
* 1a4) change cr3.
* 1b) thread switch without mm change
* cpu active_mm is correct, cpu0 already handles
* flush ipis.
* 1b1) set cpu mmu_state to TLBSTATE_OK
* 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
+ * Atomically set the bit [other cpus will start sending flush ipis],
+ * and test the bit.
* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
* 2) switch %%esp, ie current
*
@@ -137,12 +137,12 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
* orig_rax contains the negated interrupt vector.
* Use that to determine where the sender put the data.
*/
- sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START;
+ sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
f = &per_cpu(flush_state, sender);
if (!cpu_isset(cpu, f->flush_cpumask))
goto out;
- /*
+ /*
* This was a BUG() but until someone can quote me the
* line from the intel manual that guarantees an IPI to
* multiple CPUs is retried _only_ on the erroring CPUs
@@ -150,10 +150,10 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
*
* BUG();
*/
-
+
if (f->flush_mm == read_pda(active_mm)) {
if (read_pda(mmu_state) == TLBSTATE_OK) {
- if (f->flush_va == FLUSH_ALL)
+ if (f->flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
__flush_tlb_one(f->flush_va);
@@ -166,19 +166,22 @@ out:
add_pda(irq_tlb_count, 1);
}
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
- unsigned long va)
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+ unsigned long va)
{
int sender;
union smp_flush_state *f;
+ cpumask_t cpumask = *cpumaskp;
/* Caller has disabled preemption */
sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
f = &per_cpu(flush_state, sender);
- /* Could avoid this lock when
- num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
- probably not worth checking this for a cache-hot lock. */
+ /*
+ * Could avoid this lock when
+ * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
+ * probably not worth checking this for a cache-hot lock.
+ */
spin_lock(&f->tlbstate_lock);
f->flush_mm = mm;
@@ -202,14 +205,14 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
int __cpuinit init_smp_flush(void)
{
int i;
+
for_each_cpu_mask(i, cpu_possible_map) {
spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
}
return 0;
}
-
core_initcall(init_smp_flush);
-
+
void flush_tlb_current_task(void)
{
struct mm_struct *mm = current->mm;
@@ -221,10 +224,9 @@ void flush_tlb_current_task(void)
local_flush_tlb();
if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
-EXPORT_SYMBOL(flush_tlb_current_task);
void flush_tlb_mm (struct mm_struct * mm)
{
@@ -241,11 +243,10 @@ void flush_tlb_mm (struct mm_struct * mm)
leave_mm(smp_processor_id());
}
if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
-EXPORT_SYMBOL(flush_tlb_mm);
void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
{
@@ -259,8 +260,8 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
if (current->active_mm == mm) {
if(current->mm)
__flush_tlb_one(va);
- else
- leave_mm(smp_processor_id());
+ else
+ leave_mm(smp_processor_id());
}
if (!cpus_empty(cpu_mask))
@@ -268,7 +269,6 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
preempt_enable();
}
-EXPORT_SYMBOL(flush_tlb_page);
static void do_flush_tlb_all(void* info)
{
@@ -325,11 +325,9 @@ void unlock_ipi_call_lock(void)
* this function sends a 'generic call function' IPI to all other CPU
* of the system defined in the mask.
*/
-
-static int
-__smp_call_function_mask(cpumask_t mask,
- void (*func)(void *), void *info,
- int wait)
+static int __smp_call_function_mask(cpumask_t mask,
+ void (*func)(void *), void *info,
+ int wait)
{
struct call_data_struct data;
cpumask_t allbutself;
@@ -417,11 +415,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
*/
int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
+ int nonatomic, int wait)
{
/* prevent preemption and reschedule on another processor */
- int ret;
- int me = get_cpu();
+ int ret, me = get_cpu();
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -471,9 +468,9 @@ static void stop_this_cpu(void *dummy)
*/
cpu_clear(smp_processor_id(), cpu_online_map);
disable_local_APIC();
- for (;;)
+ for (;;)
halt();
-}
+}
void smp_send_stop(void)
{
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 4ea80cbe52e..5787a0c3e29 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(cpu_online_map);
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
-EXPORT_SYMBOL(cpu_callout_map);
cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
static cpumask_t smp_commenced_mask;
@@ -92,15 +91,10 @@ static cpumask_t smp_commenced_mask;
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
-/*
- * The following static array is used during kernel startup
- * and the x86_cpu_to_apicid_ptr contains the address of the
- * array during this time. Is it zeroed when the per_cpu
- * data area is removed.
- */
+/* which logical CPU number maps to which CPU (physical APIC ID) */
u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
{ [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_ptr;
+void *x86_cpu_to_apicid_early_ptr;
DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
@@ -113,7 +107,6 @@ u8 apicid_2_node[MAX_APICID];
extern const unsigned char trampoline_data [];
extern const unsigned char trampoline_end [];
static unsigned char *trampoline_base;
-static int trampoline_exec;
static void map_cpu_to_logical_apicid(void);
@@ -138,17 +131,13 @@ static unsigned long __cpuinit setup_trampoline(void)
*/
void __init smp_alloc_memory(void)
{
- trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+ trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
/*
* Has to be in very low memory so we can execute
* real-mode AP code.
*/
if (__pa(trampoline_base) >= 0x9F000)
BUG();
- /*
- * Make the SMP trampoline executable:
- */
- trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
}
/*
@@ -405,7 +394,7 @@ static void __cpuinit start_secondary(void *unused)
setup_secondary_clock();
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
- enable_NMI_through_LVT0(NULL);
+ enable_NMI_through_LVT0();
enable_8259A_irq(0);
}
/*
@@ -448,38 +437,38 @@ void __devinit initialize_secondary(void)
{
/*
* We don't actually need to load the full TSS,
- * basically just the stack pointer and the eip.
+ * basically just the stack pointer and the ip.
*/
asm volatile(
"movl %0,%%esp\n\t"
"jmp *%1"
:
- :"m" (current->thread.esp),"m" (current->thread.eip));
+ :"m" (current->thread.sp),"m" (current->thread.ip));
}
/* Static state in head.S used to set up a CPU */
extern struct {
- void * esp;
+ void * sp;
unsigned short ss;
} stack_start;
#ifdef CONFIG_NUMA
/* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
+cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-EXPORT_SYMBOL(node_2_cpu_mask);
+EXPORT_SYMBOL(node_to_cpumask_map);
/* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_2_node);
+int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_to_node_map);
/* set up a mapping between cpu and node. */
static inline void map_cpu_to_node(int cpu, int node)
{
printk("Mapping cpu %d to node %d\n", cpu, node);
- cpu_set(cpu, node_2_cpu_mask[node]);
- cpu_2_node[cpu] = node;
+ cpu_set(cpu, node_to_cpumask_map[node]);
+ cpu_to_node_map[cpu] = node;
}
/* undo a mapping between cpu and node. */
@@ -489,8 +478,8 @@ static inline void unmap_cpu_to_node(int cpu)
printk("Unmapping cpu %d from all nodes\n", cpu);
for (node = 0; node < MAX_NUMNODES; node ++)
- cpu_clear(cpu, node_2_cpu_mask[node]);
- cpu_2_node[cpu] = 0;
+ cpu_clear(cpu, node_to_cpumask_map[node]);
+ cpu_to_node_map[cpu] = 0;
}
#else /* !CONFIG_NUMA */
@@ -668,7 +657,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
* target processor state.
*/
startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
- (unsigned long) stack_start.esp);
+ (unsigned long) stack_start.sp);
/*
* Run STARTUP IPI loop.
@@ -754,7 +743,7 @@ static inline struct task_struct * __cpuinit alloc_idle_task(int cpu)
/* initialize thread_struct. we really want to avoid destroy
* idle tread
*/
- idle->thread.esp = (unsigned long)task_pt_regs(idle);
+ idle->thread.sp = (unsigned long)task_pt_regs(idle);
init_idle(idle, cpu);
return idle;
}
@@ -799,7 +788,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
per_cpu(current_task, cpu) = idle;
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
- idle->thread.eip = (unsigned long) start_secondary;
+ idle->thread.ip = (unsigned long) start_secondary;
/* start_eip had better be page-aligned! */
start_eip = setup_trampoline();
@@ -807,9 +796,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
alternatives_smp_switch(1);
/* So we see what's up */
- printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+ printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip);
/* Stack for startup_32 can be just as for start_secondary onwards */
- stack_start.esp = (void *) idle->thread.esp;
+ stack_start.sp = (void *) idle->thread.sp;
irq_ctx_init(cpu);
@@ -1091,7 +1080,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
* Allow the user to impress friends.
*/
Dprintk("Before bogomips.\n");
- for (cpu = 0; cpu < NR_CPUS; cpu++)
+ for_each_possible_cpu(cpu)
if (cpu_isset(cpu, cpu_callout_map))
bogosum += cpu_data(cpu).loops_per_jiffy;
printk(KERN_INFO
@@ -1122,7 +1111,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
* construct cpu_sibling_map, so that we can tell sibling CPUs
* efficiently.
*/
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ for_each_possible_cpu(cpu) {
cpus_clear(per_cpu(cpu_sibling_map, cpu));
cpus_clear(per_cpu(cpu_core_map, cpu));
}
@@ -1296,12 +1285,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest();
#endif
zap_low_mappings();
-#ifndef CONFIG_HOTPLUG_CPU
- /*
- * Disable executability of the SMP trampoline:
- */
- set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
-#endif
}
void __init smp_intr_init(void)
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index aaf4e129121..cc64b8085c2 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -65,7 +65,7 @@ int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
+DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
/* Bitmask of currently online CPUs */
cpumask_t cpu_online_map __read_mostly;
@@ -78,8 +78,6 @@ EXPORT_SYMBOL(cpu_online_map);
*/
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
-EXPORT_SYMBOL(cpu_callout_map);
-
cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
@@ -113,10 +111,20 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
* a new thread. Also avoids complicated thread destroy functionality
* for idle threads.
*/
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
+ * removed after init for !CONFIG_HOTPLUG_CPU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
+#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
+#define set_idle_for_cpu(x,p) (per_cpu(idle_thread_array, x) = (p))
+#else
struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
-
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p))
+#endif
+
/*
* Currently trivial. Write the real->protected mode
@@ -212,6 +220,7 @@ void __cpuinit smp_callin(void)
Dprintk("CALLIN, before setup_local_APIC().\n");
setup_local_APIC();
+ end_local_APIC_setup();
/*
* Get our bogomips.
@@ -338,7 +347,7 @@ void __cpuinit start_secondary(void)
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
- enable_NMI_through_LVT0(NULL);
+ enable_NMI_through_LVT0();
enable_8259A_irq(0);
}
@@ -370,7 +379,7 @@ void __cpuinit start_secondary(void)
unlock_ipi_call_lock();
- setup_secondary_APIC_clock();
+ setup_secondary_clock();
cpu_idle();
}
@@ -384,19 +393,20 @@ static void inquire_remote_apic(int apicid)
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
int timeout;
- unsigned int status;
+ u32 status;
printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
for (i = 0; i < ARRAY_SIZE(regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
+ printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
/*
* Wait for idle.
*/
status = safe_apic_wait_icr_idle();
if (status)
- printk("a previous APIC delivery may have failed\n");
+ printk(KERN_CONT
+ "a previous APIC delivery may have failed\n");
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -410,10 +420,10 @@ static void inquire_remote_apic(int apicid)
switch (status) {
case APIC_ICR_RR_VALID:
status = apic_read(APIC_RRR);
- printk("%08x\n", status);
+ printk(KERN_CONT "%08x\n", status);
break;
default:
- printk("failed\n");
+ printk(KERN_CONT "failed\n");
}
}
}
@@ -466,7 +476,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
*/
Dprintk("#startup loops: %d.\n", num_starts);
- maxlvt = get_maxlvt();
+ maxlvt = lapic_get_maxlvt();
for (j = 1; j <= num_starts; j++) {
Dprintk("Sending STARTUP #%d.\n",j);
@@ -577,7 +587,7 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
c_idle.idle = get_idle_for_cpu(cpu);
if (c_idle.idle) {
- c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *)
+ c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
(THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
init_idle(c_idle.idle, cpu);
goto do_rest;
@@ -613,8 +623,8 @@ do_rest:
start_rip = setup_trampoline();
- init_rsp = c_idle.idle->thread.rsp;
- per_cpu(init_tss,cpu).rsp0 = init_rsp;
+ init_rsp = c_idle.idle->thread.sp;
+ load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
initial_code = start_secondary;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
@@ -691,7 +701,7 @@ do_rest:
}
if (boot_error) {
cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
- clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+ clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
clear_node_cpumask(cpu); /* was set by numa_add_cpu */
cpu_clear(cpu, cpu_present_map);
cpu_clear(cpu, cpu_possible_map);
@@ -841,24 +851,16 @@ static int __init smp_sanity_check(unsigned max_cpus)
return 0;
}
-/*
- * Copy apicid's found by MP_processor_info from initial array to the per cpu
- * data area. The x86_cpu_to_apicid_init array is then expendable and the
- * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no
- * longer available.
- */
-void __init smp_set_apicids(void)
+static void __init smp_cpu_index_default(void)
{
- int cpu;
+ int i;
+ struct cpuinfo_x86 *c;
- for_each_cpu_mask(cpu, cpu_possible_map) {
- if (per_cpu_offset(cpu))
- per_cpu(x86_cpu_to_apicid, cpu) =
- x86_cpu_to_apicid_init[cpu];
+ for_each_cpu_mask(i, cpu_possible_map) {
+ c = &cpu_data(i);
+ /* mark all to hotplug */
+ c->cpu_index = NR_CPUS;
}
-
- /* indicate the static array will be going away soon */
- x86_cpu_to_apicid_ptr = NULL;
}
/*
@@ -868,9 +870,9 @@ void __init smp_set_apicids(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
nmi_watchdog_default();
+ smp_cpu_index_default();
current_cpu_data = boot_cpu_data;
current_thread_info()->cpu = 0; /* needed? */
- smp_set_apicids();
set_cpu_sibling_map(0);
if (smp_sanity_check(max_cpus) < 0) {
@@ -885,6 +887,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
*/
setup_local_APIC();
+ /*
+ * Enable IO APIC before setting up error vector
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ enable_IO_APIC();
+ end_local_APIC_setup();
+
if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
@@ -903,7 +912,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
* Set up local APIC timer on boot CPU.
*/
- setup_boot_APIC_clock();
+ setup_boot_clock();
}
/*
@@ -912,7 +921,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
void __init smp_prepare_boot_cpu(void)
{
int me = smp_processor_id();
- cpu_set(me, cpu_online_map);
+ /* already set me in cpu_online_map in boot_cpu_init() */
cpu_set(me, cpu_callout_map);
per_cpu(cpu_state, me) = CPU_ONLINE;
}
@@ -1016,7 +1025,7 @@ void remove_cpu_from_maps(void)
cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map);
- clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+ clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
clear_node_cpumask(cpu);
}
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c
index bbfe85a0f69..8bc38af29ae 100644
--- a/arch/x86/kernel/smpcommon_32.c
+++ b/arch/x86/kernel/smpcommon_32.c
@@ -14,10 +14,11 @@ __cpuinit void init_gdt(int cpu)
{
struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a,
- (u32 *)&gdt[GDT_ENTRY_PERCPU].b,
+ pack_descriptor(&gdt[GDT_ENTRY_PERCPU],
__per_cpu_offset[cpu], 0xFFFFF,
- 0x80 | DESCTYPE_S | 0x2, 0x8);
+ 0x2 | DESCTYPE_S, 0x8);
+
+ gdt[GDT_ENTRY_PERCPU].s = 1;
per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
per_cpu(cpu_number, cpu) = cpu;
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 2a8713ec0f9..2bf6903cb44 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -57,8 +57,6 @@ static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
static int num_memory_chunks; /* total number of memory chunks */
static u8 __initdata apicid_to_pxm[MAX_APICID];
-extern void * boot_ioremap(unsigned long, unsigned long);
-
/* Identify CPU proximity domains */
static void __init parse_cpu_affinity_structure(char *p)
{
@@ -299,7 +297,7 @@ int __init get_memcfg_from_srat(void)
}
rsdt = (struct acpi_table_rsdt *)
- boot_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
+ early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
if (!rsdt) {
printk(KERN_WARNING
@@ -339,11 +337,11 @@ int __init get_memcfg_from_srat(void)
for (i = 0; i < tables; i++) {
/* Map in header, then map in full table length. */
header = (struct acpi_table_header *)
- boot_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
+ early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
if (!header)
break;
header = (struct acpi_table_header *)
- boot_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
+ early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
if (!header)
break;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 55771fd7e54..02f0f61f5b1 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -22,9 +22,9 @@ static int save_stack_stack(void *data, char *name)
return -1;
}
-static void save_stack_address(void *data, unsigned long addr)
+static void save_stack_address(void *data, unsigned long addr, int reliable)
{
- struct stack_trace *trace = (struct stack_trace *)data;
+ struct stack_trace *trace = data;
if (trace->skip > 0) {
trace->skip--;
return;
@@ -33,7 +33,8 @@ static void save_stack_address(void *data, unsigned long addr)
trace->entries[trace->nr_entries++] = addr;
}
-static void save_stack_address_nosched(void *data, unsigned long addr)
+static void
+save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
struct stack_trace *trace = (struct stack_trace *)data;
if (in_sched_functions(addr))
@@ -65,15 +66,14 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
*/
void save_stack_trace(struct stack_trace *trace)
{
- dump_trace(current, NULL, NULL, &save_stack_ops, trace);
+ dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
-EXPORT_SYMBOL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
+ dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
new file mode 100644
index 00000000000..2ef1a5f8d67
--- /dev/null
+++ b/arch/x86/kernel/step.c
@@ -0,0 +1,203 @@
+/*
+ * x86 single-step support code, common to 32-bit and 64-bit.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+
+unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
+{
+ unsigned long addr, seg;
+
+ addr = regs->ip;
+ seg = regs->cs & 0xffff;
+ if (v8086_mode(regs)) {
+ addr = (addr & 0xffff) + (seg << 4);
+ return addr;
+ }
+
+ /*
+ * We'll assume that the code segments in the GDT
+ * are all zero-based. That is largely true: the
+ * TLS segments are used for data, and the PNPBIOS
+ * and APM bios ones we just ignore here.
+ */
+ if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+ u32 *desc;
+ unsigned long base;
+
+ seg &= ~7UL;
+
+ mutex_lock(&child->mm->context.lock);
+ if (unlikely((seg >> 3) >= child->mm->context.size))
+ addr = -1L; /* bogus selector, access would fault */
+ else {
+ desc = child->mm->context.ldt + seg;
+ base = ((desc[0] >> 16) |
+ ((desc[1] & 0xff) << 16) |
+ (desc[1] & 0xff000000));
+
+ /* 16-bit code segment? */
+ if (!((desc[1] >> 22) & 1))
+ addr &= 0xffff;
+ addr += base;
+ }
+ mutex_unlock(&child->mm->context.lock);
+ }
+
+ return addr;
+}
+
+static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
+{
+ int i, copied;
+ unsigned char opcode[15];
+ unsigned long addr = convert_ip_to_linear(child, regs);
+
+ copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
+ for (i = 0; i < copied; i++) {
+ switch (opcode[i]) {
+ /* popf and iret */
+ case 0x9d: case 0xcf:
+ return 1;
+
+ /* CHECKME: 64 65 */
+
+ /* opcode and address size prefixes */
+ case 0x66: case 0x67:
+ continue;
+ /* irrelevant prefixes (segment overrides and repeats) */
+ case 0x26: case 0x2e:
+ case 0x36: case 0x3e:
+ case 0x64: case 0x65:
+ case 0xf0: case 0xf2: case 0xf3:
+ continue;
+
+#ifdef CONFIG_X86_64
+ case 0x40 ... 0x4f:
+ if (regs->cs != __USER_CS)
+ /* 32-bit mode: register increment */
+ return 0;
+ /* 64-bit mode: REX prefix */
+ continue;
+#endif
+
+ /* CHECKME: f2, f3 */
+
+ /*
+ * pushf: NOTE! We should probably not let
+ * the user see the TF bit being set. But
+ * it's more pain than it's worth to avoid
+ * it, and a debugger could emulate this
+ * all in user space if it _really_ cares.
+ */
+ case 0x9c:
+ default:
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Enable single-stepping. Return nonzero if user mode is not using TF itself.
+ */
+static int enable_single_step(struct task_struct *child)
+{
+ struct pt_regs *regs = task_pt_regs(child);
+
+ /*
+ * Always set TIF_SINGLESTEP - this guarantees that
+ * we single-step system calls etc.. This will also
+ * cause us to set TF when returning to user mode.
+ */
+ set_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+ /*
+ * If TF was already set, don't do anything else
+ */
+ if (regs->flags & X86_EFLAGS_TF)
+ return 0;
+
+ /* Set TF on the kernel stack.. */
+ regs->flags |= X86_EFLAGS_TF;
+
+ /*
+ * ..but if TF is changed by the instruction we will trace,
+ * don't mark it as being "us" that set it, so that we
+ * won't clear it by hand later.
+ */
+ if (is_setting_trap_flag(child, regs))
+ return 0;
+
+ set_tsk_thread_flag(child, TIF_FORCED_TF);
+
+ return 1;
+}
+
+/*
+ * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running.
+ */
+static void write_debugctlmsr(struct task_struct *child, unsigned long val)
+{
+ child->thread.debugctlmsr = val;
+
+ if (child != current)
+ return;
+
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+}
+
+/*
+ * Enable single or block step.
+ */
+static void enable_step(struct task_struct *child, bool block)
+{
+ /*
+ * Make sure block stepping (BTF) is not enabled unless it should be.
+ * Note that we don't try to worry about any is_setting_trap_flag()
+ * instructions after the first when using block stepping.
+ * So noone should try to use debugger block stepping in a program
+ * that uses user-mode single stepping itself.
+ */
+ if (enable_single_step(child) && block) {
+ set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+ write_debugctlmsr(child,
+ child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
+ } else {
+ write_debugctlmsr(child,
+ child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+ }
+}
+
+void user_enable_single_step(struct task_struct *child)
+{
+ enable_step(child, 0);
+}
+
+void user_enable_block_step(struct task_struct *child)
+{
+ enable_step(child, 1);
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+ /*
+ * Make sure block stepping (BTF) is disabled.
+ */
+ write_debugctlmsr(child,
+ child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+ /* Always clear TIF_SINGLESTEP... */
+ clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+ /* But touch TF only if it was set by us.. */
+ if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
+ task_pt_regs(child)->flags &= ~X86_EFLAGS_TF;
+}
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index 2e5efaaf880..09199511c25 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -17,9 +17,26 @@
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
+static void fix_processor_context(void);
+
struct saved_context saved_context;
-void __save_processor_state(struct saved_context *ctxt)
+/**
+ * __save_processor_state - save CPU registers before creating a
+ * hibernation image and before restoring the memory state from it
+ * @ctxt - structure to store the registers contents in
+ *
+ * NOTE: If there is a CPU register the modification of which by the
+ * boot kernel (ie. the kernel used for loading the hibernation image)
+ * might affect the operations of the restored target kernel (ie. the one
+ * saved in the hibernation image), then its contents must be saved by this
+ * function. In other words, if kernel A is hibernated and different
+ * kernel B is used for loading the hibernation image into memory, the
+ * kernel A's __save_processor_state() function must save all registers
+ * needed by kernel A, so that it can operate correctly after the resume
+ * regardless of what kernel B does in the meantime.
+ */
+static void __save_processor_state(struct saved_context *ctxt)
{
kernel_fpu_begin();
@@ -69,7 +86,12 @@ static void do_fpu_end(void)
kernel_fpu_end();
}
-void __restore_processor_state(struct saved_context *ctxt)
+/**
+ * __restore_processor_state - restore the contents of CPU registers saved
+ * by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ */
+static void __restore_processor_state(struct saved_context *ctxt)
{
/*
* control registers
@@ -113,14 +135,14 @@ void restore_processor_state(void)
__restore_processor_state(&saved_context);
}
-void fix_processor_context(void)
+static void fix_processor_context(void)
{
int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu);
set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
- cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9;
+ get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
syscall_init(); /* This sets MSR_*STAR and related */
load_TR_desc(); /* This does ltr */
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S
index 72f952103e5..aeb9a4d7681 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/kernel/suspend_asm_64.S
@@ -18,13 +18,13 @@
ENTRY(swsusp_arch_suspend)
movq $saved_context, %rax
- movq %rsp, pt_regs_rsp(%rax)
- movq %rbp, pt_regs_rbp(%rax)
- movq %rsi, pt_regs_rsi(%rax)
- movq %rdi, pt_regs_rdi(%rax)
- movq %rbx, pt_regs_rbx(%rax)
- movq %rcx, pt_regs_rcx(%rax)
- movq %rdx, pt_regs_rdx(%rax)
+ movq %rsp, pt_regs_sp(%rax)
+ movq %rbp, pt_regs_bp(%rax)
+ movq %rsi, pt_regs_si(%rax)
+ movq %rdi, pt_regs_di(%rax)
+ movq %rbx, pt_regs_bx(%rax)
+ movq %rcx, pt_regs_cx(%rax)
+ movq %rdx, pt_regs_dx(%rax)
movq %r8, pt_regs_r8(%rax)
movq %r9, pt_regs_r9(%rax)
movq %r10, pt_regs_r10(%rax)
@@ -34,7 +34,7 @@ ENTRY(swsusp_arch_suspend)
movq %r14, pt_regs_r14(%rax)
movq %r15, pt_regs_r15(%rax)
pushfq
- popq pt_regs_eflags(%rax)
+ popq pt_regs_flags(%rax)
/* save the address of restore_registers */
movq $restore_registers, %rax
@@ -115,13 +115,13 @@ ENTRY(restore_registers)
/* We don't restore %rax, it must be 0 anyway */
movq $saved_context, %rax
- movq pt_regs_rsp(%rax), %rsp
- movq pt_regs_rbp(%rax), %rbp
- movq pt_regs_rsi(%rax), %rsi
- movq pt_regs_rdi(%rax), %rdi
- movq pt_regs_rbx(%rax), %rbx
- movq pt_regs_rcx(%rax), %rcx
- movq pt_regs_rdx(%rax), %rdx
+ movq pt_regs_sp(%rax), %rsp
+ movq pt_regs_bp(%rax), %rbp
+ movq pt_regs_si(%rax), %rsi
+ movq pt_regs_di(%rax), %rdi
+ movq pt_regs_bx(%rax), %rbx
+ movq pt_regs_cx(%rax), %rcx
+ movq pt_regs_dx(%rax), %rdx
movq pt_regs_r8(%rax), %r8
movq pt_regs_r9(%rax), %r9
movq pt_regs_r10(%rax), %r10
@@ -130,7 +130,7 @@ ENTRY(restore_registers)
movq pt_regs_r13(%rax), %r13
movq pt_regs_r14(%rax), %r14
movq pt_regs_r15(%rax), %r15
- pushq pt_regs_eflags(%rax)
+ pushq pt_regs_flags(%rax)
popfq
xorq %rax, %rax
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 907942ee6e7..bd802a5e1aa 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -12,6 +12,7 @@
#include <linux/file.h>
#include <linux/utsname.h>
#include <linux/personality.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/ia32.h>
@@ -65,6 +66,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
unsigned long *end)
{
if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) {
+ unsigned long new_begin;
/* This is usually used needed to map code in small
model, so it needs to be in the first 31bit. Limit
it to that. This means we need to move the
@@ -74,6 +76,11 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
of playground for now. -AK */
*begin = 0x40000000;
*end = 0x80000000;
+ if (current->flags & PF_RANDOMIZE) {
+ new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
+ if (new_begin)
+ *begin = new_begin;
+ }
} else {
*begin = TASK_UNMAPPED_BASE;
*end = TASK_SIZE;
@@ -143,6 +150,97 @@ full_search:
}
}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+
+ /* requested length too big for entire address space */
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED)
+ return addr;
+
+ /* for MAP_32BIT mappings we force the legact mmap base */
+ if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT))
+ goto bottomup;
+
+ /* requesting a specific address */
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+
+ /* check if free_area_cache is useful for us */
+ if (len <= mm->cached_hole_size) {
+ mm->cached_hole_size = 0;
+ mm->free_area_cache = mm->mmap_base;
+ }
+
+ /* either no address requested or can't fit in requested address hole */
+ addr = mm->free_area_cache;
+
+ /* make sure it can fit in the remaining address space */
+ if (addr > len) {
+ vma = find_vma(mm, addr-len);
+ if (!vma || addr <= vma->vm_start)
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr-len);
+ }
+
+ if (mm->mmap_base < len)
+ goto bottomup;
+
+ addr = mm->mmap_base-len;
+
+ do {
+ /*
+ * Lookup failure means no vma is above this address,
+ * else if new region fits below vma->vm_start,
+ * return with success:
+ */
+ vma = find_vma(mm, addr);
+ if (!vma || addr+len <= vma->vm_start)
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr);
+
+ /* remember the largest hole we saw so far */
+ if (addr + mm->cached_hole_size < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+
+ /* try just below the current vma->vm_start */
+ addr = vma->vm_start-len;
+ } while (len < vma->vm_start);
+
+bottomup:
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ mm->cached_hole_size = ~0UL;
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
+ addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+ /*
+ * Restore the topdown base:
+ */
+ mm->free_area_cache = mm->mmap_base;
+ mm->cached_hole_size = ~0UL;
+
+ return addr;
+}
+
+
asmlinkage long sys_uname(struct new_utsname __user * name)
{
int err;
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
new file mode 100644
index 00000000000..6d7ef11e797
--- /dev/null
+++ b/arch/x86/kernel/test_nx.c
@@ -0,0 +1,176 @@
+/*
+ * test_nx.c: functional test for NX functionality
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/uaccess.h>
+
+extern int rodata_test_data;
+
+/*
+ * This file checks 4 things:
+ * 1) Check if the stack is not executable
+ * 2) Check if kmalloc memory is not executable
+ * 3) Check if the .rodata section is not executable
+ * 4) Check if the .data section of a module is not executable
+ *
+ * To do this, the test code tries to execute memory in stack/kmalloc/etc,
+ * and then checks if the expected trap happens.
+ *
+ * Sadly, this implies having a dynamic exception handling table entry.
+ * ... which can be done (and will make Rusty cry)... but it can only
+ * be done in a stand-alone module with only 1 entry total.
+ * (otherwise we'd have to sort and that's just too messy)
+ */
+
+
+
+/*
+ * We want to set up an exception handling point on our stack,
+ * which means a variable value. This function is rather dirty
+ * and walks the exception table of the module, looking for a magic
+ * marker and replaces it with a specific function.
+ */
+static void fudze_exception_table(void *marker, void *new)
+{
+ struct module *mod = THIS_MODULE;
+ struct exception_table_entry *extable;
+
+ /*
+ * Note: This module has only 1 exception table entry,
+ * so searching and sorting is not needed. If that changes,
+ * this would be the place to search and re-sort the exception
+ * table.
+ */
+ if (mod->num_exentries > 1) {
+ printk(KERN_ERR "test_nx: too many exception table entries!\n");
+ printk(KERN_ERR "test_nx: test results are not reliable.\n");
+ return;
+ }
+ extable = (struct exception_table_entry *)mod->extable;
+ extable[0].insn = (unsigned long)new;
+}
+
+
+/*
+ * exception tables get their symbols translated so we need
+ * to use a fake function to put in there, which we can then
+ * replace at runtime.
+ */
+void foo_label(void);
+
+/*
+ * returns 0 for not-executable, negative for executable
+ *
+ * Note: we cannot allow this function to be inlined, because
+ * that would give us more than 1 exception table entry.
+ * This in turn would break the assumptions above.
+ */
+static noinline int test_address(void *address)
+{
+ unsigned long result;
+
+ /* Set up an exception table entry for our address */
+ fudze_exception_table(&foo_label, address);
+ result = 1;
+ asm volatile(
+ "foo_label:\n"
+ "0: call *%[fake_code]\n"
+ "1:\n"
+ ".section .fixup,\"ax\"\n"
+ "2: mov %[zero], %[rslt]\n"
+ " ret\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 8\n"
+ " .quad 0b\n"
+ " .quad 2b\n"
+ ".previous\n"
+ : [rslt] "=r" (result)
+ : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
+ );
+ /* change the exception table back for the next round */
+ fudze_exception_table(address, &foo_label);
+
+ if (result)
+ return -ENODEV;
+ return 0;
+}
+
+static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */
+
+static int test_NX(void)
+{
+ int ret = 0;
+ /* 0xC3 is the opcode for "ret" */
+ char stackcode[] = {0xC3, 0x90, 0 };
+ char *heap;
+
+ test_data = 0xC3;
+
+ printk(KERN_INFO "Testing NX protection\n");
+
+ /* Test 1: check if the stack is not executable */
+ if (test_address(&stackcode)) {
+ printk(KERN_ERR "test_nx: stack was executable\n");
+ ret = -ENODEV;
+ }
+
+
+ /* Test 2: Check if the heap is executable */
+ heap = kmalloc(64, GFP_KERNEL);
+ if (!heap)
+ return -ENOMEM;
+ heap[0] = 0xC3; /* opcode for "ret" */
+
+ if (test_address(heap)) {
+ printk(KERN_ERR "test_nx: heap was executable\n");
+ ret = -ENODEV;
+ }
+ kfree(heap);
+
+ /*
+ * The following 2 tests currently fail, this needs to get fixed
+ * Until then, don't run them to avoid too many people getting scared
+ * by the error message
+ */
+#if 0
+
+#ifdef CONFIG_DEBUG_RODATA
+ /* Test 3: Check if the .rodata section is executable */
+ if (rodata_test_data != 0xC3) {
+ printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
+ ret = -ENODEV;
+ } else if (test_address(&rodata_test_data)) {
+ printk(KERN_ERR "test_nx: .rodata section is executable\n");
+ ret = -ENODEV;
+ }
+#endif
+
+ /* Test 4: Check if the .data section of a module is executable */
+ if (test_address(&test_data)) {
+ printk(KERN_ERR "test_nx: .data section is executable\n");
+ ret = -ENODEV;
+ }
+
+#endif
+ return 0;
+}
+
+static void test_exit(void)
+{
+}
+
+module_init(test_NX);
+module_exit(test_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Testcase for the NX infrastructure");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
new file mode 100644
index 00000000000..4c163772000
--- /dev/null
+++ b/arch/x86/kernel/test_rodata.c
@@ -0,0 +1,86 @@
+/*
+ * test_rodata.c: functional test for mark_rodata_ro function
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/module.h>
+#include <asm/sections.h>
+extern int rodata_test_data;
+
+int rodata_test(void)
+{
+ unsigned long result;
+ unsigned long start, end;
+
+ /* test 1: read the value */
+ /* If this test fails, some previous testrun has clobbered the state */
+ if (!rodata_test_data) {
+ printk(KERN_ERR "rodata_test: test 1 fails (start data)\n");
+ return -ENODEV;
+ }
+
+ /* test 2: write to the variable; this should fault */
+ /*
+ * If this test fails, we managed to overwrite the data
+ *
+ * This is written in assembly to be able to catch the
+ * exception that is supposed to happen in the correct
+ * case
+ */
+
+ result = 1;
+ asm volatile(
+ "0: mov %[zero],(%[rodata_test])\n"
+ " mov %[zero], %[rslt]\n"
+ "1:\n"
+ ".section .fixup,\"ax\"\n"
+ "2: jmp 1b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 16\n"
+#ifdef CONFIG_X86_32
+ " .long 0b,2b\n"
+#else
+ " .quad 0b,2b\n"
+#endif
+ ".previous"
+ : [rslt] "=r" (result)
+ : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
+ );
+
+
+ if (!result) {
+ printk(KERN_ERR "rodata_test: test data was not read only\n");
+ return -ENODEV;
+ }
+
+ /* test 3: check the value hasn't changed */
+ /* If this test fails, we managed to overwrite the data */
+ if (!rodata_test_data) {
+ printk(KERN_ERR "rodata_test: Test 3 failes (end data)\n");
+ return -ENODEV;
+ }
+ /* test 4: check if the rodata section is 4Kb aligned */
+ start = (unsigned long)__start_rodata;
+ end = (unsigned long)__end_rodata;
+ if (start & (PAGE_SIZE - 1)) {
+ printk(KERN_ERR "rodata_test: .rodata is not 4k aligned\n");
+ return -ENODEV;
+ }
+ if (end & (PAGE_SIZE - 1)) {
+ printk(KERN_ERR "rodata_test: .rodata end is not 4k aligned\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 8a322c96bc2..1a89e93f3f1 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -28,98 +28,20 @@
* serialize accesses to xtime/lost_ticks).
*/
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
+#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/bcd.h>
-#include <linux/efi.h>
#include <linux/mca.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/irq.h>
-#include <asm/msr.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/timer.h>
-#include <asm/time.h>
-
-#include "mach_time.h"
-
-#include <linux/timex.h>
-
-#include <asm/hpet.h>
-
#include <asm/arch_hooks.h>
-
-#include "io_ports.h"
-
-#include <asm/i8259.h>
+#include <asm/hpet.h>
+#include <asm/time.h>
#include "do_timer.h"
unsigned int cpu_khz; /* Detected as we calibrate the TSC */
EXPORT_SYMBOL(cpu_khz);
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
-/*
- * This is a special lock that is owned by the CPU and holds the index
- * register we are working with. It is required for NMI access to the
- * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
- */
-volatile unsigned long cmos_lock = 0;
-EXPORT_SYMBOL(cmos_lock);
-
-/* Routines for accessing the CMOS RAM/RTC. */
-unsigned char rtc_cmos_read(unsigned char addr)
-{
- unsigned char val;
- lock_cmos_prefix(addr);
- outb_p(addr, RTC_PORT(0));
- val = inb_p(RTC_PORT(1));
- lock_cmos_suffix(addr);
- return val;
-}
-EXPORT_SYMBOL(rtc_cmos_read);
-
-void rtc_cmos_write(unsigned char val, unsigned char addr)
-{
- lock_cmos_prefix(addr);
- outb_p(addr, RTC_PORT(0));
- outb_p(val, RTC_PORT(1));
- lock_cmos_suffix(addr);
-}
-EXPORT_SYMBOL(rtc_cmos_write);
-
-static int set_rtc_mmss(unsigned long nowtime)
-{
- int retval;
- unsigned long flags;
-
- /* gets recalled with irq locally disabled */
- /* XXX - does irqsave resolve this? -johnstul */
- spin_lock_irqsave(&rtc_lock, flags);
- retval = set_wallclock(nowtime);
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- return retval;
-}
-
-
int timer_ack;
unsigned long profile_pc(struct pt_regs *regs)
@@ -127,17 +49,17 @@ unsigned long profile_pc(struct pt_regs *regs)
unsigned long pc = instruction_pointer(regs);
#ifdef CONFIG_SMP
- if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) &&
+ if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) &&
in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
- return *(unsigned long *)(regs->ebp + 4);
+ return *(unsigned long *)(regs->bp + 4);
#else
- unsigned long *sp = (unsigned long *)&regs->esp;
+ unsigned long *sp = (unsigned long *)&regs->sp;
/* Return address is either directly at stack pointer
- or above a saved eflags. Eflags has bits 22-31 zero,
+ or above a saved flags. Eflags has bits 22-31 zero,
kernel addresses don't. */
- if (sp[0] >> 22)
+ if (sp[0] >> 22)
return sp[0];
if (sp[1] >> 22)
return sp[1];
@@ -193,26 +115,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-/* not static: needed by APM */
-unsigned long read_persistent_clock(void)
-{
- unsigned long retval;
- unsigned long flags;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- retval = get_wallclock();
-
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
- return set_rtc_mmss(now.tv_sec);
-}
-
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
void __init hpet_time_init(void)
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 368b1942b39..0380795121a 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -11,43 +11,18 @@
* RTC support code taken from arch/i386/kernel/timers/time_hpet.c
*/
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
+#include <linux/clockchips.h>
#include <linux/init.h>
-#include <linux/mc146818rtc.h>
-#include <linux/time.h>
-#include <linux/ioport.h>
+#include <linux/interrupt.h>
#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/bcd.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/kallsyms.h>
-#include <linux/acpi.h>
-#include <linux/clockchips.h>
+#include <linux/time.h>
-#ifdef CONFIG_ACPI
-#include <acpi/achware.h> /* for PM timer frequency */
-#include <acpi/acpi_bus.h>
-#endif
#include <asm/i8253.h>
-#include <asm/pgtable.h>
-#include <asm/vsyscall.h>
-#include <asm/timex.h>
-#include <asm/proto.h>
-#include <asm/hpet.h>
-#include <asm/sections.h>
-#include <linux/hpet.h>
-#include <asm/apic.h>
#include <asm/hpet.h>
-#include <asm/mpspec.h>
#include <asm/nmi.h>
#include <asm/vgtod.h>
-
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
+#include <asm/time.h>
+#include <asm/timer.h>
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
@@ -56,10 +31,10 @@ unsigned long profile_pc(struct pt_regs *regs)
unsigned long pc = instruction_pointer(regs);
/* Assume the lock function has either no stack frame or a copy
- of eflags from PUSHF
+ of flags from PUSHF
Eflags always has bits 22 and up cleared unlike kernel addresses. */
if (!user_mode(regs) && in_lock_functions(pc)) {
- unsigned long *sp = (unsigned long *)regs->rsp;
+ unsigned long *sp = (unsigned long *)regs->sp;
if (sp[0] >> 22)
return sp[0];
if (sp[1] >> 22)
@@ -69,82 +44,6 @@ unsigned long profile_pc(struct pt_regs *regs)
}
EXPORT_SYMBOL(profile_pc);
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
- * ms after the second nowtime has started, because when nowtime is written
- * into the registers of the CMOS clock, it will jump to the next second
- * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
- * sheet for details.
- */
-
-static int set_rtc_mmss(unsigned long nowtime)
-{
- int retval = 0;
- int real_seconds, real_minutes, cmos_minutes;
- unsigned char control, freq_select;
- unsigned long flags;
-
-/*
- * set_rtc_mmss is called when irqs are enabled, so disable irqs here
- */
- spin_lock_irqsave(&rtc_lock, flags);
-/*
- * Tell the clock it's being set and stop it.
- */
- control = CMOS_READ(RTC_CONTROL);
- CMOS_WRITE(control | RTC_SET, RTC_CONTROL);
-
- freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
-
- cmos_minutes = CMOS_READ(RTC_MINUTES);
- BCD_TO_BIN(cmos_minutes);
-
-/*
- * since we're only adjusting minutes and seconds, don't interfere with hour
- * overflow. This avoids messing with unknown time zones but requires your RTC
- * not to be off by more than 15 minutes. Since we're calling it only when
- * our clock is externally synchronized using NTP, this shouldn't be a problem.
- */
-
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
- if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
- real_minutes += 30; /* correct for half hour time zone */
- real_minutes %= 60;
-
- if (abs(real_minutes - cmos_minutes) >= 30) {
- printk(KERN_WARNING "time.c: can't update CMOS clock "
- "from %d to %d\n", cmos_minutes, real_minutes);
- retval = -1;
- } else {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
- CMOS_WRITE(real_seconds, RTC_SECONDS);
- CMOS_WRITE(real_minutes, RTC_MINUTES);
- }
-
-/*
- * The following flags have to be released exactly in this order, otherwise the
- * DS12887 (popular MC146818A clone with integrated battery and quartz) will
- * not reset the oscillator and will not update precisely 500 ms later. You
- * won't find this mentioned in the Dallas Semiconductor data sheets, but who
- * believes data sheets anyway ... -- Markus Kuhn
- */
-
- CMOS_WRITE(control, RTC_CONTROL);
- CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
- return set_rtc_mmss(now.tv_sec);
-}
-
static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
{
add_pda(irq0_irqs, 1);
@@ -154,67 +53,10 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-unsigned long read_persistent_clock(void)
-{
- unsigned int year, mon, day, hour, min, sec;
- unsigned long flags;
- unsigned century = 0;
-
- spin_lock_irqsave(&rtc_lock, flags);
- /*
- * if UIP is clear, then we have >= 244 microseconds before RTC
- * registers will be updated. Spec sheet says that this is the
- * reliable way to read RTC - registers invalid (off bus) during update
- */
- while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
- cpu_relax();
-
-
- /* now read all RTC registers while stable with interrupts disabled */
- sec = CMOS_READ(RTC_SECONDS);
- min = CMOS_READ(RTC_MINUTES);
- hour = CMOS_READ(RTC_HOURS);
- day = CMOS_READ(RTC_DAY_OF_MONTH);
- mon = CMOS_READ(RTC_MONTH);
- year = CMOS_READ(RTC_YEAR);
-#ifdef CONFIG_ACPI
- if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- acpi_gbl_FADT.century)
- century = CMOS_READ(acpi_gbl_FADT.century);
-#endif
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- /*
- * We know that x86-64 always uses BCD format, no need to check the
- * config register.
- */
-
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
-
- if (century) {
- BCD_TO_BIN(century);
- year += century * 100;
- printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
- } else {
- /*
- * x86-64 systems only exists since 2002.
- * This will work up to Dec 31, 2100
- */
- year += 2000;
- }
-
- return mktime(year, mon, day, hour, min, sec);
-}
-
/* calibrate_cpu is used on systems with fixed rate TSCs to determine
* processor frequency */
#define TICK_COUNT 100000000
-static unsigned int __init tsc_calibrate_cpu_khz(void)
+unsigned long __init native_calculate_cpu_khz(void)
{
int tsc_start, tsc_now;
int i, no_ctr_free;
@@ -241,7 +83,7 @@ static unsigned int __init tsc_calibrate_cpu_khz(void)
rdtscl(tsc_start);
do {
rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
- tsc_now = get_cycles_sync();
+ tsc_now = get_cycles();
} while ((tsc_now - tsc_start) < TICK_COUNT);
local_irq_restore(flags);
@@ -264,20 +106,22 @@ static struct irqaction irq0 = {
.name = "timer"
};
-void __init time_init(void)
+void __init hpet_time_init(void)
{
if (!hpet_enable())
setup_pit_timer();
setup_irq(0, &irq0);
+}
+void __init time_init(void)
+{
tsc_calibrate();
cpu_khz = tsc_khz;
if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
- boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 16)
- cpu_khz = tsc_calibrate_cpu_khz();
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
+ cpu_khz = calculate_cpu_khz();
if (unsynchronized_tsc())
mark_tsc_unstable("TSCs unsynchronized");
@@ -290,4 +134,5 @@ void __init time_init(void)
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
init_tsc_clocksource();
+ late_time_init = choose_time_init();
}
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
new file mode 100644
index 00000000000..6dfd4e76661
--- /dev/null
+++ b/arch/x86/kernel/tls.c
@@ -0,0 +1,213 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/user.h>
+#include <linux/regset.h>
+
+#include <asm/uaccess.h>
+#include <asm/desc.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/proto.h>
+
+#include "tls.h"
+
+/*
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
+ */
+static int get_free_idx(void)
+{
+ struct thread_struct *t = &current->thread;
+ int idx;
+
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (desc_empty(&t->tls_array[idx]))
+ return idx + GDT_ENTRY_TLS_MIN;
+ return -ESRCH;
+}
+
+static void set_tls_desc(struct task_struct *p, int idx,
+ const struct user_desc *info, int n)
+{
+ struct thread_struct *t = &p->thread;
+ struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+ int cpu;
+
+ /*
+ * We must not get preempted while modifying the TLS.
+ */
+ cpu = get_cpu();
+
+ while (n-- > 0) {
+ if (LDT_empty(info))
+ desc->a = desc->b = 0;
+ else
+ fill_ldt(desc, info);
+ ++info;
+ ++desc;
+ }
+
+ if (t == &current->thread)
+ load_TLS(t, cpu);
+
+ put_cpu();
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+int do_set_thread_area(struct task_struct *p, int idx,
+ struct user_desc __user *u_info,
+ int can_allocate)
+{
+ struct user_desc info;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+
+ if (idx == -1)
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1 && can_allocate) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ set_tls_desc(p, idx, &info, 1);
+
+ return 0;
+}
+
+asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
+{
+ return do_set_thread_area(current, -1, u_info, 1);
+}
+
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+static void fill_user_desc(struct user_desc *info, int idx,
+ const struct desc_struct *desc)
+
+{
+ memset(info, 0, sizeof(*info));
+ info->entry_number = idx;
+ info->base_addr = get_desc_base(desc);
+ info->limit = get_desc_limit(desc);
+ info->seg_32bit = desc->d;
+ info->contents = desc->type >> 2;
+ info->read_exec_only = !(desc->type & 2);
+ info->limit_in_pages = desc->g;
+ info->seg_not_present = !desc->p;
+ info->useable = desc->avl;
+#ifdef CONFIG_X86_64
+ info->lm = desc->l;
+#endif
+}
+
+int do_get_thread_area(struct task_struct *p, int idx,
+ struct user_desc __user *u_info)
+{
+ struct user_desc info;
+
+ if (idx == -1 && get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ fill_user_desc(&info, idx,
+ &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
+asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
+{
+ return do_get_thread_area(current, -1, u_info);
+}
+
+int regset_tls_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ struct thread_struct *t = &target->thread;
+ int n = GDT_ENTRY_TLS_ENTRIES;
+ while (n > 0 && desc_empty(&t->tls_array[n - 1]))
+ --n;
+ return n;
+}
+
+int regset_tls_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ const struct desc_struct *tls;
+
+ if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+ (pos % sizeof(struct user_desc)) != 0 ||
+ (count % sizeof(struct user_desc)) != 0)
+ return -EINVAL;
+
+ pos /= sizeof(struct user_desc);
+ count /= sizeof(struct user_desc);
+
+ tls = &target->thread.tls_array[pos];
+
+ if (kbuf) {
+ struct user_desc *info = kbuf;
+ while (count-- > 0)
+ fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++,
+ tls++);
+ } else {
+ struct user_desc __user *u_info = ubuf;
+ while (count-- > 0) {
+ struct user_desc info;
+ fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++);
+ if (__copy_to_user(u_info++, &info, sizeof(info)))
+ return -EFAULT;
+ }
+ }
+
+ return 0;
+}
+
+int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
+ const struct user_desc *info;
+
+ if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+ (pos % sizeof(struct user_desc)) != 0 ||
+ (count % sizeof(struct user_desc)) != 0)
+ return -EINVAL;
+
+ if (kbuf)
+ info = kbuf;
+ else if (__copy_from_user(infobuf, ubuf, count))
+ return -EFAULT;
+ else
+ info = infobuf;
+
+ set_tls_desc(target,
+ GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)),
+ info, count / sizeof(struct user_desc));
+
+ return 0;
+}
diff --git a/arch/x86/kernel/tls.h b/arch/x86/kernel/tls.h
new file mode 100644
index 00000000000..2f083a2fe21
--- /dev/null
+++ b/arch/x86/kernel/tls.h
@@ -0,0 +1,21 @@
+/*
+ * Internal declarations for x86 TLS implementation functions.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * Red Hat Author: Roland McGrath.
+ */
+
+#ifndef _ARCH_X86_KERNEL_TLS_H
+
+#include <linux/regset.h>
+
+extern user_regset_active_fn regset_tls_active;
+extern user_regset_get_fn regset_tls_get;
+extern user_regset_set_fn regset_tls_set;
+
+#endif /* _ARCH_X86_KERNEL_TLS_H */
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 7e16d675eb8..78cbb655aa7 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -31,9 +31,10 @@
#include <linux/mmzone.h>
#include <asm/cpu.h>
-static struct i386_cpu cpu_devices[NR_CPUS];
+static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
-int __cpuinit arch_register_cpu(int num)
+#ifdef CONFIG_HOTPLUG_CPU
+int arch_register_cpu(int num)
{
/*
* CPU0 cannot be offlined due to several
@@ -44,21 +45,23 @@ int __cpuinit arch_register_cpu(int num)
* Also certain PCI quirks require not to enable hotplug control
* for all CPU's.
*/
-#ifdef CONFIG_HOTPLUG_CPU
if (num)
- cpu_devices[num].cpu.hotpluggable = 1;
-#endif
-
- return register_cpu(&cpu_devices[num].cpu, num);
+ per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
+ return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
}
+EXPORT_SYMBOL(arch_register_cpu);
-#ifdef CONFIG_HOTPLUG_CPU
void arch_unregister_cpu(int num)
{
- return unregister_cpu(&cpu_devices[num].cpu);
+ return unregister_cpu(&per_cpu(cpu_devices, num).cpu);
}
-EXPORT_SYMBOL(arch_register_cpu);
EXPORT_SYMBOL(arch_unregister_cpu);
+#else
+int arch_register_cpu(int num)
+{
+ return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
+}
+EXPORT_SYMBOL(arch_register_cpu);
#endif /*CONFIG_HOTPLUG_CPU*/
static int __init topology_init(void)
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 02d1e1e58e8..3cf72977d01 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -76,7 +76,8 @@ char ignore_fpu_irq = 0;
* F0 0F bug workaround.. We have a special link segment
* for this.
*/
-struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
+gate_desc idt_table[256]
+ __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
asmlinkage void divide_error(void);
asmlinkage void debug(void);
@@ -101,6 +102,34 @@ asmlinkage void machine_check(void);
int kstack_depth_to_print = 24;
static unsigned int code_bytes = 64;
+void printk_address(unsigned long address, int reliable)
+{
+#ifdef CONFIG_KALLSYMS
+ unsigned long offset = 0, symsize;
+ const char *symname;
+ char *modname;
+ char *delim = ":";
+ char namebuf[128];
+ char reliab[4] = "";
+
+ symname = kallsyms_lookup(address, &symsize, &offset,
+ &modname, namebuf);
+ if (!symname) {
+ printk(" [<%08lx>]\n", address);
+ return;
+ }
+ if (!reliable)
+ strcpy(reliab, "? ");
+
+ if (!modname)
+ modname = delim = "";
+ printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
+ address, reliab, delim, modname, delim, symname, offset, symsize);
+#else
+ printk(" [<%08lx>]\n", address);
+#endif
+}
+
static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
{
return p > (void *)tinfo &&
@@ -114,48 +143,35 @@ struct stack_frame {
};
static inline unsigned long print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long ebp,
+ unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
{
-#ifdef CONFIG_FRAME_POINTER
- struct stack_frame *frame = (struct stack_frame *)ebp;
- while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) {
- struct stack_frame *next;
- unsigned long addr;
+ struct stack_frame *frame = (struct stack_frame *)bp;
- addr = frame->return_address;
- ops->address(data, addr);
- /*
- * break out of recursive entries (such as
- * end_of_stack_stop_unwind_function). Also,
- * we can never allow a frame pointer to
- * move downwards!
- */
- next = frame->next_frame;
- if (next <= frame)
- break;
- frame = next;
- }
-#else
while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
unsigned long addr;
- addr = *stack++;
- if (__kernel_text_address(addr))
- ops->address(data, addr);
+ addr = *stack;
+ if (__kernel_text_address(addr)) {
+ if ((unsigned long) stack == bp + 4) {
+ ops->address(data, addr, 1);
+ frame = frame->next_frame;
+ bp = (unsigned long) frame;
+ } else {
+ ops->address(data, addr, bp == 0);
+ }
+ }
+ stack++;
}
-#endif
- return ebp;
+ return bp;
}
#define MSG(msg) ops->warning(data, msg)
void dump_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack,
+ unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
{
- unsigned long ebp = 0;
-
if (!task)
task = current;
@@ -163,17 +179,17 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long dummy;
stack = &dummy;
if (task != current)
- stack = (unsigned long *)task->thread.esp;
+ stack = (unsigned long *)task->thread.sp;
}
#ifdef CONFIG_FRAME_POINTER
- if (!ebp) {
+ if (!bp) {
if (task == current) {
- /* Grab ebp right from our regs */
- asm ("movl %%ebp, %0" : "=r" (ebp) : );
+ /* Grab bp right from our regs */
+ asm ("movl %%ebp, %0" : "=r" (bp) : );
} else {
- /* ebp is the last reg pushed by switch_to */
- ebp = *(unsigned long *) task->thread.esp;
+ /* bp is the last reg pushed by switch_to */
+ bp = *(unsigned long *) task->thread.sp;
}
}
#endif
@@ -182,7 +198,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
struct thread_info *context;
context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
- ebp = print_context_stack(context, stack, ebp, ops, data);
+ bp = print_context_stack(context, stack, bp, ops, data);
/* Should be after the line below, but somewhere
in early boot context comes out corrupted and we
can't reference it -AK */
@@ -217,9 +233,11 @@ static int print_trace_stack(void *data, char *name)
/*
* Print one address/symbol entries per line.
*/
-static void print_trace_address(void *data, unsigned long addr)
+static void print_trace_address(void *data, unsigned long addr, int reliable)
{
printk("%s [<%08lx>] ", (char *)data, addr);
+ if (!reliable)
+ printk("? ");
print_symbol("%s\n", addr);
touch_nmi_watchdog();
}
@@ -233,32 +251,32 @@ static const struct stacktrace_ops print_trace_ops = {
static void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long * stack, char *log_lvl)
+ unsigned long *stack, unsigned long bp, char *log_lvl)
{
- dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
printk("%s =======================\n", log_lvl);
}
void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long * stack)
+ unsigned long *stack, unsigned long bp)
{
- show_trace_log_lvl(task, regs, stack, "");
+ show_trace_log_lvl(task, regs, stack, bp, "");
}
static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *esp, char *log_lvl)
+ unsigned long *sp, unsigned long bp, char *log_lvl)
{
unsigned long *stack;
int i;
- if (esp == NULL) {
+ if (sp == NULL) {
if (task)
- esp = (unsigned long*)task->thread.esp;
+ sp = (unsigned long*)task->thread.sp;
else
- esp = (unsigned long *)&esp;
+ sp = (unsigned long *)&sp;
}
- stack = esp;
+ stack = sp;
for(i = 0; i < kstack_depth_to_print; i++) {
if (kstack_end(stack))
break;
@@ -267,13 +285,13 @@ static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%08lx ", *stack++);
}
printk("\n%sCall Trace:\n", log_lvl);
- show_trace_log_lvl(task, regs, esp, log_lvl);
+ show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_stack(struct task_struct *task, unsigned long *esp)
+void show_stack(struct task_struct *task, unsigned long *sp)
{
printk(" ");
- show_stack_log_lvl(task, NULL, esp, "");
+ show_stack_log_lvl(task, NULL, sp, 0, "");
}
/*
@@ -282,13 +300,19 @@ void show_stack(struct task_struct *task, unsigned long *esp)
void dump_stack(void)
{
unsigned long stack;
+ unsigned long bp = 0;
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp)
+ asm("movl %%ebp, %0" : "=r" (bp):);
+#endif
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- show_trace(current, NULL, &stack);
+ show_trace(current, NULL, &stack, bp);
}
EXPORT_SYMBOL(dump_stack);
@@ -307,30 +331,30 @@ void show_registers(struct pt_regs *regs)
* time of the fault..
*/
if (!user_mode_vm(regs)) {
- u8 *eip;
+ u8 *ip;
unsigned int code_prologue = code_bytes * 43 / 64;
unsigned int code_len = code_bytes;
unsigned char c;
printk("\n" KERN_EMERG "Stack: ");
- show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);
+ show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
printk(KERN_EMERG "Code: ");
- eip = (u8 *)regs->eip - code_prologue;
- if (eip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(eip, c)) {
+ ip = (u8 *)regs->ip - code_prologue;
+ if (ip < (u8 *)PAGE_OFFSET ||
+ probe_kernel_address(ip, c)) {
/* try starting at EIP */
- eip = (u8 *)regs->eip;
+ ip = (u8 *)regs->ip;
code_len = code_len - code_prologue + 1;
}
- for (i = 0; i < code_len; i++, eip++) {
- if (eip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(eip, c)) {
+ for (i = 0; i < code_len; i++, ip++) {
+ if (ip < (u8 *)PAGE_OFFSET ||
+ probe_kernel_address(ip, c)) {
printk(" Bad EIP value.");
break;
}
- if (eip == (u8 *)regs->eip)
+ if (ip == (u8 *)regs->ip)
printk("<%02x> ", c);
else
printk("%02x ", c);
@@ -339,18 +363,57 @@ void show_registers(struct pt_regs *regs)
printk("\n");
}
-int is_valid_bugaddr(unsigned long eip)
+int is_valid_bugaddr(unsigned long ip)
{
unsigned short ud2;
- if (eip < PAGE_OFFSET)
+ if (ip < PAGE_OFFSET)
return 0;
- if (probe_kernel_address((unsigned short *)eip, ud2))
+ if (probe_kernel_address((unsigned short *)ip, ud2))
return 0;
return ud2 == 0x0b0f;
}
+static int die_counter;
+
+int __kprobes __die(const char * str, struct pt_regs * regs, long err)
+{
+ unsigned long sp;
+ unsigned short ss;
+
+ printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) !=
+ NOTIFY_STOP) {
+ show_registers(regs);
+ /* Executive summary in case the oops scrolled away */
+ sp = (unsigned long) (&regs->sp);
+ savesegment(ss, ss);
+ if (user_mode(regs)) {
+ sp = regs->sp;
+ ss = regs->ss & 0xffff;
+ }
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+ print_symbol("%s", regs->ip);
+ printk(" SS:ESP %04x:%08lx\n", ss, sp);
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
/*
* This is gone through when something in the kernel has done something bad and
* is about to be terminated.
@@ -366,7 +429,6 @@ void die(const char * str, struct pt_regs * regs, long err)
.lock_owner = -1,
.lock_owner_depth = 0
};
- static int die_counter;
unsigned long flags;
oops_enter();
@@ -382,43 +444,13 @@ void die(const char * str, struct pt_regs * regs, long err)
raw_local_irq_save(flags);
if (++die.lock_owner_depth < 3) {
- unsigned long esp;
- unsigned short ss;
+ report_bug(regs->ip, regs);
- report_bug(regs->eip, regs);
-
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff,
- ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
-
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) !=
- NOTIFY_STOP) {
- show_registers(regs);
- /* Executive summary in case the oops scrolled away */
- esp = (unsigned long) (&regs->esp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
- esp = regs->esp;
- ss = regs->xss & 0xffff;
- }
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
- print_symbol("%s", regs->eip);
- printk(" SS:ESP %04x:%08lx\n", ss, esp);
- }
- else
+ if (__die(str, regs, err))
regs = NULL;
- } else
+ } else {
printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
+ }
bust_spinlocks(0);
die.lock_owner = -1;
@@ -454,7 +486,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
{
struct task_struct *tsk = current;
- if (regs->eflags & VM_MASK) {
+ if (regs->flags & VM_MASK) {
if (vm86)
goto vm86_trap;
goto trap_signal;
@@ -500,7 +532,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
}
#define DO_ERROR(trapnr, signr, str, name) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
@@ -509,7 +541,7 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
}
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs, long error_code) \
{ \
siginfo_t info; \
if (irq) \
@@ -525,7 +557,7 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
}
#define DO_VM86_ERROR(trapnr, signr, str, name) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
@@ -534,7 +566,7 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
}
#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs, long error_code) \
{ \
siginfo_t info; \
info.si_signo = signr; \
@@ -548,13 +580,13 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
}
-DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip)
+DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
#ifndef CONFIG_KPROBES
DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
#endif
DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0)
+DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
@@ -562,7 +594,7 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
-fastcall void __kprobes do_general_protection(struct pt_regs * regs,
+void __kprobes do_general_protection(struct pt_regs * regs,
long error_code)
{
int cpu = get_cpu();
@@ -596,7 +628,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
}
put_cpu();
- if (regs->eflags & VM_MASK)
+ if (regs->flags & VM_MASK)
goto gp_in_vm86;
if (!user_mode(regs))
@@ -605,11 +637,14 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
current->thread.error_code = error_code;
current->thread.trap_no = 13;
if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
- printk_ratelimit())
+ printk_ratelimit()) {
printk(KERN_INFO
- "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
current->comm, task_pid_nr(current),
- regs->eip, regs->esp, error_code);
+ regs->ip, regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
force_sig(SIGSEGV, current);
return;
@@ -705,8 +740,8 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
*/
bust_spinlocks(1);
printk(KERN_EMERG "%s", msg);
- printk(" on CPU%d, eip %08lx, registers:\n",
- smp_processor_id(), regs->eip);
+ printk(" on CPU%d, ip %08lx, registers:\n",
+ smp_processor_id(), regs->ip);
show_registers(regs);
console_silent();
spin_unlock(&nmi_print_lock);
@@ -763,7 +798,7 @@ static __kprobes void default_do_nmi(struct pt_regs * regs)
static int ignore_nmis;
-fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
+__kprobes void do_nmi(struct pt_regs * regs, long error_code)
{
int cpu;
@@ -792,7 +827,7 @@ void restart_nmi(void)
}
#ifdef CONFIG_KPROBES
-fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
+void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
trace_hardirqs_fixup();
@@ -828,7 +863,7 @@ fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
* find every occurrence of the TF bit that could be saved away even
* by user code)
*/
-fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
+void __kprobes do_debug(struct pt_regs * regs, long error_code)
{
unsigned int condition;
struct task_struct *tsk = current;
@@ -837,24 +872,30 @@ fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
get_debugreg(condition, 6);
+ /*
+ * The processor cleared BTF, so don't mark that we need it set.
+ */
+ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
+ tsk->thread.debugctlmsr = 0;
+
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
- if (regs->eflags & X86_EFLAGS_IF)
+ if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg[7])
+ if (!tsk->thread.debugreg7)
goto clear_dr7;
}
- if (regs->eflags & VM_MASK)
+ if (regs->flags & VM_MASK)
goto debug_vm86;
/* Save debug status register where ptrace can see it */
- tsk->thread.debugreg[6] = condition;
+ tsk->thread.debugreg6 = condition;
/*
* Single-stepping through TF: make sure we ignore any events in
@@ -886,7 +927,7 @@ debug_vm86:
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->eflags &= ~TF_MASK;
+ regs->flags &= ~TF_MASK;
return;
}
@@ -895,7 +936,7 @@ clear_TF_reenable:
* the correct behaviour even in the presence of the asynchronous
* IRQ13 behaviour
*/
-void math_error(void __user *eip)
+void math_error(void __user *ip)
{
struct task_struct * task;
siginfo_t info;
@@ -911,7 +952,7 @@ void math_error(void __user *eip)
info.si_signo = SIGFPE;
info.si_errno = 0;
info.si_code = __SI_FAULT;
- info.si_addr = eip;
+ info.si_addr = ip;
/*
* (~cwd & swd) will mask out exceptions that are not set to unmasked
* status. 0x3f is the exception bits in these regs, 0x200 is the
@@ -954,13 +995,13 @@ void math_error(void __user *eip)
force_sig_info(SIGFPE, &info, task);
}
-fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
+void do_coprocessor_error(struct pt_regs * regs, long error_code)
{
ignore_fpu_irq = 1;
- math_error((void __user *)regs->eip);
+ math_error((void __user *)regs->ip);
}
-static void simd_math_error(void __user *eip)
+static void simd_math_error(void __user *ip)
{
struct task_struct * task;
siginfo_t info;
@@ -976,7 +1017,7 @@ static void simd_math_error(void __user *eip)
info.si_signo = SIGFPE;
info.si_errno = 0;
info.si_code = __SI_FAULT;
- info.si_addr = eip;
+ info.si_addr = ip;
/*
* The SIMD FPU exceptions are handled a little differently, as there
* is only a single status/control register. Thus, to determine which
@@ -1008,19 +1049,19 @@ static void simd_math_error(void __user *eip)
force_sig_info(SIGFPE, &info, task);
}
-fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
+void do_simd_coprocessor_error(struct pt_regs * regs,
long error_code)
{
if (cpu_has_xmm) {
/* Handle SIMD FPU exceptions on PIII+ processors. */
ignore_fpu_irq = 1;
- simd_math_error((void __user *)regs->eip);
+ simd_math_error((void __user *)regs->ip);
} else {
/*
* Handle strange cache flush from user space exception
* in all other cases. This is undocumented behaviour.
*/
- if (regs->eflags & VM_MASK) {
+ if (regs->flags & VM_MASK) {
handle_vm86_fault((struct kernel_vm86_regs *)regs,
error_code);
return;
@@ -1032,7 +1073,7 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
}
}
-fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
+void do_spurious_interrupt_bug(struct pt_regs * regs,
long error_code)
{
#if 0
@@ -1041,7 +1082,7 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
#endif
}
-fastcall unsigned long patch_espfix_desc(unsigned long uesp,
+unsigned long patch_espfix_desc(unsigned long uesp,
unsigned long kesp)
{
struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
@@ -1095,51 +1136,17 @@ asmlinkage void math_emulate(long arg)
#endif /* CONFIG_MATH_EMULATION */
-/*
- * This needs to use 'idt_table' rather than 'idt', and
- * thus use the _nonmapped_ version of the IDT, as the
- * Pentium F0 0F bugfix can have resulted in the mapped
- * IDT being write-protected.
- */
-void set_intr_gate(unsigned int n, void *addr)
-{
- _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS);
-}
-
-/*
- * This routine sets up an interrupt gate at directory privilege level 3.
- */
-static inline void set_system_intr_gate(unsigned int n, void *addr)
-{
- _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS);
-}
-
-static void __init set_trap_gate(unsigned int n, void *addr)
-{
- _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS);
-}
-
-static void __init set_system_gate(unsigned int n, void *addr)
-{
- _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS);
-}
-
-static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
-{
- _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
-}
-
void __init trap_init(void)
{
int i;
#ifdef CONFIG_EISA
- void __iomem *p = ioremap(0x0FFFD9, 4);
+ void __iomem *p = early_ioremap(0x0FFFD9, 4);
if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
EISA_bus = 1;
}
- iounmap(p);
+ early_iounmap(p, 4);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index cc68b92316c..efc66df728b 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -74,22 +74,24 @@ asmlinkage void alignment_check(void);
asmlinkage void machine_check(void);
asmlinkage void spurious_interrupt_bug(void);
+static unsigned int code_bytes = 64;
+
static inline void conditional_sti(struct pt_regs *regs)
{
- if (regs->eflags & X86_EFLAGS_IF)
+ if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
preempt_disable();
- if (regs->eflags & X86_EFLAGS_IF)
+ if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
static inline void preempt_conditional_cli(struct pt_regs *regs)
{
- if (regs->eflags & X86_EFLAGS_IF)
+ if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
/* Make sure to not schedule here because we could be running
on an exception stack. */
@@ -98,14 +100,15 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
int kstack_depth_to_print = 12;
-#ifdef CONFIG_KALLSYMS
-void printk_address(unsigned long address)
+void printk_address(unsigned long address, int reliable)
{
+#ifdef CONFIG_KALLSYMS
unsigned long offset = 0, symsize;
const char *symname;
char *modname;
char *delim = ":";
- char namebuf[128];
+ char namebuf[KSYM_NAME_LEN];
+ char reliab[4] = "";
symname = kallsyms_lookup(address, &symsize, &offset,
&modname, namebuf);
@@ -113,17 +116,17 @@ void printk_address(unsigned long address)
printk(" [<%016lx>]\n", address);
return;
}
+ if (!reliable)
+ strcpy(reliab, "? ");
+
if (!modname)
- modname = delim = "";
- printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
- address, delim, modname, delim, symname, offset, symsize);
-}
+ modname = delim = "";
+ printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
+ address, reliab, delim, modname, delim, symname, offset, symsize);
#else
-void printk_address(unsigned long address)
-{
printk(" [<%016lx>]\n", address);
-}
#endif
+}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
@@ -208,14 +211,53 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+ void *p, unsigned int size, void *end)
+{
+ void *t = tinfo;
+ if (end) {
+ if (p < end && p >= (end-THREAD_SIZE))
+ return 1;
+ else
+ return 0;
+ }
+ return p > t && p < t + THREAD_SIZE - size;
+}
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+ struct stack_frame *next_frame;
+ unsigned long return_address;
+};
+
+
+static inline unsigned long print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end)
{
- void *t = (void *)tinfo;
- return p > t && p < t + THREAD_SIZE - 3;
+ struct stack_frame *frame = (struct stack_frame *)bp;
+
+ while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+ unsigned long addr;
+
+ addr = *stack;
+ if (__kernel_text_address(addr)) {
+ if ((unsigned long) stack == bp + 8) {
+ ops->address(data, addr, 1);
+ frame = frame->next_frame;
+ bp = (unsigned long) frame;
+ } else {
+ ops->address(data, addr, bp == 0);
+ }
+ }
+ stack++;
+ }
+ return bp;
}
void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
- unsigned long *stack,
+ unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
@@ -225,36 +267,28 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
if (!tsk)
tsk = current;
+ tinfo = task_thread_info(tsk);
if (!stack) {
unsigned long dummy;
stack = &dummy;
if (tsk && tsk != current)
- stack = (unsigned long *)tsk->thread.rsp;
+ stack = (unsigned long *)tsk->thread.sp;
}
- /*
- * Print function call entries within a stack. 'cond' is the
- * "end of stackframe" condition, that the 'stack++'
- * iteration will eventually trigger.
- */
-#define HANDLE_STACK(cond) \
- do while (cond) { \
- unsigned long addr = *stack++; \
- /* Use unlocked access here because except for NMIs \
- we should be already protected against module unloads */ \
- if (__kernel_text_address(addr)) { \
- /* \
- * If the address is either in the text segment of the \
- * kernel, or in the region which contains vmalloc'ed \
- * memory, it *may* be the address of a calling \
- * routine; if so, print it so that someone tracing \
- * down the cause of the crash will be able to figure \
- * out the call path that was taken. \
- */ \
- ops->address(data, addr); \
- } \
- } while (0)
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp) {
+ if (tsk == current) {
+ /* Grab bp right from our regs */
+ asm("movq %%rbp, %0" : "=r" (bp):);
+ } else {
+ /* bp is the last reg pushed by switch_to */
+ bp = *(unsigned long *) tsk->thread.sp;
+ }
+ }
+#endif
+
+
/*
* Print function call entries in all stacks, starting at the
@@ -270,7 +304,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
if (estack_end) {
if (ops->stack(data, id) < 0)
break;
- HANDLE_STACK (stack < estack_end);
+
+ bp = print_context_stack(tinfo, stack, bp, ops,
+ data, estack_end);
ops->stack(data, "<EOE>");
/*
* We link to the next stack via the
@@ -288,7 +324,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
if (stack >= irqstack && stack < irqstack_end) {
if (ops->stack(data, "IRQ") < 0)
break;
- HANDLE_STACK (stack < irqstack_end);
+ bp = print_context_stack(tinfo, stack, bp,
+ ops, data, irqstack_end);
/*
* We link to the next stack (which would be
* the process stack normally) the last
@@ -306,9 +343,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
/*
* This handles the process stack:
*/
- tinfo = task_thread_info(tsk);
- HANDLE_STACK (valid_stack_ptr(tinfo, stack));
-#undef HANDLE_STACK
+ bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
put_cpu();
}
EXPORT_SYMBOL(dump_trace);
@@ -331,10 +366,10 @@ static int print_trace_stack(void *data, char *name)
return 0;
}
-static void print_trace_address(void *data, unsigned long addr)
+static void print_trace_address(void *data, unsigned long addr, int reliable)
{
touch_nmi_watchdog();
- printk_address(addr);
+ printk_address(addr, reliable);
}
static const struct stacktrace_ops print_trace_ops = {
@@ -345,15 +380,17 @@ static const struct stacktrace_ops print_trace_ops = {
};
void
-show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
+show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
+ unsigned long bp)
{
printk("\nCall Trace:\n");
- dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
+ dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL);
printk("\n");
}
static void
-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
+_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
+ unsigned long bp)
{
unsigned long *stack;
int i;
@@ -364,14 +401,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
// debugging aid: "show_stack(NULL, NULL);" prints the
// back trace for this cpu.
- if (rsp == NULL) {
+ if (sp == NULL) {
if (tsk)
- rsp = (unsigned long *)tsk->thread.rsp;
+ sp = (unsigned long *)tsk->thread.sp;
else
- rsp = (unsigned long *)&rsp;
+ sp = (unsigned long *)&sp;
}
- stack = rsp;
+ stack = sp;
for(i=0; i < kstack_depth_to_print; i++) {
if (stack >= irqstack && stack <= irqstack_end) {
if (stack == irqstack_end) {
@@ -387,12 +424,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
- show_trace(tsk, regs, rsp);
+ show_trace(tsk, regs, sp, bp);
}
-void show_stack(struct task_struct *tsk, unsigned long * rsp)
+void show_stack(struct task_struct *tsk, unsigned long * sp)
{
- _show_stack(tsk, NULL, rsp);
+ _show_stack(tsk, NULL, sp, 0);
}
/*
@@ -401,13 +438,19 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
void dump_stack(void)
{
unsigned long dummy;
+ unsigned long bp = 0;
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp)
+ asm("movq %%rbp, %0" : "=r" (bp):);
+#endif
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- show_trace(NULL, NULL, &dummy);
+ show_trace(NULL, NULL, &dummy, bp);
}
EXPORT_SYMBOL(dump_stack);
@@ -415,12 +458,15 @@ EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
{
int i;
- int in_kernel = !user_mode(regs);
- unsigned long rsp;
+ unsigned long sp;
const int cpu = smp_processor_id();
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+ u8 *ip;
+ unsigned int code_prologue = code_bytes * 43 / 64;
+ unsigned int code_len = code_bytes;
- rsp = regs->rsp;
+ sp = regs->sp;
+ ip = (u8 *) regs->ip - code_prologue;
printk("CPU %d ", cpu);
__show_regs(regs);
printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -430,45 +476,43 @@ void show_registers(struct pt_regs *regs)
* When in-kernel, we also print out the stack and code at the
* time of the fault..
*/
- if (in_kernel) {
+ if (!user_mode(regs)) {
+ unsigned char c;
printk("Stack: ");
- _show_stack(NULL, regs, (unsigned long*)rsp);
-
- printk("\nCode: ");
- if (regs->rip < PAGE_OFFSET)
- goto bad;
-
- for (i=0; i<20; i++) {
- unsigned char c;
- if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
-bad:
+ _show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
+ printk("\n");
+
+ printk(KERN_EMERG "Code: ");
+ if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
+ /* try starting at RIP */
+ ip = (u8 *) regs->ip;
+ code_len = code_len - code_prologue + 1;
+ }
+ for (i = 0; i < code_len; i++, ip++) {
+ if (ip < (u8 *)PAGE_OFFSET ||
+ probe_kernel_address(ip, c)) {
printk(" Bad RIP value.");
break;
}
- printk("%02x ", c);
+ if (ip == (u8 *)regs->ip)
+ printk("<%02x> ", c);
+ else
+ printk("%02x ", c);
}
}
printk("\n");
}
-int is_valid_bugaddr(unsigned long rip)
+int is_valid_bugaddr(unsigned long ip)
{
unsigned short ud2;
- if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
+ if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
return 0;
return ud2 == 0x0b0f;
}
-#ifdef CONFIG_BUG
-void out_of_line_bug(void)
-{
- BUG();
-}
-EXPORT_SYMBOL(out_of_line_bug);
-#endif
-
static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
@@ -496,7 +540,7 @@ unsigned __kprobes long oops_begin(void)
return flags;
}
-void __kprobes oops_end(unsigned long flags)
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
die_owner = -1;
bust_spinlocks(0);
@@ -505,12 +549,17 @@ void __kprobes oops_end(unsigned long flags)
/* Nest count reaches zero, release the lock. */
__raw_spin_unlock(&die_lock);
raw_local_irq_restore(flags);
+ if (!regs) {
+ oops_exit();
+ return;
+ }
if (panic_on_oops)
panic("Fatal exception");
oops_exit();
+ do_exit(signr);
}
-void __kprobes __die(const char * str, struct pt_regs * regs, long err)
+int __kprobes __die(const char * str, struct pt_regs * regs, long err)
{
static int die_counter;
printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
@@ -524,15 +573,17 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
printk("DEBUG_PAGEALLOC");
#endif
printk("\n");
- notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
+ if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+ return 1;
show_registers(regs);
add_taint(TAINT_DIE);
/* Executive summary in case the oops scrolled away */
printk(KERN_ALERT "RIP ");
- printk_address(regs->rip);
- printk(" RSP <%016lx>\n", regs->rsp);
+ printk_address(regs->ip, 1);
+ printk(" RSP <%016lx>\n", regs->sp);
if (kexec_should_crash(current))
crash_kexec(regs);
+ return 0;
}
void die(const char * str, struct pt_regs * regs, long err)
@@ -540,11 +591,11 @@ void die(const char * str, struct pt_regs * regs, long err)
unsigned long flags = oops_begin();
if (!user_mode(regs))
- report_bug(regs->rip, regs);
+ report_bug(regs->ip, regs);
- __die(str, regs, err);
- oops_end(flags);
- do_exit(SIGSEGV);
+ if (__die(str, regs, err))
+ regs = NULL;
+ oops_end(flags, regs, SIGSEGV);
}
void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
@@ -561,10 +612,10 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
crash_kexec(regs);
if (do_panic || panic_on_oops)
panic("Non maskable interrupt");
- oops_end(flags);
+ oops_end(flags, NULL, SIGBUS);
nmi_exit();
local_irq_enable();
- do_exit(SIGSEGV);
+ do_exit(SIGBUS);
}
static void __kprobes do_trap(int trapnr, int signr, char *str,
@@ -588,11 +639,14 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
tsk->thread.trap_no = trapnr;
if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
- printk_ratelimit())
+ printk_ratelimit()) {
printk(KERN_INFO
- "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
+ "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
tsk->comm, tsk->pid, str,
- regs->rip, regs->rsp, error_code);
+ regs->ip, regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
if (info)
force_sig_info(signr, info, tsk);
@@ -602,19 +656,12 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
}
- /* kernel trap */
- {
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup)
- regs->rip = fixup->fixup;
- else {
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
- die(str, regs, error_code);
- }
- return;
+ if (!fixup_exception(regs)) {
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+ die(str, regs, error_code);
}
+ return;
}
#define DO_ERROR(trapnr, signr, str, name) \
@@ -643,10 +690,10 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
do_trap(trapnr, signr, str, regs, error_code, &info); \
}
-DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
+DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
DO_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip)
+DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
@@ -694,32 +741,28 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
tsk->thread.trap_no = 13;
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit())
+ printk_ratelimit()) {
printk(KERN_INFO
- "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
tsk->comm, tsk->pid,
- regs->rip, regs->rsp, error_code);
+ regs->ip, regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
force_sig(SIGSEGV, tsk);
return;
}
- /* kernel gp */
- {
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return;
- }
+ if (fixup_exception(regs))
+ return;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 13;
- if (notify_die(DIE_GPF, "general protection fault", regs,
- error_code, 13, SIGSEGV) == NOTIFY_STOP)
- return;
- die("general protection fault", regs, error_code);
- }
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
+ if (notify_die(DIE_GPF, "general protection fault", regs,
+ error_code, 13, SIGSEGV) == NOTIFY_STOP)
+ return;
+ die("general protection fault", regs, error_code);
}
static __kprobes void
@@ -832,15 +875,15 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
{
struct pt_regs *regs = eregs;
/* Did already sync */
- if (eregs == (struct pt_regs *)eregs->rsp)
+ if (eregs == (struct pt_regs *)eregs->sp)
;
/* Exception from user space */
else if (user_mode(eregs))
regs = task_pt_regs(current);
/* Exception from kernel and interrupts are enabled. Move to
kernel process stack. */
- else if (eregs->eflags & X86_EFLAGS_IF)
- regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs));
+ else if (eregs->flags & X86_EFLAGS_IF)
+ regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
if (eregs != regs)
*regs = *eregs;
return regs;
@@ -858,6 +901,12 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
get_debugreg(condition, 6);
+ /*
+ * The processor cleared BTF, so don't mark that we need it set.
+ */
+ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
+ tsk->thread.debugctlmsr = 0;
+
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP)
return;
@@ -873,27 +922,14 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
tsk->thread.debugreg6 = condition;
- /* Mask out spurious TF errors due to lazy TF clearing */
+
+ /*
+ * Single-stepping through TF: make sure we ignore any events in
+ * kernel space (but re-enable TF when returning to user mode).
+ */
if (condition & DR_STEP) {
- /*
- * The TF error should be masked out only if the current
- * process is not traced and if the TRAP flag has been set
- * previously by a tracing process (condition detected by
- * the PT_DTRACE flag); remember that the i386 TRAP flag
- * can be modified by the process itself in user mode,
- * allowing programs to debug themselves without the ptrace()
- * interface.
- */
if (!user_mode(regs))
goto clear_TF_reenable;
- /*
- * Was the TF flag set by a debugger? If so, clear it now,
- * so that register information is correct.
- */
- if (tsk->ptrace & PT_DTRACE) {
- regs->eflags &= ~TF_MASK;
- tsk->ptrace &= ~PT_DTRACE;
- }
}
/* Ok, finally something we can handle */
@@ -902,7 +938,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_BRKPT;
- info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL;
+ info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
force_sig_info(SIGTRAP, &info, tsk);
clear_dr7:
@@ -912,18 +948,15 @@ clear_dr7:
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->eflags &= ~TF_MASK;
+ regs->flags &= ~X86_EFLAGS_TF;
preempt_conditional_cli(regs);
}
static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
{
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
+ if (fixup_exception(regs))
return 1;
- }
+
notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
/* Illegal floating point operation in the kernel */
current->thread.trap_no = trapnr;
@@ -938,7 +971,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
*/
asmlinkage void do_coprocessor_error(struct pt_regs *regs)
{
- void __user *rip = (void __user *)(regs->rip);
+ void __user *ip = (void __user *)(regs->ip);
struct task_struct * task;
siginfo_t info;
unsigned short cwd, swd;
@@ -958,7 +991,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
info.si_signo = SIGFPE;
info.si_errno = 0;
info.si_code = __SI_FAULT;
- info.si_addr = rip;
+ info.si_addr = ip;
/*
* (~cwd & swd) will mask out exceptions that are not set to unmasked
* status. 0x3f is the exception bits in these regs, 0x200 is the
@@ -1007,7 +1040,7 @@ asmlinkage void bad_intr(void)
asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
{
- void __user *rip = (void __user *)(regs->rip);
+ void __user *ip = (void __user *)(regs->ip);
struct task_struct * task;
siginfo_t info;
unsigned short mxcsr;
@@ -1027,7 +1060,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
info.si_signo = SIGFPE;
info.si_errno = 0;
info.si_code = __SI_FAULT;
- info.si_addr = rip;
+ info.si_addr = ip;
/*
* The SIMD FPU exceptions are handled a little differently, as there
* is only a single status/control register. Thus, to determine which
@@ -1089,6 +1122,7 @@ asmlinkage void math_state_restore(void)
task_thread_info(me)->status |= TS_USEDFPU;
me->fpu_counter++;
}
+EXPORT_SYMBOL_GPL(math_state_restore);
void __init trap_init(void)
{
@@ -1144,3 +1178,14 @@ static int __init kstack_setup(char *s)
return 0;
}
early_param("kstack", kstack_setup);
+
+
+static int __init code_bytes_setup(char *s)
+{
+ code_bytes = simple_strtoul(s, NULL, 0);
+ if (code_bytes > 8192)
+ code_bytes = 8192;
+
+ return 1;
+}
+__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 9ebc0dab66b..43517e324be 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -5,6 +5,7 @@
#include <linux/jiffies.h>
#include <linux/init.h>
#include <linux/dmi.h>
+#include <linux/percpu.h>
#include <asm/delay.h>
#include <asm/tsc.h>
@@ -23,8 +24,6 @@ static int tsc_enabled;
unsigned int tsc_khz;
EXPORT_SYMBOL_GPL(tsc_khz);
-int tsc_disable;
-
#ifdef CONFIG_X86_TSC
static int __init tsc_setup(char *str)
{
@@ -39,8 +38,7 @@ static int __init tsc_setup(char *str)
*/
static int __init tsc_setup(char *str)
{
- tsc_disable = 1;
-
+ setup_clear_cpu_cap(X86_FEATURE_TSC);
return 1;
}
#endif
@@ -80,13 +78,31 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-unsigned long cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+DEFINE_PER_CPU(unsigned long, cyc2ns);
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+ unsigned long flags, prev_scale, *scale;
+ unsigned long long tsc_now, ns_now;
+
+ local_irq_save(flags);
+ sched_clock_idle_sleep_event();
+
+ scale = &per_cpu(cyc2ns, cpu);
+
+ rdtscll(tsc_now);
+ ns_now = __cycles_2_ns(tsc_now);
+
+ prev_scale = *scale;
+ if (cpu_khz)
+ *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+ /*
+ * Start smoothly with the new frequency:
+ */
+ sched_clock_idle_wakeup_event(0);
+ local_irq_restore(flags);
}
/*
@@ -239,7 +255,9 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
tsc_khz = cpu_khz;
- set_cyc2ns_scale(cpu_khz);
+ preempt_disable();
+ set_cyc2ns_scale(cpu_khz, smp_processor_id());
+ preempt_enable();
/*
* TSC based sched_clock turns
* to junk w/ cpufreq
@@ -333,6 +351,11 @@ __cpuinit int unsynchronized_tsc(void)
{
if (!cpu_has_tsc || tsc_unstable)
return 1;
+
+ /* Anything with constant TSC should be synchronized */
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return 0;
+
/*
* Intel systems are normally all synchronized.
* Exceptions must mark TSC as unstable:
@@ -367,7 +390,9 @@ static inline void check_geode_tsc_reliable(void) { }
void __init tsc_init(void)
{
- if (!cpu_has_tsc || tsc_disable)
+ int cpu;
+
+ if (!cpu_has_tsc)
goto out_no_tsc;
cpu_khz = calculate_cpu_khz();
@@ -380,7 +405,15 @@ void __init tsc_init(void)
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
- set_cyc2ns_scale(cpu_khz);
+ /*
+ * Secondary CPUs do not run through tsc_init(), so set up
+ * all the scale factors for all CPUs, assuming the same
+ * speed as the bootup CPU. (cpufreq notifiers will fix this
+ * up if their speed diverges)
+ */
+ for_each_possible_cpu(cpu)
+ set_cyc2ns_scale(cpu_khz, cpu);
+
use_tsc_delay();
/* Check and install the TSC clocksource */
@@ -403,10 +436,5 @@ void __init tsc_init(void)
return;
out_no_tsc:
- /*
- * Set the tsc_disable flag if there's no TSC support, this
- * makes it a fast flag for the kernel to see whether it
- * should be using the TSC.
- */
- tsc_disable = 1;
+ setup_clear_cpu_cap(X86_FEATURE_TSC);
}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 9c70af45b42..947554ddabb 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -10,6 +10,7 @@
#include <asm/hpet.h>
#include <asm/timex.h>
+#include <asm/timer.h>
static int notsc __initdata = 0;
@@ -18,19 +19,51 @@ EXPORT_SYMBOL(cpu_khz);
unsigned int tsc_khz;
EXPORT_SYMBOL(tsc_khz);
-static unsigned int cyc2ns_scale __read_mostly;
+/* Accelerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
+ *
+ * Then we use scaling math (suggested by george@mvista.com) to get:
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better precision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * (mathieu.desnoyers@polymtl.ca)
+ *
+ * -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+DEFINE_PER_CPU(unsigned long, cyc2ns);
-static inline void set_cyc2ns_scale(unsigned long khz)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
{
- cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
-}
+ unsigned long flags, prev_scale, *scale;
+ unsigned long long tsc_now, ns_now;
-static unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> NS_SCALE;
+ local_irq_save(flags);
+ sched_clock_idle_sleep_event();
+
+ scale = &per_cpu(cyc2ns, cpu);
+
+ rdtscll(tsc_now);
+ ns_now = __cycles_2_ns(tsc_now);
+
+ prev_scale = *scale;
+ if (cpu_khz)
+ *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+ sched_clock_idle_wakeup_event(0);
+ local_irq_restore(flags);
}
-unsigned long long sched_clock(void)
+unsigned long long native_sched_clock(void)
{
unsigned long a = 0;
@@ -44,12 +77,27 @@ unsigned long long sched_clock(void)
return cycles_2_ns(a);
}
+/* We need to define a real function for sched_clock, to override the
+ weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+ return paravirt_sched_clock();
+}
+#else
+unsigned long long
+sched_clock(void) __attribute__((alias("native_sched_clock")));
+#endif
+
+
static int tsc_unstable;
-inline int check_tsc_unstable(void)
+int check_tsc_unstable(void)
{
return tsc_unstable;
}
+EXPORT_SYMBOL_GPL(check_tsc_unstable);
+
#ifdef CONFIG_CPU_FREQ
/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
@@ -100,7 +148,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
mark_tsc_unstable("cpufreq changes");
}
- set_cyc2ns_scale(tsc_khz_ref);
+ preempt_disable();
+ set_cyc2ns_scale(tsc_khz_ref, smp_processor_id());
+ preempt_enable();
return 0;
}
@@ -133,12 +183,12 @@ static unsigned long __init tsc_read_refs(unsigned long *pm,
int i;
for (i = 0; i < MAX_RETRIES; i++) {
- t1 = get_cycles_sync();
+ t1 = get_cycles();
if (hpet)
*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
else
*pm = acpi_pm_read_early();
- t2 = get_cycles_sync();
+ t2 = get_cycles();
if ((t2 - t1) < SMI_TRESHOLD)
return t2;
}
@@ -151,7 +201,7 @@ static unsigned long __init tsc_read_refs(unsigned long *pm,
void __init tsc_calibrate(void)
{
unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
- int hpet = is_hpet_enabled();
+ int hpet = is_hpet_enabled(), cpu;
local_irq_save(flags);
@@ -162,9 +212,9 @@ void __init tsc_calibrate(void)
outb(0xb0, 0x43);
outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
- tr1 = get_cycles_sync();
+ tr1 = get_cycles();
while ((inb(0x61) & 0x20) == 0);
- tr2 = get_cycles_sync();
+ tr2 = get_cycles();
tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
@@ -206,7 +256,9 @@ void __init tsc_calibrate(void)
}
tsc_khz = tsc2 / tsc1;
- set_cyc2ns_scale(tsc_khz);
+
+ for_each_possible_cpu(cpu)
+ set_cyc2ns_scale(tsc_khz, cpu);
}
/*
@@ -222,17 +274,9 @@ __cpuinit int unsynchronized_tsc(void)
if (apic_is_clustered_box())
return 1;
#endif
- /* Most intel systems have synchronized TSCs except for
- multi node systems */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
-#ifdef CONFIG_ACPI
- /* But TSC doesn't tick in C3 so don't use it there */
- if (acpi_gbl_FADT.header.length > 0 &&
- acpi_gbl_FADT.C3latency < 1000)
- return 1;
-#endif
+
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return 0;
- }
/* Assume multi socket systems are not synchronized */
return num_present_cpus() > 1;
@@ -250,13 +294,13 @@ __setup("notsc", notsc_setup);
/* clock source code: */
static cycle_t read_tsc(void)
{
- cycle_t ret = (cycle_t)get_cycles_sync();
+ cycle_t ret = (cycle_t)get_cycles();
return ret;
}
static cycle_t __vsyscall_fn vread_tsc(void)
{
- cycle_t ret = (cycle_t)get_cycles_sync();
+ cycle_t ret = (cycle_t)vget_cycles();
return ret;
}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9125efe66a0..0577825cf89 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -46,7 +46,7 @@ static __cpuinit void check_tsc_warp(void)
cycles_t start, now, prev, end;
int i;
- start = get_cycles_sync();
+ start = get_cycles();
/*
* The measurement runs for 20 msecs:
*/
@@ -61,18 +61,18 @@ static __cpuinit void check_tsc_warp(void)
*/
__raw_spin_lock(&sync_lock);
prev = last_tsc;
- now = get_cycles_sync();
+ now = get_cycles();
last_tsc = now;
__raw_spin_unlock(&sync_lock);
/*
* Be nice every now and then (and also check whether
- * measurement is done [we also insert a 100 million
+ * measurement is done [we also insert a 10 million
* loops safety exit, so we dont lock up in case the
* TSC readout is totally broken]):
*/
if (unlikely(!(i & 7))) {
- if (now > end || i > 100000000)
+ if (now > end || i > 10000000)
break;
cpu_relax();
touch_nmi_watchdog();
@@ -87,7 +87,11 @@ static __cpuinit void check_tsc_warp(void)
nr_warps++;
__raw_spin_unlock(&sync_lock);
}
-
+ }
+ if (!(now-start)) {
+ printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
+ now-start, end-start);
+ WARN_ON(1);
}
}
@@ -129,24 +133,24 @@ void __cpuinit check_tsc_sync_source(int cpu)
while (atomic_read(&stop_count) != cpus-1)
cpu_relax();
- /*
- * Reset it - just in case we boot another CPU later:
- */
- atomic_set(&start_count, 0);
-
if (nr_warps) {
printk("\n");
printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
" turning off TSC clock.\n", max_warp);
mark_tsc_unstable("check_tsc_sync_source failed");
- nr_warps = 0;
- max_warp = 0;
- last_tsc = 0;
} else {
printk(" passed.\n");
}
/*
+ * Reset it - just in case we boot another CPU later:
+ */
+ atomic_set(&start_count, 0);
+ nr_warps = 0;
+ max_warp = 0;
+ last_tsc = 0;
+
+ /*
* Let the target continue with the bootup:
*/
atomic_inc(&stop_count);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 157e4bedd3c..738c2104df3 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -70,10 +70,10 @@
/*
* 8- and 16-bit register defines..
*/
-#define AL(regs) (((unsigned char *)&((regs)->pt.eax))[0])
-#define AH(regs) (((unsigned char *)&((regs)->pt.eax))[1])
-#define IP(regs) (*(unsigned short *)&((regs)->pt.eip))
-#define SP(regs) (*(unsigned short *)&((regs)->pt.esp))
+#define AL(regs) (((unsigned char *)&((regs)->pt.ax))[0])
+#define AH(regs) (((unsigned char *)&((regs)->pt.ax))[1])
+#define IP(regs) (*(unsigned short *)&((regs)->pt.ip))
+#define SP(regs) (*(unsigned short *)&((regs)->pt.sp))
/*
* virtual flags (16 and 32-bit versions)
@@ -93,12 +93,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
{
int ret = 0;
- /* kernel_vm86_regs is missing xgs, so copy everything up to
+ /* kernel_vm86_regs is missing gs, so copy everything up to
(but not including) orig_eax, and then rest including orig_eax. */
- ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax));
- ret += copy_to_user(&user->orig_eax, &regs->pt.orig_eax,
+ ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
+ ret += copy_to_user(&user->orig_eax, &regs->pt.orig_ax,
sizeof(struct kernel_vm86_regs) -
- offsetof(struct kernel_vm86_regs, pt.orig_eax));
+ offsetof(struct kernel_vm86_regs, pt.orig_ax));
return ret;
}
@@ -110,18 +110,17 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
{
int ret = 0;
- /* copy eax-xfs inclusive */
- ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax));
- /* copy orig_eax-__gsh+extra */
- ret += copy_from_user(&regs->pt.orig_eax, &user->orig_eax,
+ /* copy ax-fs inclusive */
+ ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax));
+ /* copy orig_ax-__gsh+extra */
+ ret += copy_from_user(&regs->pt.orig_ax, &user->orig_eax,
sizeof(struct kernel_vm86_regs) -
- offsetof(struct kernel_vm86_regs, pt.orig_eax) +
+ offsetof(struct kernel_vm86_regs, pt.orig_ax) +
extra);
return ret;
}
-struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
-struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
+struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs)
{
struct tss_struct *tss;
struct pt_regs *ret;
@@ -138,7 +137,7 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
printk("no vm86_info: BAD\n");
do_exit(SIGSEGV);
}
- set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
+ set_flags(regs->pt.flags, VEFLAGS, VIF_MASK | current->thread.v86mask);
tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs,regs);
tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
if (tmp) {
@@ -147,15 +146,15 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
}
tss = &per_cpu(init_tss, get_cpu());
- current->thread.esp0 = current->thread.saved_esp0;
+ current->thread.sp0 = current->thread.saved_sp0;
current->thread.sysenter_cs = __KERNEL_CS;
- load_esp0(tss, &current->thread);
- current->thread.saved_esp0 = 0;
+ load_sp0(tss, &current->thread);
+ current->thread.saved_sp0 = 0;
put_cpu();
ret = KVM86->regs32;
- ret->xfs = current->thread.saved_fs;
+ ret->fs = current->thread.saved_fs;
loadsegment(gs, current->thread.saved_gs);
return ret;
@@ -197,7 +196,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
asmlinkage int sys_vm86old(struct pt_regs regs)
{
- struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx;
+ struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx;
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
* This remains on the stack until we
@@ -207,7 +206,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
int tmp, ret = -EPERM;
tsk = current;
- if (tsk->thread.saved_esp0)
+ if (tsk->thread.saved_sp0)
goto out;
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
offsetof(struct kernel_vm86_struct, vm86plus) -
@@ -237,12 +236,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
struct vm86plus_struct __user *v86;
tsk = current;
- switch (regs.ebx) {
+ switch (regs.bx) {
case VM86_REQUEST_IRQ:
case VM86_FREE_IRQ:
case VM86_GET_IRQ_BITS:
case VM86_GET_AND_RESET_IRQ:
- ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx);
+ ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
goto out;
case VM86_PLUS_INSTALL_CHECK:
/* NOTE: on old vm86 stuff this will return the error
@@ -256,9 +255,9 @@ asmlinkage int sys_vm86(struct pt_regs regs)
/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
ret = -EPERM;
- if (tsk->thread.saved_esp0)
+ if (tsk->thread.saved_sp0)
goto out;
- v86 = (struct vm86plus_struct __user *)regs.ecx;
+ v86 = (struct vm86plus_struct __user *)regs.cx;
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
offsetof(struct kernel_vm86_struct, regs32) -
sizeof(info.regs));
@@ -281,23 +280,23 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
/*
* make sure the vm86() system call doesn't try to do anything silly
*/
- info->regs.pt.xds = 0;
- info->regs.pt.xes = 0;
- info->regs.pt.xfs = 0;
+ info->regs.pt.ds = 0;
+ info->regs.pt.es = 0;
+ info->regs.pt.fs = 0;
/* we are clearing gs later just before "jmp resume_userspace",
* because it is not saved/restored.
*/
/*
- * The eflags register is also special: we cannot trust that the user
+ * The flags register is also special: we cannot trust that the user
* has set it up safely, so this makes sure interrupt etc flags are
* inherited from protected mode.
*/
- VEFLAGS = info->regs.pt.eflags;
- info->regs.pt.eflags &= SAFE_MASK;
- info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK;
- info->regs.pt.eflags |= VM_MASK;
+ VEFLAGS = info->regs.pt.flags;
+ info->regs.pt.flags &= SAFE_MASK;
+ info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
+ info->regs.pt.flags |= VM_MASK;
switch (info->cpu_type) {
case CPU_286:
@@ -315,18 +314,18 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
}
/*
- * Save old state, set default return value (%eax) to 0
+ * Save old state, set default return value (%ax) to 0
*/
- info->regs32->eax = 0;
- tsk->thread.saved_esp0 = tsk->thread.esp0;
- tsk->thread.saved_fs = info->regs32->xfs;
+ info->regs32->ax = 0;
+ tsk->thread.saved_sp0 = tsk->thread.sp0;
+ tsk->thread.saved_fs = info->regs32->fs;
savesegment(gs, tsk->thread.saved_gs);
tss = &per_cpu(init_tss, get_cpu());
- tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
+ tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
if (cpu_has_sep)
tsk->thread.sysenter_cs = 0;
- load_esp0(tss, &tsk->thread);
+ load_sp0(tss, &tsk->thread);
put_cpu();
tsk->thread.screen_bitmap = info->screen_bitmap;
@@ -352,7 +351,7 @@ static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval)
struct pt_regs * regs32;
regs32 = save_v86_state(regs16);
- regs32->eax = retval;
+ regs32->ax = retval;
__asm__ __volatile__("movl %0,%%esp\n\t"
"movl %1,%%ebp\n\t"
"jmp resume_userspace"
@@ -373,30 +372,30 @@ static inline void clear_IF(struct kernel_vm86_regs * regs)
static inline void clear_TF(struct kernel_vm86_regs * regs)
{
- regs->pt.eflags &= ~TF_MASK;
+ regs->pt.flags &= ~TF_MASK;
}
static inline void clear_AC(struct kernel_vm86_regs * regs)
{
- regs->pt.eflags &= ~AC_MASK;
+ regs->pt.flags &= ~AC_MASK;
}
/* It is correct to call set_IF(regs) from the set_vflags_*
* functions. However someone forgot to call clear_IF(regs)
* in the opposite case.
* After the command sequence CLI PUSHF STI POPF you should
- * end up with interrups disabled, but you ended up with
+ * end up with interrupts disabled, but you ended up with
* interrupts enabled.
* ( I was testing my own changes, but the only bug I
* could find was in a function I had not changed. )
* [KD]
*/
-static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
+static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs * regs)
{
- set_flags(VEFLAGS, eflags, current->thread.v86mask);
- set_flags(regs->pt.eflags, eflags, SAFE_MASK);
- if (eflags & IF_MASK)
+ set_flags(VEFLAGS, flags, current->thread.v86mask);
+ set_flags(regs->pt.flags, flags, SAFE_MASK);
+ if (flags & IF_MASK)
set_IF(regs);
else
clear_IF(regs);
@@ -405,7 +404,7 @@ static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs
static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
{
set_flags(VFLAGS, flags, current->thread.v86mask);
- set_flags(regs->pt.eflags, flags, SAFE_MASK);
+ set_flags(regs->pt.flags, flags, SAFE_MASK);
if (flags & IF_MASK)
set_IF(regs);
else
@@ -414,7 +413,7 @@ static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_reg
static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
{
- unsigned long flags = regs->pt.eflags & RETURN_MASK;
+ unsigned long flags = regs->pt.flags & RETURN_MASK;
if (VEFLAGS & VIF_MASK)
flags |= IF_MASK;
@@ -518,7 +517,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
unsigned long __user *intr_ptr;
unsigned long segoffs;
- if (regs->pt.xcs == BIOSSEG)
+ if (regs->pt.cs == BIOSSEG)
goto cannot_handle;
if (is_revectored(i, &KVM86->int_revectored))
goto cannot_handle;
@@ -530,9 +529,9 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
if ((segoffs >> 16) == BIOSSEG)
goto cannot_handle;
pushw(ssp, sp, get_vflags(regs), cannot_handle);
- pushw(ssp, sp, regs->pt.xcs, cannot_handle);
+ pushw(ssp, sp, regs->pt.cs, cannot_handle);
pushw(ssp, sp, IP(regs), cannot_handle);
- regs->pt.xcs = segoffs >> 16;
+ regs->pt.cs = segoffs >> 16;
SP(regs) -= 6;
IP(regs) = segoffs & 0xffff;
clear_TF(regs);
@@ -549,7 +548,7 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno
if (VMPI.is_vm86pus) {
if ( (trapno==3) || (trapno==1) )
return_to_32bit(regs, VM86_TRAP + (trapno << 8));
- do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs));
+ do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
return 0;
}
if (trapno !=1)
@@ -585,10 +584,10 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
handle_vm86_trap(regs, 0, 1); \
return; } while (0)
- orig_flags = *(unsigned short *)&regs->pt.eflags;
+ orig_flags = *(unsigned short *)&regs->pt.flags;
- csp = (unsigned char __user *) (regs->pt.xcs << 4);
- ssp = (unsigned char __user *) (regs->pt.xss << 4);
+ csp = (unsigned char __user *) (regs->pt.cs << 4);
+ ssp = (unsigned char __user *) (regs->pt.ss << 4);
sp = SP(regs);
ip = IP(regs);
@@ -675,7 +674,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
SP(regs) += 6;
}
IP(regs) = newip;
- regs->pt.xcs = newcs;
+ regs->pt.cs = newcs;
CHECK_IF_IN_TRAP;
if (data32) {
set_vflags_long(newflags, regs);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index f02bad68aba..4525bc2c2e1 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -62,7 +62,10 @@ static struct {
void (*cpuid)(void /* non-c */);
void (*_set_ldt)(u32 selector);
void (*set_tr)(u32 selector);
- void (*set_kernel_stack)(u32 selector, u32 esp0);
+ void (*write_idt_entry)(struct desc_struct *, int, u32, u32);
+ void (*write_gdt_entry)(struct desc_struct *, int, u32, u32);
+ void (*write_ldt_entry)(struct desc_struct *, int, u32, u32);
+ void (*set_kernel_stack)(u32 selector, u32 sp0);
void (*allocate_page)(u32, u32, u32, u32, u32);
void (*release_page)(u32, u32);
void (*set_pte)(pte_t, pte_t *, unsigned);
@@ -88,13 +91,13 @@ struct vmi_timer_ops vmi_timer_ops;
#define IRQ_PATCH_DISABLE 5
static inline void patch_offset(void *insnbuf,
- unsigned long eip, unsigned long dest)
+ unsigned long ip, unsigned long dest)
{
- *(unsigned long *)(insnbuf+1) = dest-eip-5;
+ *(unsigned long *)(insnbuf+1) = dest-ip-5;
}
static unsigned patch_internal(int call, unsigned len, void *insnbuf,
- unsigned long eip)
+ unsigned long ip)
{
u64 reloc;
struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
@@ -103,13 +106,13 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf,
case VMI_RELOCATION_CALL_REL:
BUG_ON(len < 5);
*(char *)insnbuf = MNEM_CALL;
- patch_offset(insnbuf, eip, (unsigned long)rel->eip);
+ patch_offset(insnbuf, ip, (unsigned long)rel->eip);
return 5;
case VMI_RELOCATION_JUMP_REL:
BUG_ON(len < 5);
*(char *)insnbuf = MNEM_JMP;
- patch_offset(insnbuf, eip, (unsigned long)rel->eip);
+ patch_offset(insnbuf, ip, (unsigned long)rel->eip);
return 5;
case VMI_RELOCATION_NOP:
@@ -131,25 +134,25 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf,
* sequence. The callee does nop padding for us.
*/
static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
- unsigned long eip, unsigned len)
+ unsigned long ip, unsigned len)
{
switch (type) {
case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
return patch_internal(VMI_CALL_DisableInterrupts, len,
- insns, eip);
+ insns, ip);
case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
return patch_internal(VMI_CALL_EnableInterrupts, len,
- insns, eip);
+ insns, ip);
case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
return patch_internal(VMI_CALL_SetInterruptMask, len,
- insns, eip);
+ insns, ip);
case PARAVIRT_PATCH(pv_irq_ops.save_fl):
return patch_internal(VMI_CALL_GetInterruptMask, len,
- insns, eip);
+ insns, ip);
case PARAVIRT_PATCH(pv_cpu_ops.iret):
- return patch_internal(VMI_CALL_IRET, len, insns, eip);
- case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
- return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
+ return patch_internal(VMI_CALL_IRET, len, insns, ip);
+ case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
+ return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
default:
break;
}
@@ -157,36 +160,36 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
}
/* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */
-static void vmi_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static void vmi_cpuid(unsigned int *ax, unsigned int *bx,
+ unsigned int *cx, unsigned int *dx)
{
int override = 0;
- if (*eax == 1)
+ if (*ax == 1)
override = 1;
asm volatile ("call *%6"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid));
+ : "=a" (*ax),
+ "=b" (*bx),
+ "=c" (*cx),
+ "=d" (*dx)
+ : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid));
if (override) {
if (disable_pse)
- *edx &= ~X86_FEATURE_PSE;
+ *dx &= ~X86_FEATURE_PSE;
if (disable_pge)
- *edx &= ~X86_FEATURE_PGE;
+ *dx &= ~X86_FEATURE_PGE;
if (disable_sep)
- *edx &= ~X86_FEATURE_SEP;
+ *dx &= ~X86_FEATURE_SEP;
if (disable_tsc)
- *edx &= ~X86_FEATURE_TSC;
+ *dx &= ~X86_FEATURE_TSC;
if (disable_mtrr)
- *edx &= ~X86_FEATURE_MTRR;
+ *dx &= ~X86_FEATURE_MTRR;
}
}
static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new)
{
if (gdt[nr].a != new->a || gdt[nr].b != new->b)
- write_gdt_entry(gdt, nr, new->a, new->b);
+ write_gdt_entry(gdt, nr, new, 0);
}
static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
@@ -200,12 +203,12 @@ static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
static void vmi_set_ldt(const void *addr, unsigned entries)
{
unsigned cpu = smp_processor_id();
- u32 low, high;
+ struct desc_struct desc;
- pack_descriptor(&low, &high, (unsigned long)addr,
+ pack_descriptor(&desc, (unsigned long)addr,
entries * sizeof(struct desc_struct) - 1,
- DESCTYPE_LDT, 0);
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high);
+ DESC_LDT, 0);
+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT);
vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0);
}
@@ -214,17 +217,37 @@ static void vmi_set_tr(void)
vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
}
-static void vmi_load_esp0(struct tss_struct *tss,
+static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
+{
+ u32 *idt_entry = (u32 *)g;
+ vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]);
+}
+
+static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
+ const void *desc, int type)
+{
+ u32 *gdt_entry = (u32 *)desc;
+ vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]);
+}
+
+static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
+ const void *desc)
+{
+ u32 *ldt_entry = (u32 *)desc;
+ vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]);
+}
+
+static void vmi_load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
- tss->x86_tss.esp0 = thread->esp0;
+ tss->x86_tss.sp0 = thread->sp0;
/* This can only happen when SEP is enabled, no need to test "SEP"arately */
if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
tss->x86_tss.ss1 = thread->sysenter_cs;
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
}
- vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0);
+ vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0);
}
static void vmi_flush_tlb_user(void)
@@ -375,7 +398,7 @@ static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
}
-static void vmi_allocate_pd(u32 pfn)
+static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
{
/*
* This call comes in very early, before mem_map is setup.
@@ -452,7 +475,7 @@ static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep
static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
#ifdef CONFIG_X86_PAE
- const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 };
+ const pte_t pte = { .pte = pmdval.pmd };
vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
#else
const pte_t pte = { pmdval.pud.pgd.pgd };
@@ -485,21 +508,21 @@ static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t
static void vmi_set_pud(pud_t *pudp, pud_t pudval)
{
/* Um, eww */
- const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 };
+ const pte_t pte = { .pte = pudval.pgd.pgd };
vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
}
static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- const pte_t pte = { 0 };
+ const pte_t pte = { .pte = 0 };
vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
}
static void vmi_pmd_clear(pmd_t *pmd)
{
- const pte_t pte = { 0 };
+ const pte_t pte = { .pte = 0 };
vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
}
@@ -790,10 +813,13 @@ static inline int __init activate_vmi(void)
para_fill(pv_cpu_ops.store_idt, GetIDT);
para_fill(pv_cpu_ops.store_tr, GetTR);
pv_cpu_ops.load_tls = vmi_load_tls;
- para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
- para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
- para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
- para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
+ para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry,
+ write_ldt_entry, WriteLDTEntry);
+ para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry,
+ write_gdt_entry, WriteGDTEntry);
+ para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry,
+ write_idt_entry, WriteIDTEntry);
+ para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack);
para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
para_fill(pv_cpu_ops.io_delay, IODelay);
@@ -870,7 +896,7 @@ static inline int __init activate_vmi(void)
* the backend. They are performance critical anyway, so requiring
* a patch is not a big problem.
*/
- pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
+ pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0;
pv_cpu_ops.iret = (void *)0xbadbab0;
#ifdef CONFIG_SMP
@@ -963,19 +989,19 @@ static int __init parse_vmi(char *arg)
return -EINVAL;
if (!strcmp(arg, "disable_pge")) {
- clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
disable_pge = 1;
} else if (!strcmp(arg, "disable_pse")) {
- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
disable_pse = 1;
} else if (!strcmp(arg, "disable_sep")) {
- clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
disable_sep = 1;
} else if (!strcmp(arg, "disable_tsc")) {
- clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC);
disable_tsc = 1;
} else if (!strcmp(arg, "disable_mtrr")) {
- clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR);
disable_mtrr = 1;
} else if (!strcmp(arg, "disable_timer")) {
disable_vmi_timer = 1;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index b1b5ab08b26..a2b030780aa 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -35,7 +35,6 @@
#include <asm/i8253.h>
#include <irq_vectors.h>
-#include "io_ports.h"
#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
@@ -238,7 +237,7 @@ static void __devinit vmi_time_init_clockevent(void)
void __init vmi_time_init(void)
{
/* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
- outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
+ outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
vmi_time_init_clockevent();
setup_irq(0, &vmi_clock_action);
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 84c913f38f9..f1148ac8abe 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -8,12 +8,6 @@
* put it inside the section definition.
*/
-/* Don't define absolute symbols until and unless you know that symbol
- * value is should remain constant even if kernel image is relocated
- * at run time. Absolute symbols are not relocated. If symbol value should
- * change if kernel is relocated, make the symbol section relative and
- * put it inside the section definition.
- */
#define LOAD_OFFSET __PAGE_OFFSET
#include <asm-generic/vmlinux.lds.h>
@@ -44,6 +38,8 @@ SECTIONS
/* read-only */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
+ . = ALIGN(4096); /* not really needed, already page aligned */
+ *(.text.page_aligned)
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index ea5386944e6..0992b9946c6 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -37,16 +37,15 @@ SECTIONS
KPROBES_TEXT
*(.fixup)
*(.gnu.warning)
- } :text = 0x9090
- /* out-of-line lock text */
- .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
-
- _etext = .; /* End of text section */
+ _etext = .; /* End of text section */
+ } :text = 0x9090
. = ALIGN(16); /* Exception table */
- __start___ex_table = .;
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
- __stop___ex_table = .;
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+ __start___ex_table = .;
+ *(__ex_table)
+ __stop___ex_table = .;
+ }
NOTES :text :note
@@ -179,6 +178,14 @@ SECTIONS
}
__con_initcall_end = .;
SECURITY_INIT
+
+ . = ALIGN(8);
+ .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+ __parainstructions = .;
+ *(.parainstructions)
+ __parainstructions_end = .;
+ }
+
. = ALIGN(8);
__alt_instructions = .;
.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 414caf0c5f9..d971210a6d3 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -25,21 +25,24 @@ static int __init vsmp_init(void)
return 0;
/* Check if we are running on a ScaleMP vSMP box */
- if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) ||
- (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
+ if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) !=
+ PCI_VENDOR_ID_SCALEMP) ||
+ (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) !=
+ PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
return 0;
/* set vSMP magic bits to indicate vSMP capable kernel */
address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8);
cap = readl(address);
ctl = readl(address + 4);
- printk("vSMP CTL: capabilities:0x%08x control:0x%08x\n", cap, ctl);
+ printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n",
+ cap, ctl);
if (cap & ctl & (1 << 4)) {
/* Turn on vSMP IRQ fastpath handling (see system.h) */
ctl &= ~(1 << 4);
writel(ctl, address + 4);
ctl = readl(address + 4);
- printk("vSMP CTL: control set to:0x%08x\n", ctl);
+ printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);
}
iounmap(address);
diff --git a/arch/x86/kernel/vsyscall_32.S b/arch/x86/kernel/vsyscall_32.S
deleted file mode 100644
index a5ab3dc4fd2..00000000000
--- a/arch/x86/kernel/vsyscall_32.S
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <linux/init.h>
-
-__INITDATA
-
- .globl vsyscall_int80_start, vsyscall_int80_end
-vsyscall_int80_start:
- .incbin "arch/x86/kernel/vsyscall-int80_32.so"
-vsyscall_int80_end:
-
- .globl vsyscall_sysenter_start, vsyscall_sysenter_end
-vsyscall_sysenter_start:
- .incbin "arch/x86/kernel/vsyscall-sysenter_32.so"
-vsyscall_sysenter_end:
-
-__FINIT
diff --git a/arch/x86/kernel/vsyscall_32.lds.S b/arch/x86/kernel/vsyscall_32.lds.S
deleted file mode 100644
index 4a8b0ed9b8f..00000000000
--- a/arch/x86/kernel/vsyscall_32.lds.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page). This script controls its layout.
- */
-#include <asm/asm-offsets.h>
-
-SECTIONS
-{
- . = VDSO_PRELINK_asm + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- /* This linker script is used both with -r and with -shared.
- For the layouts to match, we need to skip more than enough
- space for the dynamic symbol table et al. If this amount
- is insufficient, ld -shared will barf. Just increase it here. */
- . = VDSO_PRELINK_asm + 0x400;
-
- .text : { *(.text) } :text =0x90909090
- .note : { *(.note.*) } :text :note
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .dynamic : { *(.dynamic) } :text :dynamic
- .useless : {
- *(.got.plt) *(.got)
- *(.data .data.* .gnu.linkonce.d.*)
- *(.dynbss)
- *(.bss .bss.* .gnu.linkonce.b.*)
- } :text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- LINUX_2.5 {
- global:
- __kernel_vsyscall;
- __kernel_sigreturn;
- __kernel_rt_sigreturn;
-
- local: *;
- };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value. */
-ENTRY(__kernel_vsyscall);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index ad4005c6d4a..3f824277458 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -43,7 +43,7 @@
#include <asm/vgtod.h>
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-#define __syscall_clobber "r11","rcx","memory"
+#define __syscall_clobber "r11","cx","memory"
#define __pa_vsymbol(x) \
({unsigned long v; \
extern char __vsyscall_0; \
@@ -190,7 +190,7 @@ time_t __vsyscall(1) vtime(time_t *t)
long __vsyscall(2)
vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
{
- unsigned int dummy, p;
+ unsigned int p;
unsigned long j = 0;
/* Fast cache - only recompute value once per jiffies and avoid
@@ -205,7 +205,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
p = tcache->blob[1];
} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
- rdtscp(dummy, dummy, p);
+ native_read_tscp(&p);
} else {
/* Load per CPU data from GDT */
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
@@ -297,7 +297,7 @@ static void __cpuinit vsyscall_set_cpu(int cpu)
/* Store cpu number in limit so that it can be loaded quickly
in user space in vgetcpu.
12 bits for the CPU and 8 bits for the node. */
- d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
+ d = (unsigned long *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_PER_CPU);
*d = 0x0f40000000000ULL;
*d |= cpu;
*d |= (node & 0xf) << 12;
@@ -319,7 +319,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
return NOTIFY_DONE;
}
-static void __init map_vsyscall(void)
+void __init map_vsyscall(void)
{
extern char __vsyscall_0;
unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
@@ -335,7 +335,6 @@ static int __init vsyscall_init(void)
BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
- map_vsyscall();
#ifdef CONFIG_SYSCTL
register_sysctl_table(kernel_root_table2);
#endif
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 77c25b30763..a66e9c1a053 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -8,6 +8,7 @@
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/desc.h>
EXPORT_SYMBOL(kernel_thread);
@@ -34,13 +35,6 @@ EXPORT_SYMBOL(__copy_from_user_inatomic);
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);
-#ifdef CONFIG_SMP
-extern void __write_lock_failed(rwlock_t *rw);
-extern void __read_lock_failed(rwlock_t *rw);
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
/* Export string functions. We normally rely on gcc builtin for most of these,
but gcc sometimes decides not to inline them. */
#undef memcpy
@@ -60,3 +54,8 @@ EXPORT_SYMBOL(init_level4_pgt);
EXPORT_SYMBOL(load_gs_index);
EXPORT_SYMBOL(_proxy_pda);
+
+#ifdef CONFIG_PARAVIRT
+/* Virtualized guests may want to use it */
+EXPORT_SYMBOL_GPL(cpu_gdt_descr);
+#endif
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 19626ace0f5..964dfa36d36 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -1,6 +1,7 @@
config LGUEST_GUEST
bool "Lguest guest support"
select PARAVIRT
+ depends on X86_32
depends on !X86_PAE
depends on !(X86_VISWS || X86_VOYAGER)
select VIRTIO
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 92c56117eae..a63373759f0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -175,8 +175,8 @@ static void lguest_leave_lazy_mode(void)
* check there when it wants to deliver an interrupt.
*/
-/* save_flags() is expected to return the processor state (ie. "eflags"). The
- * eflags word contains all kind of stuff, but in practice Linux only cares
+/* save_flags() is expected to return the processor state (ie. "flags"). The
+ * flags word contains all kind of stuff, but in practice Linux only cares
* about the interrupt flag. Our "save_flags()" just returns that. */
static unsigned long save_fl(void)
{
@@ -217,19 +217,20 @@ static void irq_enable(void)
* address of the handler, and... well, who cares? The Guest just asks the
* Host to make the change anyway, because the Host controls the real IDT.
*/
-static void lguest_write_idt_entry(struct desc_struct *dt,
- int entrynum, u32 low, u32 high)
+static void lguest_write_idt_entry(gate_desc *dt,
+ int entrynum, const gate_desc *g)
{
+ u32 *desc = (u32 *)g;
/* Keep the local copy up to date. */
- write_dt_entry(dt, entrynum, low, high);
+ native_write_idt_entry(dt, entrynum, g);
/* Tell Host about this new entry. */
- hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high);
+ hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
}
/* Changing to a different IDT is very rare: we keep the IDT up-to-date every
* time it is written, so we can simply loop through all entries and tell the
* Host about them. */
-static void lguest_load_idt(const struct Xgt_desc_struct *desc)
+static void lguest_load_idt(const struct desc_ptr *desc)
{
unsigned int i;
struct desc_struct *idt = (void *)desc->address;
@@ -252,7 +253,7 @@ static void lguest_load_idt(const struct Xgt_desc_struct *desc)
* hypercall and use that repeatedly to load a new IDT. I don't think it
* really matters, but wouldn't it be nice if they were the same?
*/
-static void lguest_load_gdt(const struct Xgt_desc_struct *desc)
+static void lguest_load_gdt(const struct desc_ptr *desc)
{
BUG_ON((desc->size+1)/8 != GDT_ENTRIES);
hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0);
@@ -261,10 +262,10 @@ static void lguest_load_gdt(const struct Xgt_desc_struct *desc)
/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
* then tell the Host to reload the entire thing. This operation is so rare
* that this naive implementation is reasonable. */
-static void lguest_write_gdt_entry(struct desc_struct *dt,
- int entrynum, u32 low, u32 high)
+static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
+ const void *desc, int type)
{
- write_dt_entry(dt, entrynum, low, high);
+ native_write_gdt_entry(dt, entrynum, desc, type);
hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
}
@@ -323,30 +324,30 @@ static void lguest_load_tr_desc(void)
* anyone (including userspace) can just use the raw "cpuid" instruction and
* the Host won't even notice since it isn't privileged. So we try not to get
* too worked up about it. */
-static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
+ unsigned int *cx, unsigned int *dx)
{
- int function = *eax;
+ int function = *ax;
- native_cpuid(eax, ebx, ecx, edx);
+ native_cpuid(ax, bx, cx, dx);
switch (function) {
case 1: /* Basic feature request. */
/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
- *ecx &= 0x00002201;
+ *cx &= 0x00002201;
/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
- *edx &= 0x07808101;
+ *dx &= 0x07808101;
/* The Host can do a nice optimization if it knows that the
* kernel mappings (addresses above 0xC0000000 or whatever
* PAGE_OFFSET is set to) haven't changed. But Linux calls
* flush_tlb_user() for both user and kernel mappings unless
* the Page Global Enable (PGE) feature bit is set. */
- *edx |= 0x00002000;
+ *dx |= 0x00002000;
break;
case 0x80000000:
/* Futureproof this a little: if they ask how much extended
* processor information there is, limit it to known fields. */
- if (*eax > 0x80000008)
- *eax = 0x80000008;
+ if (*ax > 0x80000008)
+ *ax = 0x80000008;
break;
}
}
@@ -755,10 +756,10 @@ static void lguest_time_init(void)
* segment), the privilege level (we're privilege level 1, the Host is 0 and
* will not tolerate us trying to use that), the stack pointer, and the number
* of pages in the stack. */
-static void lguest_load_esp0(struct tss_struct *tss,
+static void lguest_load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
- lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0,
+ lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0,
THREAD_SIZE/PAGE_SIZE);
}
@@ -788,11 +789,11 @@ static void lguest_wbinvd(void)
* code qualifies for Advanced. It will also never interrupt anything. It
* does, however, allow us to get through the Linux boot code. */
#ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(unsigned long reg, unsigned long v)
+static void lguest_apic_write(unsigned long reg, u32 v)
{
}
-static unsigned long lguest_apic_read(unsigned long reg)
+static u32 lguest_apic_read(unsigned long reg)
{
return 0;
}
@@ -957,7 +958,7 @@ __init void lguest_init(void)
pv_cpu_ops.cpuid = lguest_cpuid;
pv_cpu_ops.load_idt = lguest_load_idt;
pv_cpu_ops.iret = lguest_iret;
- pv_cpu_ops.load_esp0 = lguest_load_esp0;
+ pv_cpu_ops.load_sp0 = lguest_load_sp0;
pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
pv_cpu_ops.set_ldt = lguest_set_ldt;
pv_cpu_ops.load_tls = lguest_load_tls;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 329da276c6f..4876182daf8 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -1,5 +1,27 @@
+#
+# Makefile for x86 specific library files.
+#
+
+obj-$(CONFIG_SMP) := msr-on-cpu.o
+
+lib-y := delay_$(BITS).o
+lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
+lib-y += memcpy_$(BITS).o
+
ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/lib/Makefile_32
+ lib-y += checksum_32.o
+ lib-y += strstr_32.o
+ lib-y += bitops_32.o semaphore_32.o string_32.o
+
+ lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
else
-include ${srctree}/arch/x86/lib/Makefile_64
+ obj-y += io_64.o iomap_copy_64.o
+
+ CFLAGS_csum-partial_64.o := -funroll-loops
+
+ lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
+ lib-y += thunk_64.o clear_page_64.o copy_page_64.o
+ lib-y += bitstr_64.o bitops_64.o
+ lib-y += memmove_64.o memset_64.o
+ lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
endif
diff --git a/arch/x86/lib/Makefile_32 b/arch/x86/lib/Makefile_32
deleted file mode 100644
index 98d1f1e2e2e..00000000000
--- a/arch/x86/lib/Makefile_32
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for i386-specific library files..
-#
-
-
-lib-y = checksum_32.o delay_32.o usercopy_32.o getuser_32.o putuser_32.o memcpy_32.o strstr_32.o \
- bitops_32.o semaphore_32.o string_32.o
-
-lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
-
-obj-$(CONFIG_SMP) += msr-on-cpu.o
diff --git a/arch/x86/lib/Makefile_64 b/arch/x86/lib/Makefile_64
deleted file mode 100644
index bbabad3c933..00000000000
--- a/arch/x86/lib/Makefile_64
+++ /dev/null
@@ -1,13 +0,0 @@
-#
-# Makefile for x86_64-specific library files.
-#
-
-CFLAGS_csum-partial_64.o := -funroll-loops
-
-obj-y := io_64.o iomap_copy_64.o
-obj-$(CONFIG_SMP) += msr-on-cpu.o
-
-lib-y := csum-partial_64.o csum-copy_64.o csum-wrappers_64.o delay_64.o \
- usercopy_64.o getuser_64.o putuser_64.o \
- thunk_64.o clear_page_64.o copy_page_64.o bitstr_64.o bitops_64.o
-lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 8ac51b82a63..37756b6fb32 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -34,8 +34,8 @@ void *memmove(void *dest, const void *src, size_t n)
"cld"
: "=&c" (d0), "=&S" (d1), "=&D" (d2)
:"0" (n),
- "1" (n-1+(const char *)src),
- "2" (n-1+(char *)dest)
+ "1" (n-1+src),
+ "2" (n-1+dest)
:"memory");
}
return dest;
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
index 751ebae8ec4..80175e47b19 100644
--- a/arch/x86/lib/memmove_64.c
+++ b/arch/x86/lib/memmove_64.c
@@ -11,8 +11,8 @@ void *memmove(void * dest,const void *src,size_t count)
if (dest < src) {
return memcpy(dest,src,count);
} else {
- char *p = (char *) dest + count;
- char *s = (char *) src + count;
+ char *p = dest + count;
+ const char *s = src + count;
while (count--)
*--p = *--s;
}
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 444fba40098..3899bd37fdf 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -29,7 +29,7 @@
* registers (%eax, %edx and %ecx) except %eax whish is either a return
* value or just clobbered..
*/
- .section .sched.text
+ .section .sched.text, "ax"
ENTRY(__down_failed)
CFI_STARTPROC
FRAME
@@ -49,7 +49,7 @@ ENTRY(__down_failed)
ENDFRAME
ret
CFI_ENDPROC
- END(__down_failed)
+ ENDPROC(__down_failed)
ENTRY(__down_failed_interruptible)
CFI_STARTPROC
@@ -70,7 +70,7 @@ ENTRY(__down_failed_interruptible)
ENDFRAME
ret
CFI_ENDPROC
- END(__down_failed_interruptible)
+ ENDPROC(__down_failed_interruptible)
ENTRY(__down_failed_trylock)
CFI_STARTPROC
@@ -91,7 +91,7 @@ ENTRY(__down_failed_trylock)
ENDFRAME
ret
CFI_ENDPROC
- END(__down_failed_trylock)
+ ENDPROC(__down_failed_trylock)
ENTRY(__up_wakeup)
CFI_STARTPROC
@@ -112,7 +112,7 @@ ENTRY(__up_wakeup)
ENDFRAME
ret
CFI_ENDPROC
- END(__up_wakeup)
+ ENDPROC(__up_wakeup)
/*
* rw spinlock fallbacks
@@ -132,7 +132,7 @@ ENTRY(__write_lock_failed)
ENDFRAME
ret
CFI_ENDPROC
- END(__write_lock_failed)
+ ENDPROC(__write_lock_failed)
ENTRY(__read_lock_failed)
CFI_STARTPROC
@@ -148,7 +148,7 @@ ENTRY(__read_lock_failed)
ENDFRAME
ret
CFI_ENDPROC
- END(__read_lock_failed)
+ ENDPROC(__read_lock_failed)
#endif
@@ -170,7 +170,7 @@ ENTRY(call_rwsem_down_read_failed)
CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
- END(call_rwsem_down_read_failed)
+ ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
CFI_STARTPROC
@@ -182,7 +182,7 @@ ENTRY(call_rwsem_down_write_failed)
CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
- END(call_rwsem_down_write_failed)
+ ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_wake)
CFI_STARTPROC
@@ -196,7 +196,7 @@ ENTRY(call_rwsem_wake)
CFI_ADJUST_CFA_OFFSET -4
1: ret
CFI_ENDPROC
- END(call_rwsem_wake)
+ ENDPROC(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
@@ -214,6 +214,6 @@ ENTRY(call_rwsem_downgrade_wake)
CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
- END(call_rwsem_downgrade_wake)
+ ENDPROC(call_rwsem_downgrade_wake)
#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 6ea73f3de56..8b92d428ab0 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -33,7 +33,7 @@
.endm
- .section .sched.text
+ .section .sched.text, "ax"
#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
diff --git a/arch/x86/mach-rdc321x/Makefile b/arch/x86/mach-rdc321x/Makefile
new file mode 100644
index 00000000000..1faac8125e3
--- /dev/null
+++ b/arch/x86/mach-rdc321x/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the RDC321x specific parts of the kernel
+#
+obj-$(CONFIG_X86_RDC321X) := gpio.o platform.o wdt.o
+
diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c
new file mode 100644
index 00000000000..031269163bd
--- /dev/null
+++ b/arch/x86/mach-rdc321x/gpio.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2007, OpenWrt.org, Florian Fainelli <florian@openwrt.org>
+ * RDC321x architecture specific GPIO support
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/autoconf.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+#include <asm/mach-rdc321x/rdc321x_defs.h>
+
+static inline int rdc_gpio_is_valid(unsigned gpio)
+{
+ return (gpio <= RDC_MAX_GPIO);
+}
+
+static unsigned int rdc_gpio_read(unsigned gpio)
+{
+ unsigned int val;
+
+ val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x84:0x48));
+ outl(val, RDC3210_CFGREG_ADDR);
+ udelay(10);
+ val = inl(RDC3210_CFGREG_DATA);
+ val |= (0x1 << (gpio & 0x1F));
+ outl(val, RDC3210_CFGREG_DATA);
+ udelay(10);
+ val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x88:0x4C));
+ outl(val, RDC3210_CFGREG_ADDR);
+ udelay(10);
+ val = inl(RDC3210_CFGREG_DATA);
+
+ return val;
+}
+
+static void rdc_gpio_write(unsigned int val)
+{
+ if (val) {
+ outl(val, RDC3210_CFGREG_DATA);
+ udelay(10);
+ }
+}
+
+int rdc_gpio_get_value(unsigned gpio)
+{
+ if (rdc_gpio_is_valid(gpio))
+ return (int)rdc_gpio_read(gpio);
+ else
+ return -EINVAL;
+}
+EXPORT_SYMBOL(rdc_gpio_get_value);
+
+void rdc_gpio_set_value(unsigned gpio, int value)
+{
+ unsigned int val;
+
+ if (!rdc_gpio_is_valid(gpio))
+ return;
+
+ val = rdc_gpio_read(gpio);
+
+ if (value)
+ val &= ~(0x1 << (gpio & 0x1F));
+ else
+ val |= (0x1 << (gpio & 0x1F));
+
+ rdc_gpio_write(val);
+}
+EXPORT_SYMBOL(rdc_gpio_set_value);
+
+int rdc_gpio_direction_input(unsigned gpio)
+{
+ return 0;
+}
+EXPORT_SYMBOL(rdc_gpio_direction_input);
+
+int rdc_gpio_direction_output(unsigned gpio, int value)
+{
+ return 0;
+}
+EXPORT_SYMBOL(rdc_gpio_direction_output);
+
+
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
new file mode 100644
index 00000000000..dda6024a586
--- /dev/null
+++ b/arch/x86/mach-rdc321x/platform.c
@@ -0,0 +1,68 @@
+/*
+ * Generic RDC321x platform devices
+ *
+ * Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/leds.h>
+
+#include <asm/gpio.h>
+
+/* LEDS */
+static struct gpio_led default_leds[] = {
+ { .name = "rdc:dmz", .gpio = 1, },
+};
+
+static struct gpio_led_platform_data rdc321x_led_data = {
+ .num_leds = ARRAY_SIZE(default_leds),
+ .leds = default_leds,
+};
+
+static struct platform_device rdc321x_leds = {
+ .name = "leds-gpio",
+ .id = -1,
+ .dev = {
+ .platform_data = &rdc321x_led_data,
+ }
+};
+
+/* Watchdog */
+static struct platform_device rdc321x_wdt = {
+ .name = "rdc321x-wdt",
+ .id = -1,
+ .num_resources = 0,
+};
+
+static struct platform_device *rdc321x_devs[] = {
+ &rdc321x_leds,
+ &rdc321x_wdt
+};
+
+static int __init rdc_board_setup(void)
+{
+ return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
+}
+
+arch_initcall(rdc_board_setup);
diff --git a/arch/x86/mach-rdc321x/wdt.c b/arch/x86/mach-rdc321x/wdt.c
new file mode 100644
index 00000000000..ec5625ae706
--- /dev/null
+++ b/arch/x86/mach-rdc321x/wdt.c
@@ -0,0 +1,275 @@
+/*
+ * RDC321x watchdog driver
+ *
+ * Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
+ *
+ * This driver is highly inspired from the cpu5_wdt driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/timer.h>
+#include <linux/completion.h>
+#include <linux/jiffies.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/mach-rdc321x/rdc321x_defs.h>
+
+#define RDC_WDT_MASK 0x80000000 /* Mask */
+#define RDC_WDT_EN 0x00800000 /* Enable bit */
+#define RDC_WDT_WTI 0x00200000 /* Generate CPU reset/NMI/WDT on timeout */
+#define RDC_WDT_RST 0x00100000 /* Reset bit */
+#define RDC_WDT_WIF 0x00040000 /* WDT IRQ Flag */
+#define RDC_WDT_IRT 0x00000100 /* IRQ Routing table */
+#define RDC_WDT_CNT 0x00000001 /* WDT count */
+
+#define RDC_CLS_TMR 0x80003844 /* Clear timer */
+
+#define RDC_WDT_INTERVAL (HZ/10+1)
+
+int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int ticks = 1000;
+
+/* some device data */
+
+static struct {
+ struct completion stop;
+ volatile int running;
+ struct timer_list timer;
+ volatile int queue;
+ int default_ticks;
+ unsigned long inuse;
+} rdc321x_wdt_device;
+
+/* generic helper functions */
+
+static void rdc321x_wdt_trigger(unsigned long unused)
+{
+ if (rdc321x_wdt_device.running)
+ ticks--;
+
+ /* keep watchdog alive */
+ outl(RDC_WDT_EN|inl(RDC3210_CFGREG_DATA), RDC3210_CFGREG_DATA);
+
+ /* requeue?? */
+ if (rdc321x_wdt_device.queue && ticks)
+ mod_timer(&rdc321x_wdt_device.timer,
+ jiffies + RDC_WDT_INTERVAL);
+ else {
+ /* ticks doesn't matter anyway */
+ complete(&rdc321x_wdt_device.stop);
+ }
+
+}
+
+static void rdc321x_wdt_reset(void)
+{
+ ticks = rdc321x_wdt_device.default_ticks;
+}
+
+static void rdc321x_wdt_start(void)
+{
+ if (!rdc321x_wdt_device.queue) {
+ rdc321x_wdt_device.queue = 1;
+
+ /* Clear the timer */
+ outl(RDC_CLS_TMR, RDC3210_CFGREG_ADDR);
+
+ /* Enable watchdog and set the timeout to 81.92 us */
+ outl(RDC_WDT_EN|RDC_WDT_CNT, RDC3210_CFGREG_DATA);
+
+ mod_timer(&rdc321x_wdt_device.timer,
+ jiffies + RDC_WDT_INTERVAL);
+ }
+
+ /* if process dies, counter is not decremented */
+ rdc321x_wdt_device.running++;
+}
+
+static int rdc321x_wdt_stop(void)
+{
+ if (rdc321x_wdt_device.running)
+ rdc321x_wdt_device.running = 0;
+
+ ticks = rdc321x_wdt_device.default_ticks;
+
+ return -EIO;
+}
+
+/* filesystem operations */
+
+static int rdc321x_wdt_open(struct inode *inode, struct file *file)
+{
+ if (test_and_set_bit(0, &rdc321x_wdt_device.inuse))
+ return -EBUSY;
+
+ return nonseekable_open(inode, file);
+}
+
+static int rdc321x_wdt_release(struct inode *inode, struct file *file)
+{
+ clear_bit(0, &rdc321x_wdt_device.inuse);
+ return 0;
+}
+
+static int rdc321x_wdt_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ unsigned int value;
+ static struct watchdog_info ident = {
+ .options = WDIOF_CARDRESET,
+ .identity = "RDC321x WDT",
+ };
+
+ switch (cmd) {
+ case WDIOC_KEEPALIVE:
+ rdc321x_wdt_reset();
+ break;
+ case WDIOC_GETSTATUS:
+ /* Read the value from the DATA register */
+ value = inl(RDC3210_CFGREG_DATA);
+ if (copy_to_user(argp, &value, sizeof(int)))
+ return -EFAULT;
+ break;
+ case WDIOC_GETSUPPORT:
+ if (copy_to_user(argp, &ident, sizeof(ident)))
+ return -EFAULT;
+ break;
+ case WDIOC_SETOPTIONS:
+ if (copy_from_user(&value, argp, sizeof(int)))
+ return -EFAULT;
+ switch (value) {
+ case WDIOS_ENABLECARD:
+ rdc321x_wdt_start();
+ break;
+ case WDIOS_DISABLECARD:
+ return rdc321x_wdt_stop();
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -ENOTTY;
+ }
+ return 0;
+}
+
+static ssize_t rdc321x_wdt_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ if (!count)
+ return -EIO;
+
+ rdc321x_wdt_reset();
+
+ return count;
+}
+
+static const struct file_operations rdc321x_wdt_fops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .ioctl = rdc321x_wdt_ioctl,
+ .open = rdc321x_wdt_open,
+ .write = rdc321x_wdt_write,
+ .release = rdc321x_wdt_release,
+};
+
+static struct miscdevice rdc321x_wdt_misc = {
+ .minor = WATCHDOG_MINOR,
+ .name = "watchdog",
+ .fops = &rdc321x_wdt_fops,
+};
+
+static int __devinit rdc321x_wdt_probe(struct platform_device *pdev)
+{
+ int err;
+
+ err = misc_register(&rdc321x_wdt_misc);
+ if (err < 0) {
+ printk(KERN_ERR PFX "watchdog misc_register failed\n");
+ return err;
+ }
+
+ /* Reset the watchdog */
+ outl(RDC_WDT_RST, RDC3210_CFGREG_DATA);
+
+ init_completion(&rdc321x_wdt_device.stop);
+ rdc321x_wdt_device.queue = 0;
+
+ clear_bit(0, &rdc321x_wdt_device.inuse);
+
+ setup_timer(&rdc321x_wdt_device.timer, rdc321x_wdt_trigger, 0);
+
+ rdc321x_wdt_device.default_ticks = ticks;
+
+ printk(KERN_INFO PFX "watchdog init success\n");
+
+ return 0;
+}
+
+static int rdc321x_wdt_remove(struct platform_device *pdev)
+{
+ if (rdc321x_wdt_device.queue) {
+ rdc321x_wdt_device.queue = 0;
+ wait_for_completion(&rdc321x_wdt_device.stop);
+ }
+
+ misc_deregister(&rdc321x_wdt_misc);
+
+ return 0;
+}
+
+static struct platform_driver rdc321x_wdt_driver = {
+ .probe = rdc321x_wdt_probe,
+ .remove = rdc321x_wdt_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "rdc321x-wdt",
+ },
+};
+
+static int __init rdc321x_wdt_init(void)
+{
+ return platform_driver_register(&rdc321x_wdt_driver);
+}
+
+static void __exit rdc321x_wdt_exit(void)
+{
+ platform_driver_unregister(&rdc321x_wdt_driver);
+}
+
+module_init(rdc321x_wdt_init);
+module_exit(rdc321x_wdt_exit);
+
+MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
+MODULE_DESCRIPTION("RDC321x watchdog driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c
index f3c74fab8b9..2a8456a1f44 100644
--- a/arch/x86/mach-visws/mpparse.c
+++ b/arch/x86/mach-visws/mpparse.c
@@ -36,19 +36,19 @@ unsigned int __initdata maxcpus = NR_CPUS;
static void __init MP_processor_info (struct mpc_config_processor *m)
{
- int ver, logical_apicid;
+ int ver, logical_apicid;
physid_mask_t apic_cpus;
-
+
if (!(m->mpc_cpuflag & CPU_ENABLED))
return;
logical_apicid = m->mpc_apicid;
- printk(KERN_INFO "%sCPU #%d %ld:%ld APIC version %d\n",
- m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
- m->mpc_apicid,
- (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
- m->mpc_apicver);
+ printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
+ m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
boot_cpu_physical_apicid = m->mpc_apicid;
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 3bef977cb29..5ae5466b9eb 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -37,14 +37,14 @@ void __init pre_setup_arch_hook(void)
{
/* Voyagers run their CPUs from independent clocks, so disable
* the TSC code because we can't sync them */
- tsc_disable = 1;
+ setup_clear_cpu_cap(X86_FEATURE_TSC);
}
void __init trap_init_hook(void)
{
}
-static struct irqaction irq0 = {
+static struct irqaction irq0 = {
.handler = timer_interrupt,
.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL,
.mask = CPU_MASK_NONE,
@@ -59,44 +59,47 @@ void __init time_init_hook(void)
/* Hook for machine specific memory setup. */
-char * __init machine_specific_memory_setup(void)
+char *__init machine_specific_memory_setup(void)
{
char *who;
who = "NOT VOYAGER";
- if(voyager_level == 5) {
+ if (voyager_level == 5) {
__u32 addr, length;
int i;
who = "Voyager-SUS";
e820.nr_map = 0;
- for(i=0; voyager_memory_detect(i, &addr, &length); i++) {
+ for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
add_memory_region(addr, length, E820_RAM);
}
return who;
- } else if(voyager_level == 4) {
+ } else if (voyager_level == 4) {
__u32 tom;
- __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+ __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
/* select the DINO config space */
outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT);
/* Read DINO top of memory register */
tom = ((inb(catbase + 0x4) & 0xf0) << 16)
- + ((inb(catbase + 0x5) & 0x7f) << 24);
+ + ((inb(catbase + 0x5) & 0x7f) << 24);
- if(inb(catbase) != VOYAGER_DINO) {
- printk(KERN_ERR "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
- tom = (boot_params.screen_info.ext_mem_k)<<10;
+ if (inb(catbase) != VOYAGER_DINO) {
+ printk(KERN_ERR
+ "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
+ tom = (boot_params.screen_info.ext_mem_k) << 10;
}
who = "Voyager-TOM";
add_memory_region(0, 0x9f000, E820_RAM);
/* map from 1M to top of memory */
- add_memory_region(1*1024*1024, tom - 1*1024*1024, E820_RAM);
+ add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
+ E820_RAM);
/* FIXME: Should check the ASICs to see if I need to
* take out the 8M window. Just do it at the moment
* */
- add_memory_region(8*1024*1024, 8*1024*1024, E820_RESERVED);
+ add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024,
+ E820_RESERVED);
return who;
}
@@ -114,8 +117,7 @@ char * __init machine_specific_memory_setup(void)
unsigned long mem_size;
/* compare results from other methods and take the greater */
- if (boot_params.alt_mem_k
- < boot_params.screen_info.ext_mem_k) {
+ if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
mem_size = boot_params.screen_info.ext_mem_k;
who = "BIOS-88";
} else {
@@ -126,6 +128,6 @@ char * __init machine_specific_memory_setup(void)
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
- }
+ }
return who;
}
diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c
index 9b77b39b71a..6a949e4edde 100644
--- a/arch/x86/mach-voyager/voyager_basic.c
+++ b/arch/x86/mach-voyager/voyager_basic.c
@@ -35,7 +35,7 @@
/*
* Power off function, if any
*/
-void (*pm_power_off)(void);
+void (*pm_power_off) (void);
EXPORT_SYMBOL(pm_power_off);
int voyager_level = 0;
@@ -43,39 +43,38 @@ int voyager_level = 0;
struct voyager_SUS *voyager_SUS = NULL;
#ifdef CONFIG_SMP
-static void
-voyager_dump(int dummy1, struct tty_struct *dummy3)
+static void voyager_dump(int dummy1, struct tty_struct *dummy3)
{
/* get here via a sysrq */
voyager_smp_dump();
}
static struct sysrq_key_op sysrq_voyager_dump_op = {
- .handler = voyager_dump,
- .help_msg = "Voyager",
- .action_msg = "Dump Voyager Status",
+ .handler = voyager_dump,
+ .help_msg = "Voyager",
+ .action_msg = "Dump Voyager Status",
};
#endif
-void
-voyager_detect(struct voyager_bios_info *bios)
+void voyager_detect(struct voyager_bios_info *bios)
{
- if(bios->len != 0xff) {
- int class = (bios->class_1 << 8)
- | (bios->class_2 & 0xff);
+ if (bios->len != 0xff) {
+ int class = (bios->class_1 << 8)
+ | (bios->class_2 & 0xff);
printk("Voyager System detected.\n"
" Class %x, Revision %d.%d\n",
class, bios->major, bios->minor);
- if(class == VOYAGER_LEVEL4)
+ if (class == VOYAGER_LEVEL4)
voyager_level = 4;
- else if(class < VOYAGER_LEVEL5_AND_ABOVE)
+ else if (class < VOYAGER_LEVEL5_AND_ABOVE)
voyager_level = 3;
else
voyager_level = 5;
printk(" Architecture Level %d\n", voyager_level);
- if(voyager_level < 4)
- printk("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
+ if (voyager_level < 4)
+ printk
+ ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
/* install the power off handler */
pm_power_off = voyager_power_off;
#ifdef CONFIG_SMP
@@ -86,15 +85,13 @@ voyager_detect(struct voyager_bios_info *bios)
}
}
-void
-voyager_system_interrupt(int cpl, void *dev_id)
+void voyager_system_interrupt(int cpl, void *dev_id)
{
printk("Voyager: detected system interrupt\n");
}
/* Routine to read information from the extended CMOS area */
-__u8
-voyager_extended_cmos_read(__u16 addr)
+__u8 voyager_extended_cmos_read(__u16 addr)
{
outb(addr & 0xff, 0x74);
outb((addr >> 8) & 0xff, 0x75);
@@ -108,12 +105,11 @@ voyager_extended_cmos_read(__u16 addr)
typedef struct ClickMap {
struct Entry {
- __u32 Address;
- __u32 Length;
+ __u32 Address;
+ __u32 Length;
} Entry[CLICK_ENTRIES];
} ClickMap_t;
-
/* This routine is pretty much an awful hack to read the bios clickmap by
* mapping it into page 0. There are usually three regions in the map:
* Base Memory
@@ -122,8 +118,7 @@ typedef struct ClickMap {
*
* Returns are 0 for failure and 1 for success on extracting region.
*/
-int __init
-voyager_memory_detect(int region, __u32 *start, __u32 *length)
+int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
{
int i;
int retval = 0;
@@ -132,13 +127,14 @@ voyager_memory_detect(int region, __u32 *start, __u32 *length)
unsigned long map_addr;
unsigned long old;
- if(region >= CLICK_ENTRIES) {
+ if (region >= CLICK_ENTRIES) {
printk("Voyager: Illegal ClickMap region %d\n", region);
return 0;
}
- for(i = 0; i < sizeof(cmos); i++)
- cmos[i] = voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
+ for (i = 0; i < sizeof(cmos); i++)
+ cmos[i] =
+ voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
map_addr = *(unsigned long *)cmos;
@@ -147,10 +143,10 @@ voyager_memory_detect(int region, __u32 *start, __u32 *length)
pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
local_flush_tlb();
/* now clear everything out but page 0 */
- map = (ClickMap_t *)(map_addr & (~PAGE_MASK));
+ map = (ClickMap_t *) (map_addr & (~PAGE_MASK));
/* zero length is the end of the clickmap */
- if(map->Entry[region].Length != 0) {
+ if (map->Entry[region].Length != 0) {
*length = map->Entry[region].Length * CLICK_SIZE;
*start = map->Entry[region].Address;
retval = 1;
@@ -165,10 +161,9 @@ voyager_memory_detect(int region, __u32 *start, __u32 *length)
/* voyager specific handling code for timer interrupts. Used to hand
* off the timer tick to the SMP code, since the VIC doesn't have an
* internal timer (The QIC does, but that's another story). */
-void
-voyager_timer_interrupt(void)
+void voyager_timer_interrupt(void)
{
- if((jiffies & 0x3ff) == 0) {
+ if ((jiffies & 0x3ff) == 0) {
/* There seems to be something flaky in either
* hardware or software that is resetting the timer 0
@@ -186,18 +181,20 @@ voyager_timer_interrupt(void)
__u16 val;
spin_lock(&i8253_lock);
-
+
outb_p(0x00, 0x43);
val = inb_p(0x40);
val |= inb(0x40) << 8;
spin_unlock(&i8253_lock);
- if(val > LATCH) {
- printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val);
+ if (val > LATCH) {
+ printk
+ ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n",
+ val);
spin_lock(&i8253_lock);
- outb(0x34,0x43);
- outb_p(LATCH & 0xff , 0x40); /* LSB */
- outb(LATCH >> 8 , 0x40); /* MSB */
+ outb(0x34, 0x43);
+ outb_p(LATCH & 0xff, 0x40); /* LSB */
+ outb(LATCH >> 8, 0x40); /* MSB */
spin_unlock(&i8253_lock);
}
}
@@ -206,14 +203,13 @@ voyager_timer_interrupt(void)
#endif
}
-void
-voyager_power_off(void)
+void voyager_power_off(void)
{
printk("VOYAGER Power Off\n");
- if(voyager_level == 5) {
+ if (voyager_level == 5) {
voyager_cat_power_off();
- } else if(voyager_level == 4) {
+ } else if (voyager_level == 4) {
/* This doesn't apparently work on most L4 machines,
* but the specs say to do this to get automatic power
* off. Unfortunately, if it doesn't power off the
@@ -222,10 +218,8 @@ voyager_power_off(void)
#if 0
int port;
-
/* enable the voyager Configuration Space */
- outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8,
- VOYAGER_MC_SETUP);
+ outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP);
/* the port for the power off flag is an offset from the
floating base */
port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21;
@@ -235,62 +229,57 @@ voyager_power_off(void)
}
/* and wait for it to happen */
local_irq_disable();
- for(;;)
+ for (;;)
halt();
}
/* copied from process.c */
-static inline void
-kb_wait(void)
+static inline void kb_wait(void)
{
int i;
- for (i=0; i<0x10000; i++)
+ for (i = 0; i < 0x10000; i++)
if ((inb_p(0x64) & 0x02) == 0)
break;
}
-void
-machine_shutdown(void)
+void machine_shutdown(void)
{
/* Architecture specific shutdown needed before a kexec */
}
-void
-machine_restart(char *cmd)
+void machine_restart(char *cmd)
{
printk("Voyager Warm Restart\n");
kb_wait();
- if(voyager_level == 5) {
+ if (voyager_level == 5) {
/* write magic values to the RTC to inform system that
* shutdown is beginning */
outb(0x8f, 0x70);
- outb(0x5 , 0x71);
-
+ outb(0x5, 0x71);
+
udelay(50);
- outb(0xfe,0x64); /* pull reset low */
- } else if(voyager_level == 4) {
- __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+ outb(0xfe, 0x64); /* pull reset low */
+ } else if (voyager_level == 4) {
+ __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
__u8 basebd = inb(VOYAGER_MC_SETUP);
-
+
outb(basebd | 0x08, VOYAGER_MC_SETUP);
outb(0x02, catbase + 0x21);
}
local_irq_disable();
- for(;;)
+ for (;;)
halt();
}
-void
-machine_emergency_restart(void)
+void machine_emergency_restart(void)
{
/*for now, just hook this to a warm restart */
machine_restart(NULL);
}
-void
-mca_nmi_hook(void)
+void mca_nmi_hook(void)
{
__u8 dumpval __maybe_unused = inb(0xf823);
__u8 swnmi __maybe_unused = inb(0xf813);
@@ -301,8 +290,8 @@ mca_nmi_hook(void)
/* clear swnmi */
outb(0xff, 0xf813);
/* tell SUS to ignore dump */
- if(voyager_level == 5 && voyager_SUS != NULL) {
- if(voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
+ if (voyager_level == 5 && voyager_SUS != NULL) {
+ if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND;
voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS;
udelay(1000);
@@ -310,15 +299,14 @@ mca_nmi_hook(void)
voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS;
}
}
- printk(KERN_ERR "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", smp_processor_id());
+ printk(KERN_ERR
+ "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n",
+ smp_processor_id());
show_stack(NULL, NULL);
show_state();
}
-
-
-void
-machine_halt(void)
+void machine_halt(void)
{
/* treat a halt like a power off */
machine_power_off();
diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c
index 2132ca652df..17a7904f75b 100644
--- a/arch/x86/mach-voyager/voyager_cat.c
+++ b/arch/x86/mach-voyager/voyager_cat.c
@@ -39,34 +39,32 @@
#define CAT_DATA (sspb + 0xd)
/* the internal cat functions */
-static void cat_pack(__u8 *msg, __u16 start_bit, __u8 *data,
- __u16 num_bits);
-static void cat_unpack(__u8 *msg, __u16 start_bit, __u8 *data,
+static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits);
+static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data,
__u16 num_bits);
-static void cat_build_header(__u8 *header, const __u16 len,
+static void cat_build_header(__u8 * header, const __u16 len,
const __u16 smallest_reg_bits,
const __u16 longest_reg_bits);
-static int cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp,
__u8 reg, __u8 op);
-static int cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp,
- __u8 reg, __u8 *value);
-static int cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes,
+static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp,
+ __u8 reg, __u8 * value);
+static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes,
__u8 pad_bits);
-static int cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
+static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
__u8 value);
-static int cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
- __u8 *value);
-static int cat_subread(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+ __u8 * value);
+static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp,
__u16 offset, __u16 len, void *buf);
-static int cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
__u8 reg, __u8 value);
-static int cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp);
-static int cat_connect(voyager_module_t *modp, voyager_asic_t *asicp);
+static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp);
+static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp);
-static inline const char *
-cat_module_name(int module_id)
+static inline const char *cat_module_name(int module_id)
{
- switch(module_id) {
+ switch (module_id) {
case 0x10:
return "Processor Slot 0";
case 0x11:
@@ -105,14 +103,14 @@ voyager_module_t *voyager_cat_list;
/* the I/O port assignments for the VIC and QIC */
static struct resource vic_res = {
- .name = "Voyager Interrupt Controller",
- .start = 0xFC00,
- .end = 0xFC6F
+ .name = "Voyager Interrupt Controller",
+ .start = 0xFC00,
+ .end = 0xFC6F
};
static struct resource qic_res = {
- .name = "Quad Interrupt Controller",
- .start = 0xFC70,
- .end = 0xFCFF
+ .name = "Quad Interrupt Controller",
+ .start = 0xFC70,
+ .end = 0xFCFF
};
/* This function is used to pack a data bit stream inside a message.
@@ -120,7 +118,7 @@ static struct resource qic_res = {
* Note: This function assumes that any unused bit in the data stream
* is set to zero so that the ors will work correctly */
static void
-cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
+cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
{
/* compute initial shift needed */
const __u16 offset = start_bit % BITS_PER_BYTE;
@@ -130,7 +128,7 @@ cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
int i;
/* adjust if we have more than a byte of residue */
- if(residue >= BITS_PER_BYTE) {
+ if (residue >= BITS_PER_BYTE) {
residue -= BITS_PER_BYTE;
len++;
}
@@ -138,24 +136,25 @@ cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
/* clear out the bits. We assume here that if len==0 then
* residue >= offset. This is always true for the catbus
* operations */
- msg[byte] &= 0xff << (BITS_PER_BYTE - offset);
+ msg[byte] &= 0xff << (BITS_PER_BYTE - offset);
msg[byte++] |= data[0] >> offset;
- if(len == 0)
+ if (len == 0)
return;
- for(i = 1; i < len; i++)
- msg[byte++] = (data[i-1] << (BITS_PER_BYTE - offset))
- | (data[i] >> offset);
- if(residue != 0) {
+ for (i = 1; i < len; i++)
+ msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset))
+ | (data[i] >> offset);
+ if (residue != 0) {
__u8 mask = 0xff >> residue;
- __u8 last_byte = data[i-1] << (BITS_PER_BYTE - offset)
- | (data[i] >> offset);
-
+ __u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset)
+ | (data[i] >> offset);
+
last_byte &= ~mask;
msg[byte] &= mask;
msg[byte] |= last_byte;
}
return;
}
+
/* unpack the data again (same arguments as cat_pack()). data buffer
* must be zero populated.
*
@@ -163,7 +162,7 @@ cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
* data (starting at bit 0 in data).
*/
static void
-cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
+cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
{
/* compute initial shift needed */
const __u16 offset = start_bit % BITS_PER_BYTE;
@@ -172,97 +171,97 @@ cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
__u16 byte = start_bit / BITS_PER_BYTE;
int i;
- if(last_bits != 0)
+ if (last_bits != 0)
len++;
/* special case: want < 8 bits from msg and we can get it from
* a single byte of the msg */
- if(len == 0 && BITS_PER_BYTE - offset >= num_bits) {
+ if (len == 0 && BITS_PER_BYTE - offset >= num_bits) {
data[0] = msg[byte] << offset;
data[0] &= 0xff >> (BITS_PER_BYTE - num_bits);
return;
}
- for(i = 0; i < len; i++) {
+ for (i = 0; i < len; i++) {
/* this annoying if has to be done just in case a read of
* msg one beyond the array causes a panic */
- if(offset != 0) {
+ if (offset != 0) {
data[i] = msg[byte++] << offset;
data[i] |= msg[byte] >> (BITS_PER_BYTE - offset);
- }
- else {
+ } else {
data[i] = msg[byte++];
}
}
/* do we need to truncate the final byte */
- if(last_bits != 0) {
- data[i-1] &= 0xff << (BITS_PER_BYTE - last_bits);
+ if (last_bits != 0) {
+ data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits);
}
return;
}
static void
-cat_build_header(__u8 *header, const __u16 len, const __u16 smallest_reg_bits,
+cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits,
const __u16 longest_reg_bits)
{
int i;
__u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE;
__u8 *last_byte = &header[len - 1];
- if(start_bit == 0)
+ if (start_bit == 0)
start_bit = 1; /* must have at least one bit in the hdr */
-
- for(i=0; i < len; i++)
+
+ for (i = 0; i < len; i++)
header[i] = 0;
- for(i = start_bit; i > 0; i--)
+ for (i = start_bit; i > 0; i--)
*last_byte = ((*last_byte) << 1) + 1;
}
static int
-cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
+cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op)
{
__u8 parity, inst, inst_buf[4] = { 0 };
__u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE];
__u16 ibytes, hbytes, padbits;
int i;
-
+
/*
* Parity is the parity of the register number + 1 (READ_REGISTER
* and WRITE_REGISTER always add '1' to the number of bits == 1)
*/
- parity = (__u8)(1 + (reg & 0x01) +
- ((__u8)(reg & 0x02) >> 1) +
- ((__u8)(reg & 0x04) >> 2) +
- ((__u8)(reg & 0x08) >> 3)) % 2;
+ parity = (__u8) (1 + (reg & 0x01) +
+ ((__u8) (reg & 0x02) >> 1) +
+ ((__u8) (reg & 0x04) >> 2) +
+ ((__u8) (reg & 0x08) >> 3)) % 2;
inst = ((parity << 7) | (reg << 2) | op);
outb(VOYAGER_CAT_IRCYC, CAT_CMD);
- if(!modp->scan_path_connected) {
- if(asicp->asic_id != VOYAGER_CAT_ID) {
- printk("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
+ if (!modp->scan_path_connected) {
+ if (asicp->asic_id != VOYAGER_CAT_ID) {
+ printk
+ ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
return 1;
}
outb(VOYAGER_CAT_HEADER, CAT_DATA);
outb(inst, CAT_DATA);
- if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+ if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n"));
return 1;
}
return 0;
}
ibytes = modp->inst_bits / BITS_PER_BYTE;
- if((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
+ if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
padbits = BITS_PER_BYTE - padbits;
ibytes++;
}
hbytes = modp->largest_reg / BITS_PER_BYTE;
- if(modp->largest_reg % BITS_PER_BYTE)
+ if (modp->largest_reg % BITS_PER_BYTE)
hbytes++;
CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes));
/* initialise the instruction sequence to 0xff */
- for(i=0; i < ibytes + hbytes; i++)
+ for (i = 0; i < ibytes + hbytes; i++)
iseq[i] = 0xff;
cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg);
cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE);
@@ -271,11 +270,11 @@ cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length);
#ifdef VOYAGER_CAT_DEBUG
printk("ins = 0x%x, iseq: ", inst);
- for(i=0; i< ibytes + hbytes; i++)
+ for (i = 0; i < ibytes + hbytes; i++)
printk("0x%x ", iseq[i]);
printk("\n");
#endif
- if(cat_shiftout(iseq, ibytes, hbytes, padbits)) {
+ if (cat_shiftout(iseq, ibytes, hbytes, padbits)) {
CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n"));
return 1;
}
@@ -284,72 +283,74 @@ cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
}
static int
-cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
- __u8 *value)
+cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+ __u8 * value)
{
- if(!modp->scan_path_connected) {
- if(asicp->asic_id != VOYAGER_CAT_ID) {
+ if (!modp->scan_path_connected) {
+ if (asicp->asic_id != VOYAGER_CAT_ID) {
CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n"));
return 1;
}
- if(reg > VOYAGER_SUBADDRHI)
+ if (reg > VOYAGER_SUBADDRHI)
outb(VOYAGER_CAT_RUN, CAT_CMD);
outb(VOYAGER_CAT_DRCYC, CAT_CMD);
outb(VOYAGER_CAT_HEADER, CAT_DATA);
*value = inb(CAT_DATA);
outb(0xAA, CAT_DATA);
- if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+ if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n"));
return 1;
}
return 0;
- }
- else {
- __u16 sbits = modp->num_asics -1 + asicp->ireg_length;
+ } else {
+ __u16 sbits = modp->num_asics - 1 + asicp->ireg_length;
__u16 sbytes = sbits / BITS_PER_BYTE;
__u16 tbytes;
- __u8 string[VOYAGER_MAX_SCAN_PATH], trailer[VOYAGER_MAX_REG_SIZE];
+ __u8 string[VOYAGER_MAX_SCAN_PATH],
+ trailer[VOYAGER_MAX_REG_SIZE];
__u8 padbits;
int i;
-
+
outb(VOYAGER_CAT_DRCYC, CAT_CMD);
- if((padbits = sbits % BITS_PER_BYTE) != 0) {
+ if ((padbits = sbits % BITS_PER_BYTE) != 0) {
padbits = BITS_PER_BYTE - padbits;
sbytes++;
}
tbytes = asicp->ireg_length / BITS_PER_BYTE;
- if(asicp->ireg_length % BITS_PER_BYTE)
+ if (asicp->ireg_length % BITS_PER_BYTE)
tbytes++;
CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n",
- tbytes, sbytes, padbits));
+ tbytes, sbytes, padbits));
cat_build_header(trailer, tbytes, 1, asicp->ireg_length);
-
- for(i = tbytes - 1; i >= 0; i--) {
+ for (i = tbytes - 1; i >= 0; i--) {
outb(trailer[i], CAT_DATA);
string[sbytes + i] = inb(CAT_DATA);
}
- for(i = sbytes - 1; i >= 0; i--) {
+ for (i = sbytes - 1; i >= 0; i--) {
outb(0xaa, CAT_DATA);
string[i] = inb(CAT_DATA);
}
*value = 0;
- cat_unpack(string, padbits + (tbytes * BITS_PER_BYTE) + asicp->asic_location, value, asicp->ireg_length);
+ cat_unpack(string,
+ padbits + (tbytes * BITS_PER_BYTE) +
+ asicp->asic_location, value, asicp->ireg_length);
#ifdef VOYAGER_CAT_DEBUG
printk("value=0x%x, string: ", *value);
- for(i=0; i< tbytes+sbytes; i++)
+ for (i = 0; i < tbytes + sbytes; i++)
printk("0x%x ", string[i]);
printk("\n");
#endif
-
+
/* sanity check the rest of the return */
- for(i=0; i < tbytes; i++) {
+ for (i = 0; i < tbytes; i++) {
__u8 input = 0;
- cat_unpack(string, padbits + (i * BITS_PER_BYTE), &input, BITS_PER_BYTE);
- if(trailer[i] != input) {
+ cat_unpack(string, padbits + (i * BITS_PER_BYTE),
+ &input, BITS_PER_BYTE);
+ if (trailer[i] != input) {
CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i]));
return 1;
}
@@ -360,14 +361,14 @@ cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
}
static int
-cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
+cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
{
int i;
-
- for(i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
+
+ for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
outb(data[i], CAT_DATA);
- for(i = header_bytes - 1; i >= 0; i--) {
+ for (i = header_bytes - 1; i >= 0; i--) {
__u8 header = 0;
__u8 input;
@@ -376,7 +377,7 @@ cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
CDEBUG(("cat_shiftout: returned 0x%x\n", input));
cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits,
&header, BITS_PER_BYTE);
- if(input != header) {
+ if (input != header) {
CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header));
return 1;
}
@@ -385,57 +386,57 @@ cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
}
static int
-cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
+cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
__u8 reg, __u8 value)
{
outb(VOYAGER_CAT_DRCYC, CAT_CMD);
- if(!modp->scan_path_connected) {
- if(asicp->asic_id != VOYAGER_CAT_ID) {
+ if (!modp->scan_path_connected) {
+ if (asicp->asic_id != VOYAGER_CAT_ID) {
CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n"));
return 1;
}
outb(VOYAGER_CAT_HEADER, CAT_DATA);
outb(value, CAT_DATA);
- if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+ if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
CDEBUG(("cat_senddata: failed to get correct header response to sent data\n"));
return 1;
}
- if(reg > VOYAGER_SUBADDRHI) {
+ if (reg > VOYAGER_SUBADDRHI) {
outb(VOYAGER_CAT_RUN, CAT_CMD);
outb(VOYAGER_CAT_END, CAT_CMD);
outb(VOYAGER_CAT_RUN, CAT_CMD);
}
-
+
return 0;
- }
- else {
+ } else {
__u16 hbytes = asicp->ireg_length / BITS_PER_BYTE;
- __u16 dbytes = (modp->num_asics - 1 + asicp->ireg_length)/BITS_PER_BYTE;
- __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH],
- hseq[VOYAGER_MAX_REG_SIZE];
+ __u16 dbytes =
+ (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE;
+ __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH],
+ hseq[VOYAGER_MAX_REG_SIZE];
int i;
- if((padbits = (modp->num_asics - 1
- + asicp->ireg_length) % BITS_PER_BYTE) != 0) {
+ if ((padbits = (modp->num_asics - 1
+ + asicp->ireg_length) % BITS_PER_BYTE) != 0) {
padbits = BITS_PER_BYTE - padbits;
dbytes++;
}
- if(asicp->ireg_length % BITS_PER_BYTE)
+ if (asicp->ireg_length % BITS_PER_BYTE)
hbytes++;
-
+
cat_build_header(hseq, hbytes, 1, asicp->ireg_length);
-
- for(i = 0; i < dbytes + hbytes; i++)
+
+ for (i = 0; i < dbytes + hbytes; i++)
dseq[i] = 0xff;
CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n",
dbytes, hbytes, padbits));
cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length,
hseq, hbytes * BITS_PER_BYTE);
- cat_pack(dseq, asicp->asic_location, &value,
+ cat_pack(dseq, asicp->asic_location, &value,
asicp->ireg_length);
#ifdef VOYAGER_CAT_DEBUG
printk("dseq ");
- for(i=0; i<hbytes+dbytes; i++) {
+ for (i = 0; i < hbytes + dbytes; i++) {
printk("0x%x ", dseq[i]);
}
printk("\n");
@@ -445,121 +446,125 @@ cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
}
static int
-cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
- __u8 value)
+cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value)
{
- if(cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
+ if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
return 1;
return cat_senddata(modp, asicp, reg, value);
}
static int
-cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
- __u8 *value)
+cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+ __u8 * value)
{
- if(cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
+ if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
return 1;
return cat_getdata(modp, asicp, reg, value);
}
static int
-cat_subaddrsetup(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
__u16 len)
{
__u8 val;
- if(len > 1) {
+ if (len > 1) {
/* set auto increment */
__u8 newval;
-
- if(cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
+
+ if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n"));
return 1;
}
- CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", val));
+ CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n",
+ val));
newval = val | VOYAGER_AUTO_INC;
- if(newval != val) {
- if(cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
+ if (newval != val) {
+ if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n"));
return 1;
}
}
}
- if(cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8)(offset &0xff))) {
+ if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) {
CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n"));
return 1;
}
- if(asicp->subaddr > VOYAGER_SUBADDR_LO) {
- if(cat_write(modp, asicp, VOYAGER_SUBADDRHI, (__u8)(offset >> 8))) {
+ if (asicp->subaddr > VOYAGER_SUBADDR_LO) {
+ if (cat_write
+ (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) {
CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n"));
return 1;
}
cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val);
- CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, val));
+ CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset,
+ val));
}
cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val);
CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val));
return 0;
}
-
+
static int
-cat_subwrite(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
- __u16 len, void *buf)
+cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
+ __u16 len, void *buf)
{
int i, retval;
/* FIXME: need special actions for VOYAGER_CAT_ID here */
- if(asicp->asic_id == VOYAGER_CAT_ID) {
+ if (asicp->asic_id == VOYAGER_CAT_ID) {
CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n"));
/* FIXME -- This is supposed to be handled better
* There is a problem writing to the cat asic in the
* PSI. The 30us delay seems to work, though */
udelay(30);
}
-
- if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+
+ if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
printk("cat_subwrite: cat_subaddrsetup FAILED\n");
return retval;
}
-
- if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
+
+ if (cat_sendinst
+ (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
printk("cat_subwrite: cat_sendinst FAILED\n");
return 1;
}
- for(i = 0; i < len; i++) {
- if(cat_senddata(modp, asicp, 0xFF, ((__u8 *)buf)[i])) {
- printk("cat_subwrite: cat_sendata element at %d FAILED\n", i);
+ for (i = 0; i < len; i++) {
+ if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) {
+ printk
+ ("cat_subwrite: cat_sendata element at %d FAILED\n",
+ i);
return 1;
}
}
return 0;
}
static int
-cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
__u16 len, void *buf)
{
int i, retval;
- if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+ if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n"));
return retval;
}
- if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
+ if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
CDEBUG(("cat_subread: cat_sendinst failed\n"));
return 1;
}
- for(i = 0; i < len; i++) {
- if(cat_getdata(modp, asicp, 0xFF,
- &((__u8 *)buf)[i])) {
- CDEBUG(("cat_subread: cat_getdata element %d failed\n", i));
+ for (i = 0; i < len; i++) {
+ if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) {
+ CDEBUG(("cat_subread: cat_getdata element %d failed\n",
+ i));
return 1;
}
}
return 0;
}
-
/* buffer for storing EPROM data read in during initialisation */
static __initdata __u8 eprom_buf[0xFFFF];
static voyager_module_t *voyager_initial_module;
@@ -568,8 +573,7 @@ static voyager_module_t *voyager_initial_module;
* boot cpu *after* all memory initialisation has been done (so we can
* use kmalloc) but before smp initialisation, so we can probe the SMP
* configuration and pick up necessary information. */
-void __init
-voyager_cat_init(void)
+void __init voyager_cat_init(void)
{
voyager_module_t **modpp = &voyager_initial_module;
voyager_asic_t **asicpp;
@@ -578,27 +582,29 @@ voyager_cat_init(void)
unsigned long qic_addr = 0;
__u8 qabc_data[0x20];
__u8 num_submodules, val;
- voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *)&eprom_buf[0];
-
+ voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0];
+
__u8 cmos[4];
unsigned long addr;
-
+
/* initiallise the SUS mailbox */
- for(i=0; i<sizeof(cmos); i++)
+ for (i = 0; i < sizeof(cmos); i++)
cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i);
addr = *(unsigned long *)cmos;
- if((addr & 0xff000000) != 0xff000000) {
- printk(KERN_ERR "Voyager failed to get SUS mailbox (addr = 0x%lx\n", addr);
+ if ((addr & 0xff000000) != 0xff000000) {
+ printk(KERN_ERR
+ "Voyager failed to get SUS mailbox (addr = 0x%lx\n",
+ addr);
} else {
static struct resource res;
-
+
res.name = "voyager SUS";
res.start = addr;
- res.end = addr+0x3ff;
-
+ res.end = addr + 0x3ff;
+
request_resource(&iomem_resource, &res);
voyager_SUS = (struct voyager_SUS *)
- ioremap(addr, 0x400);
+ ioremap(addr, 0x400);
printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n",
voyager_SUS->SUS_version);
voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION;
@@ -609,8 +615,6 @@ voyager_cat_init(void)
voyager_extended_vic_processors = 0;
voyager_quad_processors = 0;
-
-
printk("VOYAGER: beginning CAT bus probe\n");
/* set up the SuperSet Port Block which tells us where the
* CAT communication port is */
@@ -618,14 +622,14 @@ voyager_cat_init(void)
VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb));
/* now find out if were 8 slot or normal */
- if((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
- == EIGHT_SLOT_IDENTIFIER) {
+ if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
+ == EIGHT_SLOT_IDENTIFIER) {
voyager_8slot = 1;
- printk(KERN_NOTICE "Voyager: Eight slot 51xx configuration detected\n");
+ printk(KERN_NOTICE
+ "Voyager: Eight slot 51xx configuration detected\n");
}
- for(i = VOYAGER_MIN_MODULE;
- i <= VOYAGER_MAX_MODULE; i++) {
+ for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) {
__u8 input;
int asic;
__u16 eprom_size;
@@ -643,21 +647,21 @@ voyager_cat_init(void)
outb(0xAA, CAT_DATA);
input = inb(CAT_DATA);
outb(VOYAGER_CAT_END, CAT_CMD);
- if(input != VOYAGER_CAT_HEADER) {
+ if (input != VOYAGER_CAT_HEADER) {
continue;
}
CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i,
cat_module_name(i)));
- *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++];*/
- if(*modpp == NULL) {
+ *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++]; */
+ if (*modpp == NULL) {
printk("**WARNING** kmalloc failure in cat_init\n");
continue;
}
memset(*modpp, 0, sizeof(voyager_module_t));
/* need temporary asic for cat_subread. It will be
* filled in correctly later */
- (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count];*/
- if((*modpp)->asic == NULL) {
+ (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count]; */
+ if ((*modpp)->asic == NULL) {
printk("**WARNING** kmalloc failure in cat_init\n");
continue;
}
@@ -666,47 +670,52 @@ voyager_cat_init(void)
(*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI;
(*modpp)->module_addr = i;
(*modpp)->scan_path_connected = 0;
- if(i == VOYAGER_PSI) {
+ if (i == VOYAGER_PSI) {
/* Exception leg for modules with no EEPROM */
printk("Module \"%s\"\n", cat_module_name(i));
continue;
}
-
+
CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
outb(VOYAGER_CAT_RUN, CAT_CMD);
cat_disconnect(*modpp, (*modpp)->asic);
- if(cat_subread(*modpp, (*modpp)->asic,
- VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
- &eprom_size)) {
- printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
+ if (cat_subread(*modpp, (*modpp)->asic,
+ VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+ &eprom_size)) {
+ printk
+ ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
+ i);
outb(VOYAGER_CAT_END, CAT_CMD);
continue;
}
- if(eprom_size > sizeof(eprom_buf)) {
- printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", i, eprom_size);
+ if (eprom_size > sizeof(eprom_buf)) {
+ printk
+ ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n",
+ i, eprom_size);
outb(VOYAGER_CAT_END, CAT_CMD);
continue;
}
outb(VOYAGER_CAT_END, CAT_CMD);
outb(VOYAGER_CAT_RUN, CAT_CMD);
- CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
- if(cat_subread(*modpp, (*modpp)->asic, 0,
- eprom_size, eprom_buf)) {
+ CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
+ eprom_size));
+ if (cat_subread
+ (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
outb(VOYAGER_CAT_END, CAT_CMD);
continue;
}
outb(VOYAGER_CAT_END, CAT_CMD);
printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n",
cat_module_name(i), eprom_hdr->version_id,
- *((__u32 *)eprom_hdr->tracer), eprom_hdr->num_asics);
+ *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics);
(*modpp)->ee_size = eprom_hdr->ee_size;
(*modpp)->num_asics = eprom_hdr->num_asics;
asicpp = &((*modpp)->asic);
sp_offset = eprom_hdr->scan_path_offset;
/* All we really care about are the Quad cards. We
- * identify them because they are in a processor slot
- * and have only four asics */
- if((i < 0x10 || (i>=0x14 && i < 0x1c) || i>0x1f)) {
+ * identify them because they are in a processor slot
+ * and have only four asics */
+ if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) {
modpp = &((*modpp)->next);
continue;
}
@@ -717,16 +726,17 @@ voyager_cat_init(void)
&num_submodules);
/* lowest two bits, active low */
num_submodules = ~(0xfc | num_submodules);
- CDEBUG(("VOYAGER CAT: %d submodules present\n", num_submodules));
- if(num_submodules == 0) {
+ CDEBUG(("VOYAGER CAT: %d submodules present\n",
+ num_submodules));
+ if (num_submodules == 0) {
/* fill in the dyadic extended processors */
__u8 cpu = i & 0x07;
printk("Module \"%s\": Dyadic Processor Card\n",
cat_module_name(i));
- voyager_extended_vic_processors |= (1<<cpu);
+ voyager_extended_vic_processors |= (1 << cpu);
cpu += 4;
- voyager_extended_vic_processors |= (1<<cpu);
+ voyager_extended_vic_processors |= (1 << cpu);
outb(VOYAGER_CAT_END, CAT_CMD);
continue;
}
@@ -740,28 +750,32 @@ voyager_cat_init(void)
cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val);
outb(VOYAGER_CAT_END, CAT_CMD);
-
CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
outb(VOYAGER_CAT_RUN, CAT_CMD);
cat_disconnect(*modpp, (*modpp)->asic);
- if(cat_subread(*modpp, (*modpp)->asic,
- VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
- &eprom_size)) {
- printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
+ if (cat_subread(*modpp, (*modpp)->asic,
+ VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+ &eprom_size)) {
+ printk
+ ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
+ i);
outb(VOYAGER_CAT_END, CAT_CMD);
continue;
}
- if(eprom_size > sizeof(eprom_buf)) {
- printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", i, eprom_size);
+ if (eprom_size > sizeof(eprom_buf)) {
+ printk
+ ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n",
+ i, eprom_size);
outb(VOYAGER_CAT_END, CAT_CMD);
continue;
}
outb(VOYAGER_CAT_END, CAT_CMD);
outb(VOYAGER_CAT_RUN, CAT_CMD);
- CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
- if(cat_subread(*modpp, (*modpp)->asic, 0,
- eprom_size, eprom_buf)) {
+ CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
+ eprom_size));
+ if (cat_subread
+ (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
outb(VOYAGER_CAT_END, CAT_CMD);
continue;
}
@@ -773,30 +787,35 @@ voyager_cat_init(void)
sp_offset = eprom_hdr->scan_path_offset;
/* get rid of the dummy CAT asic and read the real one */
kfree((*modpp)->asic);
- for(asic=0; asic < (*modpp)->num_asics; asic++) {
+ for (asic = 0; asic < (*modpp)->num_asics; asic++) {
int j;
- voyager_asic_t *asicp = *asicpp
- = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/
+ voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++]; */
voyager_sp_table_t *sp_table;
voyager_at_t *asic_table;
voyager_jtt_t *jtag_table;
- if(asicp == NULL) {
- printk("**WARNING** kmalloc failure in cat_init\n");
+ if (asicp == NULL) {
+ printk
+ ("**WARNING** kmalloc failure in cat_init\n");
continue;
}
asicpp = &(asicp->next);
asicp->asic_location = asic;
- sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset);
+ sp_table =
+ (voyager_sp_table_t *) (eprom_buf + sp_offset);
asicp->asic_id = sp_table->asic_id;
- asic_table = (voyager_at_t *)(eprom_buf + sp_table->asic_data_offset);
- for(j=0; j<4; j++)
+ asic_table =
+ (voyager_at_t *) (eprom_buf +
+ sp_table->asic_data_offset);
+ for (j = 0; j < 4; j++)
asicp->jtag_id[j] = asic_table->jtag_id[j];
- jtag_table = (voyager_jtt_t *)(eprom_buf + asic_table->jtag_offset);
+ jtag_table =
+ (voyager_jtt_t *) (eprom_buf +
+ asic_table->jtag_offset);
asicp->ireg_length = jtag_table->ireg_len;
asicp->bit_location = (*modpp)->inst_bits;
(*modpp)->inst_bits += asicp->ireg_length;
- if(asicp->ireg_length > (*modpp)->largest_reg)
+ if (asicp->ireg_length > (*modpp)->largest_reg)
(*modpp)->largest_reg = asicp->ireg_length;
if (asicp->ireg_length < (*modpp)->smallest_reg ||
(*modpp)->smallest_reg == 0)
@@ -804,15 +823,13 @@ voyager_cat_init(void)
CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n",
asicp->asic_id, asicp->ireg_length,
asicp->bit_location));
- if(asicp->asic_id == VOYAGER_QUAD_QABC) {
+ if (asicp->asic_id == VOYAGER_QUAD_QABC) {
CDEBUG(("VOYAGER CAT: QABC ASIC found\n"));
qabc_asic = asicp;
}
sp_offset += sizeof(voyager_sp_table_t);
}
- CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n",
- (*modpp)->inst_bits, (*modpp)->largest_reg,
- (*modpp)->smallest_reg));
+ CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg));
/* OK, now we have the QUAD ASICs set up, use them.
* we need to:
*
@@ -828,10 +845,11 @@ voyager_cat_init(void)
qic_addr = qabc_data[5] << 8;
qic_addr = (qic_addr | qabc_data[6]) << 8;
qic_addr = (qic_addr | qabc_data[7]) << 8;
- printk("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
- cat_module_name(i), qic_addr, qabc_data[8]);
+ printk
+ ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
+ cat_module_name(i), qic_addr, qabc_data[8]);
#if 0 /* plumbing fails---FIXME */
- if((qabc_data[8] & 0xf0) == 0) {
+ if ((qabc_data[8] & 0xf0) == 0) {
/* FIXME: 32 way 8 CPU slot monster cannot be
* plumbed this way---need to check for it */
@@ -842,94 +860,97 @@ voyager_cat_init(void)
#ifdef VOYAGER_CAT_DEBUG
/* verify plumbing */
cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
- if((qabc_data[8] & 0xf0) == 0) {
- CDEBUG(("PLUMBING FAILED: 0x%x\n", qabc_data[8]));
+ if ((qabc_data[8] & 0xf0) == 0) {
+ CDEBUG(("PLUMBING FAILED: 0x%x\n",
+ qabc_data[8]));
}
#endif
}
#endif
{
- struct resource *res = kzalloc(sizeof(struct resource),GFP_KERNEL);
+ struct resource *res =
+ kzalloc(sizeof(struct resource), GFP_KERNEL);
res->name = kmalloc(128, GFP_KERNEL);
- sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i));
+ sprintf((char *)res->name, "Voyager %s Quad CPI",
+ cat_module_name(i));
res->start = qic_addr;
res->end = qic_addr + 0x3ff;
request_resource(&iomem_resource, res);
}
qic_addr = (unsigned long)ioremap(qic_addr, 0x400);
-
- for(j = 0; j < 4; j++) {
+
+ for (j = 0; j < 4; j++) {
__u8 cpu;
- if(voyager_8slot) {
+ if (voyager_8slot) {
/* 8 slot has a different mapping,
* each slot has only one vic line, so
* 1 cpu in each slot must be < 8 */
- cpu = (i & 0x07) + j*8;
+ cpu = (i & 0x07) + j * 8;
} else {
- cpu = (i & 0x03) + j*4;
+ cpu = (i & 0x03) + j * 4;
}
- if( (qabc_data[8] & (1<<j))) {
- voyager_extended_vic_processors |= (1<<cpu);
+ if ((qabc_data[8] & (1 << j))) {
+ voyager_extended_vic_processors |= (1 << cpu);
}
- if(qabc_data[8] & (1<<(j+4)) ) {
+ if (qabc_data[8] & (1 << (j + 4))) {
/* Second SET register plumbed: Quad
* card has two VIC connected CPUs.
* Secondary cannot be booted as a VIC
* CPU */
- voyager_extended_vic_processors |= (1<<cpu);
- voyager_allowed_boot_processors &= (~(1<<cpu));
+ voyager_extended_vic_processors |= (1 << cpu);
+ voyager_allowed_boot_processors &=
+ (~(1 << cpu));
}
- voyager_quad_processors |= (1<<cpu);
+ voyager_quad_processors |= (1 << cpu);
voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *)
- (qic_addr+(j<<8));
+ (qic_addr + (j << 8));
CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu,
(unsigned long)voyager_quad_cpi_addr[cpu]));
}
outb(VOYAGER_CAT_END, CAT_CMD);
-
-
*asicpp = NULL;
modpp = &((*modpp)->next);
}
*modpp = NULL;
- printk("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", voyager_extended_vic_processors, voyager_quad_processors, voyager_allowed_boot_processors);
+ printk
+ ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n",
+ voyager_extended_vic_processors, voyager_quad_processors,
+ voyager_allowed_boot_processors);
request_resource(&ioport_resource, &vic_res);
- if(voyager_quad_processors)
+ if (voyager_quad_processors)
request_resource(&ioport_resource, &qic_res);
/* set up the front power switch */
}
-int
-voyager_cat_readb(__u8 module, __u8 asic, int reg)
+int voyager_cat_readb(__u8 module, __u8 asic, int reg)
{
return 0;
}
-static int
-cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp)
+static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp)
{
__u8 val;
int err = 0;
- if(!modp->scan_path_connected)
+ if (!modp->scan_path_connected)
return 0;
- if(asicp->asic_id != VOYAGER_CAT_ID) {
+ if (asicp->asic_id != VOYAGER_CAT_ID) {
CDEBUG(("cat_disconnect: ASIC is not CAT\n"));
return 1;
}
err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
- if(err) {
+ if (err) {
CDEBUG(("cat_disconnect: failed to read SCANPATH\n"));
return err;
}
val &= VOYAGER_DISCONNECT_ASIC;
err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
- if(err) {
+ if (err) {
CDEBUG(("cat_disconnect: failed to write SCANPATH\n"));
return err;
}
@@ -940,27 +961,26 @@ cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp)
return 0;
}
-static int
-cat_connect(voyager_module_t *modp, voyager_asic_t *asicp)
+static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp)
{
__u8 val;
int err = 0;
- if(modp->scan_path_connected)
+ if (modp->scan_path_connected)
return 0;
- if(asicp->asic_id != VOYAGER_CAT_ID) {
+ if (asicp->asic_id != VOYAGER_CAT_ID) {
CDEBUG(("cat_connect: ASIC is not CAT\n"));
return 1;
}
err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
- if(err) {
+ if (err) {
CDEBUG(("cat_connect: failed to read SCANPATH\n"));
return err;
}
val |= VOYAGER_CONNECT_ASIC;
err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
- if(err) {
+ if (err) {
CDEBUG(("cat_connect: failed to write SCANPATH\n"));
return err;
}
@@ -971,11 +991,10 @@ cat_connect(voyager_module_t *modp, voyager_asic_t *asicp)
return 0;
}
-void
-voyager_cat_power_off(void)
+void voyager_cat_power_off(void)
{
/* Power the machine off by writing to the PSI over the CAT
- * bus */
+ * bus */
__u8 data;
voyager_module_t psi = { 0 };
voyager_asic_t psi_asic = { 0 };
@@ -1009,8 +1028,7 @@ voyager_cat_power_off(void)
struct voyager_status voyager_status = { 0 };
-void
-voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
+void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data)
{
voyager_module_t psi = { 0 };
voyager_asic_t psi_asic = { 0 };
@@ -1027,7 +1045,7 @@ voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
outb(VOYAGER_CAT_RUN, CAT_CMD);
cat_disconnect(&psi, &psi_asic);
- switch(cmd) {
+ switch (cmd) {
case VOYAGER_PSI_READ:
cat_read(&psi, &psi_asic, reg, data);
break;
@@ -1047,8 +1065,7 @@ voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
outb(VOYAGER_CAT_END, CAT_CMD);
}
-void
-voyager_cat_do_common_interrupt(void)
+void voyager_cat_do_common_interrupt(void)
{
/* This is caused either by a memory parity error or something
* in the PSI */
@@ -1057,7 +1074,7 @@ voyager_cat_do_common_interrupt(void)
voyager_asic_t psi_asic = { 0 };
struct voyager_psi psi_reg;
int i;
- re_read:
+ re_read:
psi.asic = &psi_asic;
psi.asic->asic_id = VOYAGER_CAT_ID;
psi.asic->subaddr = VOYAGER_SUBADDR_HI;
@@ -1072,43 +1089,45 @@ voyager_cat_do_common_interrupt(void)
cat_disconnect(&psi, &psi_asic);
/* Read the status. NOTE: Need to read *all* the PSI regs here
* otherwise the cmn int will be reasserted */
- for(i = 0; i < sizeof(psi_reg.regs); i++) {
- cat_read(&psi, &psi_asic, i, &((__u8 *)&psi_reg.regs)[i]);
+ for (i = 0; i < sizeof(psi_reg.regs); i++) {
+ cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]);
}
outb(VOYAGER_CAT_END, CAT_CMD);
- if((psi_reg.regs.checkbit & 0x02) == 0) {
+ if ((psi_reg.regs.checkbit & 0x02) == 0) {
psi_reg.regs.checkbit |= 0x02;
cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit);
printk("VOYAGER RE-READ PSI\n");
goto re_read;
}
outb(VOYAGER_CAT_RUN, CAT_CMD);
- for(i = 0; i < sizeof(psi_reg.subregs); i++) {
+ for (i = 0; i < sizeof(psi_reg.subregs); i++) {
/* This looks strange, but the PSI doesn't do auto increment
* correctly */
- cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i,
- 1, &((__u8 *)&psi_reg.subregs)[i]);
+ cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i,
+ 1, &((__u8 *) & psi_reg.subregs)[i]);
}
outb(VOYAGER_CAT_END, CAT_CMD);
#ifdef VOYAGER_CAT_DEBUG
printk("VOYAGER PSI: ");
- for(i=0; i<sizeof(psi_reg.regs); i++)
- printk("%02x ", ((__u8 *)&psi_reg.regs)[i]);
+ for (i = 0; i < sizeof(psi_reg.regs); i++)
+ printk("%02x ", ((__u8 *) & psi_reg.regs)[i]);
printk("\n ");
- for(i=0; i<sizeof(psi_reg.subregs); i++)
- printk("%02x ", ((__u8 *)&psi_reg.subregs)[i]);
+ for (i = 0; i < sizeof(psi_reg.subregs); i++)
+ printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]);
printk("\n");
#endif
- if(psi_reg.regs.intstatus & PSI_MON) {
+ if (psi_reg.regs.intstatus & PSI_MON) {
/* switch off or power fail */
- if(psi_reg.subregs.supply & PSI_SWITCH_OFF) {
- if(voyager_status.switch_off) {
- printk(KERN_ERR "Voyager front panel switch turned off again---Immediate power off!\n");
+ if (psi_reg.subregs.supply & PSI_SWITCH_OFF) {
+ if (voyager_status.switch_off) {
+ printk(KERN_ERR
+ "Voyager front panel switch turned off again---Immediate power off!\n");
voyager_cat_power_off();
/* not reached */
} else {
- printk(KERN_ERR "Voyager front panel switch turned off\n");
+ printk(KERN_ERR
+ "Voyager front panel switch turned off\n");
voyager_status.switch_off = 1;
voyager_status.request_from_kernel = 1;
wake_up_process(voyager_thread);
@@ -1127,7 +1146,7 @@ voyager_cat_do_common_interrupt(void)
VDEBUG(("Voyager ac fail reg 0x%x\n",
psi_reg.subregs.ACfail));
- if((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
+ if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
/* No further update */
return;
}
@@ -1135,20 +1154,20 @@ voyager_cat_do_common_interrupt(void)
/* Don't bother trying to find out who failed.
* FIXME: This probably makes the code incorrect on
* anything other than a 345x */
- for(i=0; i< 5; i++) {
- if( psi_reg.subregs.ACfail &(1<<i)) {
+ for (i = 0; i < 5; i++) {
+ if (psi_reg.subregs.ACfail & (1 << i)) {
break;
}
}
printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i);
#endif
/* DON'T do this: it shuts down the AC PSI
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- data = PSI_MASK_MASK | i;
- cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
- 1, &data);
- outb(VOYAGER_CAT_END, CAT_CMD);
- */
+ outb(VOYAGER_CAT_RUN, CAT_CMD);
+ data = PSI_MASK_MASK | i;
+ cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
+ 1, &data);
+ outb(VOYAGER_CAT_END, CAT_CMD);
+ */
printk(KERN_ERR "Voyager AC power failure\n");
outb(VOYAGER_CAT_RUN, CAT_CMD);
data = PSI_COLD_START;
@@ -1159,16 +1178,16 @@ voyager_cat_do_common_interrupt(void)
voyager_status.request_from_kernel = 1;
wake_up_process(voyager_thread);
}
-
-
- } else if(psi_reg.regs.intstatus & PSI_FAULT) {
+
+ } else if (psi_reg.regs.intstatus & PSI_FAULT) {
/* Major fault! */
- printk(KERN_ERR "Voyager PSI Detected major fault, immediate power off!\n");
+ printk(KERN_ERR
+ "Voyager PSI Detected major fault, immediate power off!\n");
voyager_cat_power_off();
/* not reached */
- } else if(psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
- | PSI_CURRENT | PSI_DVM
- | PSI_PSCFAULT | PSI_STAT_CHG)) {
+ } else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
+ | PSI_CURRENT | PSI_DVM
+ | PSI_PSCFAULT | PSI_STAT_CHG)) {
/* other psi fault */
printk(KERN_WARNING "Voyager PSI status 0x%x\n", data);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 88124dd3540..dffa786f61f 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -32,7 +32,8 @@
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 };
/* CPU IRQ affinity -- set to all ones initially */
-static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = ~0UL };
+static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned =
+ {[0 ... NR_CPUS-1] = ~0UL };
/* per CPU data structure (for /proc/cpuinfo et al), visible externally
* indexed physically */
@@ -76,7 +77,6 @@ EXPORT_SYMBOL(cpu_online_map);
* by scheduler but indexed physically */
cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
-
/* The internal functions */
static void send_CPI(__u32 cpuset, __u8 cpi);
static void ack_CPI(__u8 cpi);
@@ -101,94 +101,86 @@ int hard_smp_processor_id(void);
int safe_smp_processor_id(void);
/* Inline functions */
-static inline void
-send_one_QIC_CPI(__u8 cpu, __u8 cpi)
+static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi)
{
voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi =
- (smp_processor_id() << 16) + cpi;
+ (smp_processor_id() << 16) + cpi;
}
-static inline void
-send_QIC_CPI(__u32 cpuset, __u8 cpi)
+static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi)
{
int cpu;
for_each_online_cpu(cpu) {
- if(cpuset & (1<<cpu)) {
+ if (cpuset & (1 << cpu)) {
#ifdef VOYAGER_DEBUG
- if(!cpu_isset(cpu, cpu_online_map))
- VDEBUG(("CPU%d sending cpi %d to CPU%d not in cpu_online_map\n", hard_smp_processor_id(), cpi, cpu));
+ if (!cpu_isset(cpu, cpu_online_map))
+ VDEBUG(("CPU%d sending cpi %d to CPU%d not in "
+ "cpu_online_map\n",
+ hard_smp_processor_id(), cpi, cpu));
#endif
send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
}
}
}
-static inline void
-wrapper_smp_local_timer_interrupt(void)
+static inline void wrapper_smp_local_timer_interrupt(void)
{
irq_enter();
smp_local_timer_interrupt();
irq_exit();
}
-static inline void
-send_one_CPI(__u8 cpu, __u8 cpi)
+static inline void send_one_CPI(__u8 cpu, __u8 cpi)
{
- if(voyager_quad_processors & (1<<cpu))
+ if (voyager_quad_processors & (1 << cpu))
send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
else
- send_CPI(1<<cpu, cpi);
+ send_CPI(1 << cpu, cpi);
}
-static inline void
-send_CPI_allbutself(__u8 cpi)
+static inline void send_CPI_allbutself(__u8 cpi)
{
__u8 cpu = smp_processor_id();
__u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu);
send_CPI(mask, cpi);
}
-static inline int
-is_cpu_quad(void)
+static inline int is_cpu_quad(void)
{
__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER);
}
-static inline int
-is_cpu_extended(void)
+static inline int is_cpu_extended(void)
{
__u8 cpu = hard_smp_processor_id();
- return(voyager_extended_vic_processors & (1<<cpu));
+ return (voyager_extended_vic_processors & (1 << cpu));
}
-static inline int
-is_cpu_vic_boot(void)
+static inline int is_cpu_vic_boot(void)
{
__u8 cpu = hard_smp_processor_id();
- return(voyager_extended_vic_processors
- & voyager_allowed_boot_processors & (1<<cpu));
+ return (voyager_extended_vic_processors
+ & voyager_allowed_boot_processors & (1 << cpu));
}
-
-static inline void
-ack_CPI(__u8 cpi)
+static inline void ack_CPI(__u8 cpi)
{
- switch(cpi) {
+ switch (cpi) {
case VIC_CPU_BOOT_CPI:
- if(is_cpu_quad() && !is_cpu_vic_boot())
+ if (is_cpu_quad() && !is_cpu_vic_boot())
ack_QIC_CPI(cpi);
else
ack_VIC_CPI(cpi);
break;
case VIC_SYS_INT:
- case VIC_CMN_INT:
+ case VIC_CMN_INT:
/* These are slightly strange. Even on the Quad card,
* They are vectored as VIC CPIs */
- if(is_cpu_quad())
+ if (is_cpu_quad())
ack_special_QIC_CPI(cpi);
else
ack_VIC_CPI(cpi);
@@ -205,11 +197,11 @@ ack_CPI(__u8 cpi)
* 8259 IRQs except that masks and things must be kept per processor
*/
static struct irq_chip vic_chip = {
- .name = "VIC",
- .startup = startup_vic_irq,
- .mask = mask_vic_irq,
- .unmask = unmask_vic_irq,
- .set_affinity = set_vic_irq_affinity,
+ .name = "VIC",
+ .startup = startup_vic_irq,
+ .mask = mask_vic_irq,
+ .unmask = unmask_vic_irq,
+ .set_affinity = set_vic_irq_affinity,
};
/* used to count up as CPUs are brought on line (starts at 0) */
@@ -223,7 +215,7 @@ static __u32 trampoline_base;
/* The per cpu profile stuff - used in smp_local_timer_interrupt */
static DEFINE_PER_CPU(int, prof_multiplier) = 1;
static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) = 1;
+static DEFINE_PER_CPU(int, prof_counter) = 1;
/* the map used to check if a CPU has booted */
static __u32 cpu_booted_map;
@@ -235,7 +227,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
/* This is for the new dynamic CPU boot code */
cpumask_t cpu_callin_map = CPU_MASK_NONE;
cpumask_t cpu_callout_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_callout_map);
cpumask_t cpu_possible_map = CPU_MASK_NONE;
EXPORT_SYMBOL(cpu_possible_map);
@@ -246,9 +237,9 @@ static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 };
/* Lock for enable/disable of VIC interrupts */
-static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
+static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
-/* The boot processor is correctly set up in PC mode when it
+/* The boot processor is correctly set up in PC mode when it
* comes up, but the secondaries need their master/slave 8259
* pairs initializing correctly */
@@ -262,8 +253,7 @@ static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 };
static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned;
/* debugging routine to read the isr of the cpu's pic */
-static inline __u16
-vic_read_isr(void)
+static inline __u16 vic_read_isr(void)
{
__u16 isr;
@@ -275,17 +265,16 @@ vic_read_isr(void)
return isr;
}
-static __init void
-qic_setup(void)
+static __init void qic_setup(void)
{
- if(!is_cpu_quad()) {
+ if (!is_cpu_quad()) {
/* not a quad, no setup */
return;
}
outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
-
- if(is_cpu_extended()) {
+
+ if (is_cpu_extended()) {
/* the QIC duplicate of the VIC base register */
outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER);
outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER);
@@ -295,8 +284,7 @@ qic_setup(void)
}
}
-static __init void
-vic_setup_pic(void)
+static __init void vic_setup_pic(void)
{
outb(1, VIC_REDIRECT_REGISTER_1);
/* clear the claim registers for dynamic routing */
@@ -333,7 +321,7 @@ vic_setup_pic(void)
/* ICW2: slave vector base */
outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1);
-
+
/* ICW3: slave ID */
outb(0x02, 0xA1);
@@ -341,19 +329,18 @@ vic_setup_pic(void)
outb(0x01, 0xA1);
}
-static void
-do_quad_bootstrap(void)
+static void do_quad_bootstrap(void)
{
- if(is_cpu_quad() && is_cpu_vic_boot()) {
+ if (is_cpu_quad() && is_cpu_vic_boot()) {
int i;
unsigned long flags;
__u8 cpuid = hard_smp_processor_id();
local_irq_save(flags);
- for(i = 0; i<4; i++) {
+ for (i = 0; i < 4; i++) {
/* FIXME: this would be >>3 &0x7 on the 32 way */
- if(((cpuid >> 2) & 0x03) == i)
+ if (((cpuid >> 2) & 0x03) == i)
/* don't lower our own mask! */
continue;
@@ -368,12 +355,10 @@ do_quad_bootstrap(void)
}
}
-
/* Set up all the basic stuff: read the SMP config and make all the
* SMP information reflect only the boot cpu. All others will be
* brought on-line later. */
-void __init
-find_smp_config(void)
+void __init find_smp_config(void)
{
int i;
@@ -382,24 +367,31 @@ find_smp_config(void)
printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
/* initialize the CPU structures (moved from smp_boot_cpus) */
- for(i=0; i<NR_CPUS; i++) {
+ for (i = 0; i < NR_CPUS; i++) {
cpu_irq_affinity[i] = ~0;
}
cpu_online_map = cpumask_of_cpu(boot_cpu_id);
/* The boot CPU must be extended */
- voyager_extended_vic_processors = 1<<boot_cpu_id;
+ voyager_extended_vic_processors = 1 << boot_cpu_id;
/* initially, all of the first 8 CPUs can boot */
voyager_allowed_boot_processors = 0xff;
/* set up everything for just this CPU, we can alter
* this as we start the other CPUs later */
/* now get the CPU disposition from the extended CMOS */
- cpus_addr(phys_cpu_present_map)[0] = voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
- cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
- cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 2) << 16;
- cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24;
+ cpus_addr(phys_cpu_present_map)[0] =
+ voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
+ cpus_addr(phys_cpu_present_map)[0] |=
+ voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
+ cpus_addr(phys_cpu_present_map)[0] |=
+ voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
+ 2) << 16;
+ cpus_addr(phys_cpu_present_map)[0] |=
+ voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
+ 3) << 24;
cpu_possible_map = phys_cpu_present_map;
- printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", cpus_addr(phys_cpu_present_map)[0]);
+ printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n",
+ cpus_addr(phys_cpu_present_map)[0]);
/* Here we set up the VIC to enable SMP */
/* enable the CPIs by writing the base vector to their register */
outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
@@ -427,8 +419,7 @@ find_smp_config(void)
/*
* The bootstrap kernel entry code has set these up. Save them
* for a given CPU, id is physical */
-void __init
-smp_store_cpu_info(int id)
+void __init smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
@@ -438,21 +429,19 @@ smp_store_cpu_info(int id)
}
/* set up the trampoline and return the physical address of the code */
-static __u32 __init
-setup_trampoline(void)
+static __u32 __init setup_trampoline(void)
{
/* these two are global symbols in trampoline.S */
extern const __u8 trampoline_end[];
extern const __u8 trampoline_data[];
- memcpy((__u8 *)trampoline_base, trampoline_data,
+ memcpy((__u8 *) trampoline_base, trampoline_data,
trampoline_end - trampoline_data);
- return virt_to_phys((__u8 *)trampoline_base);
+ return virt_to_phys((__u8 *) trampoline_base);
}
/* Routine initially called when a non-boot CPU is brought online */
-static void __init
-start_secondary(void *unused)
+static void __init start_secondary(void *unused)
{
__u8 cpuid = hard_smp_processor_id();
/* external functions not defined in the headers */
@@ -464,17 +453,18 @@ start_secondary(void *unused)
ack_CPI(VIC_CPU_BOOT_CPI);
/* setup the 8259 master slave pair belonging to this CPU ---
- * we won't actually receive any until the boot CPU
- * relinquishes it's static routing mask */
+ * we won't actually receive any until the boot CPU
+ * relinquishes it's static routing mask */
vic_setup_pic();
qic_setup();
- if(is_cpu_quad() && !is_cpu_vic_boot()) {
+ if (is_cpu_quad() && !is_cpu_vic_boot()) {
/* clear the boot CPI */
__u8 dummy;
- dummy = voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
+ dummy =
+ voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
printk("read dummy %d\n", dummy);
}
@@ -516,7 +506,6 @@ start_secondary(void *unused)
cpu_idle();
}
-
/* Routine to kick start the given CPU and wait for it to report ready
* (or timeout in startup). When this routine returns, the requested
* CPU is either fully running and configured or known to be dead.
@@ -524,29 +513,28 @@ start_secondary(void *unused)
* We call this routine sequentially 1 CPU at a time, so no need for
* locking */
-static void __init
-do_boot_cpu(__u8 cpu)
+static void __init do_boot_cpu(__u8 cpu)
{
struct task_struct *idle;
int timeout;
unsigned long flags;
- int quad_boot = (1<<cpu) & voyager_quad_processors
- & ~( voyager_extended_vic_processors
- & voyager_allowed_boot_processors);
+ int quad_boot = (1 << cpu) & voyager_quad_processors
+ & ~(voyager_extended_vic_processors
+ & voyager_allowed_boot_processors);
/* This is an area in head.S which was used to set up the
* initial kernel stack. We need to alter this to give the
* booting CPU a new stack (taken from its idle process) */
extern struct {
- __u8 *esp;
+ __u8 *sp;
unsigned short ss;
} stack_start;
/* This is the format of the CPI IDT gate (in real mode) which
* we're hijacking to boot the CPU */
- union IDTFormat {
+ union IDTFormat {
struct seg {
- __u16 Offset;
- __u16 Segment;
+ __u16 Offset;
+ __u16 Segment;
} idt;
__u32 val;
} hijack_source;
@@ -565,37 +553,44 @@ do_boot_cpu(__u8 cpu)
alternatives_smp_switch(1);
idle = fork_idle(cpu);
- if(IS_ERR(idle))
+ if (IS_ERR(idle))
panic("failed fork for CPU%d", cpu);
- idle->thread.eip = (unsigned long) start_secondary;
+ idle->thread.ip = (unsigned long)start_secondary;
/* init_tasks (in sched.c) is indexed logically */
- stack_start.esp = (void *) idle->thread.esp;
+ stack_start.sp = (void *)idle->thread.sp;
init_gdt(cpu);
- per_cpu(current_task, cpu) = idle;
+ per_cpu(current_task, cpu) = idle;
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
irq_ctx_init(cpu);
/* Note: Don't modify initial ss override */
- VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
+ VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
(unsigned long)hijack_source.val, hijack_source.idt.Segment,
- hijack_source.idt.Offset, stack_start.esp));
+ hijack_source.idt.Offset, stack_start.sp));
/* init lowmem identity mapping */
clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
flush_tlb_all();
- if(quad_boot) {
+ if (quad_boot) {
printk("CPU %d: non extended Quad boot\n", cpu);
- hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE)*4);
+ hijack_vector =
+ (__u32 *)
+ phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4);
*hijack_vector = hijack_source.val;
} else {
printk("CPU%d: extended VIC boot\n", cpu);
- hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE)*4);
+ hijack_vector =
+ (__u32 *)
+ phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4);
*hijack_vector = hijack_source.val;
/* VIC errata, may also receive interrupt at this address */
- hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + VIC_DEFAULT_CPI_BASE)*4);
+ hijack_vector =
+ (__u32 *)
+ phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI +
+ VIC_DEFAULT_CPI_BASE) * 4);
*hijack_vector = hijack_source.val;
}
/* All non-boot CPUs start with interrupts fully masked. Need
@@ -603,73 +598,76 @@ do_boot_cpu(__u8 cpu)
* this in the VIC by masquerading as the processor we're
* about to boot and lowering its interrupt mask */
local_irq_save(flags);
- if(quad_boot) {
+ if (quad_boot) {
send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI);
} else {
outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
/* here we're altering registers belonging to `cpu' */
-
+
outb(VIC_BOOT_INTERRUPT_MASK, 0x21);
/* now go back to our original identity */
outb(boot_cpu_id, VIC_PROCESSOR_ID);
/* and boot the CPU */
- send_CPI((1<<cpu), VIC_CPU_BOOT_CPI);
+ send_CPI((1 << cpu), VIC_CPU_BOOT_CPI);
}
cpu_booted_map = 0;
local_irq_restore(flags);
/* now wait for it to become ready (or timeout) */
- for(timeout = 0; timeout < 50000; timeout++) {
- if(cpu_booted_map)
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_booted_map)
break;
udelay(100);
}
/* reset the page table */
zap_low_mappings();
-
+
if (cpu_booted_map) {
VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n",
cpu, smp_processor_id()));
-
+
printk("CPU%d: ", cpu);
print_cpu_info(&cpu_data(cpu));
wmb();
cpu_set(cpu, cpu_callout_map);
cpu_set(cpu, cpu_present_map);
- }
- else {
+ } else {
printk("CPU%d FAILED TO BOOT: ", cpu);
- if (*((volatile unsigned char *)phys_to_virt(start_phys_address))==0xA5)
+ if (*
+ ((volatile unsigned char *)phys_to_virt(start_phys_address))
+ == 0xA5)
printk("Stuck.\n");
else
printk("Not responding.\n");
-
+
cpucount--;
}
}
-void __init
-smp_boot_cpus(void)
+void __init smp_boot_cpus(void)
{
int i;
/* CAT BUS initialisation must be done after the memory */
/* FIXME: The L4 has a catbus too, it just needs to be
* accessed in a totally different way */
- if(voyager_level == 5) {
+ if (voyager_level == 5) {
voyager_cat_init();
/* now that the cat has probed the Voyager System Bus, sanity
* check the cpu map */
- if( ((voyager_quad_processors | voyager_extended_vic_processors)
- & cpus_addr(phys_cpu_present_map)[0]) != cpus_addr(phys_cpu_present_map)[0]) {
+ if (((voyager_quad_processors | voyager_extended_vic_processors)
+ & cpus_addr(phys_cpu_present_map)[0]) !=
+ cpus_addr(phys_cpu_present_map)[0]) {
/* should panic */
- printk("\n\n***WARNING*** Sanity check of CPU present map FAILED\n");
+ printk("\n\n***WARNING*** "
+ "Sanity check of CPU present map FAILED\n");
}
- } else if(voyager_level == 4)
- voyager_extended_vic_processors = cpus_addr(phys_cpu_present_map)[0];
+ } else if (voyager_level == 4)
+ voyager_extended_vic_processors =
+ cpus_addr(phys_cpu_present_map)[0];
/* this sets up the idle task to run on the current cpu */
voyager_extended_cpus = 1;
@@ -678,14 +676,14 @@ smp_boot_cpus(void)
//global_irq_holder = boot_cpu_id;
/* FIXME: Need to do something about this but currently only works
- * on CPUs with a tsc which none of mine have.
- smp_tune_scheduling();
+ * on CPUs with a tsc which none of mine have.
+ smp_tune_scheduling();
*/
smp_store_cpu_info(boot_cpu_id);
printk("CPU%d: ", boot_cpu_id);
print_cpu_info(&cpu_data(boot_cpu_id));
- if(is_cpu_quad()) {
+ if (is_cpu_quad()) {
/* booting on a Quad CPU */
printk("VOYAGER SMP: Boot CPU is Quad\n");
qic_setup();
@@ -697,11 +695,11 @@ smp_boot_cpus(void)
cpu_set(boot_cpu_id, cpu_online_map);
cpu_set(boot_cpu_id, cpu_callout_map);
-
- /* loop over all the extended VIC CPUs and boot them. The
+
+ /* loop over all the extended VIC CPUs and boot them. The
* Quad CPUs must be bootstrapped by their extended VIC cpu */
- for(i = 0; i < NR_CPUS; i++) {
- if(i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
+ for (i = 0; i < NR_CPUS; i++) {
+ if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
continue;
do_boot_cpu(i);
/* This udelay seems to be needed for the Quad boots
@@ -715,25 +713,26 @@ smp_boot_cpus(void)
for (i = 0; i < NR_CPUS; i++)
if (cpu_isset(i, cpu_online_map))
bogosum += cpu_data(i).loops_per_jiffy;
- printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
- cpucount+1,
- bogosum/(500000/HZ),
- (bogosum/(5000/HZ))%100);
+ printk(KERN_INFO "Total of %d processors activated "
+ "(%lu.%02lu BogoMIPS).\n",
+ cpucount + 1, bogosum / (500000 / HZ),
+ (bogosum / (5000 / HZ)) % 100);
}
voyager_extended_cpus = hweight32(voyager_extended_vic_processors);
- printk("VOYAGER: Extended (interrupt handling CPUs): %d, non-extended: %d\n", voyager_extended_cpus, num_booting_cpus() - voyager_extended_cpus);
+ printk("VOYAGER: Extended (interrupt handling CPUs): "
+ "%d, non-extended: %d\n", voyager_extended_cpus,
+ num_booting_cpus() - voyager_extended_cpus);
/* that's it, switch to symmetric mode */
outb(0, VIC_PRIORITY_REGISTER);
outb(0, VIC_CLAIM_REGISTER_0);
outb(0, VIC_CLAIM_REGISTER_1);
-
+
VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus()));
}
/* Reload the secondary CPUs task structure (this function does not
* return ) */
-void __init
-initialize_secondary(void)
+void __init initialize_secondary(void)
{
#if 0
// AC kernels only
@@ -745,11 +744,9 @@ initialize_secondary(void)
* basically just the stack pointer and the eip.
*/
- asm volatile(
- "movl %0,%%esp\n\t"
- "jmp *%1"
- :
- :"r" (current->thread.esp),"r" (current->thread.eip));
+ asm volatile ("movl %0,%%esp\n\t"
+ "jmp *%1"::"r" (current->thread.sp),
+ "r"(current->thread.ip));
}
/* handle a Voyager SYS_INT -- If we don't, the base board will
@@ -758,25 +755,23 @@ initialize_secondary(void)
* System interrupts occur because some problem was detected on the
* various busses. To find out what you have to probe all the
* hardware via the CAT bus. FIXME: At the moment we do nothing. */
-fastcall void
-smp_vic_sys_interrupt(struct pt_regs *regs)
+void smp_vic_sys_interrupt(struct pt_regs *regs)
{
ack_CPI(VIC_SYS_INT);
- printk("Voyager SYSTEM INTERRUPT\n");
+ printk("Voyager SYSTEM INTERRUPT\n");
}
/* Handle a voyager CMN_INT; These interrupts occur either because of
* a system status change or because a single bit memory error
* occurred. FIXME: At the moment, ignore all this. */
-fastcall void
-smp_vic_cmn_interrupt(struct pt_regs *regs)
+void smp_vic_cmn_interrupt(struct pt_regs *regs)
{
static __u8 in_cmn_int = 0;
static DEFINE_SPINLOCK(cmn_int_lock);
/* common ints are broadcast, so make sure we only do this once */
_raw_spin_lock(&cmn_int_lock);
- if(in_cmn_int)
+ if (in_cmn_int)
goto unlock_end;
in_cmn_int++;
@@ -784,12 +779,12 @@ smp_vic_cmn_interrupt(struct pt_regs *regs)
VDEBUG(("Voyager COMMON INTERRUPT\n"));
- if(voyager_level == 5)
+ if (voyager_level == 5)
voyager_cat_do_common_interrupt();
_raw_spin_lock(&cmn_int_lock);
in_cmn_int = 0;
- unlock_end:
+ unlock_end:
_raw_spin_unlock(&cmn_int_lock);
ack_CPI(VIC_CMN_INT);
}
@@ -797,26 +792,23 @@ smp_vic_cmn_interrupt(struct pt_regs *regs)
/*
* Reschedule call back. Nothing to do, all the work is done
* automatically when we return from the interrupt. */
-static void
-smp_reschedule_interrupt(void)
+static void smp_reschedule_interrupt(void)
{
/* do nothing */
}
-static struct mm_struct * flush_mm;
+static struct mm_struct *flush_mm;
static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);
-#define FLUSH_ALL 0xffffffff
/*
- * We cannot call mmdrop() because we are in interrupt context,
+ * We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*
* We need to reload %cr3 since the page tables may be going
* away from under us..
*/
-static inline void
-leave_mm (unsigned long cpu)
+static inline void voyager_leave_mm(unsigned long cpu)
{
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
BUG();
@@ -824,12 +816,10 @@ leave_mm (unsigned long cpu)
load_cr3(swapper_pg_dir);
}
-
/*
* Invalidate call-back
*/
-static void
-smp_invalidate_interrupt(void)
+static void smp_invalidate_interrupt(void)
{
__u8 cpu = smp_processor_id();
@@ -837,18 +827,18 @@ smp_invalidate_interrupt(void)
return;
/* This will flood messages. Don't uncomment unless you see
* Problems with cross cpu invalidation
- VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
- smp_processor_id()));
- */
+ VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
+ smp_processor_id()));
+ */
if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
- if (flush_va == FLUSH_ALL)
+ if (flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
__flush_tlb_one(flush_va);
} else
- leave_mm(cpu);
+ voyager_leave_mm(cpu);
}
smp_mb__before_clear_bit();
clear_bit(cpu, &smp_invalidate_needed);
@@ -857,11 +847,10 @@ smp_invalidate_interrupt(void)
/* All the new flush operations for 2.4 */
-
/* This routine is called with a physical cpu mask */
static void
-voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
- unsigned long va)
+voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm,
+ unsigned long va)
{
int stuck = 50000;
@@ -875,7 +864,7 @@ voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
BUG();
spin_lock(&tlbstate_lock);
-
+
flush_mm = mm;
flush_va = va;
atomic_set_mask(cpumask, &smp_invalidate_needed);
@@ -887,23 +876,23 @@ voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
while (smp_invalidate_needed) {
mb();
- if(--stuck == 0) {
- printk("***WARNING*** Stuck doing invalidate CPI (CPU%d)\n", smp_processor_id());
+ if (--stuck == 0) {
+ printk("***WARNING*** Stuck doing invalidate CPI "
+ "(CPU%d)\n", smp_processor_id());
break;
}
}
/* Uncomment only to debug invalidation problems
- VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
- */
+ VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
+ */
flush_mm = NULL;
flush_va = 0;
spin_unlock(&tlbstate_lock);
}
-void
-flush_tlb_current_task(void)
+void flush_tlb_current_task(void)
{
struct mm_struct *mm = current->mm;
unsigned long cpu_mask;
@@ -913,14 +902,12 @@ flush_tlb_current_task(void)
cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
local_flush_tlb();
if (cpu_mask)
- voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
-
-void
-flush_tlb_mm (struct mm_struct * mm)
+void flush_tlb_mm(struct mm_struct *mm)
{
unsigned long cpu_mask;
@@ -932,15 +919,15 @@ flush_tlb_mm (struct mm_struct * mm)
if (current->mm)
local_flush_tlb();
else
- leave_mm(smp_processor_id());
+ voyager_leave_mm(smp_processor_id());
}
if (cpu_mask)
- voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long cpu_mask;
@@ -949,10 +936,10 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
if (current->active_mm == mm) {
- if(current->mm)
+ if (current->mm)
__flush_tlb_one(va);
- else
- leave_mm(smp_processor_id());
+ else
+ voyager_leave_mm(smp_processor_id());
}
if (cpu_mask)
@@ -960,21 +947,21 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
preempt_enable();
}
+
EXPORT_SYMBOL(flush_tlb_page);
/* enable the requested IRQs */
-static void
-smp_enable_irq_interrupt(void)
+static void smp_enable_irq_interrupt(void)
{
__u8 irq;
__u8 cpu = get_cpu();
VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu,
- vic_irq_enable_mask[cpu]));
+ vic_irq_enable_mask[cpu]));
spin_lock(&vic_irq_lock);
- for(irq = 0; irq < 16; irq++) {
- if(vic_irq_enable_mask[cpu] & (1<<irq))
+ for (irq = 0; irq < 16; irq++) {
+ if (vic_irq_enable_mask[cpu] & (1 << irq))
enable_local_vic_irq(irq);
}
vic_irq_enable_mask[cpu] = 0;
@@ -982,17 +969,16 @@ smp_enable_irq_interrupt(void)
put_cpu_no_resched();
}
-
+
/*
* CPU halt call-back
*/
-static void
-smp_stop_cpu_function(void *dummy)
+static void smp_stop_cpu_function(void *dummy)
{
VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id()));
cpu_clear(smp_processor_id(), cpu_online_map);
local_irq_disable();
- for(;;)
+ for (;;)
halt();
}
@@ -1006,14 +992,13 @@ struct call_data_struct {
int wait;
};
-static struct call_data_struct * call_data;
+static struct call_data_struct *call_data;
/* execute a thread on a new CPU. The function to be called must be
* previously set up. This is used to schedule a function for
* execution on all CPUs - set up the function then broadcast a
* function_interrupt CPI to come here on each CPU */
-static void
-smp_call_function_interrupt(void)
+static void smp_call_function_interrupt(void)
{
void (*func) (void *info) = call_data->func;
void *info = call_data->info;
@@ -1027,16 +1012,17 @@ smp_call_function_interrupt(void)
* about to execute the function
*/
mb();
- if(!test_and_clear_bit(cpu, &call_data->started)) {
+ if (!test_and_clear_bit(cpu, &call_data->started)) {
/* If the bit wasn't set, this could be a replay */
- printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion with no call pending\n", cpu);
+ printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
+ " with no call pending\n", cpu);
return;
}
/*
* At this point the info structure may be out of scope unless wait==1
*/
irq_enter();
- (*func)(info);
+ (*func) (info);
__get_cpu_var(irq_stat).irq_call_count++;
irq_exit();
if (wait) {
@@ -1046,14 +1032,13 @@ smp_call_function_interrupt(void)
}
static int
-voyager_smp_call_function_mask (cpumask_t cpumask,
- void (*func) (void *info), void *info,
- int wait)
+voyager_smp_call_function_mask(cpumask_t cpumask,
+ void (*func) (void *info), void *info, int wait)
{
struct call_data_struct data;
u32 mask = cpus_addr(cpumask)[0];
- mask &= ~(1<<smp_processor_id());
+ mask &= ~(1 << smp_processor_id());
if (!mask)
return 0;
@@ -1093,7 +1078,7 @@ voyager_smp_call_function_mask (cpumask_t cpumask,
* so we use the system clock to interrupt one processor, which in
* turn, broadcasts a timer CPI to all the others --- we receive that
* CPI here. We don't use this actually for counting so losing
- * ticks doesn't matter
+ * ticks doesn't matter
*
* FIXME: For those CPUs which actually have a local APIC, we could
* try to use it to trigger this interrupt instead of having to
@@ -1101,8 +1086,7 @@ voyager_smp_call_function_mask (cpumask_t cpumask,
* no local APIC, so I can't do this
*
* This function is currently a placeholder and is unused in the code */
-fastcall void
-smp_apic_timer_interrupt(struct pt_regs *regs)
+void smp_apic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
wrapper_smp_local_timer_interrupt();
@@ -1110,8 +1094,7 @@ smp_apic_timer_interrupt(struct pt_regs *regs)
}
/* All of the QUAD interrupt GATES */
-fastcall void
-smp_qic_timer_interrupt(struct pt_regs *regs)
+void smp_qic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
ack_QIC_CPI(QIC_TIMER_CPI);
@@ -1119,127 +1102,112 @@ smp_qic_timer_interrupt(struct pt_regs *regs)
set_irq_regs(old_regs);
}
-fastcall void
-smp_qic_invalidate_interrupt(struct pt_regs *regs)
+void smp_qic_invalidate_interrupt(struct pt_regs *regs)
{
ack_QIC_CPI(QIC_INVALIDATE_CPI);
smp_invalidate_interrupt();
}
-fastcall void
-smp_qic_reschedule_interrupt(struct pt_regs *regs)
+void smp_qic_reschedule_interrupt(struct pt_regs *regs)
{
ack_QIC_CPI(QIC_RESCHEDULE_CPI);
smp_reschedule_interrupt();
}
-fastcall void
-smp_qic_enable_irq_interrupt(struct pt_regs *regs)
+void smp_qic_enable_irq_interrupt(struct pt_regs *regs)
{
ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
smp_enable_irq_interrupt();
}
-fastcall void
-smp_qic_call_function_interrupt(struct pt_regs *regs)
+void smp_qic_call_function_interrupt(struct pt_regs *regs)
{
ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
smp_call_function_interrupt();
}
-fastcall void
-smp_vic_cpi_interrupt(struct pt_regs *regs)
+void smp_vic_cpi_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
__u8 cpu = smp_processor_id();
- if(is_cpu_quad())
+ if (is_cpu_quad())
ack_QIC_CPI(VIC_CPI_LEVEL0);
else
ack_VIC_CPI(VIC_CPI_LEVEL0);
- if(test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
+ if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
wrapper_smp_local_timer_interrupt();
- if(test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
+ if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
smp_invalidate_interrupt();
- if(test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
+ if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
smp_reschedule_interrupt();
- if(test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
+ if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
smp_enable_irq_interrupt();
- if(test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
+ if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
smp_call_function_interrupt();
set_irq_regs(old_regs);
}
-static void
-do_flush_tlb_all(void* info)
+static void do_flush_tlb_all(void *info)
{
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
- leave_mm(cpu);
+ voyager_leave_mm(cpu);
}
-
/* flush the TLB of every active CPU in the system */
-void
-flush_tlb_all(void)
+void flush_tlb_all(void)
{
on_each_cpu(do_flush_tlb_all, 0, 1, 1);
}
/* used to set up the trampoline for other CPUs when the memory manager
* is sorted out */
-void __init
-smp_alloc_memory(void)
+void __init smp_alloc_memory(void)
{
- trampoline_base = (__u32)alloc_bootmem_low_pages(PAGE_SIZE);
- if(__pa(trampoline_base) >= 0x93000)
+ trampoline_base = (__u32) alloc_bootmem_low_pages(PAGE_SIZE);
+ if (__pa(trampoline_base) >= 0x93000)
BUG();
}
/* send a reschedule CPI to one CPU by physical CPU number*/
-static void
-voyager_smp_send_reschedule(int cpu)
+static void voyager_smp_send_reschedule(int cpu)
{
send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
}
-
-int
-hard_smp_processor_id(void)
+int hard_smp_processor_id(void)
{
__u8 i;
__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
- if((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
+ if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
return cpumask & 0x1F;
- for(i = 0; i < 8; i++) {
- if(cpumask & (1<<i))
+ for (i = 0; i < 8; i++) {
+ if (cpumask & (1 << i))
return i;
}
printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask);
return 0;
}
-int
-safe_smp_processor_id(void)
+int safe_smp_processor_id(void)
{
return hard_smp_processor_id();
}
/* broadcast a halt to all other CPUs */
-static void
-voyager_smp_send_stop(void)
+static void voyager_smp_send_stop(void)
{
smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
}
/* this function is triggered in time.c when a clock tick fires
* we need to re-broadcast the tick to all CPUs */
-void
-smp_vic_timer_interrupt(void)
+void smp_vic_timer_interrupt(void)
{
send_CPI_allbutself(VIC_TIMER_CPI);
smp_local_timer_interrupt();
@@ -1253,8 +1221,7 @@ smp_vic_timer_interrupt(void)
* multiplier is 1 and it can be changed by writing the new multiplier
* value into /proc/profile.
*/
-void
-smp_local_timer_interrupt(void)
+void smp_local_timer_interrupt(void)
{
int cpu = smp_processor_id();
long weight;
@@ -1269,18 +1236,18 @@ smp_local_timer_interrupt(void)
*
* Interrupts are already masked off at this point.
*/
- per_cpu(prof_counter,cpu) = per_cpu(prof_multiplier, cpu);
+ per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
if (per_cpu(prof_counter, cpu) !=
- per_cpu(prof_old_multiplier, cpu)) {
+ per_cpu(prof_old_multiplier, cpu)) {
/* FIXME: need to update the vic timer tick here */
per_cpu(prof_old_multiplier, cpu) =
- per_cpu(prof_counter, cpu);
+ per_cpu(prof_counter, cpu);
}
update_process_times(user_mode_vm(get_irq_regs()));
}
- if( ((1<<cpu) & voyager_extended_vic_processors) == 0)
+ if (((1 << cpu) & voyager_extended_vic_processors) == 0)
/* only extended VIC processors participate in
* interrupt distribution */
return;
@@ -1296,12 +1263,12 @@ smp_local_timer_interrupt(void)
* we can take more than 100K local irqs per second on a 100 MHz P5.
*/
- if((++vic_tick[cpu] & 0x7) != 0)
+ if ((++vic_tick[cpu] & 0x7) != 0)
return;
/* get here every 16 ticks (about every 1/6 of a second) */
/* Change our priority to give someone else a chance at getting
- * the IRQ. The algorithm goes like this:
+ * the IRQ. The algorithm goes like this:
*
* In the VIC, the dynamically routed interrupt is always
* handled by the lowest priority eligible (i.e. receiving
@@ -1325,18 +1292,18 @@ smp_local_timer_interrupt(void)
* affinity code since we now try to even up the interrupt
* counts when an affinity binding is keeping them on a
* particular CPU*/
- weight = (vic_intr_count[cpu]*voyager_extended_cpus
+ weight = (vic_intr_count[cpu] * voyager_extended_cpus
- vic_intr_total) >> 4;
weight += 4;
- if(weight > 7)
+ if (weight > 7)
weight = 7;
- if(weight < 0)
+ if (weight < 0)
weight = 0;
-
- outb((__u8)weight, VIC_PRIORITY_REGISTER);
+
+ outb((__u8) weight, VIC_PRIORITY_REGISTER);
#ifdef VOYAGER_DEBUG
- if((vic_tick[cpu] & 0xFFF) == 0) {
+ if ((vic_tick[cpu] & 0xFFF) == 0) {
/* print this message roughly every 25 secs */
printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n",
cpu, vic_tick[cpu], weight);
@@ -1345,15 +1312,14 @@ smp_local_timer_interrupt(void)
}
/* setup the profiling timer */
-int
-setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
{
int i;
- if ( (!multiplier))
+ if ((!multiplier))
return -EINVAL;
- /*
+ /*
* Set the new multiplier for each CPU. CPUs don't start using the
* new values until the next timer interrupt in which they do process
* accounting.
@@ -1367,15 +1333,13 @@ setup_profiling_timer(unsigned int multiplier)
/* This is a bit of a mess, but forced on us by the genirq changes
* there's no genirq handler that really does what voyager wants
* so hack it up with the simple IRQ handler */
-static void fastcall
-handle_vic_irq(unsigned int irq, struct irq_desc *desc)
+static void handle_vic_irq(unsigned int irq, struct irq_desc *desc)
{
before_handle_vic_irq(irq);
handle_simple_irq(irq, desc);
after_handle_vic_irq(irq);
}
-
/* The CPIs are handled in the per cpu 8259s, so they must be
* enabled to be received: FIX: enabling the CPIs in the early
* boot sequence interferes with bug checking; enable them later
@@ -1385,13 +1349,12 @@ handle_vic_irq(unsigned int irq, struct irq_desc *desc)
#define QIC_SET_GATE(cpi, vector) \
set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
-void __init
-smp_intr_init(void)
+void __init smp_intr_init(void)
{
int i;
/* initialize the per cpu irq mask to all disabled */
- for(i = 0; i < NR_CPUS; i++)
+ for (i = 0; i < NR_CPUS; i++)
vic_irq_mask[i] = 0xFFFF;
VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
@@ -1404,42 +1367,40 @@ smp_intr_init(void)
QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt);
QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt);
QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt);
-
- /* now put the VIC descriptor into the first 48 IRQs
+ /* now put the VIC descriptor into the first 48 IRQs
*
* This is for later: first 16 correspond to PC IRQs; next 16
* are Primary MC IRQs and final 16 are Secondary MC IRQs */
- for(i = 0; i < 48; i++)
+ for (i = 0; i < 48; i++)
set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq);
}
/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per
* processor to receive CPI */
-static void
-send_CPI(__u32 cpuset, __u8 cpi)
+static void send_CPI(__u32 cpuset, __u8 cpi)
{
int cpu;
__u32 quad_cpuset = (cpuset & voyager_quad_processors);
- if(cpi < VIC_START_FAKE_CPI) {
- /* fake CPI are only used for booting, so send to the
+ if (cpi < VIC_START_FAKE_CPI) {
+ /* fake CPI are only used for booting, so send to the
* extended quads as well---Quads must be VIC booted */
- outb((__u8)(cpuset), VIC_CPI_Registers[cpi]);
+ outb((__u8) (cpuset), VIC_CPI_Registers[cpi]);
return;
}
- if(quad_cpuset)
+ if (quad_cpuset)
send_QIC_CPI(quad_cpuset, cpi);
cpuset &= ~quad_cpuset;
cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */
- if(cpuset == 0)
+ if (cpuset == 0)
return;
for_each_online_cpu(cpu) {
- if(cpuset & (1<<cpu))
+ if (cpuset & (1 << cpu))
set_bit(cpi, &vic_cpi_mailbox[cpu]);
}
- if(cpuset)
- outb((__u8)cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
+ if (cpuset)
+ outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
}
/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and
@@ -1448,20 +1409,19 @@ send_CPI(__u32 cpuset, __u8 cpi)
* DON'T make this inline otherwise the cache line read will be
* optimised away
* */
-static int
-ack_QIC_CPI(__u8 cpi) {
+static int ack_QIC_CPI(__u8 cpi)
+{
__u8 cpu = hard_smp_processor_id();
cpi &= 7;
- outb(1<<cpi, QIC_INTERRUPT_CLEAR1);
+ outb(1 << cpi, QIC_INTERRUPT_CLEAR1);
return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi;
}
-static void
-ack_special_QIC_CPI(__u8 cpi)
+static void ack_special_QIC_CPI(__u8 cpi)
{
- switch(cpi) {
+ switch (cpi) {
case VIC_CMN_INT:
outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0);
break;
@@ -1474,8 +1434,7 @@ ack_special_QIC_CPI(__u8 cpi)
}
/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */
-static void
-ack_VIC_CPI(__u8 cpi)
+static void ack_VIC_CPI(__u8 cpi)
{
#ifdef VOYAGER_DEBUG
unsigned long flags;
@@ -1484,17 +1443,17 @@ ack_VIC_CPI(__u8 cpi)
local_irq_save(flags);
isr = vic_read_isr();
- if((isr & (1<<(cpi &7))) == 0) {
+ if ((isr & (1 << (cpi & 7))) == 0) {
printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi);
}
#endif
/* send specific EOI; the two system interrupts have
* bit 4 set for a separate vector but behave as the
* corresponding 3 bit intr */
- outb_p(0x60|(cpi & 7),0x20);
+ outb_p(0x60 | (cpi & 7), 0x20);
#ifdef VOYAGER_DEBUG
- if((vic_read_isr() & (1<<(cpi &7))) != 0) {
+ if ((vic_read_isr() & (1 << (cpi & 7))) != 0) {
printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi);
}
local_irq_restore(flags);
@@ -1502,12 +1461,11 @@ ack_VIC_CPI(__u8 cpi)
}
/* cribbed with thanks from irq.c */
-#define __byte(x,y) (((unsigned char *)&(y))[x])
+#define __byte(x,y) (((unsigned char *)&(y))[x])
#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu]))
#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu]))
-static unsigned int
-startup_vic_irq(unsigned int irq)
+static unsigned int startup_vic_irq(unsigned int irq)
{
unmask_vic_irq(irq);
@@ -1535,13 +1493,12 @@ startup_vic_irq(unsigned int irq)
* broadcast an Interrupt enable CPI which causes all other CPUs to
* adjust their masks accordingly. */
-static void
-unmask_vic_irq(unsigned int irq)
+static void unmask_vic_irq(unsigned int irq)
{
/* linux doesn't to processor-irq affinity, so enable on
* all CPUs we know about */
int cpu = smp_processor_id(), real_cpu;
- __u16 mask = (1<<irq);
+ __u16 mask = (1 << irq);
__u32 processorList = 0;
unsigned long flags;
@@ -1549,78 +1506,72 @@ unmask_vic_irq(unsigned int irq)
irq, cpu, cpu_irq_affinity[cpu]));
spin_lock_irqsave(&vic_irq_lock, flags);
for_each_online_cpu(real_cpu) {
- if(!(voyager_extended_vic_processors & (1<<real_cpu)))
+ if (!(voyager_extended_vic_processors & (1 << real_cpu)))
continue;
- if(!(cpu_irq_affinity[real_cpu] & mask)) {
+ if (!(cpu_irq_affinity[real_cpu] & mask)) {
/* irq has no affinity for this CPU, ignore */
continue;
}
- if(real_cpu == cpu) {
+ if (real_cpu == cpu) {
enable_local_vic_irq(irq);
- }
- else if(vic_irq_mask[real_cpu] & mask) {
+ } else if (vic_irq_mask[real_cpu] & mask) {
vic_irq_enable_mask[real_cpu] |= mask;
- processorList |= (1<<real_cpu);
+ processorList |= (1 << real_cpu);
}
}
spin_unlock_irqrestore(&vic_irq_lock, flags);
- if(processorList)
+ if (processorList)
send_CPI(processorList, VIC_ENABLE_IRQ_CPI);
}
-static void
-mask_vic_irq(unsigned int irq)
+static void mask_vic_irq(unsigned int irq)
{
/* lazy disable, do nothing */
}
-static void
-enable_local_vic_irq(unsigned int irq)
+static void enable_local_vic_irq(unsigned int irq)
{
__u8 cpu = smp_processor_id();
__u16 mask = ~(1 << irq);
__u16 old_mask = vic_irq_mask[cpu];
vic_irq_mask[cpu] &= mask;
- if(vic_irq_mask[cpu] == old_mask)
+ if (vic_irq_mask[cpu] == old_mask)
return;
VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n",
irq, cpu));
if (irq & 8) {
- outb_p(cached_A1(cpu),0xA1);
+ outb_p(cached_A1(cpu), 0xA1);
(void)inb_p(0xA1);
- }
- else {
- outb_p(cached_21(cpu),0x21);
+ } else {
+ outb_p(cached_21(cpu), 0x21);
(void)inb_p(0x21);
}
}
-static void
-disable_local_vic_irq(unsigned int irq)
+static void disable_local_vic_irq(unsigned int irq)
{
__u8 cpu = smp_processor_id();
__u16 mask = (1 << irq);
__u16 old_mask = vic_irq_mask[cpu];
- if(irq == 7)
+ if (irq == 7)
return;
vic_irq_mask[cpu] |= mask;
- if(old_mask == vic_irq_mask[cpu])
+ if (old_mask == vic_irq_mask[cpu])
return;
VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n",
irq, cpu));
if (irq & 8) {
- outb_p(cached_A1(cpu),0xA1);
+ outb_p(cached_A1(cpu), 0xA1);
(void)inb_p(0xA1);
- }
- else {
- outb_p(cached_21(cpu),0x21);
+ } else {
+ outb_p(cached_21(cpu), 0x21);
(void)inb_p(0x21);
}
}
@@ -1631,8 +1582,7 @@ disable_local_vic_irq(unsigned int irq)
* interrupt in the vic, so we merely set a flag (IRQ_DISABLED). If
* this interrupt actually comes in, then we mask and ack here to push
* the interrupt off to another CPU */
-static void
-before_handle_vic_irq(unsigned int irq)
+static void before_handle_vic_irq(unsigned int irq)
{
irq_desc_t *desc = irq_desc + irq;
__u8 cpu = smp_processor_id();
@@ -1641,16 +1591,16 @@ before_handle_vic_irq(unsigned int irq)
vic_intr_total++;
vic_intr_count[cpu]++;
- if(!(cpu_irq_affinity[cpu] & (1<<irq))) {
+ if (!(cpu_irq_affinity[cpu] & (1 << irq))) {
/* The irq is not in our affinity mask, push it off
* onto another CPU */
- VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d on cpu %d\n",
- irq, cpu));
+ VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d "
+ "on cpu %d\n", irq, cpu));
disable_local_vic_irq(irq);
/* set IRQ_INPROGRESS to prevent the handler in irq.c from
* actually calling the interrupt routine */
desc->status |= IRQ_REPLAY | IRQ_INPROGRESS;
- } else if(desc->status & IRQ_DISABLED) {
+ } else if (desc->status & IRQ_DISABLED) {
/* Damn, the interrupt actually arrived, do the lazy
* disable thing. The interrupt routine in irq.c will
* not handle a IRQ_DISABLED interrupt, so nothing more
@@ -1667,8 +1617,7 @@ before_handle_vic_irq(unsigned int irq)
}
/* Finish the VIC interrupt: basically mask */
-static void
-after_handle_vic_irq(unsigned int irq)
+static void after_handle_vic_irq(unsigned int irq)
{
irq_desc_t *desc = irq_desc + irq;
@@ -1685,11 +1634,11 @@ after_handle_vic_irq(unsigned int irq)
#ifdef VOYAGER_DEBUG
/* DEBUG: before we ack, check what's in progress */
isr = vic_read_isr();
- if((isr & (1<<irq) && !(status & IRQ_REPLAY)) == 0) {
+ if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) {
int i;
__u8 cpu = smp_processor_id();
__u8 real_cpu;
- int mask; /* Um... initialize me??? --RR */
+ int mask; /* Um... initialize me??? --RR */
printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
cpu, irq);
@@ -1698,9 +1647,10 @@ after_handle_vic_irq(unsigned int irq)
outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
VIC_PROCESSOR_ID);
isr = vic_read_isr();
- if(isr & (1<<irq)) {
- printk("VOYAGER SMP: CPU%d ack irq %d\n",
- real_cpu, irq);
+ if (isr & (1 << irq)) {
+ printk
+ ("VOYAGER SMP: CPU%d ack irq %d\n",
+ real_cpu, irq);
ack_vic_irq(irq);
}
outb(cpu, VIC_PROCESSOR_ID);
@@ -1711,7 +1661,7 @@ after_handle_vic_irq(unsigned int irq)
* receipt by another CPU so everything must be in
* order here */
ack_vic_irq(irq);
- if(status & IRQ_REPLAY) {
+ if (status & IRQ_REPLAY) {
/* replay is set if we disable the interrupt
* in the before_handle_vic_irq() routine, so
* clear the in progress bit here to allow the
@@ -1720,9 +1670,9 @@ after_handle_vic_irq(unsigned int irq)
}
#ifdef VOYAGER_DEBUG
isr = vic_read_isr();
- if((isr & (1<<irq)) != 0)
- printk("VOYAGER SMP: after_handle_vic_irq() after ack irq=%d, isr=0x%x\n",
- irq, isr);
+ if ((isr & (1 << irq)) != 0)
+ printk("VOYAGER SMP: after_handle_vic_irq() after "
+ "ack irq=%d, isr=0x%x\n", irq, isr);
#endif /* VOYAGER_DEBUG */
}
_raw_spin_unlock(&vic_irq_lock);
@@ -1731,7 +1681,6 @@ after_handle_vic_irq(unsigned int irq)
* may be intercepted by another CPU if reasserted */
}
-
/* Linux processor - interrupt affinity manipulations.
*
* For each processor, we maintain a 32 bit irq affinity mask.
@@ -1748,8 +1697,7 @@ after_handle_vic_irq(unsigned int irq)
* change the mask and then do an interrupt enable CPI to re-enable on
* the selected processors */
-void
-set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
+void set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
{
/* Only extended processors handle interrupts */
unsigned long real_mask;
@@ -1757,13 +1705,13 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
int cpu;
real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors;
-
- if(cpus_addr(mask)[0] == 0)
+
+ if (cpus_addr(mask)[0] == 0)
/* can't have no CPUs to accept the interrupt -- extremely
* bad things will happen */
return;
- if(irq == 0)
+ if (irq == 0)
/* can't change the affinity of the timer IRQ. This
* is due to the constraint in the voyager
* architecture that the CPI also comes in on and IRQ
@@ -1772,7 +1720,7 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
* will no-longer be able to accept VIC CPIs */
return;
- if(irq >= 32)
+ if (irq >= 32)
/* You can only have 32 interrupts in a voyager system
* (and 32 only if you have a secondary microchannel
* bus) */
@@ -1780,8 +1728,8 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
for_each_online_cpu(cpu) {
unsigned long cpu_mask = 1 << cpu;
-
- if(cpu_mask & real_mask) {
+
+ if (cpu_mask & real_mask) {
/* enable the interrupt for this cpu */
cpu_irq_affinity[cpu] |= irq_mask;
} else {
@@ -1800,25 +1748,23 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
unmask_vic_irq(irq);
}
-static void
-ack_vic_irq(unsigned int irq)
+static void ack_vic_irq(unsigned int irq)
{
if (irq & 8) {
- outb(0x62,0x20); /* Specific EOI to cascade */
- outb(0x60|(irq & 7),0xA0);
+ outb(0x62, 0x20); /* Specific EOI to cascade */
+ outb(0x60 | (irq & 7), 0xA0);
} else {
- outb(0x60 | (irq & 7),0x20);
+ outb(0x60 | (irq & 7), 0x20);
}
}
/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259
* but are not vectored by it. This means that the 8259 mask must be
* lowered to receive them */
-static __init void
-vic_enable_cpi(void)
+static __init void vic_enable_cpi(void)
{
__u8 cpu = smp_processor_id();
-
+
/* just take a copy of the current mask (nop for boot cpu) */
vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id];
@@ -1827,7 +1773,7 @@ vic_enable_cpi(void)
/* for sys int and cmn int */
enable_local_vic_irq(7);
- if(is_cpu_quad()) {
+ if (is_cpu_quad()) {
outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n",
@@ -1838,8 +1784,7 @@ vic_enable_cpi(void)
cpu, vic_irq_mask[cpu]));
}
-void
-voyager_smp_dump()
+void voyager_smp_dump()
{
int old_cpu = smp_processor_id(), cpu;
@@ -1865,10 +1810,10 @@ voyager_smp_dump()
cpu, vic_irq_mask[cpu], imr, irr, isr);
#if 0
/* These lines are put in to try to unstick an un ack'd irq */
- if(isr != 0) {
+ if (isr != 0) {
int irq;
- for(irq=0; irq<16; irq++) {
- if(isr & (1<<irq)) {
+ for (irq = 0; irq < 16; irq++) {
+ if (isr & (1 << irq)) {
printk("\tCPU%d: ack irq %d\n",
cpu, irq);
local_irq_save(flags);
@@ -1884,17 +1829,15 @@ voyager_smp_dump()
}
}
-void
-smp_voyager_power_off(void *dummy)
+void smp_voyager_power_off(void *dummy)
{
- if(smp_processor_id() == boot_cpu_id)
+ if (smp_processor_id() == boot_cpu_id)
voyager_power_off();
else
smp_stop_cpu_function(NULL);
}
-static void __init
-voyager_smp_prepare_cpus(unsigned int max_cpus)
+static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
{
/* FIXME: ignore max_cpus for now */
smp_boot_cpus();
@@ -1911,8 +1854,7 @@ static void __cpuinit voyager_smp_prepare_boot_cpu(void)
cpu_set(smp_processor_id(), cpu_present_map);
}
-static int __cpuinit
-voyager_cpu_up(unsigned int cpu)
+static int __cpuinit voyager_cpu_up(unsigned int cpu)
{
/* This only works at boot for x86. See "rewrite" above. */
if (cpu_isset(cpu, smp_commenced_mask))
@@ -1928,14 +1870,12 @@ voyager_cpu_up(unsigned int cpu)
return 0;
}
-static void __init
-voyager_smp_cpus_done(unsigned int max_cpus)
+static void __init voyager_smp_cpus_done(unsigned int max_cpus)
{
zap_low_mappings();
}
-void __init
-smp_setup_processor_id(void)
+void __init smp_setup_processor_id(void)
{
current_thread_info()->cpu = hard_smp_processor_id();
x86_write_percpu(cpu_number, hard_smp_processor_id());
diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c
index 50f9366c411..c69c931818e 100644
--- a/arch/x86/mach-voyager/voyager_thread.c
+++ b/arch/x86/mach-voyager/voyager_thread.c
@@ -30,12 +30,10 @@
#include <asm/mtrr.h>
#include <asm/msr.h>
-
struct task_struct *voyager_thread;
static __u8 set_timeout;
-static int
-execute(const char *string)
+static int execute(const char *string)
{
int ret;
@@ -52,48 +50,48 @@ execute(const char *string)
NULL,
};
- if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
- printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
- string, ret);
+ if ((ret =
+ call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
+ printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string,
+ ret);
}
return ret;
}
-static void
-check_from_kernel(void)
+static void check_from_kernel(void)
{
- if(voyager_status.switch_off) {
-
+ if (voyager_status.switch_off) {
+
/* FIXME: This should be configurable via proc */
execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1");
- } else if(voyager_status.power_fail) {
+ } else if (voyager_status.power_fail) {
VDEBUG(("Voyager daemon detected AC power failure\n"));
-
+
/* FIXME: This should be configureable via proc */
execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1");
set_timeout = 1;
}
}
-static void
-check_continuing_condition(void)
+static void check_continuing_condition(void)
{
- if(voyager_status.power_fail) {
+ if (voyager_status.power_fail) {
__u8 data;
- voyager_cat_psi(VOYAGER_PSI_SUBREAD,
+ voyager_cat_psi(VOYAGER_PSI_SUBREAD,
VOYAGER_PSI_AC_FAIL_REG, &data);
- if((data & 0x1f) == 0) {
+ if ((data & 0x1f) == 0) {
/* all power restored */
- printk(KERN_NOTICE "VOYAGER AC power restored, cancelling shutdown\n");
+ printk(KERN_NOTICE
+ "VOYAGER AC power restored, cancelling shutdown\n");
/* FIXME: should be user configureable */
- execute("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
+ execute
+ ("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
set_timeout = 0;
}
}
}
-static int
-thread(void *unused)
+static int thread(void *unused)
{
printk(KERN_NOTICE "Voyager starting monitor thread\n");
@@ -102,7 +100,7 @@ thread(void *unused)
schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT);
VDEBUG(("Voyager Daemon awoken\n"));
- if(voyager_status.request_from_kernel == 0) {
+ if (voyager_status.request_from_kernel == 0) {
/* probably awoken from timeout */
check_continuing_condition();
} else {
@@ -112,20 +110,18 @@ thread(void *unused)
}
}
-static int __init
-voyager_thread_start(void)
+static int __init voyager_thread_start(void)
{
voyager_thread = kthread_run(thread, NULL, "kvoyagerd");
if (IS_ERR(voyager_thread)) {
- printk(KERN_ERR "Voyager: Failed to create system monitor thread.\n");
+ printk(KERN_ERR
+ "Voyager: Failed to create system monitor thread.\n");
return PTR_ERR(voyager_thread);
}
return 0;
}
-
-static void __exit
-voyager_thread_stop(void)
+static void __exit voyager_thread_stop(void)
{
kthread_stop(voyager_thread);
}
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index a1b0d22f697..59d353d2c59 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -33,45 +33,41 @@
#undef PRINT_MESSAGES
/* */
-
#if 0
void Un_impl(void)
{
- u_char byte1, FPU_modrm;
- unsigned long address = FPU_ORIG_EIP;
-
- RE_ENTRANT_CHECK_OFF;
- /* No need to check access_ok(), we have previously fetched these bytes. */
- printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address);
- if ( FPU_CS == __USER_CS )
- {
- while ( 1 )
- {
- FPU_get_user(byte1, (u_char __user *) address);
- if ( (byte1 & 0xf8) == 0xd8 ) break;
- printk("[%02x]", byte1);
- address++;
+ u_char byte1, FPU_modrm;
+ unsigned long address = FPU_ORIG_EIP;
+
+ RE_ENTRANT_CHECK_OFF;
+ /* No need to check access_ok(), we have previously fetched these bytes. */
+ printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *)address);
+ if (FPU_CS == __USER_CS) {
+ while (1) {
+ FPU_get_user(byte1, (u_char __user *) address);
+ if ((byte1 & 0xf8) == 0xd8)
+ break;
+ printk("[%02x]", byte1);
+ address++;
+ }
+ printk("%02x ", byte1);
+ FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
+
+ if (FPU_modrm >= 0300)
+ printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8,
+ FPU_modrm & 7);
+ else
+ printk("/%d\n", (FPU_modrm >> 3) & 7);
+ } else {
+ printk("cs selector = %04x\n", FPU_CS);
}
- printk("%02x ", byte1);
- FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
-
- if (FPU_modrm >= 0300)
- printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
- else
- printk("/%d\n", (FPU_modrm >> 3) & 7);
- }
- else
- {
- printk("cs selector = %04x\n", FPU_CS);
- }
-
- RE_ENTRANT_CHECK_ON;
-
- EXCEPTION(EX_Invalid);
-}
-#endif /* 0 */
+ RE_ENTRANT_CHECK_ON;
+ EXCEPTION(EX_Invalid);
+
+}
+#endif /* 0 */
/*
Called for opcodes which are illegal and which are known to result in a
@@ -79,139 +75,152 @@ void Un_impl(void)
*/
void FPU_illegal(void)
{
- math_abort(FPU_info,SIGILL);
+ math_abort(FPU_info, SIGILL);
}
-
-
void FPU_printall(void)
{
- int i;
- static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
- "DeNorm", "Inf", "NaN" };
- u_char byte1, FPU_modrm;
- unsigned long address = FPU_ORIG_EIP;
-
- RE_ENTRANT_CHECK_OFF;
- /* No need to check access_ok(), we have previously fetched these bytes. */
- printk("At %p:", (void *) address);
- if ( FPU_CS == __USER_CS )
- {
+ int i;
+ static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
+ "DeNorm", "Inf", "NaN"
+ };
+ u_char byte1, FPU_modrm;
+ unsigned long address = FPU_ORIG_EIP;
+
+ RE_ENTRANT_CHECK_OFF;
+ /* No need to check access_ok(), we have previously fetched these bytes. */
+ printk("At %p:", (void *)address);
+ if (FPU_CS == __USER_CS) {
#define MAX_PRINTED_BYTES 20
- for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
- {
- FPU_get_user(byte1, (u_char __user *) address);
- if ( (byte1 & 0xf8) == 0xd8 )
- {
- printk(" %02x", byte1);
- break;
- }
- printk(" [%02x]", byte1);
- address++;
- }
- if ( i == MAX_PRINTED_BYTES )
- printk(" [more..]\n");
- else
- {
- FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
-
- if (FPU_modrm >= 0300)
- printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
- else
- printk(" /%d, mod=%d rm=%d\n",
- (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
+ for (i = 0; i < MAX_PRINTED_BYTES; i++) {
+ FPU_get_user(byte1, (u_char __user *) address);
+ if ((byte1 & 0xf8) == 0xd8) {
+ printk(" %02x", byte1);
+ break;
+ }
+ printk(" [%02x]", byte1);
+ address++;
+ }
+ if (i == MAX_PRINTED_BYTES)
+ printk(" [more..]\n");
+ else {
+ FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
+
+ if (FPU_modrm >= 0300)
+ printk(" %02x (%02x+%d)\n", FPU_modrm,
+ FPU_modrm & 0xf8, FPU_modrm & 7);
+ else
+ printk(" /%d, mod=%d rm=%d\n",
+ (FPU_modrm >> 3) & 7,
+ (FPU_modrm >> 6) & 3, FPU_modrm & 7);
+ }
+ } else {
+ printk("%04x\n", FPU_CS);
}
- }
- else
- {
- printk("%04x\n", FPU_CS);
- }
- partial_status = status_word();
+ partial_status = status_word();
#ifdef DEBUGGING
-if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n");
-if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n");
-if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n");
-if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n");
-if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n");
-if ( partial_status & SW_Summary ) printk("SW: exception summary\n");
-if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
-if ( partial_status & SW_Precision ) printk("SW: loss of precision\n");
-if ( partial_status & SW_Underflow ) printk("SW: underflow\n");
-if ( partial_status & SW_Overflow ) printk("SW: overflow\n");
-if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n");
-if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n");
-if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n");
+ if (partial_status & SW_Backward)
+ printk("SW: backward compatibility\n");
+ if (partial_status & SW_C3)
+ printk("SW: condition bit 3\n");
+ if (partial_status & SW_C2)
+ printk("SW: condition bit 2\n");
+ if (partial_status & SW_C1)
+ printk("SW: condition bit 1\n");
+ if (partial_status & SW_C0)
+ printk("SW: condition bit 0\n");
+ if (partial_status & SW_Summary)
+ printk("SW: exception summary\n");
+ if (partial_status & SW_Stack_Fault)
+ printk("SW: stack fault\n");
+ if (partial_status & SW_Precision)
+ printk("SW: loss of precision\n");
+ if (partial_status & SW_Underflow)
+ printk("SW: underflow\n");
+ if (partial_status & SW_Overflow)
+ printk("SW: overflow\n");
+ if (partial_status & SW_Zero_Div)
+ printk("SW: divide by zero\n");
+ if (partial_status & SW_Denorm_Op)
+ printk("SW: denormalized operand\n");
+ if (partial_status & SW_Invalid)
+ printk("SW: invalid operation\n");
#endif /* DEBUGGING */
- printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
- partial_status & 0x8000 ? 1 : 0, /* busy */
- (partial_status & 0x3800) >> 11, /* stack top pointer */
- partial_status & 0x80 ? 1 : 0, /* Error summary status */
- partial_status & 0x40 ? 1 : 0, /* Stack flag */
- partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
- partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
- partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
- partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
- partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
-
-printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n",
- control_word & 0x1000 ? 1 : 0,
- (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
- (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
- control_word & 0x80 ? 1 : 0,
- control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
- control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
- control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
-
- for ( i = 0; i < 8; i++ )
- {
- FPU_REG *r = &st(i);
- u_char tagi = FPU_gettagi(i);
- switch (tagi)
- {
- case TAG_Empty:
- continue;
- break;
- case TAG_Zero:
- case TAG_Special:
- tagi = FPU_Special(r);
- case TAG_Valid:
- printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
- getsign(r) ? '-' : '+',
- (long)(r->sigh >> 16),
- (long)(r->sigh & 0xFFFF),
- (long)(r->sigl >> 16),
- (long)(r->sigl & 0xFFFF),
- exponent(r) - EXP_BIAS + 1);
- break;
- default:
- printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi);
- continue;
- break;
+ printk(" SW: b=%d st=%d es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", partial_status & 0x8000 ? 1 : 0, /* busy */
+ (partial_status & 0x3800) >> 11, /* stack top pointer */
+ partial_status & 0x80 ? 1 : 0, /* Error summary status */
+ partial_status & 0x40 ? 1 : 0, /* Stack flag */
+ partial_status & SW_C3 ? 1 : 0, partial_status & SW_C2 ? 1 : 0, /* cc */
+ partial_status & SW_C1 ? 1 : 0, partial_status & SW_C0 ? 1 : 0, /* cc */
+ partial_status & SW_Precision ? 1 : 0,
+ partial_status & SW_Underflow ? 1 : 0,
+ partial_status & SW_Overflow ? 1 : 0,
+ partial_status & SW_Zero_Div ? 1 : 0,
+ partial_status & SW_Denorm_Op ? 1 : 0,
+ partial_status & SW_Invalid ? 1 : 0);
+
+ printk(" CW: ic=%d rc=%d%d pc=%d%d iem=%d ef=%d%d%d%d%d%d\n",
+ control_word & 0x1000 ? 1 : 0,
+ (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
+ (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
+ control_word & 0x80 ? 1 : 0,
+ control_word & SW_Precision ? 1 : 0,
+ control_word & SW_Underflow ? 1 : 0,
+ control_word & SW_Overflow ? 1 : 0,
+ control_word & SW_Zero_Div ? 1 : 0,
+ control_word & SW_Denorm_Op ? 1 : 0,
+ control_word & SW_Invalid ? 1 : 0);
+
+ for (i = 0; i < 8; i++) {
+ FPU_REG *r = &st(i);
+ u_char tagi = FPU_gettagi(i);
+ switch (tagi) {
+ case TAG_Empty:
+ continue;
+ break;
+ case TAG_Zero:
+ case TAG_Special:
+ tagi = FPU_Special(r);
+ case TAG_Valid:
+ printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
+ getsign(r) ? '-' : '+',
+ (long)(r->sigh >> 16),
+ (long)(r->sigh & 0xFFFF),
+ (long)(r->sigl >> 16),
+ (long)(r->sigl & 0xFFFF),
+ exponent(r) - EXP_BIAS + 1);
+ break;
+ default:
+ printk("Whoops! Error in errors.c: tag%d is %d ", i,
+ tagi);
+ continue;
+ break;
+ }
+ printk("%s\n", tag_desc[(int)(unsigned)tagi]);
}
- printk("%s\n", tag_desc[(int) (unsigned) tagi]);
- }
- RE_ENTRANT_CHECK_ON;
+ RE_ENTRANT_CHECK_ON;
}
static struct {
- int type;
- const char *name;
+ int type;
+ const char *name;
} exception_names[] = {
- { EX_StackOver, "stack overflow" },
- { EX_StackUnder, "stack underflow" },
- { EX_Precision, "loss of precision" },
- { EX_Underflow, "underflow" },
- { EX_Overflow, "overflow" },
- { EX_ZeroDiv, "divide by zero" },
- { EX_Denormal, "denormalized operand" },
- { EX_Invalid, "invalid operation" },
- { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
- { 0, NULL }
+ {
+ EX_StackOver, "stack overflow"}, {
+ EX_StackUnder, "stack underflow"}, {
+ EX_Precision, "loss of precision"}, {
+ EX_Underflow, "underflow"}, {
+ EX_Overflow, "overflow"}, {
+ EX_ZeroDiv, "divide by zero"}, {
+ EX_Denormal, "denormalized operand"}, {
+ EX_Invalid, "invalid operation"}, {
+ EX_INTERNAL, "INTERNAL BUG in " FPU_VERSION}, {
+ 0, NULL}
};
/*
@@ -295,445 +304,386 @@ static struct {
asmlinkage void FPU_exception(int n)
{
- int i, int_type;
-
- int_type = 0; /* Needed only to stop compiler warnings */
- if ( n & EX_INTERNAL )
- {
- int_type = n - EX_INTERNAL;
- n = EX_INTERNAL;
- /* Set lots of exception bits! */
- partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
- }
- else
- {
- /* Extract only the bits which we use to set the status word */
- n &= (SW_Exc_Mask);
- /* Set the corresponding exception bit */
- partial_status |= n;
- /* Set summary bits iff exception isn't masked */
- if ( partial_status & ~control_word & CW_Exceptions )
- partial_status |= (SW_Summary | SW_Backward);
- if ( n & (SW_Stack_Fault | EX_Precision) )
- {
- if ( !(n & SW_C1) )
- /* This bit distinguishes over- from underflow for a stack fault,
- and roundup from round-down for precision loss. */
- partial_status &= ~SW_C1;
+ int i, int_type;
+
+ int_type = 0; /* Needed only to stop compiler warnings */
+ if (n & EX_INTERNAL) {
+ int_type = n - EX_INTERNAL;
+ n = EX_INTERNAL;
+ /* Set lots of exception bits! */
+ partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
+ } else {
+ /* Extract only the bits which we use to set the status word */
+ n &= (SW_Exc_Mask);
+ /* Set the corresponding exception bit */
+ partial_status |= n;
+ /* Set summary bits iff exception isn't masked */
+ if (partial_status & ~control_word & CW_Exceptions)
+ partial_status |= (SW_Summary | SW_Backward);
+ if (n & (SW_Stack_Fault | EX_Precision)) {
+ if (!(n & SW_C1))
+ /* This bit distinguishes over- from underflow for a stack fault,
+ and roundup from round-down for precision loss. */
+ partial_status &= ~SW_C1;
+ }
}
- }
- RE_ENTRANT_CHECK_OFF;
- if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
- {
+ RE_ENTRANT_CHECK_OFF;
+ if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) {
#ifdef PRINT_MESSAGES
- /* My message from the sponsor */
- printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
+ /* My message from the sponsor */
+ printk(FPU_VERSION " " __DATE__ " (C) W. Metzenthen.\n");
#endif /* PRINT_MESSAGES */
-
- /* Get a name string for error reporting */
- for (i=0; exception_names[i].type; i++)
- if ( (exception_names[i].type & n) == exception_names[i].type )
- break;
-
- if (exception_names[i].type)
- {
+
+ /* Get a name string for error reporting */
+ for (i = 0; exception_names[i].type; i++)
+ if ((exception_names[i].type & n) ==
+ exception_names[i].type)
+ break;
+
+ if (exception_names[i].type) {
#ifdef PRINT_MESSAGES
- printk("FP Exception: %s!\n", exception_names[i].name);
+ printk("FP Exception: %s!\n", exception_names[i].name);
#endif /* PRINT_MESSAGES */
- }
- else
- printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
-
- if ( n == EX_INTERNAL )
- {
- printk("FPU emulator: Internal error type 0x%04x\n", int_type);
- FPU_printall();
- }
+ } else
+ printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
+
+ if (n == EX_INTERNAL) {
+ printk("FPU emulator: Internal error type 0x%04x\n",
+ int_type);
+ FPU_printall();
+ }
#ifdef PRINT_MESSAGES
- else
- FPU_printall();
+ else
+ FPU_printall();
#endif /* PRINT_MESSAGES */
- /*
- * The 80486 generates an interrupt on the next non-control FPU
- * instruction. So we need some means of flagging it.
- * We use the ES (Error Summary) bit for this.
- */
- }
- RE_ENTRANT_CHECK_ON;
+ /*
+ * The 80486 generates an interrupt on the next non-control FPU
+ * instruction. So we need some means of flagging it.
+ * We use the ES (Error Summary) bit for this.
+ */
+ }
+ RE_ENTRANT_CHECK_ON;
#ifdef __DEBUG__
- math_abort(FPU_info,SIGFPE);
+ math_abort(FPU_info, SIGFPE);
#endif /* __DEBUG__ */
}
-
/* Real operation attempted on a NaN. */
/* Returns < 0 if the exception is unmasked */
int real_1op_NaN(FPU_REG *a)
{
- int signalling, isNaN;
-
- isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
-
- /* The default result for the case of two "equal" NaNs (signs may
- differ) is chosen to reproduce 80486 behaviour */
- signalling = isNaN && !(a->sigh & 0x40000000);
-
- if ( !signalling )
- {
- if ( !isNaN ) /* pseudo-NaN, or other unsupported? */
- {
- if ( control_word & CW_Invalid )
- {
- /* Masked response */
- reg_copy(&CONST_QNaN, a);
- }
- EXCEPTION(EX_Invalid);
- return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+ int signalling, isNaN;
+
+ isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
+
+ /* The default result for the case of two "equal" NaNs (signs may
+ differ) is chosen to reproduce 80486 behaviour */
+ signalling = isNaN && !(a->sigh & 0x40000000);
+
+ if (!signalling) {
+ if (!isNaN) { /* pseudo-NaN, or other unsupported? */
+ if (control_word & CW_Invalid) {
+ /* Masked response */
+ reg_copy(&CONST_QNaN, a);
+ }
+ EXCEPTION(EX_Invalid);
+ return (!(control_word & CW_Invalid) ? FPU_Exception :
+ 0) | TAG_Special;
+ }
+ return TAG_Special;
}
- return TAG_Special;
- }
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */
- {
- reg_copy(&CONST_QNaN, a);
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ if (!(a->sigh & 0x80000000)) { /* pseudo-NaN ? */
+ reg_copy(&CONST_QNaN, a);
+ }
+ /* ensure a Quiet NaN */
+ a->sigh |= 0x40000000;
}
- /* ensure a Quiet NaN */
- a->sigh |= 0x40000000;
- }
- EXCEPTION(EX_Invalid);
+ EXCEPTION(EX_Invalid);
- return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+ return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
}
-
/* Real operation attempted on two operands, one a NaN. */
/* Returns < 0 if the exception is unmasked */
int real_2op_NaN(FPU_REG const *b, u_char tagb,
- int deststnr,
- FPU_REG const *defaultNaN)
+ int deststnr, FPU_REG const *defaultNaN)
{
- FPU_REG *dest = &st(deststnr);
- FPU_REG const *a = dest;
- u_char taga = FPU_gettagi(deststnr);
- FPU_REG const *x;
- int signalling, unsupported;
-
- if ( taga == TAG_Special )
- taga = FPU_Special(a);
- if ( tagb == TAG_Special )
- tagb = FPU_Special(b);
-
- /* TW_NaN is also used for unsupported data types. */
- unsupported = ((taga == TW_NaN)
- && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000)))
- || ((tagb == TW_NaN)
- && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
- if ( unsupported )
- {
- if ( control_word & CW_Invalid )
- {
- /* Masked response */
- FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
- }
- EXCEPTION(EX_Invalid);
- return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
- }
-
- if (taga == TW_NaN)
- {
- x = a;
- if (tagb == TW_NaN)
- {
- signalling = !(a->sigh & b->sigh & 0x40000000);
- if ( significand(b) > significand(a) )
- x = b;
- else if ( significand(b) == significand(a) )
- {
- /* The default result for the case of two "equal" NaNs (signs may
- differ) is chosen to reproduce 80486 behaviour */
- x = defaultNaN;
- }
- }
- else
- {
- /* return the quiet version of the NaN in a */
- signalling = !(a->sigh & 0x40000000);
+ FPU_REG *dest = &st(deststnr);
+ FPU_REG const *a = dest;
+ u_char taga = FPU_gettagi(deststnr);
+ FPU_REG const *x;
+ int signalling, unsupported;
+
+ if (taga == TAG_Special)
+ taga = FPU_Special(a);
+ if (tagb == TAG_Special)
+ tagb = FPU_Special(b);
+
+ /* TW_NaN is also used for unsupported data types. */
+ unsupported = ((taga == TW_NaN)
+ && !((exponent(a) == EXP_OVER)
+ && (a->sigh & 0x80000000)))
+ || ((tagb == TW_NaN)
+ && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
+ if (unsupported) {
+ if (control_word & CW_Invalid) {
+ /* Masked response */
+ FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+ }
+ EXCEPTION(EX_Invalid);
+ return (!(control_word & CW_Invalid) ? FPU_Exception : 0) |
+ TAG_Special;
}
- }
- else
+
+ if (taga == TW_NaN) {
+ x = a;
+ if (tagb == TW_NaN) {
+ signalling = !(a->sigh & b->sigh & 0x40000000);
+ if (significand(b) > significand(a))
+ x = b;
+ else if (significand(b) == significand(a)) {
+ /* The default result for the case of two "equal" NaNs (signs may
+ differ) is chosen to reproduce 80486 behaviour */
+ x = defaultNaN;
+ }
+ } else {
+ /* return the quiet version of the NaN in a */
+ signalling = !(a->sigh & 0x40000000);
+ }
+ } else
#ifdef PARANOID
- if (tagb == TW_NaN)
+ if (tagb == TW_NaN)
#endif /* PARANOID */
- {
- signalling = !(b->sigh & 0x40000000);
- x = b;
- }
+ {
+ signalling = !(b->sigh & 0x40000000);
+ x = b;
+ }
#ifdef PARANOID
- else
- {
- signalling = 0;
- EXCEPTION(EX_INTERNAL|0x113);
- x = &CONST_QNaN;
- }
+ else {
+ signalling = 0;
+ EXCEPTION(EX_INTERNAL | 0x113);
+ x = &CONST_QNaN;
+ }
#endif /* PARANOID */
- if ( (!signalling) || (control_word & CW_Invalid) )
- {
- if ( ! x )
- x = b;
+ if ((!signalling) || (control_word & CW_Invalid)) {
+ if (!x)
+ x = b;
- if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */
- x = &CONST_QNaN;
+ if (!(x->sigh & 0x80000000)) /* pseudo-NaN ? */
+ x = &CONST_QNaN;
- FPU_copy_to_regi(x, TAG_Special, deststnr);
+ FPU_copy_to_regi(x, TAG_Special, deststnr);
- if ( !signalling )
- return TAG_Special;
+ if (!signalling)
+ return TAG_Special;
- /* ensure a Quiet NaN */
- dest->sigh |= 0x40000000;
- }
+ /* ensure a Quiet NaN */
+ dest->sigh |= 0x40000000;
+ }
- EXCEPTION(EX_Invalid);
+ EXCEPTION(EX_Invalid);
- return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+ return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
}
-
/* Invalid arith operation on Valid registers */
/* Returns < 0 if the exception is unmasked */
asmlinkage int arith_invalid(int deststnr)
{
- EXCEPTION(EX_Invalid);
-
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
- }
-
- return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
+ EXCEPTION(EX_Invalid);
-}
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+ }
+ return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
+
+}
/* Divide a finite number by zero */
asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
{
- FPU_REG *dest = &st(deststnr);
- int tag = TAG_Valid;
+ FPU_REG *dest = &st(deststnr);
+ int tag = TAG_Valid;
+
+ if (control_word & CW_ZeroDiv) {
+ /* The masked response */
+ FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
+ setsign(dest, sign);
+ tag = TAG_Special;
+ }
- if ( control_word & CW_ZeroDiv )
- {
- /* The masked response */
- FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
- setsign(dest, sign);
- tag = TAG_Special;
- }
-
- EXCEPTION(EX_ZeroDiv);
+ EXCEPTION(EX_ZeroDiv);
- return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
+ return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
}
-
/* This may be called often, so keep it lean */
int set_precision_flag(int flags)
{
- if ( control_word & CW_Precision )
- {
- partial_status &= ~(SW_C1 & flags);
- partial_status |= flags; /* The masked response */
- return 0;
- }
- else
- {
- EXCEPTION(flags);
- return 1;
- }
+ if (control_word & CW_Precision) {
+ partial_status &= ~(SW_C1 & flags);
+ partial_status |= flags; /* The masked response */
+ return 0;
+ } else {
+ EXCEPTION(flags);
+ return 1;
+ }
}
-
/* This may be called often, so keep it lean */
asmlinkage void set_precision_flag_up(void)
{
- if ( control_word & CW_Precision )
- partial_status |= (SW_Precision | SW_C1); /* The masked response */
- else
- EXCEPTION(EX_Precision | SW_C1);
+ if (control_word & CW_Precision)
+ partial_status |= (SW_Precision | SW_C1); /* The masked response */
+ else
+ EXCEPTION(EX_Precision | SW_C1);
}
-
/* This may be called often, so keep it lean */
asmlinkage void set_precision_flag_down(void)
{
- if ( control_word & CW_Precision )
- { /* The masked response */
- partial_status &= ~SW_C1;
- partial_status |= SW_Precision;
- }
- else
- EXCEPTION(EX_Precision);
+ if (control_word & CW_Precision) { /* The masked response */
+ partial_status &= ~SW_C1;
+ partial_status |= SW_Precision;
+ } else
+ EXCEPTION(EX_Precision);
}
-
asmlinkage int denormal_operand(void)
{
- if ( control_word & CW_Denormal )
- { /* The masked response */
- partial_status |= SW_Denorm_Op;
- return TAG_Special;
- }
- else
- {
- EXCEPTION(EX_Denormal);
- return TAG_Special | FPU_Exception;
- }
+ if (control_word & CW_Denormal) { /* The masked response */
+ partial_status |= SW_Denorm_Op;
+ return TAG_Special;
+ } else {
+ EXCEPTION(EX_Denormal);
+ return TAG_Special | FPU_Exception;
+ }
}
-
asmlinkage int arith_overflow(FPU_REG *dest)
{
- int tag = TAG_Valid;
+ int tag = TAG_Valid;
- if ( control_word & CW_Overflow )
- {
- /* The masked response */
+ if (control_word & CW_Overflow) {
+ /* The masked response */
/* ###### The response here depends upon the rounding mode */
- reg_copy(&CONST_INF, dest);
- tag = TAG_Special;
- }
- else
- {
- /* Subtract the magic number from the exponent */
- addexponent(dest, (-3 * (1 << 13)));
- }
-
- EXCEPTION(EX_Overflow);
- if ( control_word & CW_Overflow )
- {
- /* The overflow exception is masked. */
- /* By definition, precision is lost.
- The roundup bit (C1) is also set because we have
- "rounded" upwards to Infinity. */
- EXCEPTION(EX_Precision | SW_C1);
- return tag;
- }
-
- return tag;
+ reg_copy(&CONST_INF, dest);
+ tag = TAG_Special;
+ } else {
+ /* Subtract the magic number from the exponent */
+ addexponent(dest, (-3 * (1 << 13)));
+ }
-}
+ EXCEPTION(EX_Overflow);
+ if (control_word & CW_Overflow) {
+ /* The overflow exception is masked. */
+ /* By definition, precision is lost.
+ The roundup bit (C1) is also set because we have
+ "rounded" upwards to Infinity. */
+ EXCEPTION(EX_Precision | SW_C1);
+ return tag;
+ }
+
+ return tag;
+}
asmlinkage int arith_underflow(FPU_REG *dest)
{
- int tag = TAG_Valid;
-
- if ( control_word & CW_Underflow )
- {
- /* The masked response */
- if ( exponent16(dest) <= EXP_UNDER - 63 )
- {
- reg_copy(&CONST_Z, dest);
- partial_status &= ~SW_C1; /* Round down. */
- tag = TAG_Zero;
+ int tag = TAG_Valid;
+
+ if (control_word & CW_Underflow) {
+ /* The masked response */
+ if (exponent16(dest) <= EXP_UNDER - 63) {
+ reg_copy(&CONST_Z, dest);
+ partial_status &= ~SW_C1; /* Round down. */
+ tag = TAG_Zero;
+ } else {
+ stdexp(dest);
+ }
+ } else {
+ /* Add the magic number to the exponent. */
+ addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
}
- else
- {
- stdexp(dest);
+
+ EXCEPTION(EX_Underflow);
+ if (control_word & CW_Underflow) {
+ /* The underflow exception is masked. */
+ EXCEPTION(EX_Precision);
+ return tag;
}
- }
- else
- {
- /* Add the magic number to the exponent. */
- addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
- }
-
- EXCEPTION(EX_Underflow);
- if ( control_word & CW_Underflow )
- {
- /* The underflow exception is masked. */
- EXCEPTION(EX_Precision);
- return tag;
- }
-
- return tag;
-}
+ return tag;
+}
void FPU_stack_overflow(void)
{
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- top--;
- FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
- }
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ top--;
+ FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+ }
- EXCEPTION(EX_StackOver);
+ EXCEPTION(EX_StackOver);
- return;
+ return;
}
-
void FPU_stack_underflow(void)
{
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
- }
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+ }
- EXCEPTION(EX_StackUnder);
+ EXCEPTION(EX_StackUnder);
- return;
+ return;
}
-
void FPU_stack_underflow_i(int i)
{
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
- }
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
+ }
- EXCEPTION(EX_StackUnder);
+ EXCEPTION(EX_StackUnder);
- return;
+ return;
}
-
void FPU_stack_underflow_pop(int i)
{
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
- FPU_pop();
- }
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
+ FPU_pop();
+ }
- EXCEPTION(EX_StackUnder);
+ EXCEPTION(EX_StackUnder);
- return;
+ return;
}
-
diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h
index b463f21a811..67f43a4683d 100644
--- a/arch/x86/math-emu/exception.h
+++ b/arch/x86/math-emu/exception.h
@@ -9,7 +9,6 @@
#ifndef _EXCEPTION_H_
#define _EXCEPTION_H_
-
#ifdef __ASSEMBLY__
#define Const_(x) $##x
#else
@@ -20,8 +19,8 @@
#include "fpu_emu.h"
#endif /* SW_C1 */
-#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */
-#define EX_ErrorSummary Const_(0x0080) /* Error summary status */
+#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */
+#define EX_ErrorSummary Const_(0x0080) /* Error summary status */
/* Special exceptions: */
#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */
#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */
@@ -34,11 +33,9 @@
#define EX_Denormal Const_(0x0002) /* denormalized operand */
#define EX_Invalid Const_(0x0001) /* invalid operation */
-
#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1))
#define PRECISION_LOST_DOWN Const_(EX_Precision)
-
#ifndef __ASSEMBLY__
#ifdef DEBUG
@@ -48,6 +45,6 @@
#define EXCEPTION(x) FPU_exception(x)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLY__ */
#endif /* _EXCEPTION_H_ */
diff --git a/arch/x86/math-emu/fpu_arith.c b/arch/x86/math-emu/fpu_arith.c
index 6972dec01af..aeab24e083c 100644
--- a/arch/x86/math-emu/fpu_arith.c
+++ b/arch/x86/math-emu/fpu_arith.c
@@ -15,160 +15,138 @@
#include "control_w.h"
#include "status_w.h"
-
void fadd__(void)
{
- /* fadd st,st(i) */
- int i = FPU_rm;
- clear_C1();
- FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
+ /* fadd st,st(i) */
+ int i = FPU_rm;
+ clear_C1();
+ FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
}
-
void fmul__(void)
{
- /* fmul st,st(i) */
- int i = FPU_rm;
- clear_C1();
- FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
+ /* fmul st,st(i) */
+ int i = FPU_rm;
+ clear_C1();
+ FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
}
-
-
void fsub__(void)
{
- /* fsub st,st(i) */
- clear_C1();
- FPU_sub(0, FPU_rm, control_word);
+ /* fsub st,st(i) */
+ clear_C1();
+ FPU_sub(0, FPU_rm, control_word);
}
-
void fsubr_(void)
{
- /* fsubr st,st(i) */
- clear_C1();
- FPU_sub(REV, FPU_rm, control_word);
+ /* fsubr st,st(i) */
+ clear_C1();
+ FPU_sub(REV, FPU_rm, control_word);
}
-
void fdiv__(void)
{
- /* fdiv st,st(i) */
- clear_C1();
- FPU_div(0, FPU_rm, control_word);
+ /* fdiv st,st(i) */
+ clear_C1();
+ FPU_div(0, FPU_rm, control_word);
}
-
void fdivr_(void)
{
- /* fdivr st,st(i) */
- clear_C1();
- FPU_div(REV, FPU_rm, control_word);
+ /* fdivr st,st(i) */
+ clear_C1();
+ FPU_div(REV, FPU_rm, control_word);
}
-
-
void fadd_i(void)
{
- /* fadd st(i),st */
- int i = FPU_rm;
- clear_C1();
- FPU_add(&st(i), FPU_gettagi(i), i, control_word);
+ /* fadd st(i),st */
+ int i = FPU_rm;
+ clear_C1();
+ FPU_add(&st(i), FPU_gettagi(i), i, control_word);
}
-
void fmul_i(void)
{
- /* fmul st(i),st */
- clear_C1();
- FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
+ /* fmul st(i),st */
+ clear_C1();
+ FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
}
-
void fsubri(void)
{
- /* fsubr st(i),st */
- clear_C1();
- FPU_sub(DEST_RM, FPU_rm, control_word);
+ /* fsubr st(i),st */
+ clear_C1();
+ FPU_sub(DEST_RM, FPU_rm, control_word);
}
-
void fsub_i(void)
{
- /* fsub st(i),st */
- clear_C1();
- FPU_sub(REV|DEST_RM, FPU_rm, control_word);
+ /* fsub st(i),st */
+ clear_C1();
+ FPU_sub(REV | DEST_RM, FPU_rm, control_word);
}
-
void fdivri(void)
{
- /* fdivr st(i),st */
- clear_C1();
- FPU_div(DEST_RM, FPU_rm, control_word);
+ /* fdivr st(i),st */
+ clear_C1();
+ FPU_div(DEST_RM, FPU_rm, control_word);
}
-
void fdiv_i(void)
{
- /* fdiv st(i),st */
- clear_C1();
- FPU_div(REV|DEST_RM, FPU_rm, control_word);
+ /* fdiv st(i),st */
+ clear_C1();
+ FPU_div(REV | DEST_RM, FPU_rm, control_word);
}
-
-
void faddp_(void)
{
- /* faddp st(i),st */
- int i = FPU_rm;
- clear_C1();
- if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 )
- FPU_pop();
+ /* faddp st(i),st */
+ int i = FPU_rm;
+ clear_C1();
+ if (FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0)
+ FPU_pop();
}
-
void fmulp_(void)
{
- /* fmulp st(i),st */
- clear_C1();
- if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 )
- FPU_pop();
+ /* fmulp st(i),st */
+ clear_C1();
+ if (FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0)
+ FPU_pop();
}
-
-
void fsubrp(void)
{
- /* fsubrp st(i),st */
- clear_C1();
- if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 )
- FPU_pop();
+ /* fsubrp st(i),st */
+ clear_C1();
+ if (FPU_sub(DEST_RM, FPU_rm, control_word) >= 0)
+ FPU_pop();
}
-
void fsubp_(void)
{
- /* fsubp st(i),st */
- clear_C1();
- if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 )
- FPU_pop();
+ /* fsubp st(i),st */
+ clear_C1();
+ if (FPU_sub(REV | DEST_RM, FPU_rm, control_word) >= 0)
+ FPU_pop();
}
-
void fdivrp(void)
{
- /* fdivrp st(i),st */
- clear_C1();
- if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 )
- FPU_pop();
+ /* fdivrp st(i),st */
+ clear_C1();
+ if (FPU_div(DEST_RM, FPU_rm, control_word) >= 0)
+ FPU_pop();
}
-
void fdivp_(void)
{
- /* fdivp st(i),st */
- clear_C1();
- if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 )
- FPU_pop();
+ /* fdivp st(i),st */
+ clear_C1();
+ if (FPU_div(REV | DEST_RM, FPU_rm, control_word) >= 0)
+ FPU_pop();
}
diff --git a/arch/x86/math-emu/fpu_asm.h b/arch/x86/math-emu/fpu_asm.h
index 9ba12416df1..955b932735a 100644
--- a/arch/x86/math-emu/fpu_asm.h
+++ b/arch/x86/math-emu/fpu_asm.h
@@ -14,7 +14,6 @@
#define EXCEPTION FPU_exception
-
#define PARAM1 8(%ebp)
#define PARAM2 12(%ebp)
#define PARAM3 16(%ebp)
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index 20886cfb9f7..491e737ce54 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -16,34 +16,34 @@
#include "status_w.h"
#include "control_w.h"
-
static void fnop(void)
{
}
static void fclex(void)
{
- partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
- SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
- SW_Invalid);
- no_ip_update = 1;
+ partial_status &=
+ ~(SW_Backward | SW_Summary | SW_Stack_Fault | SW_Precision |
+ SW_Underflow | SW_Overflow | SW_Zero_Div | SW_Denorm_Op |
+ SW_Invalid);
+ no_ip_update = 1;
}
/* Needs to be externally visible */
void finit(void)
{
- control_word = 0x037f;
- partial_status = 0;
- top = 0; /* We don't keep top in the status word internally. */
- fpu_tag_word = 0xffff;
- /* The behaviour is different from that detailed in
- Section 15.1.6 of the Intel manual */
- operand_address.offset = 0;
- operand_address.selector = 0;
- instruction_address.offset = 0;
- instruction_address.selector = 0;
- instruction_address.opcode = 0;
- no_ip_update = 1;
+ control_word = 0x037f;
+ partial_status = 0;
+ top = 0; /* We don't keep top in the status word internally. */
+ fpu_tag_word = 0xffff;
+ /* The behaviour is different from that detailed in
+ Section 15.1.6 of the Intel manual */
+ operand_address.offset = 0;
+ operand_address.selector = 0;
+ instruction_address.offset = 0;
+ instruction_address.selector = 0;
+ instruction_address.opcode = 0;
+ no_ip_update = 1;
}
/*
@@ -54,151 +54,134 @@ void finit(void)
#define fsetpm fnop
static FUNC const finit_table[] = {
- feni, fdisi, fclex, finit,
- fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
+ feni, fdisi, fclex, finit,
+ fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
};
void finit_(void)
{
- (finit_table[FPU_rm])();
+ (finit_table[FPU_rm]) ();
}
-
static void fstsw_ax(void)
{
- *(short *) &FPU_EAX = status_word();
- no_ip_update = 1;
+ *(short *)&FPU_EAX = status_word();
+ no_ip_update = 1;
}
static FUNC const fstsw_table[] = {
- fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
- FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+ fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
+ FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
};
void fstsw_(void)
{
- (fstsw_table[FPU_rm])();
+ (fstsw_table[FPU_rm]) ();
}
-
static FUNC const fp_nop_table[] = {
- fnop, FPU_illegal, FPU_illegal, FPU_illegal,
- FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+ fnop, FPU_illegal, FPU_illegal, FPU_illegal,
+ FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
};
void fp_nop(void)
{
- (fp_nop_table[FPU_rm])();
+ (fp_nop_table[FPU_rm]) ();
}
-
void fld_i_(void)
{
- FPU_REG *st_new_ptr;
- int i;
- u_char tag;
-
- if ( STACK_OVERFLOW )
- { FPU_stack_overflow(); return; }
-
- /* fld st(i) */
- i = FPU_rm;
- if ( NOT_EMPTY(i) )
- {
- reg_copy(&st(i), st_new_ptr);
- tag = FPU_gettagi(i);
- push();
- FPU_settag0(tag);
- }
- else
- {
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- FPU_stack_underflow();
+ FPU_REG *st_new_ptr;
+ int i;
+ u_char tag;
+
+ if (STACK_OVERFLOW) {
+ FPU_stack_overflow();
+ return;
}
- else
- EXCEPTION(EX_StackUnder);
- }
-}
+ /* fld st(i) */
+ i = FPU_rm;
+ if (NOT_EMPTY(i)) {
+ reg_copy(&st(i), st_new_ptr);
+ tag = FPU_gettagi(i);
+ push();
+ FPU_settag0(tag);
+ } else {
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ FPU_stack_underflow();
+ } else
+ EXCEPTION(EX_StackUnder);
+ }
+}
void fxch_i(void)
{
- /* fxch st(i) */
- FPU_REG t;
- int i = FPU_rm;
- FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
- long tag_word = fpu_tag_word;
- int regnr = top & 7, regnri = ((regnr + i) & 7);
- u_char st0_tag = (tag_word >> (regnr*2)) & 3;
- u_char sti_tag = (tag_word >> (regnri*2)) & 3;
-
- if ( st0_tag == TAG_Empty )
- {
- if ( sti_tag == TAG_Empty )
- {
- FPU_stack_underflow();
- FPU_stack_underflow_i(i);
- return;
+ /* fxch st(i) */
+ FPU_REG t;
+ int i = FPU_rm;
+ FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
+ long tag_word = fpu_tag_word;
+ int regnr = top & 7, regnri = ((regnr + i) & 7);
+ u_char st0_tag = (tag_word >> (regnr * 2)) & 3;
+ u_char sti_tag = (tag_word >> (regnri * 2)) & 3;
+
+ if (st0_tag == TAG_Empty) {
+ if (sti_tag == TAG_Empty) {
+ FPU_stack_underflow();
+ FPU_stack_underflow_i(i);
+ return;
+ }
+ if (control_word & CW_Invalid) {
+ /* Masked response */
+ FPU_copy_to_reg0(sti_ptr, sti_tag);
+ }
+ FPU_stack_underflow_i(i);
+ return;
}
- if ( control_word & CW_Invalid )
- {
- /* Masked response */
- FPU_copy_to_reg0(sti_ptr, sti_tag);
+ if (sti_tag == TAG_Empty) {
+ if (control_word & CW_Invalid) {
+ /* Masked response */
+ FPU_copy_to_regi(st0_ptr, st0_tag, i);
+ }
+ FPU_stack_underflow();
+ return;
}
- FPU_stack_underflow_i(i);
- return;
- }
- if ( sti_tag == TAG_Empty )
- {
- if ( control_word & CW_Invalid )
- {
- /* Masked response */
- FPU_copy_to_regi(st0_ptr, st0_tag, i);
- }
- FPU_stack_underflow();
- return;
- }
- clear_C1();
-
- reg_copy(st0_ptr, &t);
- reg_copy(sti_ptr, st0_ptr);
- reg_copy(&t, sti_ptr);
-
- tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2));
- tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2));
- fpu_tag_word = tag_word;
-}
+ clear_C1();
+ reg_copy(st0_ptr, &t);
+ reg_copy(sti_ptr, st0_ptr);
+ reg_copy(&t, sti_ptr);
+
+ tag_word &= ~(3 << (regnr * 2)) & ~(3 << (regnri * 2));
+ tag_word |= (sti_tag << (regnr * 2)) | (st0_tag << (regnri * 2));
+ fpu_tag_word = tag_word;
+}
void ffree_(void)
{
- /* ffree st(i) */
- FPU_settagi(FPU_rm, TAG_Empty);
+ /* ffree st(i) */
+ FPU_settagi(FPU_rm, TAG_Empty);
}
-
void ffreep(void)
{
- /* ffree st(i) + pop - unofficial code */
- FPU_settagi(FPU_rm, TAG_Empty);
- FPU_pop();
+ /* ffree st(i) + pop - unofficial code */
+ FPU_settagi(FPU_rm, TAG_Empty);
+ FPU_pop();
}
-
void fst_i_(void)
{
- /* fst st(i) */
- FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
+ /* fst st(i) */
+ FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
}
-
void fstp_i(void)
{
- /* fstp st(i) */
- FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
- FPU_pop();
+ /* fstp st(i) */
+ FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
+ FPU_pop();
}
-
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index 65120f52385..4dae511c85a 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -7,7 +7,6 @@
| |
+---------------------------------------------------------------------------*/
-
#ifndef _FPU_EMU_H_
#define _FPU_EMU_H_
@@ -28,15 +27,15 @@
#endif
#define EXP_BIAS Const(0)
-#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */
-#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */
-#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but
- still a 16 bit nr. */
+#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */
+#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */
+#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but
+ still a 16 bit nr. */
#define EXP_Infinity EXP_OVER
#define EXP_NaN EXP_OVER
#define EXTENDED_Ebias Const(0x3fff)
-#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */
+#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */
#define SIGN_POS Const(0)
#define SIGN_NEG Const(0x80)
@@ -44,10 +43,9 @@
#define SIGN_Positive Const(0)
#define SIGN_Negative Const(0x8000)
-
/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */
/* The following fold to 2 (Special) in the Tag Word */
-#define TW_Denormal Const(4) /* De-normal */
+#define TW_Denormal Const(4) /* De-normal */
#define TW_Infinity Const(5) /* + or - infinity */
#define TW_NaN Const(6) /* Not a Number */
#define TW_Unsupported Const(7) /* Not supported by an 80486 */
@@ -67,14 +65,13 @@
#define DEST_RM 0x20
#define LOADED 0x40
-#define FPU_Exception Const(0x80000000) /* Added to tag returns. */
-
+#define FPU_Exception Const(0x80000000) /* Added to tag returns. */
#ifndef __ASSEMBLY__
#include "fpu_system.h"
-#include <asm/sigcontext.h> /* for struct _fpstate */
+#include <asm/sigcontext.h> /* for struct _fpstate */
#include <asm/math_emu.h>
#include <linux/linkage.h>
@@ -112,30 +109,33 @@ extern u_char emulating;
#define PREFIX_DEFAULT 7
struct address {
- unsigned int offset;
- unsigned int selector:16;
- unsigned int opcode:11;
- unsigned int empty:5;
+ unsigned int offset;
+ unsigned int selector:16;
+ unsigned int opcode:11;
+ unsigned int empty:5;
};
struct fpu__reg {
- unsigned sigl;
- unsigned sigh;
- short exp;
+ unsigned sigl;
+ unsigned sigh;
+ short exp;
};
-typedef void (*FUNC)(void);
+typedef void (*FUNC) (void);
typedef struct fpu__reg FPU_REG;
-typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag);
-typedef struct { u_char address_size, operand_size, segment; }
- overrides;
+typedef void (*FUNC_ST0) (FPU_REG *st0_ptr, u_char st0_tag);
+typedef struct {
+ u_char address_size, operand_size, segment;
+} overrides;
/* This structure is 32 bits: */
-typedef struct { overrides override;
- u_char default_mode; } fpu_addr_modes;
+typedef struct {
+ overrides override;
+ u_char default_mode;
+} fpu_addr_modes;
/* PROTECTED has a restricted meaning in the emulator; it is used
to signal that the emulator needs to do special things to ensure
that protection is respected in a segmented model. */
#define PROTECTED 4
-#define SIXTEEN 1 /* We rely upon this being 1 (true) */
+#define SIXTEEN 1 /* We rely upon this being 1 (true) */
#define VM86 SIXTEEN
#define PM16 (SIXTEEN | PROTECTED)
#define SEG32 PROTECTED
@@ -168,8 +168,8 @@ extern u_char const data_sizes_16[32];
static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
{
- *(short *)&(y->exp) = *(const short *)&(x->exp);
- *(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
+ *(short *)&(y->exp) = *(const short *)&(x->exp);
+ *(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
}
#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias)
@@ -184,27 +184,26 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
-
/*----- Prototypes for functions written in assembler -----*/
/* extern void reg_move(FPU_REG *a, FPU_REG *b); */
asmlinkage int FPU_normalize(FPU_REG *x);
asmlinkage int FPU_normalize_nuo(FPU_REG *x);
asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
- FPU_REG *answ, unsigned int control_w, u_char sign,
+ FPU_REG * answ, unsigned int control_w, u_char sign,
int expa, int expb);
asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
- FPU_REG *answ, unsigned int control_w, u_char sign,
+ FPU_REG * answ, unsigned int control_w, u_char sign,
int expon);
asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
- FPU_REG *answ, unsigned int control_w, u_char sign);
+ FPU_REG * answ, unsigned int control_w, u_char sign);
asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
- FPU_REG *answ, unsigned int control_w, u_char sign,
+ FPU_REG * answ, unsigned int control_w, u_char sign,
int expa, int expb);
asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2,
unsigned int control_w, u_char sign);
-asmlinkage unsigned FPU_shrx(void *l, unsigned x);
-asmlinkage unsigned FPU_shrxs(void *v, unsigned x);
+asmlinkage unsigned FPU_shrx(void *l, unsigned x);
+asmlinkage unsigned FPU_shrxs(void *v, unsigned x);
asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y);
asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
unsigned int control_w, u_char sign);
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 1853524c8b5..760baeea5f0 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -25,10 +25,11 @@
+---------------------------------------------------------------------------*/
#include <linux/signal.h>
-#include <linux/ptrace.h>
+#include <linux/regset.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
+#include <asm/user.h>
#include "fpu_system.h"
#include "fpu_emu.h"
@@ -36,726 +37,727 @@
#include "control_w.h"
#include "status_w.h"
-#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */
+#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */
-#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */
+#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */
/* WARNING: These codes are not documented by Intel in their 80486 manual
and may not work on FPU clones or later Intel FPUs. */
/* Changes to support the un-doc codes provided by Linus Torvalds. */
-#define _d9_d8_ fstp_i /* unofficial code (19) */
-#define _dc_d0_ fcom_st /* unofficial code (14) */
-#define _dc_d8_ fcompst /* unofficial code (1c) */
-#define _dd_c8_ fxch_i /* unofficial code (0d) */
-#define _de_d0_ fcompst /* unofficial code (16) */
-#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */
-#define _df_c8_ fxch_i /* unofficial code (0f) */
-#define _df_d0_ fstp_i /* unofficial code (17) */
-#define _df_d8_ fstp_i /* unofficial code (1f) */
+#define _d9_d8_ fstp_i /* unofficial code (19) */
+#define _dc_d0_ fcom_st /* unofficial code (14) */
+#define _dc_d8_ fcompst /* unofficial code (1c) */
+#define _dd_c8_ fxch_i /* unofficial code (0d) */
+#define _de_d0_ fcompst /* unofficial code (16) */
+#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */
+#define _df_c8_ fxch_i /* unofficial code (0f) */
+#define _df_d0_ fstp_i /* unofficial code (17) */
+#define _df_d8_ fstp_i /* unofficial code (1f) */
static FUNC const st_instr_table[64] = {
- fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
- fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
- fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
- fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
- fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
- fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
- fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
- fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
+ fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
+ fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
+ fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
+ fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
+ fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
+ fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
+ fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
+ fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
};
-#else /* Support only documented FPU op-codes */
+#else /* Support only documented FPU op-codes */
static FUNC const st_instr_table[64] = {
- fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
- fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
- fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
- fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
- fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
- fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
- fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
- fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
+ fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
+ fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
+ fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
+ fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
+ fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
+ fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
+ fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
+ fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
};
#endif /* NO_UNDOC_CODE */
-
-#define _NONE_ 0 /* Take no special action */
-#define _REG0_ 1 /* Need to check for not empty st(0) */
-#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */
-#define _REGi_ 0 /* Uses st(rm) */
-#define _PUSH_ 3 /* Need to check for space to push onto stack */
-#define _null_ 4 /* Function illegal or not implemented */
-#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */
-#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */
-#define _REGIc 0 /* Compare st(0) and st(rm) */
-#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */
+#define _NONE_ 0 /* Take no special action */
+#define _REG0_ 1 /* Need to check for not empty st(0) */
+#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */
+#define _REGi_ 0 /* Uses st(rm) */
+#define _PUSH_ 3 /* Need to check for space to push onto stack */
+#define _null_ 4 /* Function illegal or not implemented */
+#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */
+#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */
+#define _REGIc 0 /* Compare st(0) and st(rm) */
+#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */
#ifndef NO_UNDOC_CODE
/* Un-documented FPU op-codes supported by default. (see above) */
static u_char const type_table[64] = {
- _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
- _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
- _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
- _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
- _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
- _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
- _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
- _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+ _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
+ _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
+ _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+ _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+ _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+ _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+ _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+ _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
};
-#else /* Support only documented FPU op-codes */
+#else /* Support only documented FPU op-codes */
static u_char const type_table[64] = {
- _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
- _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
- _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
- _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
- _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
- _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
- _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
- _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+ _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
+ _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+ _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
+ _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
+ _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+ _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+ _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+ _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
};
#endif /* NO_UNDOC_CODE */
-
#ifdef RE_ENTRANT_CHECKING
-u_char emulating=0;
+u_char emulating = 0;
#endif /* RE_ENTRANT_CHECKING */
-static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
- overrides *override);
+static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip,
+ overrides * override);
asmlinkage void math_emulate(long arg)
{
- u_char FPU_modrm, byte1;
- unsigned short code;
- fpu_addr_modes addr_modes;
- int unmasked;
- FPU_REG loaded_data;
- FPU_REG *st0_ptr;
- u_char loaded_tag, st0_tag;
- void __user *data_address;
- struct address data_sel_off;
- struct address entry_sel_off;
- unsigned long code_base = 0;
- unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
- struct desc_struct code_descriptor;
+ u_char FPU_modrm, byte1;
+ unsigned short code;
+ fpu_addr_modes addr_modes;
+ int unmasked;
+ FPU_REG loaded_data;
+ FPU_REG *st0_ptr;
+ u_char loaded_tag, st0_tag;
+ void __user *data_address;
+ struct address data_sel_off;
+ struct address entry_sel_off;
+ unsigned long code_base = 0;
+ unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
+ struct desc_struct code_descriptor;
#ifdef RE_ENTRANT_CHECKING
- if ( emulating )
- {
- printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
- }
- RE_ENTRANT_CHECK_ON;
+ if (emulating) {
+ printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
+ }
+ RE_ENTRANT_CHECK_ON;
#endif /* RE_ENTRANT_CHECKING */
- if (!used_math())
- {
- finit();
- set_used_math();
- }
-
- SETUP_DATA_AREA(arg);
-
- FPU_ORIG_EIP = FPU_EIP;
-
- if ( (FPU_EFLAGS & 0x00020000) != 0 )
- {
- /* Virtual 8086 mode */
- addr_modes.default_mode = VM86;
- FPU_EIP += code_base = FPU_CS << 4;
- code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */
- }
- else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS )
- {
- addr_modes.default_mode = 0;
- }
- else if ( FPU_CS == __KERNEL_CS )
- {
- printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
- panic("Math emulation needed in kernel");
- }
- else
- {
-
- if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */
- {
- /* Can only handle segmented addressing via the LDT
- for now, and it must be 16 bit */
- printk("FPU emulator: Unsupported addressing mode\n");
- math_abort(FPU_info, SIGILL);
+ if (!used_math()) {
+ finit();
+ set_used_math();
}
- code_descriptor = LDT_DESCRIPTOR(FPU_CS);
- if ( SEG_D_SIZE(code_descriptor) )
- {
- /* The above test may be wrong, the book is not clear */
- /* Segmented 32 bit protected mode */
- addr_modes.default_mode = SEG32;
+ SETUP_DATA_AREA(arg);
+
+ FPU_ORIG_EIP = FPU_EIP;
+
+ if ((FPU_EFLAGS & 0x00020000) != 0) {
+ /* Virtual 8086 mode */
+ addr_modes.default_mode = VM86;
+ FPU_EIP += code_base = FPU_CS << 4;
+ code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */
+ } else if (FPU_CS == __USER_CS && FPU_DS == __USER_DS) {
+ addr_modes.default_mode = 0;
+ } else if (FPU_CS == __KERNEL_CS) {
+ printk("math_emulate: %04x:%08lx\n", FPU_CS, FPU_EIP);
+ panic("Math emulation needed in kernel");
+ } else {
+
+ if ((FPU_CS & 4) != 4) { /* Must be in the LDT */
+ /* Can only handle segmented addressing via the LDT
+ for now, and it must be 16 bit */
+ printk("FPU emulator: Unsupported addressing mode\n");
+ math_abort(FPU_info, SIGILL);
+ }
+
+ code_descriptor = LDT_DESCRIPTOR(FPU_CS);
+ if (SEG_D_SIZE(code_descriptor)) {
+ /* The above test may be wrong, the book is not clear */
+ /* Segmented 32 bit protected mode */
+ addr_modes.default_mode = SEG32;
+ } else {
+ /* 16 bit protected mode */
+ addr_modes.default_mode = PM16;
+ }
+ FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
+ code_limit = code_base
+ + (SEG_LIMIT(code_descriptor) +
+ 1) * SEG_GRANULARITY(code_descriptor)
+ - 1;
+ if (code_limit < code_base)
+ code_limit = 0xffffffff;
}
- else
- {
- /* 16 bit protected mode */
- addr_modes.default_mode = PM16;
+
+ FPU_lookahead = !(FPU_EFLAGS & X86_EFLAGS_TF);
+
+ if (!valid_prefix(&byte1, (u_char __user **) & FPU_EIP,
+ &addr_modes.override)) {
+ RE_ENTRANT_CHECK_OFF;
+ printk
+ ("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
+ "FPU emulator: self-modifying code! (emulation impossible)\n",
+ byte1);
+ RE_ENTRANT_CHECK_ON;
+ EXCEPTION(EX_INTERNAL | 0x126);
+ math_abort(FPU_info, SIGILL);
}
- FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
- code_limit = code_base
- + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
- - 1;
- if ( code_limit < code_base ) code_limit = 0xffffffff;
- }
-
- FPU_lookahead = 1;
- if (current->ptrace & PT_PTRACED)
- FPU_lookahead = 0;
-
- if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
- &addr_modes.override) )
- {
- RE_ENTRANT_CHECK_OFF;
- printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
- "FPU emulator: self-modifying code! (emulation impossible)\n",
- byte1);
- RE_ENTRANT_CHECK_ON;
- EXCEPTION(EX_INTERNAL|0x126);
- math_abort(FPU_info,SIGILL);
- }
-
-do_another_FPU_instruction:
-
- no_ip_update = 0;
-
- FPU_EIP++; /* We have fetched the prefix and first code bytes. */
-
- if ( addr_modes.default_mode )
- {
- /* This checks for the minimum instruction bytes.
- We also need to check any extra (address mode) code access. */
- if ( FPU_EIP > code_limit )
- math_abort(FPU_info,SIGSEGV);
- }
-
- if ( (byte1 & 0xf8) != 0xd8 )
- {
- if ( byte1 == FWAIT_OPCODE )
- {
- if (partial_status & SW_Summary)
- goto do_the_FPU_interrupt;
- else
- goto FPU_fwait_done;
+
+ do_another_FPU_instruction:
+
+ no_ip_update = 0;
+
+ FPU_EIP++; /* We have fetched the prefix and first code bytes. */
+
+ if (addr_modes.default_mode) {
+ /* This checks for the minimum instruction bytes.
+ We also need to check any extra (address mode) code access. */
+ if (FPU_EIP > code_limit)
+ math_abort(FPU_info, SIGSEGV);
}
+
+ if ((byte1 & 0xf8) != 0xd8) {
+ if (byte1 == FWAIT_OPCODE) {
+ if (partial_status & SW_Summary)
+ goto do_the_FPU_interrupt;
+ else
+ goto FPU_fwait_done;
+ }
#ifdef PARANOID
- EXCEPTION(EX_INTERNAL|0x128);
- math_abort(FPU_info,SIGILL);
+ EXCEPTION(EX_INTERNAL | 0x128);
+ math_abort(FPU_info, SIGILL);
#endif /* PARANOID */
- }
-
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(1);
- FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
- RE_ENTRANT_CHECK_ON;
- FPU_EIP++;
-
- if (partial_status & SW_Summary)
- {
- /* Ignore the error for now if the current instruction is a no-wait
- control instruction */
- /* The 80486 manual contradicts itself on this topic,
- but a real 80486 uses the following instructions:
- fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
- */
- code = (FPU_modrm << 8) | byte1;
- if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */
- (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv,
- fnstsw */
- ((code & 0xc000) != 0xc000))) ) )
- {
- /*
- * We need to simulate the action of the kernel to FPU
- * interrupts here.
- */
- do_the_FPU_interrupt:
-
- FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */
-
- RE_ENTRANT_CHECK_OFF;
- current->thread.trap_no = 16;
- current->thread.error_code = 0;
- send_sig(SIGFPE, current, 1);
- return;
- }
- }
-
- entry_sel_off.offset = FPU_ORIG_EIP;
- entry_sel_off.selector = FPU_CS;
- entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
-
- FPU_rm = FPU_modrm & 7;
-
- if ( FPU_modrm < 0300 )
- {
- /* All of these instructions use the mod/rm byte to get a data address */
-
- if ( (addr_modes.default_mode & SIXTEEN)
- ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
- data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
- addr_modes);
- else
- data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
- addr_modes);
-
- if ( addr_modes.default_mode )
- {
- if ( FPU_EIP-1 > code_limit )
- math_abort(FPU_info,SIGSEGV);
}
- if ( !(byte1 & 1) )
- {
- unsigned short status1 = partial_status;
-
- st0_ptr = &st(0);
- st0_tag = FPU_gettag0();
-
- /* Stack underflow has priority */
- if ( NOT_EMPTY_ST0 )
- {
- if ( addr_modes.default_mode & PROTECTED )
- {
- /* This table works for 16 and 32 bit protected mode */
- if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
- math_abort(FPU_info,SIGSEGV);
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(1);
+ FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
+ RE_ENTRANT_CHECK_ON;
+ FPU_EIP++;
+
+ if (partial_status & SW_Summary) {
+ /* Ignore the error for now if the current instruction is a no-wait
+ control instruction */
+ /* The 80486 manual contradicts itself on this topic,
+ but a real 80486 uses the following instructions:
+ fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
+ */
+ code = (FPU_modrm << 8) | byte1;
+ if (!((((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */
+ (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv,
+ fnstsw */
+ ((code & 0xc000) != 0xc000))))) {
+ /*
+ * We need to simulate the action of the kernel to FPU
+ * interrupts here.
+ */
+ do_the_FPU_interrupt:
+
+ FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */
+
+ RE_ENTRANT_CHECK_OFF;
+ current->thread.trap_no = 16;
+ current->thread.error_code = 0;
+ send_sig(SIGFPE, current, 1);
+ return;
}
+ }
- unmasked = 0; /* Do this here to stop compiler warnings. */
- switch ( (byte1 >> 1) & 3 )
- {
- case 0:
- unmasked = FPU_load_single((float __user *)data_address,
- &loaded_data);
- loaded_tag = unmasked & 0xff;
- unmasked &= ~0xff;
- break;
- case 1:
- loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
- break;
- case 2:
- unmasked = FPU_load_double((double __user *)data_address,
- &loaded_data);
- loaded_tag = unmasked & 0xff;
- unmasked &= ~0xff;
- break;
- case 3:
- default: /* Used here to suppress gcc warnings. */
- loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
- break;
- }
+ entry_sel_off.offset = FPU_ORIG_EIP;
+ entry_sel_off.selector = FPU_CS;
+ entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
- /* No more access to user memory, it is safe
- to use static data now */
-
- /* NaN operands have the next priority. */
- /* We have to delay looking at st(0) until after
- loading the data, because that data might contain an SNaN */
- if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) ||
- ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) )
- {
- /* Restore the status word; we might have loaded a
- denormal. */
- partial_status = status1;
- if ( (FPU_modrm & 0x30) == 0x10 )
- {
- /* fcom or fcomp */
- EXCEPTION(EX_Invalid);
- setcc(SW_C3 | SW_C2 | SW_C0);
- if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
- FPU_pop(); /* fcomp, masked, so we pop. */
- }
- else
- {
- if ( loaded_tag == TAG_Special )
- loaded_tag = FPU_Special(&loaded_data);
-#ifdef PECULIAR_486
- /* This is not really needed, but gives behaviour
- identical to an 80486 */
- if ( (FPU_modrm & 0x28) == 0x20 )
- /* fdiv or fsub */
- real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data);
- else
-#endif /* PECULIAR_486 */
- /* fadd, fdivr, fmul, or fsubr */
- real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr);
- }
- goto reg_mem_instr_done;
- }
+ FPU_rm = FPU_modrm & 7;
- if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
- {
- /* Is not a comparison instruction. */
- if ( (FPU_modrm & 0x38) == 0x38 )
- {
- /* fdivr */
- if ( (st0_tag == TAG_Zero) &&
- ((loaded_tag == TAG_Valid)
- || (loaded_tag == TAG_Special
- && isdenormal(&loaded_data))) )
- {
- if ( FPU_divide_by_zero(0, getsign(&loaded_data))
- < 0 )
- {
- /* We use the fact here that the unmasked
- exception in the loaded data was for a
- denormal operand */
- /* Restore the state of the denormal op bit */
- partial_status &= ~SW_Denorm_Op;
- partial_status |= status1 & SW_Denorm_Op;
- }
- else
- setsign(st0_ptr, getsign(&loaded_data));
- }
- }
- goto reg_mem_instr_done;
- }
+ if (FPU_modrm < 0300) {
+ /* All of these instructions use the mod/rm byte to get a data address */
- switch ( (FPU_modrm >> 3) & 7 )
- {
- case 0: /* fadd */
- clear_C1();
- FPU_add(&loaded_data, loaded_tag, 0, control_word);
- break;
- case 1: /* fmul */
- clear_C1();
- FPU_mul(&loaded_data, loaded_tag, 0, control_word);
- break;
- case 2: /* fcom */
- FPU_compare_st_data(&loaded_data, loaded_tag);
- break;
- case 3: /* fcomp */
- if ( !FPU_compare_st_data(&loaded_data, loaded_tag)
- && !unmasked )
- FPU_pop();
- break;
- case 4: /* fsub */
- clear_C1();
- FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word);
- break;
- case 5: /* fsubr */
- clear_C1();
- FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
- break;
- case 6: /* fdiv */
- clear_C1();
- FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word);
- break;
- case 7: /* fdivr */
- clear_C1();
- if ( st0_tag == TAG_Zero )
- partial_status = status1; /* Undo any denorm tag,
- zero-divide has priority. */
- FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
- break;
+ if ((addr_modes.default_mode & SIXTEEN)
+ ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX))
+ data_address =
+ FPU_get_address_16(FPU_modrm, &FPU_EIP,
+ &data_sel_off, addr_modes);
+ else
+ data_address =
+ FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
+ addr_modes);
+
+ if (addr_modes.default_mode) {
+ if (FPU_EIP - 1 > code_limit)
+ math_abort(FPU_info, SIGSEGV);
}
- }
- else
- {
- if ( (FPU_modrm & 0x30) == 0x10 )
- {
- /* The instruction is fcom or fcomp */
- EXCEPTION(EX_StackUnder);
- setcc(SW_C3 | SW_C2 | SW_C0);
- if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
- FPU_pop(); /* fcomp */
+
+ if (!(byte1 & 1)) {
+ unsigned short status1 = partial_status;
+
+ st0_ptr = &st(0);
+ st0_tag = FPU_gettag0();
+
+ /* Stack underflow has priority */
+ if (NOT_EMPTY_ST0) {
+ if (addr_modes.default_mode & PROTECTED) {
+ /* This table works for 16 and 32 bit protected mode */
+ if (access_limit <
+ data_sizes_16[(byte1 >> 1) & 3])
+ math_abort(FPU_info, SIGSEGV);
+ }
+
+ unmasked = 0; /* Do this here to stop compiler warnings. */
+ switch ((byte1 >> 1) & 3) {
+ case 0:
+ unmasked =
+ FPU_load_single((float __user *)
+ data_address,
+ &loaded_data);
+ loaded_tag = unmasked & 0xff;
+ unmasked &= ~0xff;
+ break;
+ case 1:
+ loaded_tag =
+ FPU_load_int32((long __user *)
+ data_address,
+ &loaded_data);
+ break;
+ case 2:
+ unmasked =
+ FPU_load_double((double __user *)
+ data_address,
+ &loaded_data);
+ loaded_tag = unmasked & 0xff;
+ unmasked &= ~0xff;
+ break;
+ case 3:
+ default: /* Used here to suppress gcc warnings. */
+ loaded_tag =
+ FPU_load_int16((short __user *)
+ data_address,
+ &loaded_data);
+ break;
+ }
+
+ /* No more access to user memory, it is safe
+ to use static data now */
+
+ /* NaN operands have the next priority. */
+ /* We have to delay looking at st(0) until after
+ loading the data, because that data might contain an SNaN */
+ if (((st0_tag == TAG_Special) && isNaN(st0_ptr))
+ || ((loaded_tag == TAG_Special)
+ && isNaN(&loaded_data))) {
+ /* Restore the status word; we might have loaded a
+ denormal. */
+ partial_status = status1;
+ if ((FPU_modrm & 0x30) == 0x10) {
+ /* fcom or fcomp */
+ EXCEPTION(EX_Invalid);
+ setcc(SW_C3 | SW_C2 | SW_C0);
+ if ((FPU_modrm & 0x08)
+ && (control_word &
+ CW_Invalid))
+ FPU_pop(); /* fcomp, masked, so we pop. */
+ } else {
+ if (loaded_tag == TAG_Special)
+ loaded_tag =
+ FPU_Special
+ (&loaded_data);
+#ifdef PECULIAR_486
+ /* This is not really needed, but gives behaviour
+ identical to an 80486 */
+ if ((FPU_modrm & 0x28) == 0x20)
+ /* fdiv or fsub */
+ real_2op_NaN
+ (&loaded_data,
+ loaded_tag, 0,
+ &loaded_data);
+ else
+#endif /* PECULIAR_486 */
+ /* fadd, fdivr, fmul, or fsubr */
+ real_2op_NaN
+ (&loaded_data,
+ loaded_tag, 0,
+ st0_ptr);
+ }
+ goto reg_mem_instr_done;
+ }
+
+ if (unmasked && !((FPU_modrm & 0x30) == 0x10)) {
+ /* Is not a comparison instruction. */
+ if ((FPU_modrm & 0x38) == 0x38) {
+ /* fdivr */
+ if ((st0_tag == TAG_Zero) &&
+ ((loaded_tag == TAG_Valid)
+ || (loaded_tag ==
+ TAG_Special
+ &&
+ isdenormal
+ (&loaded_data)))) {
+ if (FPU_divide_by_zero
+ (0,
+ getsign
+ (&loaded_data))
+ < 0) {
+ /* We use the fact here that the unmasked
+ exception in the loaded data was for a
+ denormal operand */
+ /* Restore the state of the denormal op bit */
+ partial_status
+ &=
+ ~SW_Denorm_Op;
+ partial_status
+ |=
+ status1 &
+ SW_Denorm_Op;
+ } else
+ setsign(st0_ptr,
+ getsign
+ (&loaded_data));
+ }
+ }
+ goto reg_mem_instr_done;
+ }
+
+ switch ((FPU_modrm >> 3) & 7) {
+ case 0: /* fadd */
+ clear_C1();
+ FPU_add(&loaded_data, loaded_tag, 0,
+ control_word);
+ break;
+ case 1: /* fmul */
+ clear_C1();
+ FPU_mul(&loaded_data, loaded_tag, 0,
+ control_word);
+ break;
+ case 2: /* fcom */
+ FPU_compare_st_data(&loaded_data,
+ loaded_tag);
+ break;
+ case 3: /* fcomp */
+ if (!FPU_compare_st_data
+ (&loaded_data, loaded_tag)
+ && !unmasked)
+ FPU_pop();
+ break;
+ case 4: /* fsub */
+ clear_C1();
+ FPU_sub(LOADED | loaded_tag,
+ (int)&loaded_data,
+ control_word);
+ break;
+ case 5: /* fsubr */
+ clear_C1();
+ FPU_sub(REV | LOADED | loaded_tag,
+ (int)&loaded_data,
+ control_word);
+ break;
+ case 6: /* fdiv */
+ clear_C1();
+ FPU_div(LOADED | loaded_tag,
+ (int)&loaded_data,
+ control_word);
+ break;
+ case 7: /* fdivr */
+ clear_C1();
+ if (st0_tag == TAG_Zero)
+ partial_status = status1; /* Undo any denorm tag,
+ zero-divide has priority. */
+ FPU_div(REV | LOADED | loaded_tag,
+ (int)&loaded_data,
+ control_word);
+ break;
+ }
+ } else {
+ if ((FPU_modrm & 0x30) == 0x10) {
+ /* The instruction is fcom or fcomp */
+ EXCEPTION(EX_StackUnder);
+ setcc(SW_C3 | SW_C2 | SW_C0);
+ if ((FPU_modrm & 0x08)
+ && (control_word & CW_Invalid))
+ FPU_pop(); /* fcomp */
+ } else
+ FPU_stack_underflow();
+ }
+ reg_mem_instr_done:
+ operand_address = data_sel_off;
+ } else {
+ if (!(no_ip_update =
+ FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6))
+ >> 1, addr_modes, data_address))) {
+ operand_address = data_sel_off;
+ }
}
- else
- FPU_stack_underflow();
- }
- reg_mem_instr_done:
- operand_address = data_sel_off;
- }
- else
- {
- if ( !(no_ip_update =
- FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
- addr_modes, data_address)) )
- {
- operand_address = data_sel_off;
- }
- }
- }
- else
- {
- /* None of these instructions access user memory */
- u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
+ } else {
+ /* None of these instructions access user memory */
+ u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
#ifdef PECULIAR_486
- /* This is supposed to be undefined, but a real 80486 seems
- to do this: */
- operand_address.offset = 0;
- operand_address.selector = FPU_DS;
+ /* This is supposed to be undefined, but a real 80486 seems
+ to do this: */
+ operand_address.offset = 0;
+ operand_address.selector = FPU_DS;
#endif /* PECULIAR_486 */
- st0_ptr = &st(0);
- st0_tag = FPU_gettag0();
- switch ( type_table[(int) instr_index] )
- {
- case _NONE_: /* also _REGIc: _REGIn */
- break;
- case _REG0_:
- if ( !NOT_EMPTY_ST0 )
- {
- FPU_stack_underflow();
- goto FPU_instruction_done;
- }
- break;
- case _REGIi:
- if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
- {
- FPU_stack_underflow_i(FPU_rm);
- goto FPU_instruction_done;
- }
- break;
- case _REGIp:
- if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
- {
- FPU_stack_underflow_pop(FPU_rm);
- goto FPU_instruction_done;
- }
- break;
- case _REGI_:
- if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
- {
- FPU_stack_underflow();
- goto FPU_instruction_done;
- }
- break;
- case _PUSH_: /* Only used by the fld st(i) instruction */
- break;
- case _null_:
- FPU_illegal();
- goto FPU_instruction_done;
- default:
- EXCEPTION(EX_INTERNAL|0x111);
- goto FPU_instruction_done;
- }
- (*st_instr_table[(int) instr_index])();
+ st0_ptr = &st(0);
+ st0_tag = FPU_gettag0();
+ switch (type_table[(int)instr_index]) {
+ case _NONE_: /* also _REGIc: _REGIn */
+ break;
+ case _REG0_:
+ if (!NOT_EMPTY_ST0) {
+ FPU_stack_underflow();
+ goto FPU_instruction_done;
+ }
+ break;
+ case _REGIi:
+ if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+ FPU_stack_underflow_i(FPU_rm);
+ goto FPU_instruction_done;
+ }
+ break;
+ case _REGIp:
+ if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+ FPU_stack_underflow_pop(FPU_rm);
+ goto FPU_instruction_done;
+ }
+ break;
+ case _REGI_:
+ if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+ FPU_stack_underflow();
+ goto FPU_instruction_done;
+ }
+ break;
+ case _PUSH_: /* Only used by the fld st(i) instruction */
+ break;
+ case _null_:
+ FPU_illegal();
+ goto FPU_instruction_done;
+ default:
+ EXCEPTION(EX_INTERNAL | 0x111);
+ goto FPU_instruction_done;
+ }
+ (*st_instr_table[(int)instr_index]) ();
-FPU_instruction_done:
- ;
- }
+ FPU_instruction_done:
+ ;
+ }
- if ( ! no_ip_update )
- instruction_address = entry_sel_off;
+ if (!no_ip_update)
+ instruction_address = entry_sel_off;
-FPU_fwait_done:
+ FPU_fwait_done:
#ifdef DEBUG
- RE_ENTRANT_CHECK_OFF;
- FPU_printall();
- RE_ENTRANT_CHECK_ON;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_printall();
+ RE_ENTRANT_CHECK_ON;
#endif /* DEBUG */
- if (FPU_lookahead && !need_resched())
- {
- FPU_ORIG_EIP = FPU_EIP - code_base;
- if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
- &addr_modes.override) )
- goto do_another_FPU_instruction;
- }
+ if (FPU_lookahead && !need_resched()) {
+ FPU_ORIG_EIP = FPU_EIP - code_base;
+ if (valid_prefix(&byte1, (u_char __user **) & FPU_EIP,
+ &addr_modes.override))
+ goto do_another_FPU_instruction;
+ }
- if ( addr_modes.default_mode )
- FPU_EIP -= code_base;
+ if (addr_modes.default_mode)
+ FPU_EIP -= code_base;
- RE_ENTRANT_CHECK_OFF;
+ RE_ENTRANT_CHECK_OFF;
}
-
/* Support for prefix bytes is not yet complete. To properly handle
all prefix bytes, further changes are needed in the emulator code
which accesses user address space. Access to separate segments is
important for msdos emulation. */
static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
- overrides *override)
+ overrides * override)
{
- u_char byte;
- u_char __user *ip = *fpu_eip;
-
- *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */
-
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(1);
- FPU_get_user(byte, ip);
- RE_ENTRANT_CHECK_ON;
-
- while ( 1 )
- {
- switch ( byte )
- {
- case ADDR_SIZE_PREFIX:
- override->address_size = ADDR_SIZE_PREFIX;
- goto do_next_byte;
-
- case OP_SIZE_PREFIX:
- override->operand_size = OP_SIZE_PREFIX;
- goto do_next_byte;
-
- case PREFIX_CS:
- override->segment = PREFIX_CS_;
- goto do_next_byte;
- case PREFIX_ES:
- override->segment = PREFIX_ES_;
- goto do_next_byte;
- case PREFIX_SS:
- override->segment = PREFIX_SS_;
- goto do_next_byte;
- case PREFIX_FS:
- override->segment = PREFIX_FS_;
- goto do_next_byte;
- case PREFIX_GS:
- override->segment = PREFIX_GS_;
- goto do_next_byte;
- case PREFIX_DS:
- override->segment = PREFIX_DS_;
- goto do_next_byte;
+ u_char byte;
+ u_char __user *ip = *fpu_eip;
+
+ *override = (overrides) {
+ 0, 0, PREFIX_DEFAULT}; /* defaults */
+
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(1);
+ FPU_get_user(byte, ip);
+ RE_ENTRANT_CHECK_ON;
+
+ while (1) {
+ switch (byte) {
+ case ADDR_SIZE_PREFIX:
+ override->address_size = ADDR_SIZE_PREFIX;
+ goto do_next_byte;
+
+ case OP_SIZE_PREFIX:
+ override->operand_size = OP_SIZE_PREFIX;
+ goto do_next_byte;
+
+ case PREFIX_CS:
+ override->segment = PREFIX_CS_;
+ goto do_next_byte;
+ case PREFIX_ES:
+ override->segment = PREFIX_ES_;
+ goto do_next_byte;
+ case PREFIX_SS:
+ override->segment = PREFIX_SS_;
+ goto do_next_byte;
+ case PREFIX_FS:
+ override->segment = PREFIX_FS_;
+ goto do_next_byte;
+ case PREFIX_GS:
+ override->segment = PREFIX_GS_;
+ goto do_next_byte;
+ case PREFIX_DS:
+ override->segment = PREFIX_DS_;
+ goto do_next_byte;
/* lock is not a valid prefix for FPU instructions,
let the cpu handle it to generate a SIGILL. */
/* case PREFIX_LOCK: */
- /* rep.. prefixes have no meaning for FPU instructions */
- case PREFIX_REPE:
- case PREFIX_REPNE:
-
- do_next_byte:
- ip++;
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(1);
- FPU_get_user(byte, ip);
- RE_ENTRANT_CHECK_ON;
- break;
- case FWAIT_OPCODE:
- *Byte = byte;
- return 1;
- default:
- if ( (byte & 0xf8) == 0xd8 )
- {
- *Byte = byte;
- *fpu_eip = ip;
- return 1;
- }
- else
- {
- /* Not a valid sequence of prefix bytes followed by
- an FPU instruction. */
- *Byte = byte; /* Needed for error message. */
- return 0;
- }
+ /* rep.. prefixes have no meaning for FPU instructions */
+ case PREFIX_REPE:
+ case PREFIX_REPNE:
+
+ do_next_byte:
+ ip++;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(1);
+ FPU_get_user(byte, ip);
+ RE_ENTRANT_CHECK_ON;
+ break;
+ case FWAIT_OPCODE:
+ *Byte = byte;
+ return 1;
+ default:
+ if ((byte & 0xf8) == 0xd8) {
+ *Byte = byte;
+ *fpu_eip = ip;
+ return 1;
+ } else {
+ /* Not a valid sequence of prefix bytes followed by
+ an FPU instruction. */
+ *Byte = byte; /* Needed for error message. */
+ return 0;
+ }
+ }
}
- }
}
-
-void math_abort(struct info * info, unsigned int signal)
+void math_abort(struct info *info, unsigned int signal)
{
FPU_EIP = FPU_ORIG_EIP;
current->thread.trap_no = 16;
current->thread.error_code = 0;
- send_sig(signal,current,1);
+ send_sig(signal, current, 1);
RE_ENTRANT_CHECK_OFF;
- __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
+ __asm__("movl %0,%%esp ; ret": :"g"(((long)info) - 4));
#ifdef PARANOID
- printk("ERROR: wm-FPU-emu math_abort failed!\n");
+ printk("ERROR: wm-FPU-emu math_abort failed!\n");
#endif /* PARANOID */
}
-
-
#define S387 ((struct i387_soft_struct *)s387)
#define sstatus_word() \
((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
-int restore_i387_soft(void *s387, struct _fpstate __user *buf)
+int fpregs_soft_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
{
- u_char __user *d = (u_char __user *)buf;
- int offset, other, i, tags, regnr, tag, newtop;
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10);
- if (__copy_from_user(&S387->cwd, d, 7*4))
- return -1;
- RE_ENTRANT_CHECK_ON;
-
- d += 7*4;
-
- S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
- offset = (S387->ftop & 7) * 10;
- other = 80 - offset;
-
- RE_ENTRANT_CHECK_OFF;
- /* Copy all registers in stack order. */
- if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other))
- return -1;
- if ( offset )
- if (__copy_from_user((u_char *)&S387->st_space, d+other, offset))
- return -1;
- RE_ENTRANT_CHECK_ON;
-
- /* The tags may need to be corrected now. */
- tags = S387->twd;
- newtop = S387->ftop;
- for ( i = 0; i < 8; i++ )
- {
- regnr = (i+newtop) & 7;
- if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty )
- {
- /* The loaded data over-rides all other cases. */
- tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr));
- tags &= ~(3 << (regnr*2));
- tags |= (tag & 3) << (regnr*2);
+ struct i387_soft_struct *s387 = &target->thread.i387.soft;
+ void *space = s387->st_space;
+ int ret;
+ int offset, other, i, tags, regnr, tag, newtop;
+
+ RE_ENTRANT_CHECK_OFF;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0,
+ offsetof(struct i387_soft_struct, st_space));
+ RE_ENTRANT_CHECK_ON;
+
+ if (ret)
+ return ret;
+
+ S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
+ offset = (S387->ftop & 7) * 10;
+ other = 80 - offset;
+
+ RE_ENTRANT_CHECK_OFF;
+
+ /* Copy all registers in stack order. */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ space + offset, 0, other);
+ if (!ret && offset)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ space, 0, offset);
+
+ RE_ENTRANT_CHECK_ON;
+
+ /* The tags may need to be corrected now. */
+ tags = S387->twd;
+ newtop = S387->ftop;
+ for (i = 0; i < 8; i++) {
+ regnr = (i + newtop) & 7;
+ if (((tags >> ((regnr & 7) * 2)) & 3) != TAG_Empty) {
+ /* The loaded data over-rides all other cases. */
+ tag =
+ FPU_tagof((FPU_REG *) ((u_char *) S387->st_space +
+ 10 * regnr));
+ tags &= ~(3 << (regnr * 2));
+ tags |= (tag & 3) << (regnr * 2);
+ }
}
- }
- S387->twd = tags;
+ S387->twd = tags;
- return 0;
+ return ret;
}
-
-int save_i387_soft(void *s387, struct _fpstate __user * buf)
+int fpregs_soft_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
{
- u_char __user *d = (u_char __user *)buf;
- int offset = (S387->ftop & 7) * 10, other = 80 - offset;
+ struct i387_soft_struct *s387 = &target->thread.i387.soft;
+ const void *space = s387->st_space;
+ int ret;
+ int offset = (S387->ftop & 7) * 10, other = 80 - offset;
+
+ RE_ENTRANT_CHECK_OFF;
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10);
#ifdef PECULIAR_486
- S387->cwd &= ~0xe080;
- /* An 80486 sets nearly all of the reserved bits to 1. */
- S387->cwd |= 0xffff0040;
- S387->swd = sstatus_word() | 0xffff0000;
- S387->twd |= 0xffff0000;
- S387->fcs &= ~0xf8000000;
- S387->fos |= 0xffff0000;
+ S387->cwd &= ~0xe080;
+ /* An 80486 sets nearly all of the reserved bits to 1. */
+ S387->cwd |= 0xffff0040;
+ S387->swd = sstatus_word() | 0xffff0000;
+ S387->twd |= 0xffff0000;
+ S387->fcs &= ~0xf8000000;
+ S387->fos |= 0xffff0000;
#endif /* PECULIAR_486 */
- if (__copy_to_user(d, &S387->cwd, 7*4))
- return -1;
- RE_ENTRANT_CHECK_ON;
-
- d += 7*4;
-
- RE_ENTRANT_CHECK_OFF;
- /* Copy all registers in stack order. */
- if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other))
- return -1;
- if ( offset )
- if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset))
- return -1;
- RE_ENTRANT_CHECK_ON;
-
- return 1;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0,
+ offsetof(struct i387_soft_struct, st_space));
+
+ /* Copy all registers in stack order. */
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ space + offset, 0, other);
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ space, 0, offset);
+
+ RE_ENTRANT_CHECK_ON;
+
+ return ret;
}
diff --git a/arch/x86/math-emu/fpu_etc.c b/arch/x86/math-emu/fpu_etc.c
index e3b5d465587..233e5af566f 100644
--- a/arch/x86/math-emu/fpu_etc.c
+++ b/arch/x86/math-emu/fpu_etc.c
@@ -16,128 +16,115 @@
#include "status_w.h"
#include "reg_constant.h"
-
static void fchs(FPU_REG *st0_ptr, u_char st0tag)
{
- if ( st0tag ^ TAG_Empty )
- {
- signbyte(st0_ptr) ^= SIGN_NEG;
- clear_C1();
- }
- else
- FPU_stack_underflow();
+ if (st0tag ^ TAG_Empty) {
+ signbyte(st0_ptr) ^= SIGN_NEG;
+ clear_C1();
+ } else
+ FPU_stack_underflow();
}
-
static void fabs(FPU_REG *st0_ptr, u_char st0tag)
{
- if ( st0tag ^ TAG_Empty )
- {
- setpositive(st0_ptr);
- clear_C1();
- }
- else
- FPU_stack_underflow();
+ if (st0tag ^ TAG_Empty) {
+ setpositive(st0_ptr);
+ clear_C1();
+ } else
+ FPU_stack_underflow();
}
-
static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
{
- switch (st0tag)
- {
- case TAG_Zero:
- setcc(SW_C3);
- break;
- case TAG_Valid:
- if (getsign(st0_ptr) == SIGN_POS)
- setcc(0);
- else
- setcc(SW_C0);
- break;
- case TAG_Special:
- switch ( FPU_Special(st0_ptr) )
- {
- case TW_Denormal:
- if (getsign(st0_ptr) == SIGN_POS)
- setcc(0);
- else
- setcc(SW_C0);
- if ( denormal_operand() < 0 )
- {
-#ifdef PECULIAR_486
- /* This is weird! */
- if (getsign(st0_ptr) == SIGN_POS)
+ switch (st0tag) {
+ case TAG_Zero:
setcc(SW_C3);
+ break;
+ case TAG_Valid:
+ if (getsign(st0_ptr) == SIGN_POS)
+ setcc(0);
+ else
+ setcc(SW_C0);
+ break;
+ case TAG_Special:
+ switch (FPU_Special(st0_ptr)) {
+ case TW_Denormal:
+ if (getsign(st0_ptr) == SIGN_POS)
+ setcc(0);
+ else
+ setcc(SW_C0);
+ if (denormal_operand() < 0) {
+#ifdef PECULIAR_486
+ /* This is weird! */
+ if (getsign(st0_ptr) == SIGN_POS)
+ setcc(SW_C3);
#endif /* PECULIAR_486 */
- return;
- }
- break;
- case TW_NaN:
- setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
- EXCEPTION(EX_Invalid);
- break;
- case TW_Infinity:
- if (getsign(st0_ptr) == SIGN_POS)
- setcc(0);
- else
- setcc(SW_C0);
- break;
- default:
- setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
- EXCEPTION(EX_INTERNAL|0x14);
- break;
+ return;
+ }
+ break;
+ case TW_NaN:
+ setcc(SW_C0 | SW_C2 | SW_C3); /* Operand is not comparable */
+ EXCEPTION(EX_Invalid);
+ break;
+ case TW_Infinity:
+ if (getsign(st0_ptr) == SIGN_POS)
+ setcc(0);
+ else
+ setcc(SW_C0);
+ break;
+ default:
+ setcc(SW_C0 | SW_C2 | SW_C3); /* Operand is not comparable */
+ EXCEPTION(EX_INTERNAL | 0x14);
+ break;
+ }
+ break;
+ case TAG_Empty:
+ setcc(SW_C0 | SW_C2 | SW_C3);
+ EXCEPTION(EX_StackUnder);
+ break;
}
- break;
- case TAG_Empty:
- setcc(SW_C0|SW_C2|SW_C3);
- EXCEPTION(EX_StackUnder);
- break;
- }
}
-
static void fxam(FPU_REG *st0_ptr, u_char st0tag)
{
- int c = 0;
- switch (st0tag)
- {
- case TAG_Empty:
- c = SW_C3|SW_C0;
- break;
- case TAG_Zero:
- c = SW_C3;
- break;
- case TAG_Valid:
- c = SW_C2;
- break;
- case TAG_Special:
- switch ( FPU_Special(st0_ptr) )
- {
- case TW_Denormal:
- c = SW_C2|SW_C3; /* Denormal */
- break;
- case TW_NaN:
- /* We also use NaN for unsupported types. */
- if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) )
- c = SW_C0;
- break;
- case TW_Infinity:
- c = SW_C2|SW_C0;
- break;
+ int c = 0;
+ switch (st0tag) {
+ case TAG_Empty:
+ c = SW_C3 | SW_C0;
+ break;
+ case TAG_Zero:
+ c = SW_C3;
+ break;
+ case TAG_Valid:
+ c = SW_C2;
+ break;
+ case TAG_Special:
+ switch (FPU_Special(st0_ptr)) {
+ case TW_Denormal:
+ c = SW_C2 | SW_C3; /* Denormal */
+ break;
+ case TW_NaN:
+ /* We also use NaN for unsupported types. */
+ if ((st0_ptr->sigh & 0x80000000)
+ && (exponent(st0_ptr) == EXP_OVER))
+ c = SW_C0;
+ break;
+ case TW_Infinity:
+ c = SW_C2 | SW_C0;
+ break;
+ }
}
- }
- if ( getsign(st0_ptr) == SIGN_NEG )
- c |= SW_C1;
- setcc(c);
+ if (getsign(st0_ptr) == SIGN_NEG)
+ c |= SW_C1;
+ setcc(c);
}
-
static FUNC_ST0 const fp_etc_table[] = {
- fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
- ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
+ fchs, fabs, (FUNC_ST0) FPU_illegal, (FUNC_ST0) FPU_illegal,
+ ftst_, fxam, (FUNC_ST0) FPU_illegal, (FUNC_ST0) FPU_illegal
};
void FPU_etc(void)
{
- (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0());
+ (fp_etc_table[FPU_rm]) (&st(0), FPU_gettag0());
}
diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h
index 37a8a7fe7e2..aa49b6a0d85 100644
--- a/arch/x86/math-emu/fpu_proto.h
+++ b/arch/x86/math-emu/fpu_proto.h
@@ -66,7 +66,7 @@ extern int FPU_Special(FPU_REG const *ptr);
extern int isNaN(FPU_REG const *ptr);
extern void FPU_pop(void);
extern int FPU_empty_i(int stnr);
-extern int FPU_stackoverflow(FPU_REG **st_new_ptr);
+extern int FPU_stackoverflow(FPU_REG ** st_new_ptr);
extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr);
extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag);
extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag);
@@ -75,21 +75,23 @@ extern void FPU_triga(void);
extern void FPU_trigb(void);
/* get_address.c */
extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
- struct address *addr, fpu_addr_modes addr_modes);
+ struct address *addr,
+ fpu_addr_modes addr_modes);
extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
- struct address *addr, fpu_addr_modes addr_modes);
+ struct address *addr,
+ fpu_addr_modes addr_modes);
/* load_store.c */
extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
- void __user *data_address);
+ void __user * data_address);
/* poly_2xm1.c */
-extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result);
+extern int poly_2xm1(u_char sign, FPU_REG * arg, FPU_REG *result);
/* poly_atan.c */
-extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
+extern void poly_atan(FPU_REG * st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
u_char st1_tag);
/* poly_l2.c */
extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
- FPU_REG *d);
+ FPU_REG * d);
/* poly_sin.c */
extern void poly_sine(FPU_REG *st0_ptr);
extern void poly_cos(FPU_REG *st0_ptr);
@@ -117,10 +119,13 @@ extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data);
extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data);
extern int FPU_load_bcd(u_char __user *s);
extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
- long double __user *d);
-extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat);
-extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single);
-extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d);
+ long double __user * d);
+extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag,
+ double __user * dfloat);
+extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag,
+ float __user * single);
+extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag,
+ long long __user * d);
extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d);
extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
@@ -137,4 +142,3 @@ extern int FPU_div(int flags, int regrm, int control_w);
/* reg_convert.c */
extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x);
#endif /* _FPU_PROTO_H */
-
diff --git a/arch/x86/math-emu/fpu_tags.c b/arch/x86/math-emu/fpu_tags.c
index cb436fe20e4..d9c657cd774 100644
--- a/arch/x86/math-emu/fpu_tags.c
+++ b/arch/x86/math-emu/fpu_tags.c
@@ -14,114 +14,102 @@
#include "fpu_system.h"
#include "exception.h"
-
void FPU_pop(void)
{
- fpu_tag_word |= 3 << ((top & 7)*2);
- top++;
+ fpu_tag_word |= 3 << ((top & 7) * 2);
+ top++;
}
-
int FPU_gettag0(void)
{
- return (fpu_tag_word >> ((top & 7)*2)) & 3;
+ return (fpu_tag_word >> ((top & 7) * 2)) & 3;
}
-
int FPU_gettagi(int stnr)
{
- return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3;
+ return (fpu_tag_word >> (((top + stnr) & 7) * 2)) & 3;
}
-
int FPU_gettag(int regnr)
{
- return (fpu_tag_word >> ((regnr & 7)*2)) & 3;
+ return (fpu_tag_word >> ((regnr & 7) * 2)) & 3;
}
-
void FPU_settag0(int tag)
{
- int regnr = top;
- regnr &= 7;
- fpu_tag_word &= ~(3 << (regnr*2));
- fpu_tag_word |= (tag & 3) << (regnr*2);
+ int regnr = top;
+ regnr &= 7;
+ fpu_tag_word &= ~(3 << (regnr * 2));
+ fpu_tag_word |= (tag & 3) << (regnr * 2);
}
-
void FPU_settagi(int stnr, int tag)
{
- int regnr = stnr+top;
- regnr &= 7;
- fpu_tag_word &= ~(3 << (regnr*2));
- fpu_tag_word |= (tag & 3) << (regnr*2);
+ int regnr = stnr + top;
+ regnr &= 7;
+ fpu_tag_word &= ~(3 << (regnr * 2));
+ fpu_tag_word |= (tag & 3) << (regnr * 2);
}
-
void FPU_settag(int regnr, int tag)
{
- regnr &= 7;
- fpu_tag_word &= ~(3 << (regnr*2));
- fpu_tag_word |= (tag & 3) << (regnr*2);
+ regnr &= 7;
+ fpu_tag_word &= ~(3 << (regnr * 2));
+ fpu_tag_word |= (tag & 3) << (regnr * 2);
}
-
int FPU_Special(FPU_REG const *ptr)
{
- int exp = exponent(ptr);
-
- if ( exp == EXP_BIAS+EXP_UNDER )
- return TW_Denormal;
- else if ( exp != EXP_BIAS+EXP_OVER )
- return TW_NaN;
- else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) )
- return TW_Infinity;
- return TW_NaN;
+ int exp = exponent(ptr);
+
+ if (exp == EXP_BIAS + EXP_UNDER)
+ return TW_Denormal;
+ else if (exp != EXP_BIAS + EXP_OVER)
+ return TW_NaN;
+ else if ((ptr->sigh == 0x80000000) && (ptr->sigl == 0))
+ return TW_Infinity;
+ return TW_NaN;
}
-
int isNaN(FPU_REG const *ptr)
{
- return ( (exponent(ptr) == EXP_BIAS+EXP_OVER)
- && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) );
+ return ((exponent(ptr) == EXP_BIAS + EXP_OVER)
+ && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)));
}
-
int FPU_empty_i(int stnr)
{
- int regnr = (top+stnr) & 7;
+ int regnr = (top + stnr) & 7;
- return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty;
+ return ((fpu_tag_word >> (regnr * 2)) & 3) == TAG_Empty;
}
-
-int FPU_stackoverflow(FPU_REG **st_new_ptr)
+int FPU_stackoverflow(FPU_REG ** st_new_ptr)
{
- *st_new_ptr = &st(-1);
+ *st_new_ptr = &st(-1);
- return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty;
+ return ((fpu_tag_word >> (((top - 1) & 7) * 2)) & 3) != TAG_Empty;
}
-
void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr)
{
- reg_copy(r, &st(stnr));
- FPU_settagi(stnr, tag);
+ reg_copy(r, &st(stnr));
+ FPU_settagi(stnr, tag);
}
void FPU_copy_to_reg1(FPU_REG const *r, u_char tag)
{
- reg_copy(r, &st(1));
- FPU_settagi(1, tag);
+ reg_copy(r, &st(1));
+ FPU_settagi(1, tag);
}
void FPU_copy_to_reg0(FPU_REG const *r, u_char tag)
{
- int regnr = top;
- regnr &= 7;
+ int regnr = top;
+ regnr &= 7;
- reg_copy(r, &st(0));
+ reg_copy(r, &st(0));
- fpu_tag_word &= ~(3 << (regnr*2));
- fpu_tag_word |= (tag & 3) << (regnr*2);
+ fpu_tag_word &= ~(3 << (regnr * 2));
+ fpu_tag_word |= (tag & 3) << (regnr * 2);
}
diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c
index 403cbde1d42..ecd06680581 100644
--- a/arch/x86/math-emu/fpu_trig.c
+++ b/arch/x86/math-emu/fpu_trig.c
@@ -15,11 +15,10 @@
#include "fpu_emu.h"
#include "status_w.h"
#include "control_w.h"
-#include "reg_constant.h"
+#include "reg_constant.h"
static void rem_kernel(unsigned long long st0, unsigned long long *y,
- unsigned long long st1,
- unsigned long long q, int n);
+ unsigned long long st1, unsigned long long q, int n);
#define BETTER_THAN_486
@@ -33,788 +32,706 @@ static void rem_kernel(unsigned long long st0, unsigned long long *y,
precision of the result sometimes degrades to about 63.9 bits */
static int trig_arg(FPU_REG *st0_ptr, int even)
{
- FPU_REG tmp;
- u_char tmptag;
- unsigned long long q;
- int old_cw = control_word, saved_status = partial_status;
- int tag, st0_tag = TAG_Valid;
-
- if ( exponent(st0_ptr) >= 63 )
- {
- partial_status |= SW_C2; /* Reduction incomplete. */
- return -1;
- }
-
- control_word &= ~CW_RC;
- control_word |= RC_CHOP;
-
- setpositive(st0_ptr);
- tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
- SIGN_POS);
-
- FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow
- to 2^64 */
- q = significand(&tmp);
- if ( q )
- {
- rem_kernel(significand(st0_ptr),
- &significand(&tmp),
- significand(&CONST_PI2),
- q, exponent(st0_ptr) - exponent(&CONST_PI2));
- setexponent16(&tmp, exponent(&CONST_PI2));
- st0_tag = FPU_normalize(&tmp);
- FPU_copy_to_reg0(&tmp, st0_tag);
- }
-
- if ( (even && !(q & 1)) || (!even && (q & 1)) )
- {
- st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION);
+ FPU_REG tmp;
+ u_char tmptag;
+ unsigned long long q;
+ int old_cw = control_word, saved_status = partial_status;
+ int tag, st0_tag = TAG_Valid;
+
+ if (exponent(st0_ptr) >= 63) {
+ partial_status |= SW_C2; /* Reduction incomplete. */
+ return -1;
+ }
-#ifdef BETTER_THAN_486
- /* So far, the results are exact but based upon a 64 bit
- precision approximation to pi/2. The technique used
- now is equivalent to using an approximation to pi/2 which
- is accurate to about 128 bits. */
- if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) )
- {
- /* This code gives the effect of having pi/2 to better than
- 128 bits precision. */
-
- significand(&tmp) = q + 1;
- setexponent16(&tmp, 63);
- FPU_normalize(&tmp);
- tmptag =
- FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS,
- exponent(&CONST_PI2extra) + exponent(&tmp));
- setsign(&tmp, getsign(&CONST_PI2extra));
- st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
- if ( signnegative(st0_ptr) )
- {
- /* CONST_PI2extra is negative, so the result of the addition
- can be negative. This means that the argument is actually
- in a different quadrant. The correction is always < pi/2,
- so it can't overflow into yet another quadrant. */
- setpositive(st0_ptr);
- q++;
- }
+ control_word &= ~CW_RC;
+ control_word |= RC_CHOP;
+
+ setpositive(st0_ptr);
+ tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
+ SIGN_POS);
+
+ FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow
+ to 2^64 */
+ q = significand(&tmp);
+ if (q) {
+ rem_kernel(significand(st0_ptr),
+ &significand(&tmp),
+ significand(&CONST_PI2),
+ q, exponent(st0_ptr) - exponent(&CONST_PI2));
+ setexponent16(&tmp, exponent(&CONST_PI2));
+ st0_tag = FPU_normalize(&tmp);
+ FPU_copy_to_reg0(&tmp, st0_tag);
}
+
+ if ((even && !(q & 1)) || (!even && (q & 1))) {
+ st0_tag =
+ FPU_sub(REV | LOADED | TAG_Valid, (int)&CONST_PI2,
+ FULL_PRECISION);
+
+#ifdef BETTER_THAN_486
+ /* So far, the results are exact but based upon a 64 bit
+ precision approximation to pi/2. The technique used
+ now is equivalent to using an approximation to pi/2 which
+ is accurate to about 128 bits. */
+ if ((exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)
+ || (q > 1)) {
+ /* This code gives the effect of having pi/2 to better than
+ 128 bits precision. */
+
+ significand(&tmp) = q + 1;
+ setexponent16(&tmp, 63);
+ FPU_normalize(&tmp);
+ tmptag =
+ FPU_u_mul(&CONST_PI2extra, &tmp, &tmp,
+ FULL_PRECISION, SIGN_POS,
+ exponent(&CONST_PI2extra) +
+ exponent(&tmp));
+ setsign(&tmp, getsign(&CONST_PI2extra));
+ st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
+ if (signnegative(st0_ptr)) {
+ /* CONST_PI2extra is negative, so the result of the addition
+ can be negative. This means that the argument is actually
+ in a different quadrant. The correction is always < pi/2,
+ so it can't overflow into yet another quadrant. */
+ setpositive(st0_ptr);
+ q++;
+ }
+ }
#endif /* BETTER_THAN_486 */
- }
+ }
#ifdef BETTER_THAN_486
- else
- {
- /* So far, the results are exact but based upon a 64 bit
- precision approximation to pi/2. The technique used
- now is equivalent to using an approximation to pi/2 which
- is accurate to about 128 bits. */
- if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
- || (q > 1) )
- {
- /* This code gives the effect of having p/2 to better than
- 128 bits precision. */
-
- significand(&tmp) = q;
- setexponent16(&tmp, 63);
- FPU_normalize(&tmp); /* This must return TAG_Valid */
- tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION,
- SIGN_POS,
- exponent(&CONST_PI2extra) + exponent(&tmp));
- setsign(&tmp, getsign(&CONST_PI2extra));
- st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp,
- FULL_PRECISION);
- if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) &&
- ((st0_ptr->sigh > CONST_PI2.sigh)
- || ((st0_ptr->sigh == CONST_PI2.sigh)
- && (st0_ptr->sigl > CONST_PI2.sigl))) )
- {
- /* CONST_PI2extra is negative, so the result of the
- subtraction can be larger than pi/2. This means
- that the argument is actually in a different quadrant.
- The correction is always < pi/2, so it can't overflow
- into yet another quadrant. */
- st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2,
- FULL_PRECISION);
- q++;
- }
+ else {
+ /* So far, the results are exact but based upon a 64 bit
+ precision approximation to pi/2. The technique used
+ now is equivalent to using an approximation to pi/2 which
+ is accurate to about 128 bits. */
+ if (((q > 0)
+ && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
+ || (q > 1)) {
+ /* This code gives the effect of having p/2 to better than
+ 128 bits precision. */
+
+ significand(&tmp) = q;
+ setexponent16(&tmp, 63);
+ FPU_normalize(&tmp); /* This must return TAG_Valid */
+ tmptag =
+ FPU_u_mul(&CONST_PI2extra, &tmp, &tmp,
+ FULL_PRECISION, SIGN_POS,
+ exponent(&CONST_PI2extra) +
+ exponent(&tmp));
+ setsign(&tmp, getsign(&CONST_PI2extra));
+ st0_tag = FPU_sub(LOADED | (tmptag & 0x0f), (int)&tmp,
+ FULL_PRECISION);
+ if ((exponent(st0_ptr) == exponent(&CONST_PI2)) &&
+ ((st0_ptr->sigh > CONST_PI2.sigh)
+ || ((st0_ptr->sigh == CONST_PI2.sigh)
+ && (st0_ptr->sigl > CONST_PI2.sigl)))) {
+ /* CONST_PI2extra is negative, so the result of the
+ subtraction can be larger than pi/2. This means
+ that the argument is actually in a different quadrant.
+ The correction is always < pi/2, so it can't overflow
+ into yet another quadrant. */
+ st0_tag =
+ FPU_sub(REV | LOADED | TAG_Valid,
+ (int)&CONST_PI2, FULL_PRECISION);
+ q++;
+ }
+ }
}
- }
#endif /* BETTER_THAN_486 */
- FPU_settag0(st0_tag);
- control_word = old_cw;
- partial_status = saved_status & ~SW_C2; /* Reduction complete. */
+ FPU_settag0(st0_tag);
+ control_word = old_cw;
+ partial_status = saved_status & ~SW_C2; /* Reduction complete. */
- return (q & 3) | even;
+ return (q & 3) | even;
}
-
/* Convert a long to register */
static void convert_l2reg(long const *arg, int deststnr)
{
- int tag;
- long num = *arg;
- u_char sign;
- FPU_REG *dest = &st(deststnr);
-
- if (num == 0)
- {
- FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
- return;
- }
-
- if (num > 0)
- { sign = SIGN_POS; }
- else
- { num = -num; sign = SIGN_NEG; }
-
- dest->sigh = num;
- dest->sigl = 0;
- setexponent16(dest, 31);
- tag = FPU_normalize(dest);
- FPU_settagi(deststnr, tag);
- setsign(dest, sign);
- return;
-}
+ int tag;
+ long num = *arg;
+ u_char sign;
+ FPU_REG *dest = &st(deststnr);
+ if (num == 0) {
+ FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+ return;
+ }
+
+ if (num > 0) {
+ sign = SIGN_POS;
+ } else {
+ num = -num;
+ sign = SIGN_NEG;
+ }
+
+ dest->sigh = num;
+ dest->sigl = 0;
+ setexponent16(dest, 31);
+ tag = FPU_normalize(dest);
+ FPU_settagi(deststnr, tag);
+ setsign(dest, sign);
+ return;
+}
static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag)
{
- if ( st0_tag == TAG_Empty )
- FPU_stack_underflow(); /* Puts a QNaN in st(0) */
- else if ( st0_tag == TW_NaN )
- real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */
+ if (st0_tag == TAG_Empty)
+ FPU_stack_underflow(); /* Puts a QNaN in st(0) */
+ else if (st0_tag == TW_NaN)
+ real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */
#ifdef PARANOID
- else
- EXCEPTION(EX_INTERNAL|0x0112);
+ else
+ EXCEPTION(EX_INTERNAL | 0x0112);
#endif /* PARANOID */
}
-
static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag)
{
- int isNaN;
-
- switch ( st0_tag )
- {
- case TW_NaN:
- isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000);
- if ( isNaN && !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */
- {
- EXCEPTION(EX_Invalid);
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- /* Convert to a QNaN */
- st0_ptr->sigh |= 0x40000000;
- push();
- FPU_copy_to_reg0(st0_ptr, TAG_Special);
- }
- }
- else if ( isNaN )
- {
- /* A QNaN */
- push();
- FPU_copy_to_reg0(st0_ptr, TAG_Special);
- }
- else
- {
- /* pseudoNaN or other unsupported */
- EXCEPTION(EX_Invalid);
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
- push();
- FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
- }
- }
- break; /* return with a NaN in st(0) */
+ int isNaN;
+
+ switch (st0_tag) {
+ case TW_NaN:
+ isNaN = (exponent(st0_ptr) == EXP_OVER)
+ && (st0_ptr->sigh & 0x80000000);
+ if (isNaN && !(st0_ptr->sigh & 0x40000000)) { /* Signaling ? */
+ EXCEPTION(EX_Invalid);
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ /* Convert to a QNaN */
+ st0_ptr->sigh |= 0x40000000;
+ push();
+ FPU_copy_to_reg0(st0_ptr, TAG_Special);
+ }
+ } else if (isNaN) {
+ /* A QNaN */
+ push();
+ FPU_copy_to_reg0(st0_ptr, TAG_Special);
+ } else {
+ /* pseudoNaN or other unsupported */
+ EXCEPTION(EX_Invalid);
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+ push();
+ FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+ }
+ }
+ break; /* return with a NaN in st(0) */
#ifdef PARANOID
- default:
- EXCEPTION(EX_INTERNAL|0x0112);
+ default:
+ EXCEPTION(EX_INTERNAL | 0x0112);
#endif /* PARANOID */
- }
+ }
}
-
/*---------------------------------------------------------------------------*/
static void f2xm1(FPU_REG *st0_ptr, u_char tag)
{
- FPU_REG a;
+ FPU_REG a;
- clear_C1();
+ clear_C1();
- if ( tag == TAG_Valid )
- {
- /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
- if ( exponent(st0_ptr) < 0 )
- {
- denormal_arg:
+ if (tag == TAG_Valid) {
+ /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
+ if (exponent(st0_ptr) < 0) {
+ denormal_arg:
- FPU_to_exp16(st0_ptr, &a);
+ FPU_to_exp16(st0_ptr, &a);
- /* poly_2xm1(x) requires 0 < st(0) < 1. */
- poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
+ /* poly_2xm1(x) requires 0 < st(0) < 1. */
+ poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
+ }
+ set_precision_flag_up(); /* 80486 appears to always do this */
+ return;
}
- set_precision_flag_up(); /* 80486 appears to always do this */
- return;
- }
- if ( tag == TAG_Zero )
- return;
+ if (tag == TAG_Zero)
+ return;
- if ( tag == TAG_Special )
- tag = FPU_Special(st0_ptr);
+ if (tag == TAG_Special)
+ tag = FPU_Special(st0_ptr);
- switch ( tag )
- {
- case TW_Denormal:
- if ( denormal_operand() < 0 )
- return;
- goto denormal_arg;
- case TW_Infinity:
- if ( signnegative(st0_ptr) )
- {
- /* -infinity gives -1 (p16-10) */
- FPU_copy_to_reg0(&CONST_1, TAG_Valid);
- setnegative(st0_ptr);
+ switch (tag) {
+ case TW_Denormal:
+ if (denormal_operand() < 0)
+ return;
+ goto denormal_arg;
+ case TW_Infinity:
+ if (signnegative(st0_ptr)) {
+ /* -infinity gives -1 (p16-10) */
+ FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+ setnegative(st0_ptr);
+ }
+ return;
+ default:
+ single_arg_error(st0_ptr, tag);
}
- return;
- default:
- single_arg_error(st0_ptr, tag);
- }
}
-
static void fptan(FPU_REG *st0_ptr, u_char st0_tag)
{
- FPU_REG *st_new_ptr;
- int q;
- u_char arg_sign = getsign(st0_ptr);
-
- /* Stack underflow has higher priority */
- if ( st0_tag == TAG_Empty )
- {
- FPU_stack_underflow(); /* Puts a QNaN in st(0) */
- if ( control_word & CW_Invalid )
- {
- st_new_ptr = &st(-1);
- push();
- FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */
+ FPU_REG *st_new_ptr;
+ int q;
+ u_char arg_sign = getsign(st0_ptr);
+
+ /* Stack underflow has higher priority */
+ if (st0_tag == TAG_Empty) {
+ FPU_stack_underflow(); /* Puts a QNaN in st(0) */
+ if (control_word & CW_Invalid) {
+ st_new_ptr = &st(-1);
+ push();
+ FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */
+ }
+ return;
}
- return;
- }
-
- if ( STACK_OVERFLOW )
- { FPU_stack_overflow(); return; }
-
- if ( st0_tag == TAG_Valid )
- {
- if ( exponent(st0_ptr) > -40 )
- {
- if ( (q = trig_arg(st0_ptr, 0)) == -1 )
- {
- /* Operand is out of range */
- return;
- }
-
- poly_tan(st0_ptr);
- setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
- set_precision_flag_up(); /* We do not really know if up or down */
+
+ if (STACK_OVERFLOW) {
+ FPU_stack_overflow();
+ return;
}
- else
- {
- /* For a small arg, the result == the argument */
- /* Underflow may happen */
- denormal_arg:
+ if (st0_tag == TAG_Valid) {
+ if (exponent(st0_ptr) > -40) {
+ if ((q = trig_arg(st0_ptr, 0)) == -1) {
+ /* Operand is out of range */
+ return;
+ }
+
+ poly_tan(st0_ptr);
+ setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
+ set_precision_flag_up(); /* We do not really know if up or down */
+ } else {
+ /* For a small arg, the result == the argument */
+ /* Underflow may happen */
+
+ denormal_arg:
+
+ FPU_to_exp16(st0_ptr, st0_ptr);
- FPU_to_exp16(st0_ptr, st0_ptr);
-
- st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
- FPU_settag0(st0_tag);
+ st0_tag =
+ FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
+ FPU_settag0(st0_tag);
+ }
+ push();
+ FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+ return;
}
- push();
- FPU_copy_to_reg0(&CONST_1, TAG_Valid);
- return;
- }
-
- if ( st0_tag == TAG_Zero )
- {
- push();
- FPU_copy_to_reg0(&CONST_1, TAG_Valid);
- setcc(0);
- return;
- }
-
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
-
- if ( st0_tag == TW_Denormal )
- {
- if ( denormal_operand() < 0 )
- return;
- goto denormal_arg;
- }
-
- if ( st0_tag == TW_Infinity )
- {
- /* The 80486 treats infinity as an invalid operand */
- if ( arith_invalid(0) >= 0 )
- {
- st_new_ptr = &st(-1);
- push();
- arith_invalid(0);
+ if (st0_tag == TAG_Zero) {
+ push();
+ FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+ setcc(0);
+ return;
+ }
+
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+
+ if (st0_tag == TW_Denormal) {
+ if (denormal_operand() < 0)
+ return;
+
+ goto denormal_arg;
}
- return;
- }
- single_arg_2_error(st0_ptr, st0_tag);
-}
+ if (st0_tag == TW_Infinity) {
+ /* The 80486 treats infinity as an invalid operand */
+ if (arith_invalid(0) >= 0) {
+ st_new_ptr = &st(-1);
+ push();
+ arith_invalid(0);
+ }
+ return;
+ }
+ single_arg_2_error(st0_ptr, st0_tag);
+}
static void fxtract(FPU_REG *st0_ptr, u_char st0_tag)
{
- FPU_REG *st_new_ptr;
- u_char sign;
- register FPU_REG *st1_ptr = st0_ptr; /* anticipate */
-
- if ( STACK_OVERFLOW )
- { FPU_stack_overflow(); return; }
-
- clear_C1();
-
- if ( st0_tag == TAG_Valid )
- {
- long e;
-
- push();
- sign = getsign(st1_ptr);
- reg_copy(st1_ptr, st_new_ptr);
- setexponent16(st_new_ptr, exponent(st_new_ptr));
-
- denormal_arg:
-
- e = exponent16(st_new_ptr);
- convert_l2reg(&e, 1);
- setexponentpos(st_new_ptr, 0);
- setsign(st_new_ptr, sign);
- FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */
- return;
- }
- else if ( st0_tag == TAG_Zero )
- {
- sign = getsign(st0_ptr);
-
- if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 )
- return;
+ FPU_REG *st_new_ptr;
+ u_char sign;
+ register FPU_REG *st1_ptr = st0_ptr; /* anticipate */
- push();
- FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
- setsign(st_new_ptr, sign);
- return;
- }
+ if (STACK_OVERFLOW) {
+ FPU_stack_overflow();
+ return;
+ }
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
+ clear_C1();
- if ( st0_tag == TW_Denormal )
- {
- if (denormal_operand() < 0 )
- return;
+ if (st0_tag == TAG_Valid) {
+ long e;
- push();
- sign = getsign(st1_ptr);
- FPU_to_exp16(st1_ptr, st_new_ptr);
- goto denormal_arg;
- }
- else if ( st0_tag == TW_Infinity )
- {
- sign = getsign(st0_ptr);
- setpositive(st0_ptr);
- push();
- FPU_copy_to_reg0(&CONST_INF, TAG_Special);
- setsign(st_new_ptr, sign);
- return;
- }
- else if ( st0_tag == TW_NaN )
- {
- if ( real_1op_NaN(st0_ptr) < 0 )
- return;
+ push();
+ sign = getsign(st1_ptr);
+ reg_copy(st1_ptr, st_new_ptr);
+ setexponent16(st_new_ptr, exponent(st_new_ptr));
+
+ denormal_arg:
+
+ e = exponent16(st_new_ptr);
+ convert_l2reg(&e, 1);
+ setexponentpos(st_new_ptr, 0);
+ setsign(st_new_ptr, sign);
+ FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */
+ return;
+ } else if (st0_tag == TAG_Zero) {
+ sign = getsign(st0_ptr);
+
+ if (FPU_divide_by_zero(0, SIGN_NEG) < 0)
+ return;
- push();
- FPU_copy_to_reg0(st0_ptr, TAG_Special);
- return;
- }
- else if ( st0_tag == TAG_Empty )
- {
- /* Is this the correct behaviour? */
- if ( control_word & EX_Invalid )
- {
- FPU_stack_underflow();
- push();
- FPU_stack_underflow();
+ push();
+ FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+ setsign(st_new_ptr, sign);
+ return;
+ }
+
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+
+ if (st0_tag == TW_Denormal) {
+ if (denormal_operand() < 0)
+ return;
+
+ push();
+ sign = getsign(st1_ptr);
+ FPU_to_exp16(st1_ptr, st_new_ptr);
+ goto denormal_arg;
+ } else if (st0_tag == TW_Infinity) {
+ sign = getsign(st0_ptr);
+ setpositive(st0_ptr);
+ push();
+ FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+ setsign(st_new_ptr, sign);
+ return;
+ } else if (st0_tag == TW_NaN) {
+ if (real_1op_NaN(st0_ptr) < 0)
+ return;
+
+ push();
+ FPU_copy_to_reg0(st0_ptr, TAG_Special);
+ return;
+ } else if (st0_tag == TAG_Empty) {
+ /* Is this the correct behaviour? */
+ if (control_word & EX_Invalid) {
+ FPU_stack_underflow();
+ push();
+ FPU_stack_underflow();
+ } else
+ EXCEPTION(EX_StackUnder);
}
- else
- EXCEPTION(EX_StackUnder);
- }
#ifdef PARANOID
- else
- EXCEPTION(EX_INTERNAL | 0x119);
+ else
+ EXCEPTION(EX_INTERNAL | 0x119);
#endif /* PARANOID */
}
-
static void fdecstp(void)
{
- clear_C1();
- top--;
+ clear_C1();
+ top--;
}
static void fincstp(void)
{
- clear_C1();
- top++;
+ clear_C1();
+ top++;
}
-
static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag)
{
- int expon;
-
- clear_C1();
-
- if ( st0_tag == TAG_Valid )
- {
- u_char tag;
-
- if (signnegative(st0_ptr))
- {
- arith_invalid(0); /* sqrt(negative) is invalid */
- return;
- }
+ int expon;
+
+ clear_C1();
- /* make st(0) in [1.0 .. 4.0) */
- expon = exponent(st0_ptr);
-
- denormal_arg:
-
- setexponent16(st0_ptr, (expon & 1));
-
- /* Do the computation, the sign of the result will be positive. */
- tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
- addexponent(st0_ptr, expon >> 1);
- FPU_settag0(tag);
- return;
- }
-
- if ( st0_tag == TAG_Zero )
- return;
-
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
-
- if ( st0_tag == TW_Infinity )
- {
- if ( signnegative(st0_ptr) )
- arith_invalid(0); /* sqrt(-Infinity) is invalid */
- return;
- }
- else if ( st0_tag == TW_Denormal )
- {
- if (signnegative(st0_ptr))
- {
- arith_invalid(0); /* sqrt(negative) is invalid */
- return;
+ if (st0_tag == TAG_Valid) {
+ u_char tag;
+
+ if (signnegative(st0_ptr)) {
+ arith_invalid(0); /* sqrt(negative) is invalid */
+ return;
+ }
+
+ /* make st(0) in [1.0 .. 4.0) */
+ expon = exponent(st0_ptr);
+
+ denormal_arg:
+
+ setexponent16(st0_ptr, (expon & 1));
+
+ /* Do the computation, the sign of the result will be positive. */
+ tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
+ addexponent(st0_ptr, expon >> 1);
+ FPU_settag0(tag);
+ return;
}
- if ( denormal_operand() < 0 )
- return;
+ if (st0_tag == TAG_Zero)
+ return;
- FPU_to_exp16(st0_ptr, st0_ptr);
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
- expon = exponent16(st0_ptr);
+ if (st0_tag == TW_Infinity) {
+ if (signnegative(st0_ptr))
+ arith_invalid(0); /* sqrt(-Infinity) is invalid */
+ return;
+ } else if (st0_tag == TW_Denormal) {
+ if (signnegative(st0_ptr)) {
+ arith_invalid(0); /* sqrt(negative) is invalid */
+ return;
+ }
- goto denormal_arg;
- }
+ if (denormal_operand() < 0)
+ return;
- single_arg_error(st0_ptr, st0_tag);
+ FPU_to_exp16(st0_ptr, st0_ptr);
-}
+ expon = exponent16(st0_ptr);
+
+ goto denormal_arg;
+ }
+ single_arg_error(st0_ptr, st0_tag);
+
+}
static void frndint_(FPU_REG *st0_ptr, u_char st0_tag)
{
- int flags, tag;
+ int flags, tag;
- if ( st0_tag == TAG_Valid )
- {
- u_char sign;
+ if (st0_tag == TAG_Valid) {
+ u_char sign;
- denormal_arg:
+ denormal_arg:
- sign = getsign(st0_ptr);
+ sign = getsign(st0_ptr);
- if (exponent(st0_ptr) > 63)
- return;
+ if (exponent(st0_ptr) > 63)
+ return;
+
+ if (st0_tag == TW_Denormal) {
+ if (denormal_operand() < 0)
+ return;
+ }
+
+ /* Fortunately, this can't overflow to 2^64 */
+ if ((flags = FPU_round_to_int(st0_ptr, st0_tag)))
+ set_precision_flag(flags);
- if ( st0_tag == TW_Denormal )
- {
- if (denormal_operand() < 0 )
- return;
+ setexponent16(st0_ptr, 63);
+ tag = FPU_normalize(st0_ptr);
+ setsign(st0_ptr, sign);
+ FPU_settag0(tag);
+ return;
}
- /* Fortunately, this can't overflow to 2^64 */
- if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) )
- set_precision_flag(flags);
-
- setexponent16(st0_ptr, 63);
- tag = FPU_normalize(st0_ptr);
- setsign(st0_ptr, sign);
- FPU_settag0(tag);
- return;
- }
-
- if ( st0_tag == TAG_Zero )
- return;
-
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
-
- if ( st0_tag == TW_Denormal )
- goto denormal_arg;
- else if ( st0_tag == TW_Infinity )
- return;
- else
- single_arg_error(st0_ptr, st0_tag);
-}
+ if (st0_tag == TAG_Zero)
+ return;
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+
+ if (st0_tag == TW_Denormal)
+ goto denormal_arg;
+ else if (st0_tag == TW_Infinity)
+ return;
+ else
+ single_arg_error(st0_ptr, st0_tag);
+}
static int fsin(FPU_REG *st0_ptr, u_char tag)
{
- u_char arg_sign = getsign(st0_ptr);
-
- if ( tag == TAG_Valid )
- {
- int q;
-
- if ( exponent(st0_ptr) > -40 )
- {
- if ( (q = trig_arg(st0_ptr, 0)) == -1 )
- {
- /* Operand is out of range */
- return 1;
- }
-
- poly_sine(st0_ptr);
-
- if (q & 2)
- changesign(st0_ptr);
-
- setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
-
- /* We do not really know if up or down */
- set_precision_flag_up();
- return 0;
+ u_char arg_sign = getsign(st0_ptr);
+
+ if (tag == TAG_Valid) {
+ int q;
+
+ if (exponent(st0_ptr) > -40) {
+ if ((q = trig_arg(st0_ptr, 0)) == -1) {
+ /* Operand is out of range */
+ return 1;
+ }
+
+ poly_sine(st0_ptr);
+
+ if (q & 2)
+ changesign(st0_ptr);
+
+ setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
+
+ /* We do not really know if up or down */
+ set_precision_flag_up();
+ return 0;
+ } else {
+ /* For a small arg, the result == the argument */
+ set_precision_flag_up(); /* Must be up. */
+ return 0;
+ }
}
- else
- {
- /* For a small arg, the result == the argument */
- set_precision_flag_up(); /* Must be up. */
- return 0;
+
+ if (tag == TAG_Zero) {
+ setcc(0);
+ return 0;
}
- }
-
- if ( tag == TAG_Zero )
- {
- setcc(0);
- return 0;
- }
-
- if ( tag == TAG_Special )
- tag = FPU_Special(st0_ptr);
-
- if ( tag == TW_Denormal )
- {
- if ( denormal_operand() < 0 )
- return 1;
-
- /* For a small arg, the result == the argument */
- /* Underflow may happen */
- FPU_to_exp16(st0_ptr, st0_ptr);
-
- tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
-
- FPU_settag0(tag);
-
- return 0;
- }
- else if ( tag == TW_Infinity )
- {
- /* The 80486 treats infinity as an invalid operand */
- arith_invalid(0);
- return 1;
- }
- else
- {
- single_arg_error(st0_ptr, tag);
- return 1;
- }
-}
+ if (tag == TAG_Special)
+ tag = FPU_Special(st0_ptr);
+
+ if (tag == TW_Denormal) {
+ if (denormal_operand() < 0)
+ return 1;
+
+ /* For a small arg, the result == the argument */
+ /* Underflow may happen */
+ FPU_to_exp16(st0_ptr, st0_ptr);
+
+ tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
+
+ FPU_settag0(tag);
+
+ return 0;
+ } else if (tag == TW_Infinity) {
+ /* The 80486 treats infinity as an invalid operand */
+ arith_invalid(0);
+ return 1;
+ } else {
+ single_arg_error(st0_ptr, tag);
+ return 1;
+ }
+}
static int f_cos(FPU_REG *st0_ptr, u_char tag)
{
- u_char st0_sign;
-
- st0_sign = getsign(st0_ptr);
-
- if ( tag == TAG_Valid )
- {
- int q;
-
- if ( exponent(st0_ptr) > -40 )
- {
- if ( (exponent(st0_ptr) < 0)
- || ((exponent(st0_ptr) == 0)
- && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) )
- {
- poly_cos(st0_ptr);
-
- /* We do not really know if up or down */
- set_precision_flag_down();
-
- return 0;
- }
- else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 )
- {
- poly_sine(st0_ptr);
-
- if ((q+1) & 2)
- changesign(st0_ptr);
-
- /* We do not really know if up or down */
- set_precision_flag_down();
-
- return 0;
- }
- else
- {
- /* Operand is out of range */
- return 1;
- }
- }
- else
- {
- denormal_arg:
+ u_char st0_sign;
+
+ st0_sign = getsign(st0_ptr);
- setcc(0);
- FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+ if (tag == TAG_Valid) {
+ int q;
+
+ if (exponent(st0_ptr) > -40) {
+ if ((exponent(st0_ptr) < 0)
+ || ((exponent(st0_ptr) == 0)
+ && (significand(st0_ptr) <=
+ 0xc90fdaa22168c234LL))) {
+ poly_cos(st0_ptr);
+
+ /* We do not really know if up or down */
+ set_precision_flag_down();
+
+ return 0;
+ } else if ((q = trig_arg(st0_ptr, FCOS)) != -1) {
+ poly_sine(st0_ptr);
+
+ if ((q + 1) & 2)
+ changesign(st0_ptr);
+
+ /* We do not really know if up or down */
+ set_precision_flag_down();
+
+ return 0;
+ } else {
+ /* Operand is out of range */
+ return 1;
+ }
+ } else {
+ denormal_arg:
+
+ setcc(0);
+ FPU_copy_to_reg0(&CONST_1, TAG_Valid);
#ifdef PECULIAR_486
- set_precision_flag_down(); /* 80486 appears to do this. */
+ set_precision_flag_down(); /* 80486 appears to do this. */
#else
- set_precision_flag_up(); /* Must be up. */
+ set_precision_flag_up(); /* Must be up. */
#endif /* PECULIAR_486 */
- return 0;
+ return 0;
+ }
+ } else if (tag == TAG_Zero) {
+ FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+ setcc(0);
+ return 0;
}
- }
- else if ( tag == TAG_Zero )
- {
- FPU_copy_to_reg0(&CONST_1, TAG_Valid);
- setcc(0);
- return 0;
- }
-
- if ( tag == TAG_Special )
- tag = FPU_Special(st0_ptr);
-
- if ( tag == TW_Denormal )
- {
- if ( denormal_operand() < 0 )
- return 1;
-
- goto denormal_arg;
- }
- else if ( tag == TW_Infinity )
- {
- /* The 80486 treats infinity as an invalid operand */
- arith_invalid(0);
- return 1;
- }
- else
- {
- single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */
- return 1;
- }
-}
+ if (tag == TAG_Special)
+ tag = FPU_Special(st0_ptr);
+
+ if (tag == TW_Denormal) {
+ if (denormal_operand() < 0)
+ return 1;
+
+ goto denormal_arg;
+ } else if (tag == TW_Infinity) {
+ /* The 80486 treats infinity as an invalid operand */
+ arith_invalid(0);
+ return 1;
+ } else {
+ single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */
+ return 1;
+ }
+}
static void fcos(FPU_REG *st0_ptr, u_char st0_tag)
{
- f_cos(st0_ptr, st0_tag);
+ f_cos(st0_ptr, st0_tag);
}
-
static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
{
- FPU_REG *st_new_ptr;
- FPU_REG arg;
- u_char tag;
-
- /* Stack underflow has higher priority */
- if ( st0_tag == TAG_Empty )
- {
- FPU_stack_underflow(); /* Puts a QNaN in st(0) */
- if ( control_word & CW_Invalid )
- {
- st_new_ptr = &st(-1);
- push();
- FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */
+ FPU_REG *st_new_ptr;
+ FPU_REG arg;
+ u_char tag;
+
+ /* Stack underflow has higher priority */
+ if (st0_tag == TAG_Empty) {
+ FPU_stack_underflow(); /* Puts a QNaN in st(0) */
+ if (control_word & CW_Invalid) {
+ st_new_ptr = &st(-1);
+ push();
+ FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */
+ }
+ return;
}
- return;
- }
-
- if ( STACK_OVERFLOW )
- { FPU_stack_overflow(); return; }
-
- if ( st0_tag == TAG_Special )
- tag = FPU_Special(st0_ptr);
- else
- tag = st0_tag;
-
- if ( tag == TW_NaN )
- {
- single_arg_2_error(st0_ptr, TW_NaN);
- return;
- }
- else if ( tag == TW_Infinity )
- {
- /* The 80486 treats infinity as an invalid operand */
- if ( arith_invalid(0) >= 0 )
- {
- /* Masked response */
- push();
- arith_invalid(0);
+
+ if (STACK_OVERFLOW) {
+ FPU_stack_overflow();
+ return;
}
- return;
- }
-
- reg_copy(st0_ptr, &arg);
- if ( !fsin(st0_ptr, st0_tag) )
- {
- push();
- FPU_copy_to_reg0(&arg, st0_tag);
- f_cos(&st(0), st0_tag);
- }
- else
- {
- /* An error, so restore st(0) */
- FPU_copy_to_reg0(&arg, st0_tag);
- }
-}
+ if (st0_tag == TAG_Special)
+ tag = FPU_Special(st0_ptr);
+ else
+ tag = st0_tag;
+
+ if (tag == TW_NaN) {
+ single_arg_2_error(st0_ptr, TW_NaN);
+ return;
+ } else if (tag == TW_Infinity) {
+ /* The 80486 treats infinity as an invalid operand */
+ if (arith_invalid(0) >= 0) {
+ /* Masked response */
+ push();
+ arith_invalid(0);
+ }
+ return;
+ }
+
+ reg_copy(st0_ptr, &arg);
+ if (!fsin(st0_ptr, st0_tag)) {
+ push();
+ FPU_copy_to_reg0(&arg, st0_tag);
+ f_cos(&st(0), st0_tag);
+ } else {
+ /* An error, so restore st(0) */
+ FPU_copy_to_reg0(&arg, st0_tag);
+ }
+}
/*---------------------------------------------------------------------------*/
/* The following all require two arguments: st(0) and st(1) */
@@ -826,1020 +743,901 @@ static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
result must be zero.
*/
static void rem_kernel(unsigned long long st0, unsigned long long *y,
- unsigned long long st1,
- unsigned long long q, int n)
+ unsigned long long st1, unsigned long long q, int n)
{
- int dummy;
- unsigned long long x;
-
- x = st0 << n;
-
- /* Do the required multiplication and subtraction in the one operation */
-
- /* lsw x -= lsw st1 * lsw q */
- asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1"
- :"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]),
- "=a" (dummy)
- :"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0])
- :"%dx");
- /* msw x -= msw st1 * lsw q */
- asm volatile ("mull %3; subl %%eax,%0"
- :"=m" (((unsigned *)&x)[1]), "=a" (dummy)
- :"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0])
- :"%dx");
- /* msw x -= lsw st1 * msw q */
- asm volatile ("mull %3; subl %%eax,%0"
- :"=m" (((unsigned *)&x)[1]), "=a" (dummy)
- :"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1])
- :"%dx");
-
- *y = x;
+ int dummy;
+ unsigned long long x;
+
+ x = st0 << n;
+
+ /* Do the required multiplication and subtraction in the one operation */
+
+ /* lsw x -= lsw st1 * lsw q */
+ asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1":"=m"
+ (((unsigned *)&x)[0]), "=m"(((unsigned *)&x)[1]),
+ "=a"(dummy)
+ :"2"(((unsigned *)&st1)[0]), "m"(((unsigned *)&q)[0])
+ :"%dx");
+ /* msw x -= msw st1 * lsw q */
+ asm volatile ("mull %3; subl %%eax,%0":"=m" (((unsigned *)&x)[1]),
+ "=a"(dummy)
+ :"1"(((unsigned *)&st1)[1]), "m"(((unsigned *)&q)[0])
+ :"%dx");
+ /* msw x -= lsw st1 * msw q */
+ asm volatile ("mull %3; subl %%eax,%0":"=m" (((unsigned *)&x)[1]),
+ "=a"(dummy)
+ :"1"(((unsigned *)&st1)[0]), "m"(((unsigned *)&q)[1])
+ :"%dx");
+
+ *y = x;
}
-
/* Remainder of st(0) / st(1) */
/* This routine produces exact results, i.e. there is never any
rounding or truncation, etc of the result. */
static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round)
{
- FPU_REG *st1_ptr = &st(1);
- u_char st1_tag = FPU_gettagi(1);
-
- if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
- {
- FPU_REG tmp, st0, st1;
- u_char st0_sign, st1_sign;
- u_char tmptag;
- int tag;
- int old_cw;
- int expdif;
- long long q;
- unsigned short saved_status;
- int cc;
-
- fprem_valid:
- /* Convert registers for internal use. */
- st0_sign = FPU_to_exp16(st0_ptr, &st0);
- st1_sign = FPU_to_exp16(st1_ptr, &st1);
- expdif = exponent16(&st0) - exponent16(&st1);
-
- old_cw = control_word;
- cc = 0;
-
- /* We want the status following the denorm tests, but don't want
- the status changed by the arithmetic operations. */
- saved_status = partial_status;
- control_word &= ~CW_RC;
- control_word |= RC_CHOP;
-
- if ( expdif < 64 )
- {
- /* This should be the most common case */
-
- if ( expdif > -2 )
- {
- u_char sign = st0_sign ^ st1_sign;
- tag = FPU_u_div(&st0, &st1, &tmp,
- PR_64_BITS | RC_CHOP | 0x3f,
- sign);
- setsign(&tmp, sign);
-
- if ( exponent(&tmp) >= 0 )
- {
- FPU_round_to_int(&tmp, tag); /* Fortunately, this can't
- overflow to 2^64 */
- q = significand(&tmp);
-
- rem_kernel(significand(&st0),
- &significand(&tmp),
- significand(&st1),
- q, expdif);
-
- setexponent16(&tmp, exponent16(&st1));
- }
- else
- {
- reg_copy(&st0, &tmp);
- q = 0;
- }
-
- if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) )
- {
- /* We may need to subtract st(1) once more,
- to get a result <= 1/2 of st(1). */
- unsigned long long x;
- expdif = exponent16(&st1) - exponent16(&tmp);
- if ( expdif <= 1 )
- {
- if ( expdif == 0 )
- x = significand(&st1) - significand(&tmp);
- else /* expdif is 1 */
- x = (significand(&st1) << 1) - significand(&tmp);
- if ( (x < significand(&tmp)) ||
- /* or equi-distant (from 0 & st(1)) and q is odd */
- ((x == significand(&tmp)) && (q & 1) ) )
- {
- st0_sign = ! st0_sign;
- significand(&tmp) = x;
- q++;
+ FPU_REG *st1_ptr = &st(1);
+ u_char st1_tag = FPU_gettagi(1);
+
+ if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+ FPU_REG tmp, st0, st1;
+ u_char st0_sign, st1_sign;
+ u_char tmptag;
+ int tag;
+ int old_cw;
+ int expdif;
+ long long q;
+ unsigned short saved_status;
+ int cc;
+
+ fprem_valid:
+ /* Convert registers for internal use. */
+ st0_sign = FPU_to_exp16(st0_ptr, &st0);
+ st1_sign = FPU_to_exp16(st1_ptr, &st1);
+ expdif = exponent16(&st0) - exponent16(&st1);
+
+ old_cw = control_word;
+ cc = 0;
+
+ /* We want the status following the denorm tests, but don't want
+ the status changed by the arithmetic operations. */
+ saved_status = partial_status;
+ control_word &= ~CW_RC;
+ control_word |= RC_CHOP;
+
+ if (expdif < 64) {
+ /* This should be the most common case */
+
+ if (expdif > -2) {
+ u_char sign = st0_sign ^ st1_sign;
+ tag = FPU_u_div(&st0, &st1, &tmp,
+ PR_64_BITS | RC_CHOP | 0x3f,
+ sign);
+ setsign(&tmp, sign);
+
+ if (exponent(&tmp) >= 0) {
+ FPU_round_to_int(&tmp, tag); /* Fortunately, this can't
+ overflow to 2^64 */
+ q = significand(&tmp);
+
+ rem_kernel(significand(&st0),
+ &significand(&tmp),
+ significand(&st1),
+ q, expdif);
+
+ setexponent16(&tmp, exponent16(&st1));
+ } else {
+ reg_copy(&st0, &tmp);
+ q = 0;
+ }
+
+ if ((round == RC_RND)
+ && (tmp.sigh & 0xc0000000)) {
+ /* We may need to subtract st(1) once more,
+ to get a result <= 1/2 of st(1). */
+ unsigned long long x;
+ expdif =
+ exponent16(&st1) - exponent16(&tmp);
+ if (expdif <= 1) {
+ if (expdif == 0)
+ x = significand(&st1) -
+ significand(&tmp);
+ else /* expdif is 1 */
+ x = (significand(&st1)
+ << 1) -
+ significand(&tmp);
+ if ((x < significand(&tmp)) ||
+ /* or equi-distant (from 0 & st(1)) and q is odd */
+ ((x == significand(&tmp))
+ && (q & 1))) {
+ st0_sign = !st0_sign;
+ significand(&tmp) = x;
+ q++;
+ }
+ }
+ }
+
+ if (q & 4)
+ cc |= SW_C0;
+ if (q & 2)
+ cc |= SW_C3;
+ if (q & 1)
+ cc |= SW_C1;
+ } else {
+ control_word = old_cw;
+ setcc(0);
+ return;
}
- }
- }
-
- if (q & 4) cc |= SW_C0;
- if (q & 2) cc |= SW_C3;
- if (q & 1) cc |= SW_C1;
- }
- else
- {
- control_word = old_cw;
- setcc(0);
- return;
- }
- }
- else
- {
- /* There is a large exponent difference ( >= 64 ) */
- /* To make much sense, the code in this section should
- be done at high precision. */
- int exp_1, N;
- u_char sign;
-
- /* prevent overflow here */
- /* N is 'a number between 32 and 63' (p26-113) */
- reg_copy(&st0, &tmp);
- tmptag = st0_tag;
- N = (expdif & 0x0000001f) + 32; /* This choice gives results
- identical to an AMD 486 */
- setexponent16(&tmp, N);
- exp_1 = exponent16(&st1);
- setexponent16(&st1, 0);
- expdif -= N;
-
- sign = getsign(&tmp) ^ st1_sign;
- tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
- sign);
- setsign(&tmp, sign);
-
- FPU_round_to_int(&tmp, tag); /* Fortunately, this can't
- overflow to 2^64 */
-
- rem_kernel(significand(&st0),
- &significand(&tmp),
- significand(&st1),
- significand(&tmp),
- exponent(&tmp)
- );
- setexponent16(&tmp, exp_1 + expdif);
-
- /* It is possible for the operation to be complete here.
- What does the IEEE standard say? The Intel 80486 manual
- implies that the operation will never be completed at this
- point, and the behaviour of a real 80486 confirms this.
- */
- if ( !(tmp.sigh | tmp.sigl) )
- {
- /* The result is zero */
- control_word = old_cw;
- partial_status = saved_status;
- FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
- setsign(&st0, st0_sign);
+ } else {
+ /* There is a large exponent difference ( >= 64 ) */
+ /* To make much sense, the code in this section should
+ be done at high precision. */
+ int exp_1, N;
+ u_char sign;
+
+ /* prevent overflow here */
+ /* N is 'a number between 32 and 63' (p26-113) */
+ reg_copy(&st0, &tmp);
+ tmptag = st0_tag;
+ N = (expdif & 0x0000001f) + 32; /* This choice gives results
+ identical to an AMD 486 */
+ setexponent16(&tmp, N);
+ exp_1 = exponent16(&st1);
+ setexponent16(&st1, 0);
+ expdif -= N;
+
+ sign = getsign(&tmp) ^ st1_sign;
+ tag =
+ FPU_u_div(&tmp, &st1, &tmp,
+ PR_64_BITS | RC_CHOP | 0x3f, sign);
+ setsign(&tmp, sign);
+
+ FPU_round_to_int(&tmp, tag); /* Fortunately, this can't
+ overflow to 2^64 */
+
+ rem_kernel(significand(&st0),
+ &significand(&tmp),
+ significand(&st1),
+ significand(&tmp), exponent(&tmp)
+ );
+ setexponent16(&tmp, exp_1 + expdif);
+
+ /* It is possible for the operation to be complete here.
+ What does the IEEE standard say? The Intel 80486 manual
+ implies that the operation will never be completed at this
+ point, and the behaviour of a real 80486 confirms this.
+ */
+ if (!(tmp.sigh | tmp.sigl)) {
+ /* The result is zero */
+ control_word = old_cw;
+ partial_status = saved_status;
+ FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+ setsign(&st0, st0_sign);
#ifdef PECULIAR_486
- setcc(SW_C2);
+ setcc(SW_C2);
#else
- setcc(0);
+ setcc(0);
#endif /* PECULIAR_486 */
- return;
- }
- cc = SW_C2;
- }
+ return;
+ }
+ cc = SW_C2;
+ }
- control_word = old_cw;
- partial_status = saved_status;
- tag = FPU_normalize_nuo(&tmp);
- reg_copy(&tmp, st0_ptr);
-
- /* The only condition to be looked for is underflow,
- and it can occur here only if underflow is unmasked. */
- if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
- && !(control_word & CW_Underflow) )
- {
- setcc(cc);
- tag = arith_underflow(st0_ptr);
- setsign(st0_ptr, st0_sign);
- FPU_settag0(tag);
- return;
- }
- else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) )
- {
- stdexp(st0_ptr);
- setsign(st0_ptr, st0_sign);
- }
- else
- {
- tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
- }
- FPU_settag0(tag);
- setcc(cc);
+ control_word = old_cw;
+ partial_status = saved_status;
+ tag = FPU_normalize_nuo(&tmp);
+ reg_copy(&tmp, st0_ptr);
+
+ /* The only condition to be looked for is underflow,
+ and it can occur here only if underflow is unmasked. */
+ if ((exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
+ && !(control_word & CW_Underflow)) {
+ setcc(cc);
+ tag = arith_underflow(st0_ptr);
+ setsign(st0_ptr, st0_sign);
+ FPU_settag0(tag);
+ return;
+ } else if ((exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero)) {
+ stdexp(st0_ptr);
+ setsign(st0_ptr, st0_sign);
+ } else {
+ tag =
+ FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
+ }
+ FPU_settag0(tag);
+ setcc(cc);
- return;
- }
+ return;
+ }
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
- if ( st1_tag == TAG_Special )
- st1_tag = FPU_Special(st1_ptr);
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+ if (st1_tag == TAG_Special)
+ st1_tag = FPU_Special(st1_ptr);
- if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+ if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
|| ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
- || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
- {
- if ( denormal_operand() < 0 )
- return;
- goto fprem_valid;
- }
- else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
- {
- FPU_stack_underflow();
- return;
- }
- else if ( st0_tag == TAG_Zero )
- {
- if ( st1_tag == TAG_Valid )
- {
- setcc(0); return;
- }
- else if ( st1_tag == TW_Denormal )
- {
- if ( denormal_operand() < 0 )
- return;
- setcc(0); return;
- }
- else if ( st1_tag == TAG_Zero )
- { arith_invalid(0); return; } /* fprem(?,0) always invalid */
- else if ( st1_tag == TW_Infinity )
- { setcc(0); return; }
- }
- else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
- {
- if ( st1_tag == TAG_Zero )
- {
- arith_invalid(0); /* fprem(Valid,Zero) is invalid */
- return;
- }
- else if ( st1_tag != TW_NaN )
- {
- if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal))
- && (denormal_operand() < 0) )
- return;
-
- if ( st1_tag == TW_Infinity )
- {
- /* fprem(Valid,Infinity) is o.k. */
- setcc(0); return;
- }
- }
- }
- else if ( st0_tag == TW_Infinity )
- {
- if ( st1_tag != TW_NaN )
- {
- arith_invalid(0); /* fprem(Infinity,?) is invalid */
- return;
+ || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+ if (denormal_operand() < 0)
+ return;
+ goto fprem_valid;
+ } else if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+ FPU_stack_underflow();
+ return;
+ } else if (st0_tag == TAG_Zero) {
+ if (st1_tag == TAG_Valid) {
+ setcc(0);
+ return;
+ } else if (st1_tag == TW_Denormal) {
+ if (denormal_operand() < 0)
+ return;
+ setcc(0);
+ return;
+ } else if (st1_tag == TAG_Zero) {
+ arith_invalid(0);
+ return;
+ } /* fprem(?,0) always invalid */
+ else if (st1_tag == TW_Infinity) {
+ setcc(0);
+ return;
+ }
+ } else if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+ if (st1_tag == TAG_Zero) {
+ arith_invalid(0); /* fprem(Valid,Zero) is invalid */
+ return;
+ } else if (st1_tag != TW_NaN) {
+ if (((st0_tag == TW_Denormal)
+ || (st1_tag == TW_Denormal))
+ && (denormal_operand() < 0))
+ return;
+
+ if (st1_tag == TW_Infinity) {
+ /* fprem(Valid,Infinity) is o.k. */
+ setcc(0);
+ return;
+ }
+ }
+ } else if (st0_tag == TW_Infinity) {
+ if (st1_tag != TW_NaN) {
+ arith_invalid(0); /* fprem(Infinity,?) is invalid */
+ return;
+ }
}
- }
- /* One of the registers must contain a NaN if we got here. */
+ /* One of the registers must contain a NaN if we got here. */
#ifdef PARANOID
- if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) )
- EXCEPTION(EX_INTERNAL | 0x118);
+ if ((st0_tag != TW_NaN) && (st1_tag != TW_NaN))
+ EXCEPTION(EX_INTERNAL | 0x118);
#endif /* PARANOID */
- real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
+ real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
}
-
/* ST(1) <- ST(1) * log ST; pop ST */
static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag)
{
- FPU_REG *st1_ptr = &st(1), exponent;
- u_char st1_tag = FPU_gettagi(1);
- u_char sign;
- int e, tag;
-
- clear_C1();
-
- if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) )
- {
- both_valid:
- /* Both regs are Valid or Denormal */
- if ( signpositive(st0_ptr) )
- {
- if ( st0_tag == TW_Denormal )
- FPU_to_exp16(st0_ptr, st0_ptr);
- else
- /* Convert st(0) for internal use. */
- setexponent16(st0_ptr, exponent(st0_ptr));
-
- if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) )
- {
- /* Special case. The result can be precise. */
- u_char esign;
- e = exponent16(st0_ptr);
- if ( e >= 0 )
- {
- exponent.sigh = e;
- esign = SIGN_POS;
- }
- else
- {
- exponent.sigh = -e;
- esign = SIGN_NEG;
+ FPU_REG *st1_ptr = &st(1), exponent;
+ u_char st1_tag = FPU_gettagi(1);
+ u_char sign;
+ int e, tag;
+
+ clear_C1();
+
+ if ((st0_tag == TAG_Valid) && (st1_tag == TAG_Valid)) {
+ both_valid:
+ /* Both regs are Valid or Denormal */
+ if (signpositive(st0_ptr)) {
+ if (st0_tag == TW_Denormal)
+ FPU_to_exp16(st0_ptr, st0_ptr);
+ else
+ /* Convert st(0) for internal use. */
+ setexponent16(st0_ptr, exponent(st0_ptr));
+
+ if ((st0_ptr->sigh == 0x80000000)
+ && (st0_ptr->sigl == 0)) {
+ /* Special case. The result can be precise. */
+ u_char esign;
+ e = exponent16(st0_ptr);
+ if (e >= 0) {
+ exponent.sigh = e;
+ esign = SIGN_POS;
+ } else {
+ exponent.sigh = -e;
+ esign = SIGN_NEG;
+ }
+ exponent.sigl = 0;
+ setexponent16(&exponent, 31);
+ tag = FPU_normalize_nuo(&exponent);
+ stdexp(&exponent);
+ setsign(&exponent, esign);
+ tag =
+ FPU_mul(&exponent, tag, 1, FULL_PRECISION);
+ if (tag >= 0)
+ FPU_settagi(1, tag);
+ } else {
+ /* The usual case */
+ sign = getsign(st1_ptr);
+ if (st1_tag == TW_Denormal)
+ FPU_to_exp16(st1_ptr, st1_ptr);
+ else
+ /* Convert st(1) for internal use. */
+ setexponent16(st1_ptr,
+ exponent(st1_ptr));
+ poly_l2(st0_ptr, st1_ptr, sign);
+ }
+ } else {
+ /* negative */
+ if (arith_invalid(1) < 0)
+ return;
}
- exponent.sigl = 0;
- setexponent16(&exponent, 31);
- tag = FPU_normalize_nuo(&exponent);
- stdexp(&exponent);
- setsign(&exponent, esign);
- tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION);
- if ( tag >= 0 )
- FPU_settagi(1, tag);
- }
- else
- {
- /* The usual case */
- sign = getsign(st1_ptr);
- if ( st1_tag == TW_Denormal )
- FPU_to_exp16(st1_ptr, st1_ptr);
- else
- /* Convert st(1) for internal use. */
- setexponent16(st1_ptr, exponent(st1_ptr));
- poly_l2(st0_ptr, st1_ptr, sign);
- }
- }
- else
- {
- /* negative */
- if ( arith_invalid(1) < 0 )
- return;
- }
- FPU_pop();
-
- return;
- }
-
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
- if ( st1_tag == TAG_Special )
- st1_tag = FPU_Special(st1_ptr);
-
- if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
- {
- FPU_stack_underflow_pop(1);
- return;
- }
- else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) )
- {
- if ( st0_tag == TAG_Zero )
- {
- if ( st1_tag == TAG_Zero )
- {
- /* Both args zero is invalid */
- if ( arith_invalid(1) < 0 )
- return;
- }
- else
- {
- u_char sign;
- sign = getsign(st1_ptr)^SIGN_NEG;
- if ( FPU_divide_by_zero(1, sign) < 0 )
- return;
+ FPU_pop();
- setsign(st1_ptr, sign);
- }
- }
- else if ( st1_tag == TAG_Zero )
- {
- /* st(1) contains zero, st(0) valid <> 0 */
- /* Zero is the valid answer */
- sign = getsign(st1_ptr);
-
- if ( signnegative(st0_ptr) )
- {
- /* log(negative) */
- if ( arith_invalid(1) < 0 )
return;
- }
- else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
- else
- {
- if ( exponent(st0_ptr) < 0 )
- sign ^= SIGN_NEG;
-
- FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
- setsign(st1_ptr, sign);
- }
}
- else
- {
- /* One or both operands are denormals. */
- if ( denormal_operand() < 0 )
- return;
- goto both_valid;
- }
- }
- else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
- {
- if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
- return;
- }
- /* One or both arg must be an infinity */
- else if ( st0_tag == TW_Infinity )
- {
- if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) )
- {
- /* log(-infinity) or 0*log(infinity) */
- if ( arith_invalid(1) < 0 )
- return;
- }
- else
- {
- u_char sign = getsign(st1_ptr);
- if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+ if (st1_tag == TAG_Special)
+ st1_tag = FPU_Special(st1_ptr);
- FPU_copy_to_reg1(&CONST_INF, TAG_Special);
- setsign(st1_ptr, sign);
- }
- }
- /* st(1) must be infinity here */
- else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
- && ( signpositive(st0_ptr) ) )
- {
- if ( exponent(st0_ptr) >= 0 )
- {
- if ( (exponent(st0_ptr) == 0) &&
- (st0_ptr->sigh == 0x80000000) &&
- (st0_ptr->sigl == 0) )
- {
- /* st(0) holds 1.0 */
- /* infinity*log(1) */
- if ( arith_invalid(1) < 0 )
+ if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+ FPU_stack_underflow_pop(1);
return;
- }
- /* else st(0) is positive and > 1.0 */
+ } else if ((st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal)) {
+ if (st0_tag == TAG_Zero) {
+ if (st1_tag == TAG_Zero) {
+ /* Both args zero is invalid */
+ if (arith_invalid(1) < 0)
+ return;
+ } else {
+ u_char sign;
+ sign = getsign(st1_ptr) ^ SIGN_NEG;
+ if (FPU_divide_by_zero(1, sign) < 0)
+ return;
+
+ setsign(st1_ptr, sign);
+ }
+ } else if (st1_tag == TAG_Zero) {
+ /* st(1) contains zero, st(0) valid <> 0 */
+ /* Zero is the valid answer */
+ sign = getsign(st1_ptr);
+
+ if (signnegative(st0_ptr)) {
+ /* log(negative) */
+ if (arith_invalid(1) < 0)
+ return;
+ } else if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+ else {
+ if (exponent(st0_ptr) < 0)
+ sign ^= SIGN_NEG;
+
+ FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+ setsign(st1_ptr, sign);
+ }
+ } else {
+ /* One or both operands are denormals. */
+ if (denormal_operand() < 0)
+ return;
+ goto both_valid;
+ }
+ } else if ((st0_tag == TW_NaN) || (st1_tag == TW_NaN)) {
+ if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+ return;
+ }
+ /* One or both arg must be an infinity */
+ else if (st0_tag == TW_Infinity) {
+ if ((signnegative(st0_ptr)) || (st1_tag == TAG_Zero)) {
+ /* log(-infinity) or 0*log(infinity) */
+ if (arith_invalid(1) < 0)
+ return;
+ } else {
+ u_char sign = getsign(st1_ptr);
+
+ if ((st1_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+
+ FPU_copy_to_reg1(&CONST_INF, TAG_Special);
+ setsign(st1_ptr, sign);
+ }
}
- else
- {
- /* st(0) is positive and < 1.0 */
+ /* st(1) must be infinity here */
+ else if (((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
+ && (signpositive(st0_ptr))) {
+ if (exponent(st0_ptr) >= 0) {
+ if ((exponent(st0_ptr) == 0) &&
+ (st0_ptr->sigh == 0x80000000) &&
+ (st0_ptr->sigl == 0)) {
+ /* st(0) holds 1.0 */
+ /* infinity*log(1) */
+ if (arith_invalid(1) < 0)
+ return;
+ }
+ /* else st(0) is positive and > 1.0 */
+ } else {
+ /* st(0) is positive and < 1.0 */
- if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
+ if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
- changesign(st1_ptr);
- }
- }
- else
- {
- /* st(0) must be zero or negative */
- if ( st0_tag == TAG_Zero )
- {
- /* This should be invalid, but a real 80486 is happy with it. */
+ changesign(st1_ptr);
+ }
+ } else {
+ /* st(0) must be zero or negative */
+ if (st0_tag == TAG_Zero) {
+ /* This should be invalid, but a real 80486 is happy with it. */
#ifndef PECULIAR_486
- sign = getsign(st1_ptr);
- if ( FPU_divide_by_zero(1, sign) < 0 )
- return;
+ sign = getsign(st1_ptr);
+ if (FPU_divide_by_zero(1, sign) < 0)
+ return;
#endif /* PECULIAR_486 */
- changesign(st1_ptr);
+ changesign(st1_ptr);
+ } else if (arith_invalid(1) < 0) /* log(negative) */
+ return;
}
- else if ( arith_invalid(1) < 0 ) /* log(negative) */
- return;
- }
- FPU_pop();
+ FPU_pop();
}
-
static void fpatan(FPU_REG *st0_ptr, u_char st0_tag)
{
- FPU_REG *st1_ptr = &st(1);
- u_char st1_tag = FPU_gettagi(1);
- int tag;
+ FPU_REG *st1_ptr = &st(1);
+ u_char st1_tag = FPU_gettagi(1);
+ int tag;
- clear_C1();
- if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
- {
- valid_atan:
+ clear_C1();
+ if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+ valid_atan:
- poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
+ poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
- FPU_pop();
+ FPU_pop();
- return;
- }
+ return;
+ }
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
- if ( st1_tag == TAG_Special )
- st1_tag = FPU_Special(st1_ptr);
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+ if (st1_tag == TAG_Special)
+ st1_tag = FPU_Special(st1_ptr);
- if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+ if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
|| ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
- || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
- {
- if ( denormal_operand() < 0 )
- return;
+ || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+ if (denormal_operand() < 0)
+ return;
- goto valid_atan;
- }
- else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
- {
- FPU_stack_underflow_pop(1);
- return;
- }
- else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
- {
- if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 )
- FPU_pop();
- return;
- }
- else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) )
- {
- u_char sign = getsign(st1_ptr);
- if ( st0_tag == TW_Infinity )
- {
- if ( st1_tag == TW_Infinity )
- {
- if ( signpositive(st0_ptr) )
- {
- FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
- }
- else
- {
- setpositive(st1_ptr);
- tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr,
- FULL_PRECISION, SIGN_POS,
- exponent(&CONST_PI4), exponent(&CONST_PI2));
- if ( tag >= 0 )
- FPU_settagi(1, tag);
- }
- }
- else
- {
- if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
+ goto valid_atan;
+ } else if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+ FPU_stack_underflow_pop(1);
+ return;
+ } else if ((st0_tag == TW_NaN) || (st1_tag == TW_NaN)) {
+ if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0)
+ FPU_pop();
return;
+ } else if ((st0_tag == TW_Infinity) || (st1_tag == TW_Infinity)) {
+ u_char sign = getsign(st1_ptr);
+ if (st0_tag == TW_Infinity) {
+ if (st1_tag == TW_Infinity) {
+ if (signpositive(st0_ptr)) {
+ FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
+ } else {
+ setpositive(st1_ptr);
+ tag =
+ FPU_u_add(&CONST_PI4, &CONST_PI2,
+ st1_ptr, FULL_PRECISION,
+ SIGN_POS,
+ exponent(&CONST_PI4),
+ exponent(&CONST_PI2));
+ if (tag >= 0)
+ FPU_settagi(1, tag);
+ }
+ } else {
+ if ((st1_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+
+ if (signpositive(st0_ptr)) {
+ FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+ setsign(st1_ptr, sign); /* An 80486 preserves the sign */
+ FPU_pop();
+ return;
+ } else {
+ FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+ }
+ }
+ } else {
+ /* st(1) is infinity, st(0) not infinity */
+ if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
- if ( signpositive(st0_ptr) )
- {
- FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
- setsign(st1_ptr, sign); /* An 80486 preserves the sign */
- FPU_pop();
- return;
+ FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
}
- else
- {
- FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+ setsign(st1_ptr, sign);
+ } else if (st1_tag == TAG_Zero) {
+ /* st(0) must be valid or zero */
+ u_char sign = getsign(st1_ptr);
+
+ if ((st0_tag == TW_Denormal) && (denormal_operand() < 0))
+ return;
+
+ if (signpositive(st0_ptr)) {
+ /* An 80486 preserves the sign */
+ FPU_pop();
+ return;
}
- }
- }
- else
- {
- /* st(1) is infinity, st(0) not infinity */
- if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
- FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
- }
- setsign(st1_ptr, sign);
- }
- else if ( st1_tag == TAG_Zero )
- {
- /* st(0) must be valid or zero */
- u_char sign = getsign(st1_ptr);
-
- if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
+ FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+ setsign(st1_ptr, sign);
+ } else if (st0_tag == TAG_Zero) {
+ /* st(1) must be TAG_Valid here */
+ u_char sign = getsign(st1_ptr);
- if ( signpositive(st0_ptr) )
- {
- /* An 80486 preserves the sign */
- FPU_pop();
- return;
- }
+ if ((st1_tag == TW_Denormal) && (denormal_operand() < 0))
+ return;
- FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
- setsign(st1_ptr, sign);
- }
- else if ( st0_tag == TAG_Zero )
- {
- /* st(1) must be TAG_Valid here */
- u_char sign = getsign(st1_ptr);
-
- if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
-
- FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
- setsign(st1_ptr, sign);
- }
+ FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
+ setsign(st1_ptr, sign);
+ }
#ifdef PARANOID
- else
- EXCEPTION(EX_INTERNAL | 0x125);
+ else
+ EXCEPTION(EX_INTERNAL | 0x125);
#endif /* PARANOID */
- FPU_pop();
- set_precision_flag_up(); /* We do not really know if up or down */
+ FPU_pop();
+ set_precision_flag_up(); /* We do not really know if up or down */
}
-
static void fprem(FPU_REG *st0_ptr, u_char st0_tag)
{
- do_fprem(st0_ptr, st0_tag, RC_CHOP);
+ do_fprem(st0_ptr, st0_tag, RC_CHOP);
}
-
static void fprem1(FPU_REG *st0_ptr, u_char st0_tag)
{
- do_fprem(st0_ptr, st0_tag, RC_RND);
+ do_fprem(st0_ptr, st0_tag, RC_RND);
}
-
static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
{
- u_char sign, sign1;
- FPU_REG *st1_ptr = &st(1), a, b;
- u_char st1_tag = FPU_gettagi(1);
+ u_char sign, sign1;
+ FPU_REG *st1_ptr = &st(1), a, b;
+ u_char st1_tag = FPU_gettagi(1);
- clear_C1();
- if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
- {
- valid_yl2xp1:
+ clear_C1();
+ if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+ valid_yl2xp1:
- sign = getsign(st0_ptr);
- sign1 = getsign(st1_ptr);
+ sign = getsign(st0_ptr);
+ sign1 = getsign(st1_ptr);
- FPU_to_exp16(st0_ptr, &a);
- FPU_to_exp16(st1_ptr, &b);
+ FPU_to_exp16(st0_ptr, &a);
+ FPU_to_exp16(st1_ptr, &b);
- if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) )
- return;
+ if (poly_l2p1(sign, sign1, &a, &b, st1_ptr))
+ return;
- FPU_pop();
- return;
- }
+ FPU_pop();
+ return;
+ }
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
- if ( st1_tag == TAG_Special )
- st1_tag = FPU_Special(st1_ptr);
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+ if (st1_tag == TAG_Special)
+ st1_tag = FPU_Special(st1_ptr);
- if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+ if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
|| ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
- || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
- {
- if ( denormal_operand() < 0 )
- return;
-
- goto valid_yl2xp1;
- }
- else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) )
- {
- FPU_stack_underflow_pop(1);
- return;
- }
- else if ( st0_tag == TAG_Zero )
- {
- switch ( st1_tag )
- {
- case TW_Denormal:
- if ( denormal_operand() < 0 )
- return;
-
- case TAG_Zero:
- case TAG_Valid:
- setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
- FPU_copy_to_reg1(st0_ptr, st0_tag);
- break;
-
- case TW_Infinity:
- /* Infinity*log(1) */
- if ( arith_invalid(1) < 0 )
- return;
- break;
+ || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+ if (denormal_operand() < 0)
+ return;
- case TW_NaN:
- if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
- return;
- break;
-
- default:
+ goto valid_yl2xp1;
+ } else if ((st0_tag == TAG_Empty) | (st1_tag == TAG_Empty)) {
+ FPU_stack_underflow_pop(1);
+ return;
+ } else if (st0_tag == TAG_Zero) {
+ switch (st1_tag) {
+ case TW_Denormal:
+ if (denormal_operand() < 0)
+ return;
+
+ case TAG_Zero:
+ case TAG_Valid:
+ setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
+ FPU_copy_to_reg1(st0_ptr, st0_tag);
+ break;
+
+ case TW_Infinity:
+ /* Infinity*log(1) */
+ if (arith_invalid(1) < 0)
+ return;
+ break;
+
+ case TW_NaN:
+ if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+ return;
+ break;
+
+ default:
#ifdef PARANOID
- EXCEPTION(EX_INTERNAL | 0x116);
- return;
+ EXCEPTION(EX_INTERNAL | 0x116);
+ return;
#endif /* PARANOID */
- break;
- }
- }
- else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
- {
- switch ( st1_tag )
- {
- case TAG_Zero:
- if ( signnegative(st0_ptr) )
- {
- if ( exponent(st0_ptr) >= 0 )
- {
- /* st(0) holds <= -1.0 */
-#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
- changesign(st1_ptr);
+ break;
+ }
+ } else if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+ switch (st1_tag) {
+ case TAG_Zero:
+ if (signnegative(st0_ptr)) {
+ if (exponent(st0_ptr) >= 0) {
+ /* st(0) holds <= -1.0 */
+#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
+ changesign(st1_ptr);
#else
- if ( arith_invalid(1) < 0 )
- return;
+ if (arith_invalid(1) < 0)
+ return;
#endif /* PECULIAR_486 */
- }
- else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
- else
- changesign(st1_ptr);
- }
- else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
- break;
-
- case TW_Infinity:
- if ( signnegative(st0_ptr) )
- {
- if ( (exponent(st0_ptr) >= 0) &&
- !((st0_ptr->sigh == 0x80000000) &&
- (st0_ptr->sigl == 0)) )
- {
- /* st(0) holds < -1.0 */
-#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
- changesign(st1_ptr);
+ } else if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+ else
+ changesign(st1_ptr);
+ } else if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+ break;
+
+ case TW_Infinity:
+ if (signnegative(st0_ptr)) {
+ if ((exponent(st0_ptr) >= 0) &&
+ !((st0_ptr->sigh == 0x80000000) &&
+ (st0_ptr->sigl == 0))) {
+ /* st(0) holds < -1.0 */
+#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
+ changesign(st1_ptr);
#else
- if ( arith_invalid(1) < 0 ) return;
+ if (arith_invalid(1) < 0)
+ return;
#endif /* PECULIAR_486 */
+ } else if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+ else
+ changesign(st1_ptr);
+ } else if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+ break;
+
+ case TW_NaN:
+ if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+ return;
}
- else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
- else
- changesign(st1_ptr);
- }
- else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
- break;
-
- case TW_NaN:
- if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
- return;
- }
- }
- else if ( st0_tag == TW_NaN )
- {
- if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
- return;
- }
- else if ( st0_tag == TW_Infinity )
- {
- if ( st1_tag == TW_NaN )
- {
- if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
- return;
- }
- else if ( signnegative(st0_ptr) )
- {
+ } else if (st0_tag == TW_NaN) {
+ if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+ return;
+ } else if (st0_tag == TW_Infinity) {
+ if (st1_tag == TW_NaN) {
+ if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+ return;
+ } else if (signnegative(st0_ptr)) {
#ifndef PECULIAR_486
- /* This should have higher priority than denormals, but... */
- if ( arith_invalid(1) < 0 ) /* log(-infinity) */
- return;
+ /* This should have higher priority than denormals, but... */
+ if (arith_invalid(1) < 0) /* log(-infinity) */
+ return;
#endif /* PECULIAR_486 */
- if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
+ if ((st1_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
#ifdef PECULIAR_486
- /* Denormal operands actually get higher priority */
- if ( arith_invalid(1) < 0 ) /* log(-infinity) */
- return;
+ /* Denormal operands actually get higher priority */
+ if (arith_invalid(1) < 0) /* log(-infinity) */
+ return;
#endif /* PECULIAR_486 */
- }
- else if ( st1_tag == TAG_Zero )
- {
- /* log(infinity) */
- if ( arith_invalid(1) < 0 )
- return;
- }
-
- /* st(1) must be valid here. */
+ } else if (st1_tag == TAG_Zero) {
+ /* log(infinity) */
+ if (arith_invalid(1) < 0)
+ return;
+ }
- else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
+ /* st(1) must be valid here. */
+
+ else if ((st1_tag == TW_Denormal) && (denormal_operand() < 0))
+ return;
- /* The Manual says that log(Infinity) is invalid, but a real
- 80486 sensibly says that it is o.k. */
- else
- {
- u_char sign = getsign(st1_ptr);
- FPU_copy_to_reg1(&CONST_INF, TAG_Special);
- setsign(st1_ptr, sign);
+ /* The Manual says that log(Infinity) is invalid, but a real
+ 80486 sensibly says that it is o.k. */
+ else {
+ u_char sign = getsign(st1_ptr);
+ FPU_copy_to_reg1(&CONST_INF, TAG_Special);
+ setsign(st1_ptr, sign);
+ }
}
- }
#ifdef PARANOID
- else
- {
- EXCEPTION(EX_INTERNAL | 0x117);
- return;
- }
+ else {
+ EXCEPTION(EX_INTERNAL | 0x117);
+ return;
+ }
#endif /* PARANOID */
- FPU_pop();
- return;
+ FPU_pop();
+ return;
}
-
static void fscale(FPU_REG *st0_ptr, u_char st0_tag)
{
- FPU_REG *st1_ptr = &st(1);
- u_char st1_tag = FPU_gettagi(1);
- int old_cw = control_word;
- u_char sign = getsign(st0_ptr);
-
- clear_C1();
- if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
- {
- long scale;
- FPU_REG tmp;
-
- /* Convert register for internal use. */
- setexponent16(st0_ptr, exponent(st0_ptr));
-
- valid_scale:
-
- if ( exponent(st1_ptr) > 30 )
- {
- /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
-
- if ( signpositive(st1_ptr) )
- {
- EXCEPTION(EX_Overflow);
- FPU_copy_to_reg0(&CONST_INF, TAG_Special);
- }
- else
- {
- EXCEPTION(EX_Underflow);
- FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
- }
- setsign(st0_ptr, sign);
- return;
- }
-
- control_word &= ~CW_RC;
- control_word |= RC_CHOP;
- reg_copy(st1_ptr, &tmp);
- FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */
- control_word = old_cw;
- scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
- scale += exponent16(st0_ptr);
-
- setexponent16(st0_ptr, scale);
-
- /* Use FPU_round() to properly detect under/overflow etc */
- FPU_round(st0_ptr, 0, 0, control_word, sign);
-
- return;
- }
-
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
- if ( st1_tag == TAG_Special )
- st1_tag = FPU_Special(st1_ptr);
-
- if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
- {
- switch ( st1_tag )
- {
- case TAG_Valid:
- /* st(0) must be a denormal */
- if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
-
- FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */
- goto valid_scale;
-
- case TAG_Zero:
- if ( st0_tag == TW_Denormal )
- denormal_operand();
- return;
-
- case TW_Denormal:
- denormal_operand();
- return;
-
- case TW_Infinity:
- if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
- return;
-
- if ( signpositive(st1_ptr) )
- FPU_copy_to_reg0(&CONST_INF, TAG_Special);
- else
- FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
- setsign(st0_ptr, sign);
- return;
+ FPU_REG *st1_ptr = &st(1);
+ u_char st1_tag = FPU_gettagi(1);
+ int old_cw = control_word;
+ u_char sign = getsign(st0_ptr);
+
+ clear_C1();
+ if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+ long scale;
+ FPU_REG tmp;
+
+ /* Convert register for internal use. */
+ setexponent16(st0_ptr, exponent(st0_ptr));
+
+ valid_scale:
+
+ if (exponent(st1_ptr) > 30) {
+ /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
+
+ if (signpositive(st1_ptr)) {
+ EXCEPTION(EX_Overflow);
+ FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+ } else {
+ EXCEPTION(EX_Underflow);
+ FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+ }
+ setsign(st0_ptr, sign);
+ return;
+ }
- case TW_NaN:
- real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
- return;
- }
- }
- else if ( st0_tag == TAG_Zero )
- {
- switch ( st1_tag )
- {
- case TAG_Valid:
- case TAG_Zero:
- return;
+ control_word &= ~CW_RC;
+ control_word |= RC_CHOP;
+ reg_copy(st1_ptr, &tmp);
+ FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */
+ control_word = old_cw;
+ scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
+ scale += exponent16(st0_ptr);
- case TW_Denormal:
- denormal_operand();
- return;
+ setexponent16(st0_ptr, scale);
- case TW_Infinity:
- if ( signpositive(st1_ptr) )
- arith_invalid(0); /* Zero scaled by +Infinity */
- return;
+ /* Use FPU_round() to properly detect under/overflow etc */
+ FPU_round(st0_ptr, 0, 0, control_word, sign);
- case TW_NaN:
- real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
- return;
+ return;
}
- }
- else if ( st0_tag == TW_Infinity )
- {
- switch ( st1_tag )
- {
- case TAG_Valid:
- case TAG_Zero:
- return;
-
- case TW_Denormal:
- denormal_operand();
- return;
- case TW_Infinity:
- if ( signnegative(st1_ptr) )
- arith_invalid(0); /* Infinity scaled by -Infinity */
- return;
-
- case TW_NaN:
- real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
- return;
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+ if (st1_tag == TAG_Special)
+ st1_tag = FPU_Special(st1_ptr);
+
+ if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+ switch (st1_tag) {
+ case TAG_Valid:
+ /* st(0) must be a denormal */
+ if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+
+ FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */
+ goto valid_scale;
+
+ case TAG_Zero:
+ if (st0_tag == TW_Denormal)
+ denormal_operand();
+ return;
+
+ case TW_Denormal:
+ denormal_operand();
+ return;
+
+ case TW_Infinity:
+ if ((st0_tag == TW_Denormal)
+ && (denormal_operand() < 0))
+ return;
+
+ if (signpositive(st1_ptr))
+ FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+ else
+ FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+ setsign(st0_ptr, sign);
+ return;
+
+ case TW_NaN:
+ real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+ return;
+ }
+ } else if (st0_tag == TAG_Zero) {
+ switch (st1_tag) {
+ case TAG_Valid:
+ case TAG_Zero:
+ return;
+
+ case TW_Denormal:
+ denormal_operand();
+ return;
+
+ case TW_Infinity:
+ if (signpositive(st1_ptr))
+ arith_invalid(0); /* Zero scaled by +Infinity */
+ return;
+
+ case TW_NaN:
+ real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+ return;
+ }
+ } else if (st0_tag == TW_Infinity) {
+ switch (st1_tag) {
+ case TAG_Valid:
+ case TAG_Zero:
+ return;
+
+ case TW_Denormal:
+ denormal_operand();
+ return;
+
+ case TW_Infinity:
+ if (signnegative(st1_ptr))
+ arith_invalid(0); /* Infinity scaled by -Infinity */
+ return;
+
+ case TW_NaN:
+ real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+ return;
+ }
+ } else if (st0_tag == TW_NaN) {
+ if (st1_tag != TAG_Empty) {
+ real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+ return;
+ }
}
- }
- else if ( st0_tag == TW_NaN )
- {
- if ( st1_tag != TAG_Empty )
- { real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; }
- }
-
#ifdef PARANOID
- if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) )
- {
- EXCEPTION(EX_INTERNAL | 0x115);
- return;
- }
+ if (!((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty))) {
+ EXCEPTION(EX_INTERNAL | 0x115);
+ return;
+ }
#endif
- /* At least one of st(0), st(1) must be empty */
- FPU_stack_underflow();
+ /* At least one of st(0), st(1) must be empty */
+ FPU_stack_underflow();
}
-
/*---------------------------------------------------------------------------*/
static FUNC_ST0 const trig_table_a[] = {
- f2xm1, fyl2x, fptan, fpatan,
- fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp
+ f2xm1, fyl2x, fptan, fpatan,
+ fxtract, fprem1, (FUNC_ST0) fdecstp, (FUNC_ST0) fincstp
};
void FPU_triga(void)
{
- (trig_table_a[FPU_rm])(&st(0), FPU_gettag0());
+ (trig_table_a[FPU_rm]) (&st(0), FPU_gettag0());
}
-
-static FUNC_ST0 const trig_table_b[] =
- {
- fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos
- };
+static FUNC_ST0 const trig_table_b[] = {
+ fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0) fsin, fcos
+};
void FPU_trigb(void)
{
- (trig_table_b[FPU_rm])(&st(0), FPU_gettag0());
+ (trig_table_b[FPU_rm]) (&st(0), FPU_gettag0());
}
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index 2e2c51a8bd3..d701e2b39e4 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -17,7 +17,6 @@
| other processes using the emulator while swapping is in progress. |
+---------------------------------------------------------------------------*/
-
#include <linux/stddef.h>
#include <asm/uaccess.h>
@@ -27,31 +26,30 @@
#include "exception.h"
#include "fpu_emu.h"
-
#define FPU_WRITE_BIT 0x10
static int reg_offset[] = {
- offsetof(struct info,___eax),
- offsetof(struct info,___ecx),
- offsetof(struct info,___edx),
- offsetof(struct info,___ebx),
- offsetof(struct info,___esp),
- offsetof(struct info,___ebp),
- offsetof(struct info,___esi),
- offsetof(struct info,___edi)
+ offsetof(struct info, ___eax),
+ offsetof(struct info, ___ecx),
+ offsetof(struct info, ___edx),
+ offsetof(struct info, ___ebx),
+ offsetof(struct info, ___esp),
+ offsetof(struct info, ___ebp),
+ offsetof(struct info, ___esi),
+ offsetof(struct info, ___edi)
};
#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info))
static int reg_offset_vm86[] = {
- offsetof(struct info,___cs),
- offsetof(struct info,___vm86_ds),
- offsetof(struct info,___vm86_es),
- offsetof(struct info,___vm86_fs),
- offsetof(struct info,___vm86_gs),
- offsetof(struct info,___ss),
- offsetof(struct info,___vm86_ds)
- };
+ offsetof(struct info, ___cs),
+ offsetof(struct info, ___vm86_ds),
+ offsetof(struct info, ___vm86_es),
+ offsetof(struct info, ___vm86_fs),
+ offsetof(struct info, ___vm86_gs),
+ offsetof(struct info, ___ss),
+ offsetof(struct info, ___vm86_ds)
+};
#define VM86_REG_(x) (*(unsigned short *) \
(reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
@@ -60,158 +58,141 @@ static int reg_offset_vm86[] = {
#define ___GS ___ds
static int reg_offset_pm[] = {
- offsetof(struct info,___cs),
- offsetof(struct info,___ds),
- offsetof(struct info,___es),
- offsetof(struct info,___fs),
- offsetof(struct info,___GS),
- offsetof(struct info,___ss),
- offsetof(struct info,___ds)
- };
+ offsetof(struct info, ___cs),
+ offsetof(struct info, ___ds),
+ offsetof(struct info, ___es),
+ offsetof(struct info, ___fs),
+ offsetof(struct info, ___GS),
+ offsetof(struct info, ___ss),
+ offsetof(struct info, ___ds)
+};
#define PM_REG_(x) (*(unsigned short *) \
(reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info))
-
/* Decode the SIB byte. This function assumes mod != 0 */
static int sib(int mod, unsigned long *fpu_eip)
{
- u_char ss,index,base;
- long offset;
-
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(1);
- FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */
- RE_ENTRANT_CHECK_ON;
- (*fpu_eip)++;
- ss = base >> 6;
- index = (base >> 3) & 7;
- base &= 7;
-
- if ((mod == 0) && (base == 5))
- offset = 0; /* No base register */
- else
- offset = REG_(base);
-
- if (index == 4)
- {
- /* No index register */
- /* A non-zero ss is illegal */
- if ( ss )
- EXCEPTION(EX_Invalid);
- }
- else
- {
- offset += (REG_(index)) << ss;
- }
-
- if (mod == 1)
- {
- /* 8 bit signed displacement */
- long displacement;
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(1);
- FPU_get_user(displacement, (signed char __user *) (*fpu_eip));
- offset += displacement;
- RE_ENTRANT_CHECK_ON;
- (*fpu_eip)++;
- }
- else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
- {
- /* 32 bit displacement */
- long displacement;
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(4);
- FPU_get_user(displacement, (long __user *) (*fpu_eip));
- offset += displacement;
- RE_ENTRANT_CHECK_ON;
- (*fpu_eip) += 4;
- }
-
- return offset;
-}
+ u_char ss, index, base;
+ long offset;
+
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(1);
+ FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */
+ RE_ENTRANT_CHECK_ON;
+ (*fpu_eip)++;
+ ss = base >> 6;
+ index = (base >> 3) & 7;
+ base &= 7;
+
+ if ((mod == 0) && (base == 5))
+ offset = 0; /* No base register */
+ else
+ offset = REG_(base);
+
+ if (index == 4) {
+ /* No index register */
+ /* A non-zero ss is illegal */
+ if (ss)
+ EXCEPTION(EX_Invalid);
+ } else {
+ offset += (REG_(index)) << ss;
+ }
+
+ if (mod == 1) {
+ /* 8 bit signed displacement */
+ long displacement;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(1);
+ FPU_get_user(displacement, (signed char __user *)(*fpu_eip));
+ offset += displacement;
+ RE_ENTRANT_CHECK_ON;
+ (*fpu_eip)++;
+ } else if (mod == 2 || base == 5) { /* The second condition also has mod==0 */
+ /* 32 bit displacement */
+ long displacement;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(4);
+ FPU_get_user(displacement, (long __user *)(*fpu_eip));
+ offset += displacement;
+ RE_ENTRANT_CHECK_ON;
+ (*fpu_eip) += 4;
+ }
+ return offset;
+}
-static unsigned long vm86_segment(u_char segment,
- struct address *addr)
+static unsigned long vm86_segment(u_char segment, struct address *addr)
{
- segment--;
+ segment--;
#ifdef PARANOID
- if ( segment > PREFIX_SS_ )
- {
- EXCEPTION(EX_INTERNAL|0x130);
- math_abort(FPU_info,SIGSEGV);
- }
+ if (segment > PREFIX_SS_) {
+ EXCEPTION(EX_INTERNAL | 0x130);
+ math_abort(FPU_info, SIGSEGV);
+ }
#endif /* PARANOID */
- addr->selector = VM86_REG_(segment);
- return (unsigned long)VM86_REG_(segment) << 4;
+ addr->selector = VM86_REG_(segment);
+ return (unsigned long)VM86_REG_(segment) << 4;
}
-
/* This should work for 16 and 32 bit protected mode. */
static long pm_address(u_char FPU_modrm, u_char segment,
struct address *addr, long offset)
-{
- struct desc_struct descriptor;
- unsigned long base_address, limit, address, seg_top;
+{
+ struct desc_struct descriptor;
+ unsigned long base_address, limit, address, seg_top;
- segment--;
+ segment--;
#ifdef PARANOID
- /* segment is unsigned, so this also detects if segment was 0: */
- if ( segment > PREFIX_SS_ )
- {
- EXCEPTION(EX_INTERNAL|0x132);
- math_abort(FPU_info,SIGSEGV);
- }
+ /* segment is unsigned, so this also detects if segment was 0: */
+ if (segment > PREFIX_SS_) {
+ EXCEPTION(EX_INTERNAL | 0x132);
+ math_abort(FPU_info, SIGSEGV);
+ }
#endif /* PARANOID */
- switch ( segment )
- {
- /* gs isn't used by the kernel, so it still has its
- user-space value. */
- case PREFIX_GS_-1:
- /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
- savesegment(gs, addr->selector);
- break;
- default:
- addr->selector = PM_REG_(segment);
- }
-
- descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
- base_address = SEG_BASE_ADDR(descriptor);
- address = base_address + offset;
- limit = base_address
- + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
- if ( limit < base_address ) limit = 0xffffffff;
-
- if ( SEG_EXPAND_DOWN(descriptor) )
- {
- if ( SEG_G_BIT(descriptor) )
- seg_top = 0xffffffff;
- else
- {
- seg_top = base_address + (1 << 20);
- if ( seg_top < base_address ) seg_top = 0xffffffff;
+ switch (segment) {
+ /* gs isn't used by the kernel, so it still has its
+ user-space value. */
+ case PREFIX_GS_ - 1:
+ /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
+ savesegment(gs, addr->selector);
+ break;
+ default:
+ addr->selector = PM_REG_(segment);
}
- access_limit =
- (address <= limit) || (address >= seg_top) ? 0 :
- ((seg_top-address) >= 255 ? 255 : seg_top-address);
- }
- else
- {
- access_limit =
- (address > limit) || (address < base_address) ? 0 :
- ((limit-address) >= 254 ? 255 : limit-address+1);
- }
- if ( SEG_EXECUTE_ONLY(descriptor) ||
- (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
- {
- access_limit = 0;
- }
- return address;
-}
+ descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
+ base_address = SEG_BASE_ADDR(descriptor);
+ address = base_address + offset;
+ limit = base_address
+ + (SEG_LIMIT(descriptor) + 1) * SEG_GRANULARITY(descriptor) - 1;
+ if (limit < base_address)
+ limit = 0xffffffff;
+
+ if (SEG_EXPAND_DOWN(descriptor)) {
+ if (SEG_G_BIT(descriptor))
+ seg_top = 0xffffffff;
+ else {
+ seg_top = base_address + (1 << 20);
+ if (seg_top < base_address)
+ seg_top = 0xffffffff;
+ }
+ access_limit =
+ (address <= limit) || (address >= seg_top) ? 0 :
+ ((seg_top - address) >= 255 ? 255 : seg_top - address);
+ } else {
+ access_limit =
+ (address > limit) || (address < base_address) ? 0 :
+ ((limit - address) >= 254 ? 255 : limit - address + 1);
+ }
+ if (SEG_EXECUTE_ONLY(descriptor) ||
+ (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT))) {
+ access_limit = 0;
+ }
+ return address;
+}
/*
MOD R/M byte: MOD == 3 has a special use for the FPU
@@ -221,7 +202,6 @@ static long pm_address(u_char FPU_modrm, u_char segment,
..... ......... .........
MOD OPCODE(2) R/M
-
SIB byte
7 6 5 4 3 2 1 0
@@ -231,208 +211,194 @@ static long pm_address(u_char FPU_modrm, u_char segment,
*/
void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
- struct address *addr,
- fpu_addr_modes addr_modes)
+ struct address *addr, fpu_addr_modes addr_modes)
+{
+ u_char mod;
+ unsigned rm = FPU_modrm & 7;
+ long *cpu_reg_ptr;
+ int address = 0; /* Initialized just to stop compiler warnings. */
+
+ /* Memory accessed via the cs selector is write protected
+ in `non-segmented' 32 bit protected mode. */
+ if (!addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+ && (addr_modes.override.segment == PREFIX_CS_)) {
+ math_abort(FPU_info, SIGSEGV);
+ }
+
+ addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
+
+ mod = (FPU_modrm >> 6) & 3;
+
+ if (rm == 4 && mod != 3) {
+ address = sib(mod, fpu_eip);
+ } else {
+ cpu_reg_ptr = &REG_(rm);
+ switch (mod) {
+ case 0:
+ if (rm == 5) {
+ /* Special case: disp32 */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(4);
+ FPU_get_user(address,
+ (unsigned long __user
+ *)(*fpu_eip));
+ (*fpu_eip) += 4;
+ RE_ENTRANT_CHECK_ON;
+ addr->offset = address;
+ return (void __user *)address;
+ } else {
+ address = *cpu_reg_ptr; /* Just return the contents
+ of the cpu register */
+ addr->offset = address;
+ return (void __user *)address;
+ }
+ case 1:
+ /* 8 bit signed displacement */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(1);
+ FPU_get_user(address, (signed char __user *)(*fpu_eip));
+ RE_ENTRANT_CHECK_ON;
+ (*fpu_eip)++;
+ break;
+ case 2:
+ /* 32 bit displacement */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(4);
+ FPU_get_user(address, (long __user *)(*fpu_eip));
+ (*fpu_eip) += 4;
+ RE_ENTRANT_CHECK_ON;
+ break;
+ case 3:
+ /* Not legal for the FPU */
+ EXCEPTION(EX_Invalid);
+ }
+ address += *cpu_reg_ptr;
+ }
+
+ addr->offset = address;
+
+ switch (addr_modes.default_mode) {
+ case 0:
+ break;
+ case VM86:
+ address += vm86_segment(addr_modes.override.segment, addr);
+ break;
+ case PM16:
+ case SEG32:
+ address = pm_address(FPU_modrm, addr_modes.override.segment,
+ addr, address);
+ break;
+ default:
+ EXCEPTION(EX_INTERNAL | 0x133);
+ }
+
+ return (void __user *)address;
+}
+
+void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
+ struct address *addr, fpu_addr_modes addr_modes)
{
- u_char mod;
- unsigned rm = FPU_modrm & 7;
- long *cpu_reg_ptr;
- int address = 0; /* Initialized just to stop compiler warnings. */
-
- /* Memory accessed via the cs selector is write protected
- in `non-segmented' 32 bit protected mode. */
- if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
- && (addr_modes.override.segment == PREFIX_CS_) )
- {
- math_abort(FPU_info,SIGSEGV);
- }
-
- addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
-
- mod = (FPU_modrm >> 6) & 3;
-
- if (rm == 4 && mod != 3)
- {
- address = sib(mod, fpu_eip);
- }
- else
- {
- cpu_reg_ptr = & REG_(rm);
- switch (mod)
- {
+ u_char mod;
+ unsigned rm = FPU_modrm & 7;
+ int address = 0; /* Default used for mod == 0 */
+
+ /* Memory accessed via the cs selector is write protected
+ in `non-segmented' 32 bit protected mode. */
+ if (!addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+ && (addr_modes.override.segment == PREFIX_CS_)) {
+ math_abort(FPU_info, SIGSEGV);
+ }
+
+ addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
+
+ mod = (FPU_modrm >> 6) & 3;
+
+ switch (mod) {
case 0:
- if (rm == 5)
- {
- /* Special case: disp32 */
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(4);
- FPU_get_user(address, (unsigned long __user *) (*fpu_eip));
- (*fpu_eip) += 4;
- RE_ENTRANT_CHECK_ON;
- addr->offset = address;
- return (void __user *) address;
- }
- else
- {
- address = *cpu_reg_ptr; /* Just return the contents
- of the cpu register */
- addr->offset = address;
- return (void __user *) address;
- }
+ if (rm == 6) {
+ /* Special case: disp16 */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(2);
+ FPU_get_user(address,
+ (unsigned short __user *)(*fpu_eip));
+ (*fpu_eip) += 2;
+ RE_ENTRANT_CHECK_ON;
+ goto add_segment;
+ }
+ break;
case 1:
- /* 8 bit signed displacement */
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(1);
- FPU_get_user(address, (signed char __user *) (*fpu_eip));
- RE_ENTRANT_CHECK_ON;
- (*fpu_eip)++;
- break;
+ /* 8 bit signed displacement */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(1);
+ FPU_get_user(address, (signed char __user *)(*fpu_eip));
+ RE_ENTRANT_CHECK_ON;
+ (*fpu_eip)++;
+ break;
case 2:
- /* 32 bit displacement */
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(4);
- FPU_get_user(address, (long __user *) (*fpu_eip));
- (*fpu_eip) += 4;
- RE_ENTRANT_CHECK_ON;
- break;
+ /* 16 bit displacement */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_code_access_ok(2);
+ FPU_get_user(address, (unsigned short __user *)(*fpu_eip));
+ (*fpu_eip) += 2;
+ RE_ENTRANT_CHECK_ON;
+ break;
case 3:
- /* Not legal for the FPU */
- EXCEPTION(EX_Invalid);
+ /* Not legal for the FPU */
+ EXCEPTION(EX_Invalid);
+ break;
+ }
+ switch (rm) {
+ case 0:
+ address += FPU_info->___ebx + FPU_info->___esi;
+ break;
+ case 1:
+ address += FPU_info->___ebx + FPU_info->___edi;
+ break;
+ case 2:
+ address += FPU_info->___ebp + FPU_info->___esi;
+ if (addr_modes.override.segment == PREFIX_DEFAULT)
+ addr_modes.override.segment = PREFIX_SS_;
+ break;
+ case 3:
+ address += FPU_info->___ebp + FPU_info->___edi;
+ if (addr_modes.override.segment == PREFIX_DEFAULT)
+ addr_modes.override.segment = PREFIX_SS_;
+ break;
+ case 4:
+ address += FPU_info->___esi;
+ break;
+ case 5:
+ address += FPU_info->___edi;
+ break;
+ case 6:
+ address += FPU_info->___ebp;
+ if (addr_modes.override.segment == PREFIX_DEFAULT)
+ addr_modes.override.segment = PREFIX_SS_;
+ break;
+ case 7:
+ address += FPU_info->___ebx;
+ break;
}
- address += *cpu_reg_ptr;
- }
-
- addr->offset = address;
-
- switch ( addr_modes.default_mode )
- {
- case 0:
- break;
- case VM86:
- address += vm86_segment(addr_modes.override.segment, addr);
- break;
- case PM16:
- case SEG32:
- address = pm_address(FPU_modrm, addr_modes.override.segment,
- addr, address);
- break;
- default:
- EXCEPTION(EX_INTERNAL|0x133);
- }
-
- return (void __user *)address;
-}
+ add_segment:
+ address &= 0xffff;
-void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
- struct address *addr,
- fpu_addr_modes addr_modes)
-{
- u_char mod;
- unsigned rm = FPU_modrm & 7;
- int address = 0; /* Default used for mod == 0 */
-
- /* Memory accessed via the cs selector is write protected
- in `non-segmented' 32 bit protected mode. */
- if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
- && (addr_modes.override.segment == PREFIX_CS_) )
- {
- math_abort(FPU_info,SIGSEGV);
- }
-
- addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
-
- mod = (FPU_modrm >> 6) & 3;
-
- switch (mod)
- {
- case 0:
- if (rm == 6)
- {
- /* Special case: disp16 */
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(2);
- FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
- (*fpu_eip) += 2;
- RE_ENTRANT_CHECK_ON;
- goto add_segment;
+ addr->offset = address;
+
+ switch (addr_modes.default_mode) {
+ case 0:
+ break;
+ case VM86:
+ address += vm86_segment(addr_modes.override.segment, addr);
+ break;
+ case PM16:
+ case SEG32:
+ address = pm_address(FPU_modrm, addr_modes.override.segment,
+ addr, address);
+ break;
+ default:
+ EXCEPTION(EX_INTERNAL | 0x131);
}
- break;
- case 1:
- /* 8 bit signed displacement */
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(1);
- FPU_get_user(address, (signed char __user *) (*fpu_eip));
- RE_ENTRANT_CHECK_ON;
- (*fpu_eip)++;
- break;
- case 2:
- /* 16 bit displacement */
- RE_ENTRANT_CHECK_OFF;
- FPU_code_access_ok(2);
- FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
- (*fpu_eip) += 2;
- RE_ENTRANT_CHECK_ON;
- break;
- case 3:
- /* Not legal for the FPU */
- EXCEPTION(EX_Invalid);
- break;
- }
- switch ( rm )
- {
- case 0:
- address += FPU_info->___ebx + FPU_info->___esi;
- break;
- case 1:
- address += FPU_info->___ebx + FPU_info->___edi;
- break;
- case 2:
- address += FPU_info->___ebp + FPU_info->___esi;
- if ( addr_modes.override.segment == PREFIX_DEFAULT )
- addr_modes.override.segment = PREFIX_SS_;
- break;
- case 3:
- address += FPU_info->___ebp + FPU_info->___edi;
- if ( addr_modes.override.segment == PREFIX_DEFAULT )
- addr_modes.override.segment = PREFIX_SS_;
- break;
- case 4:
- address += FPU_info->___esi;
- break;
- case 5:
- address += FPU_info->___edi;
- break;
- case 6:
- address += FPU_info->___ebp;
- if ( addr_modes.override.segment == PREFIX_DEFAULT )
- addr_modes.override.segment = PREFIX_SS_;
- break;
- case 7:
- address += FPU_info->___ebx;
- break;
- }
-
- add_segment:
- address &= 0xffff;
-
- addr->offset = address;
-
- switch ( addr_modes.default_mode )
- {
- case 0:
- break;
- case VM86:
- address += vm86_segment(addr_modes.override.segment, addr);
- break;
- case PM16:
- case SEG32:
- address = pm_address(FPU_modrm, addr_modes.override.segment,
- addr, address);
- break;
- default:
- EXCEPTION(EX_INTERNAL|0x131);
- }
-
- return (void __user *)address ;
+
+ return (void __user *)address;
}
diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c
index eebd6fb1c8a..2931ff35521 100644
--- a/arch/x86/math-emu/load_store.c
+++ b/arch/x86/math-emu/load_store.c
@@ -26,247 +26,257 @@
#include "status_w.h"
#include "control_w.h"
-
-#define _NONE_ 0 /* st0_ptr etc not needed */
-#define _REG0_ 1 /* Will be storing st(0) */
-#define _PUSH_ 3 /* Need to check for space to push onto stack */
-#define _null_ 4 /* Function illegal or not implemented */
+#define _NONE_ 0 /* st0_ptr etc not needed */
+#define _REG0_ 1 /* Will be storing st(0) */
+#define _PUSH_ 3 /* Need to check for space to push onto stack */
+#define _null_ 4 /* Function illegal or not implemented */
#define pop_0() { FPU_settag0(TAG_Empty); top++; }
-
static u_char const type_table[32] = {
- _PUSH_, _PUSH_, _PUSH_, _PUSH_,
- _null_, _null_, _null_, _null_,
- _REG0_, _REG0_, _REG0_, _REG0_,
- _REG0_, _REG0_, _REG0_, _REG0_,
- _NONE_, _null_, _NONE_, _PUSH_,
- _NONE_, _PUSH_, _null_, _PUSH_,
- _NONE_, _null_, _NONE_, _REG0_,
- _NONE_, _REG0_, _NONE_, _REG0_
- };
+ _PUSH_, _PUSH_, _PUSH_, _PUSH_,
+ _null_, _null_, _null_, _null_,
+ _REG0_, _REG0_, _REG0_, _REG0_,
+ _REG0_, _REG0_, _REG0_, _REG0_,
+ _NONE_, _null_, _NONE_, _PUSH_,
+ _NONE_, _PUSH_, _null_, _PUSH_,
+ _NONE_, _null_, _NONE_, _REG0_,
+ _NONE_, _REG0_, _NONE_, _REG0_
+};
u_char const data_sizes_16[32] = {
- 4, 4, 8, 2, 0, 0, 0, 0,
- 4, 4, 8, 2, 4, 4, 8, 2,
- 14, 0, 94, 10, 2, 10, 0, 8,
- 14, 0, 94, 10, 2, 10, 2, 8
+ 4, 4, 8, 2, 0, 0, 0, 0,
+ 4, 4, 8, 2, 4, 4, 8, 2,
+ 14, 0, 94, 10, 2, 10, 0, 8,
+ 14, 0, 94, 10, 2, 10, 2, 8
};
static u_char const data_sizes_32[32] = {
- 4, 4, 8, 2, 0, 0, 0, 0,
- 4, 4, 8, 2, 4, 4, 8, 2,
- 28, 0,108, 10, 2, 10, 0, 8,
- 28, 0,108, 10, 2, 10, 2, 8
+ 4, 4, 8, 2, 0, 0, 0, 0,
+ 4, 4, 8, 2, 4, 4, 8, 2,
+ 28, 0, 108, 10, 2, 10, 0, 8,
+ 28, 0, 108, 10, 2, 10, 2, 8
};
int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
- void __user *data_address)
+ void __user * data_address)
{
- FPU_REG loaded_data;
- FPU_REG *st0_ptr;
- u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */
- u_char loaded_tag;
+ FPU_REG loaded_data;
+ FPU_REG *st0_ptr;
+ u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */
+ u_char loaded_tag;
- st0_ptr = NULL; /* Initialized just to stop compiler warnings. */
+ st0_ptr = NULL; /* Initialized just to stop compiler warnings. */
- if ( addr_modes.default_mode & PROTECTED )
- {
- if ( addr_modes.default_mode == SEG32 )
- {
- if ( access_limit < data_sizes_32[type] )
- math_abort(FPU_info,SIGSEGV);
- }
- else if ( addr_modes.default_mode == PM16 )
- {
- if ( access_limit < data_sizes_16[type] )
- math_abort(FPU_info,SIGSEGV);
- }
+ if (addr_modes.default_mode & PROTECTED) {
+ if (addr_modes.default_mode == SEG32) {
+ if (access_limit < data_sizes_32[type])
+ math_abort(FPU_info, SIGSEGV);
+ } else if (addr_modes.default_mode == PM16) {
+ if (access_limit < data_sizes_16[type])
+ math_abort(FPU_info, SIGSEGV);
+ }
#ifdef PARANOID
- else
- EXCEPTION(EX_INTERNAL|0x140);
+ else
+ EXCEPTION(EX_INTERNAL | 0x140);
#endif /* PARANOID */
- }
+ }
- switch ( type_table[type] )
- {
- case _NONE_:
- break;
- case _REG0_:
- st0_ptr = &st(0); /* Some of these instructions pop after
- storing */
- st0_tag = FPU_gettag0();
- break;
- case _PUSH_:
- {
- if ( FPU_gettagi(-1) != TAG_Empty )
- { FPU_stack_overflow(); return 0; }
- top--;
- st0_ptr = &st(0);
- }
- break;
- case _null_:
- FPU_illegal();
- return 0;
+ switch (type_table[type]) {
+ case _NONE_:
+ break;
+ case _REG0_:
+ st0_ptr = &st(0); /* Some of these instructions pop after
+ storing */
+ st0_tag = FPU_gettag0();
+ break;
+ case _PUSH_:
+ {
+ if (FPU_gettagi(-1) != TAG_Empty) {
+ FPU_stack_overflow();
+ return 0;
+ }
+ top--;
+ st0_ptr = &st(0);
+ }
+ break;
+ case _null_:
+ FPU_illegal();
+ return 0;
#ifdef PARANOID
- default:
- EXCEPTION(EX_INTERNAL|0x141);
- return 0;
+ default:
+ EXCEPTION(EX_INTERNAL | 0x141);
+ return 0;
#endif /* PARANOID */
- }
-
- switch ( type )
- {
- case 000: /* fld m32real */
- clear_C1();
- loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data);
- if ( (loaded_tag == TAG_Special)
- && isNaN(&loaded_data)
- && (real_1op_NaN(&loaded_data) < 0) )
- {
- top++;
- break;
- }
- FPU_copy_to_reg0(&loaded_data, loaded_tag);
- break;
- case 001: /* fild m32int */
- clear_C1();
- loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
- FPU_copy_to_reg0(&loaded_data, loaded_tag);
- break;
- case 002: /* fld m64real */
- clear_C1();
- loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data);
- if ( (loaded_tag == TAG_Special)
- && isNaN(&loaded_data)
- && (real_1op_NaN(&loaded_data) < 0) )
- {
- top++;
- break;
}
- FPU_copy_to_reg0(&loaded_data, loaded_tag);
- break;
- case 003: /* fild m16int */
- clear_C1();
- loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
- FPU_copy_to_reg0(&loaded_data, loaded_tag);
- break;
- case 010: /* fst m32real */
- clear_C1();
- FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address);
- break;
- case 011: /* fist m32int */
- clear_C1();
- FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
- break;
- case 012: /* fst m64real */
- clear_C1();
- FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address);
- break;
- case 013: /* fist m16int */
- clear_C1();
- FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
- break;
- case 014: /* fstp m32real */
- clear_C1();
- if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) )
- pop_0(); /* pop only if the number was actually stored
- (see the 80486 manual p16-28) */
- break;
- case 015: /* fistp m32int */
- clear_C1();
- if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) )
- pop_0(); /* pop only if the number was actually stored
- (see the 80486 manual p16-28) */
- break;
- case 016: /* fstp m64real */
- clear_C1();
- if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) )
- pop_0(); /* pop only if the number was actually stored
- (see the 80486 manual p16-28) */
- break;
- case 017: /* fistp m16int */
- clear_C1();
- if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) )
- pop_0(); /* pop only if the number was actually stored
- (see the 80486 manual p16-28) */
- break;
- case 020: /* fldenv m14/28byte */
- fldenv(addr_modes, (u_char __user *)data_address);
- /* Ensure that the values just loaded are not changed by
- fix-up operations. */
- return 1;
- case 022: /* frstor m94/108byte */
- frstor(addr_modes, (u_char __user *)data_address);
- /* Ensure that the values just loaded are not changed by
- fix-up operations. */
- return 1;
- case 023: /* fbld m80dec */
- clear_C1();
- loaded_tag = FPU_load_bcd((u_char __user *)data_address);
- FPU_settag0(loaded_tag);
- break;
- case 024: /* fldcw */
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, data_address, 2);
- FPU_get_user(control_word, (unsigned short __user *) data_address);
- RE_ENTRANT_CHECK_ON;
- if ( partial_status & ~control_word & CW_Exceptions )
- partial_status |= (SW_Summary | SW_Backward);
- else
- partial_status &= ~(SW_Summary | SW_Backward);
+
+ switch (type) {
+ case 000: /* fld m32real */
+ clear_C1();
+ loaded_tag =
+ FPU_load_single((float __user *)data_address, &loaded_data);
+ if ((loaded_tag == TAG_Special)
+ && isNaN(&loaded_data)
+ && (real_1op_NaN(&loaded_data) < 0)) {
+ top++;
+ break;
+ }
+ FPU_copy_to_reg0(&loaded_data, loaded_tag);
+ break;
+ case 001: /* fild m32int */
+ clear_C1();
+ loaded_tag =
+ FPU_load_int32((long __user *)data_address, &loaded_data);
+ FPU_copy_to_reg0(&loaded_data, loaded_tag);
+ break;
+ case 002: /* fld m64real */
+ clear_C1();
+ loaded_tag =
+ FPU_load_double((double __user *)data_address,
+ &loaded_data);
+ if ((loaded_tag == TAG_Special)
+ && isNaN(&loaded_data)
+ && (real_1op_NaN(&loaded_data) < 0)) {
+ top++;
+ break;
+ }
+ FPU_copy_to_reg0(&loaded_data, loaded_tag);
+ break;
+ case 003: /* fild m16int */
+ clear_C1();
+ loaded_tag =
+ FPU_load_int16((short __user *)data_address, &loaded_data);
+ FPU_copy_to_reg0(&loaded_data, loaded_tag);
+ break;
+ case 010: /* fst m32real */
+ clear_C1();
+ FPU_store_single(st0_ptr, st0_tag,
+ (float __user *)data_address);
+ break;
+ case 011: /* fist m32int */
+ clear_C1();
+ FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
+ break;
+ case 012: /* fst m64real */
+ clear_C1();
+ FPU_store_double(st0_ptr, st0_tag,
+ (double __user *)data_address);
+ break;
+ case 013: /* fist m16int */
+ clear_C1();
+ FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
+ break;
+ case 014: /* fstp m32real */
+ clear_C1();
+ if (FPU_store_single
+ (st0_ptr, st0_tag, (float __user *)data_address))
+ pop_0(); /* pop only if the number was actually stored
+ (see the 80486 manual p16-28) */
+ break;
+ case 015: /* fistp m32int */
+ clear_C1();
+ if (FPU_store_int32
+ (st0_ptr, st0_tag, (long __user *)data_address))
+ pop_0(); /* pop only if the number was actually stored
+ (see the 80486 manual p16-28) */
+ break;
+ case 016: /* fstp m64real */
+ clear_C1();
+ if (FPU_store_double
+ (st0_ptr, st0_tag, (double __user *)data_address))
+ pop_0(); /* pop only if the number was actually stored
+ (see the 80486 manual p16-28) */
+ break;
+ case 017: /* fistp m16int */
+ clear_C1();
+ if (FPU_store_int16
+ (st0_ptr, st0_tag, (short __user *)data_address))
+ pop_0(); /* pop only if the number was actually stored
+ (see the 80486 manual p16-28) */
+ break;
+ case 020: /* fldenv m14/28byte */
+ fldenv(addr_modes, (u_char __user *) data_address);
+ /* Ensure that the values just loaded are not changed by
+ fix-up operations. */
+ return 1;
+ case 022: /* frstor m94/108byte */
+ frstor(addr_modes, (u_char __user *) data_address);
+ /* Ensure that the values just loaded are not changed by
+ fix-up operations. */
+ return 1;
+ case 023: /* fbld m80dec */
+ clear_C1();
+ loaded_tag = FPU_load_bcd((u_char __user *) data_address);
+ FPU_settag0(loaded_tag);
+ break;
+ case 024: /* fldcw */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, data_address, 2);
+ FPU_get_user(control_word,
+ (unsigned short __user *)data_address);
+ RE_ENTRANT_CHECK_ON;
+ if (partial_status & ~control_word & CW_Exceptions)
+ partial_status |= (SW_Summary | SW_Backward);
+ else
+ partial_status &= ~(SW_Summary | SW_Backward);
#ifdef PECULIAR_486
- control_word |= 0x40; /* An 80486 appears to always set this bit */
+ control_word |= 0x40; /* An 80486 appears to always set this bit */
#endif /* PECULIAR_486 */
- return 1;
- case 025: /* fld m80real */
- clear_C1();
- loaded_tag = FPU_load_extended((long double __user *)data_address, 0);
- FPU_settag0(loaded_tag);
- break;
- case 027: /* fild m64int */
- clear_C1();
- loaded_tag = FPU_load_int64((long long __user *)data_address);
- if (loaded_tag == TAG_Error)
+ return 1;
+ case 025: /* fld m80real */
+ clear_C1();
+ loaded_tag =
+ FPU_load_extended((long double __user *)data_address, 0);
+ FPU_settag0(loaded_tag);
+ break;
+ case 027: /* fild m64int */
+ clear_C1();
+ loaded_tag = FPU_load_int64((long long __user *)data_address);
+ if (loaded_tag == TAG_Error)
+ return 0;
+ FPU_settag0(loaded_tag);
+ break;
+ case 030: /* fstenv m14/28byte */
+ fstenv(addr_modes, (u_char __user *) data_address);
+ return 1;
+ case 032: /* fsave */
+ fsave(addr_modes, (u_char __user *) data_address);
+ return 1;
+ case 033: /* fbstp m80dec */
+ clear_C1();
+ if (FPU_store_bcd
+ (st0_ptr, st0_tag, (u_char __user *) data_address))
+ pop_0(); /* pop only if the number was actually stored
+ (see the 80486 manual p16-28) */
+ break;
+ case 034: /* fstcw m16int */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, data_address, 2);
+ FPU_put_user(control_word,
+ (unsigned short __user *)data_address);
+ RE_ENTRANT_CHECK_ON;
+ return 1;
+ case 035: /* fstp m80real */
+ clear_C1();
+ if (FPU_store_extended
+ (st0_ptr, st0_tag, (long double __user *)data_address))
+ pop_0(); /* pop only if the number was actually stored
+ (see the 80486 manual p16-28) */
+ break;
+ case 036: /* fstsw m2byte */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, data_address, 2);
+ FPU_put_user(status_word(),
+ (unsigned short __user *)data_address);
+ RE_ENTRANT_CHECK_ON;
+ return 1;
+ case 037: /* fistp m64int */
+ clear_C1();
+ if (FPU_store_int64
+ (st0_ptr, st0_tag, (long long __user *)data_address))
+ pop_0(); /* pop only if the number was actually stored
+ (see the 80486 manual p16-28) */
+ break;
+ }
return 0;
- FPU_settag0(loaded_tag);
- break;
- case 030: /* fstenv m14/28byte */
- fstenv(addr_modes, (u_char __user *)data_address);
- return 1;
- case 032: /* fsave */
- fsave(addr_modes, (u_char __user *)data_address);
- return 1;
- case 033: /* fbstp m80dec */
- clear_C1();
- if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) )
- pop_0(); /* pop only if the number was actually stored
- (see the 80486 manual p16-28) */
- break;
- case 034: /* fstcw m16int */
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,data_address,2);
- FPU_put_user(control_word, (unsigned short __user *) data_address);
- RE_ENTRANT_CHECK_ON;
- return 1;
- case 035: /* fstp m80real */
- clear_C1();
- if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) )
- pop_0(); /* pop only if the number was actually stored
- (see the 80486 manual p16-28) */
- break;
- case 036: /* fstsw m2byte */
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,data_address,2);
- FPU_put_user(status_word(),(unsigned short __user *) data_address);
- RE_ENTRANT_CHECK_ON;
- return 1;
- case 037: /* fistp m64int */
- clear_C1();
- if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) )
- pop_0(); /* pop only if the number was actually stored
- (see the 80486 manual p16-28) */
- break;
- }
- return 0;
}
diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h
index 4db79811492..168eb44c93c 100644
--- a/arch/x86/math-emu/poly.h
+++ b/arch/x86/math-emu/poly.h
@@ -21,9 +21,9 @@
allows. 9-byte would probably be sufficient.
*/
typedef struct {
- unsigned long lsw;
- unsigned long midw;
- unsigned long msw;
+ unsigned long lsw;
+ unsigned long midw;
+ unsigned long msw;
} Xsig;
asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
@@ -49,7 +49,6 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
/* Macro to access the 8 ms bytes of an Xsig as a long long */
#define XSIG_LL(x) (*(unsigned long long *)&x.midw)
-
/*
Need to run gcc with optimizations on to get these to
actually be in-line.
@@ -63,59 +62,53 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
static inline unsigned long mul_32_32(const unsigned long arg1,
const unsigned long arg2)
{
- int retval;
- asm volatile ("mull %2; movl %%edx,%%eax" \
- :"=a" (retval) \
- :"0" (arg1), "g" (arg2) \
- :"dx");
- return retval;
+ int retval;
+ asm volatile ("mull %2; movl %%edx,%%eax":"=a" (retval)
+ :"0"(arg1), "g"(arg2)
+ :"dx");
+ return retval;
}
-
/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
{
- asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
- "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
- "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
- "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n"
- :"=g" (*dest):"g" (dest), "g" (x2)
- :"ax","si","di");
+ asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
+ "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
+ "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
+ "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n":"=g"
+ (*dest):"g"(dest), "g"(x2)
+ :"ax", "si", "di");
}
-
/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
/* Note: the constraints in the asm statement didn't always work properly
with gcc 2.5.8. Changing from using edi to using ecx got around the
problem, but keep fingers crossed! */
static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
{
- asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
- "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
- "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
- "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
- "jnc 0f;\n"
- "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
- "movl %4,%%ecx; incl (%%ecx)\n"
- "movl $1,%%eax; jmp 1f;\n"
- "0: xorl %%eax,%%eax;\n"
- "1:\n"
- :"=g" (*exp), "=g" (*dest)
- :"g" (dest), "g" (x2), "g" (exp)
- :"cx","si","ax");
+ asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
+ "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
+ "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
+ "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
+ "jnc 0f;\n"
+ "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
+ "movl %4,%%ecx; incl (%%ecx)\n"
+ "movl $1,%%eax; jmp 1f;\n"
+ "0: xorl %%eax,%%eax;\n" "1:\n":"=g" (*exp), "=g"(*dest)
+ :"g"(dest), "g"(x2), "g"(exp)
+ :"cx", "si", "ax");
}
-
/* Negate (subtract from 1.0) the 12 byte Xsig */
/* This is faster in a loop on my 386 than using the "neg" instruction. */
static inline void negate_Xsig(Xsig *x)
{
- asm volatile("movl %1,%%esi;\n"
- "xorl %%ecx,%%ecx;\n"
- "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
- "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
- "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n"
- :"=g" (*x):"g" (x):"si","ax","cx");
+ asm volatile ("movl %1,%%esi;\n"
+ "xorl %%ecx,%%ecx;\n"
+ "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
+ "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
+ "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n":"=g"
+ (*x):"g"(x):"si", "ax", "cx");
}
#endif /* _POLY_H */
diff --git a/arch/x86/math-emu/poly_2xm1.c b/arch/x86/math-emu/poly_2xm1.c
index 9766ad5e974..b00e9e10cdc 100644
--- a/arch/x86/math-emu/poly_2xm1.c
+++ b/arch/x86/math-emu/poly_2xm1.c
@@ -17,21 +17,19 @@
#include "control_w.h"
#include "poly.h"
-
#define HIPOWER 11
-static const unsigned long long lterms[HIPOWER] =
-{
- 0x0000000000000000LL, /* This term done separately as 12 bytes */
- 0xf5fdeffc162c7543LL,
- 0x1c6b08d704a0bfa6LL,
- 0x0276556df749cc21LL,
- 0x002bb0ffcf14f6b8LL,
- 0x0002861225ef751cLL,
- 0x00001ffcbfcd5422LL,
- 0x00000162c005d5f1LL,
- 0x0000000da96ccb1bLL,
- 0x0000000078d1b897LL,
- 0x000000000422b029LL
+static const unsigned long long lterms[HIPOWER] = {
+ 0x0000000000000000LL, /* This term done separately as 12 bytes */
+ 0xf5fdeffc162c7543LL,
+ 0x1c6b08d704a0bfa6LL,
+ 0x0276556df749cc21LL,
+ 0x002bb0ffcf14f6b8LL,
+ 0x0002861225ef751cLL,
+ 0x00001ffcbfcd5422LL,
+ 0x00000162c005d5f1LL,
+ 0x0000000da96ccb1bLL,
+ 0x0000000078d1b897LL,
+ 0x000000000422b029LL
};
static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
@@ -45,112 +43,103 @@ static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
- &shiftterm2, &shiftterm3 };
-
+ &shiftterm2, &shiftterm3
+};
/*--- poly_2xm1() -----------------------------------------------------------+
| Requires st(0) which is TAG_Valid and < 1. |
+---------------------------------------------------------------------------*/
-int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
+int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
{
- long int exponent, shift;
- unsigned long long Xll;
- Xsig accumulator, Denom, argSignif;
- u_char tag;
+ long int exponent, shift;
+ unsigned long long Xll;
+ Xsig accumulator, Denom, argSignif;
+ u_char tag;
- exponent = exponent16(arg);
+ exponent = exponent16(arg);
#ifdef PARANOID
- if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */
- {
- /* Number negative, too large, or not Valid. */
- EXCEPTION(EX_INTERNAL|0x127);
- return 1;
- }
+ if (exponent >= 0) { /* Don't want a |number| >= 1.0 */
+ /* Number negative, too large, or not Valid. */
+ EXCEPTION(EX_INTERNAL | 0x127);
+ return 1;
+ }
#endif /* PARANOID */
- argSignif.lsw = 0;
- XSIG_LL(argSignif) = Xll = significand(arg);
-
- if ( exponent == -1 )
- {
- shift = (argSignif.msw & 0x40000000) ? 3 : 2;
- /* subtract 0.5 or 0.75 */
- exponent -= 2;
- XSIG_LL(argSignif) <<= 2;
- Xll <<= 2;
- }
- else if ( exponent == -2 )
- {
- shift = 1;
- /* subtract 0.25 */
- exponent--;
- XSIG_LL(argSignif) <<= 1;
- Xll <<= 1;
- }
- else
- shift = 0;
-
- if ( exponent < -2 )
- {
- /* Shift the argument right by the required places. */
- if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U )
- Xll++; /* round up */
- }
-
- accumulator.lsw = accumulator.midw = accumulator.msw = 0;
- polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
- mul_Xsig_Xsig(&accumulator, &argSignif);
- shr_Xsig(&accumulator, 3);
-
- mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */
- add_two_Xsig(&accumulator, &argSignif, &exponent);
-
- if ( shift )
- {
- /* The argument is large, use the identity:
- f(x+a) = f(a) * (f(x) + 1) - 1;
- */
- shr_Xsig(&accumulator, - exponent);
- accumulator.msw |= 0x80000000; /* add 1.0 */
- mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
- accumulator.msw &= 0x3fffffff; /* subtract 1.0 */
- exponent = 1;
- }
-
- if ( sign != SIGN_POS )
- {
- /* The argument is negative, use the identity:
- f(-x) = -f(x) / (1 + f(x))
- */
- Denom.lsw = accumulator.lsw;
- XSIG_LL(Denom) = XSIG_LL(accumulator);
- if ( exponent < 0 )
- shr_Xsig(&Denom, - exponent);
- else if ( exponent > 0 )
- {
- /* exponent must be 1 here */
- XSIG_LL(Denom) <<= 1;
- if ( Denom.lsw & 0x80000000 )
- XSIG_LL(Denom) |= 1;
- (Denom.lsw) <<= 1;
+ argSignif.lsw = 0;
+ XSIG_LL(argSignif) = Xll = significand(arg);
+
+ if (exponent == -1) {
+ shift = (argSignif.msw & 0x40000000) ? 3 : 2;
+ /* subtract 0.5 or 0.75 */
+ exponent -= 2;
+ XSIG_LL(argSignif) <<= 2;
+ Xll <<= 2;
+ } else if (exponent == -2) {
+ shift = 1;
+ /* subtract 0.25 */
+ exponent--;
+ XSIG_LL(argSignif) <<= 1;
+ Xll <<= 1;
+ } else
+ shift = 0;
+
+ if (exponent < -2) {
+ /* Shift the argument right by the required places. */
+ if (FPU_shrx(&Xll, -2 - exponent) >= 0x80000000U)
+ Xll++; /* round up */
+ }
+
+ accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+ polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER - 1);
+ mul_Xsig_Xsig(&accumulator, &argSignif);
+ shr_Xsig(&accumulator, 3);
+
+ mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */
+ add_two_Xsig(&accumulator, &argSignif, &exponent);
+
+ if (shift) {
+ /* The argument is large, use the identity:
+ f(x+a) = f(a) * (f(x) + 1) - 1;
+ */
+ shr_Xsig(&accumulator, -exponent);
+ accumulator.msw |= 0x80000000; /* add 1.0 */
+ mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
+ accumulator.msw &= 0x3fffffff; /* subtract 1.0 */
+ exponent = 1;
+ }
+
+ if (sign != SIGN_POS) {
+ /* The argument is negative, use the identity:
+ f(-x) = -f(x) / (1 + f(x))
+ */
+ Denom.lsw = accumulator.lsw;
+ XSIG_LL(Denom) = XSIG_LL(accumulator);
+ if (exponent < 0)
+ shr_Xsig(&Denom, -exponent);
+ else if (exponent > 0) {
+ /* exponent must be 1 here */
+ XSIG_LL(Denom) <<= 1;
+ if (Denom.lsw & 0x80000000)
+ XSIG_LL(Denom) |= 1;
+ (Denom.lsw) <<= 1;
+ }
+ Denom.msw |= 0x80000000; /* add 1.0 */
+ div_Xsig(&accumulator, &Denom, &accumulator);
}
- Denom.msw |= 0x80000000; /* add 1.0 */
- div_Xsig(&accumulator, &Denom, &accumulator);
- }
- /* Convert to 64 bit signed-compatible */
- exponent += round_Xsig(&accumulator);
+ /* Convert to 64 bit signed-compatible */
+ exponent += round_Xsig(&accumulator);
- result = &st(0);
- significand(result) = XSIG_LL(accumulator);
- setexponent16(result, exponent);
+ result = &st(0);
+ significand(result) = XSIG_LL(accumulator);
+ setexponent16(result, exponent);
- tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
+ tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
- setsign(result, sign);
- FPU_settag0(tag);
+ setsign(result, sign);
+ FPU_settag0(tag);
- return 0;
+ return 0;
}
diff --git a/arch/x86/math-emu/poly_atan.c b/arch/x86/math-emu/poly_atan.c
index 82f702952f6..20c28e58e2d 100644
--- a/arch/x86/math-emu/poly_atan.c
+++ b/arch/x86/math-emu/poly_atan.c
@@ -18,28 +18,25 @@
#include "control_w.h"
#include "poly.h"
-
#define HIPOWERon 6 /* odd poly, negative terms */
-static const unsigned long long oddnegterms[HIPOWERon] =
-{
- 0x0000000000000000LL, /* Dummy (not for - 1.0) */
- 0x015328437f756467LL,
- 0x0005dda27b73dec6LL,
- 0x0000226bf2bfb91aLL,
- 0x000000ccc439c5f7LL,
- 0x0000000355438407LL
-} ;
+static const unsigned long long oddnegterms[HIPOWERon] = {
+ 0x0000000000000000LL, /* Dummy (not for - 1.0) */
+ 0x015328437f756467LL,
+ 0x0005dda27b73dec6LL,
+ 0x0000226bf2bfb91aLL,
+ 0x000000ccc439c5f7LL,
+ 0x0000000355438407LL
+};
#define HIPOWERop 6 /* odd poly, positive terms */
-static const unsigned long long oddplterms[HIPOWERop] =
-{
+static const unsigned long long oddplterms[HIPOWERop] = {
/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */
- 0x0db55a71875c9ac2LL,
- 0x0029fce2d67880b0LL,
- 0x0000dfd3908b4596LL,
- 0x00000550fd61dab4LL,
- 0x0000001c9422b3f9LL,
- 0x000000003e3301e1LL
+ 0x0db55a71875c9ac2LL,
+ 0x0029fce2d67880b0LL,
+ 0x0000dfd3908b4596LL,
+ 0x00000550fd61dab4LL,
+ 0x0000001c9422b3f9LL,
+ 0x000000003e3301e1LL
};
static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
@@ -48,182 +45,164 @@ static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
-
/*--- poly_atan() -----------------------------------------------------------+
| |
+---------------------------------------------------------------------------*/
-void poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
- FPU_REG *st1_ptr, u_char st1_tag)
+void poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
+ FPU_REG *st1_ptr, u_char st1_tag)
{
- u_char transformed, inverted,
- sign1, sign2;
- int exponent;
- long int dummy_exp;
- Xsig accumulator, Numer, Denom, accumulatore, argSignif,
- argSq, argSqSq;
- u_char tag;
-
- sign1 = getsign(st0_ptr);
- sign2 = getsign(st1_ptr);
- if ( st0_tag == TAG_Valid )
- {
- exponent = exponent(st0_ptr);
- }
- else
- {
- /* This gives non-compatible stack contents... */
- FPU_to_exp16(st0_ptr, st0_ptr);
- exponent = exponent16(st0_ptr);
- }
- if ( st1_tag == TAG_Valid )
- {
- exponent -= exponent(st1_ptr);
- }
- else
- {
- /* This gives non-compatible stack contents... */
- FPU_to_exp16(st1_ptr, st1_ptr);
- exponent -= exponent16(st1_ptr);
- }
-
- if ( (exponent < 0) || ((exponent == 0) &&
- ((st0_ptr->sigh < st1_ptr->sigh) ||
- ((st0_ptr->sigh == st1_ptr->sigh) &&
- (st0_ptr->sigl < st1_ptr->sigl))) ) )
- {
- inverted = 1;
- Numer.lsw = Denom.lsw = 0;
- XSIG_LL(Numer) = significand(st0_ptr);
- XSIG_LL(Denom) = significand(st1_ptr);
- }
- else
- {
- inverted = 0;
- exponent = -exponent;
- Numer.lsw = Denom.lsw = 0;
- XSIG_LL(Numer) = significand(st1_ptr);
- XSIG_LL(Denom) = significand(st0_ptr);
- }
- div_Xsig(&Numer, &Denom, &argSignif);
- exponent += norm_Xsig(&argSignif);
-
- if ( (exponent >= -1)
- || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
- {
- /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
- /* Convert the argument by an identity for atan */
- transformed = 1;
-
- if ( exponent >= 0 )
- {
+ u_char transformed, inverted, sign1, sign2;
+ int exponent;
+ long int dummy_exp;
+ Xsig accumulator, Numer, Denom, accumulatore, argSignif, argSq, argSqSq;
+ u_char tag;
+
+ sign1 = getsign(st0_ptr);
+ sign2 = getsign(st1_ptr);
+ if (st0_tag == TAG_Valid) {
+ exponent = exponent(st0_ptr);
+ } else {
+ /* This gives non-compatible stack contents... */
+ FPU_to_exp16(st0_ptr, st0_ptr);
+ exponent = exponent16(st0_ptr);
+ }
+ if (st1_tag == TAG_Valid) {
+ exponent -= exponent(st1_ptr);
+ } else {
+ /* This gives non-compatible stack contents... */
+ FPU_to_exp16(st1_ptr, st1_ptr);
+ exponent -= exponent16(st1_ptr);
+ }
+
+ if ((exponent < 0) || ((exponent == 0) &&
+ ((st0_ptr->sigh < st1_ptr->sigh) ||
+ ((st0_ptr->sigh == st1_ptr->sigh) &&
+ (st0_ptr->sigl < st1_ptr->sigl))))) {
+ inverted = 1;
+ Numer.lsw = Denom.lsw = 0;
+ XSIG_LL(Numer) = significand(st0_ptr);
+ XSIG_LL(Denom) = significand(st1_ptr);
+ } else {
+ inverted = 0;
+ exponent = -exponent;
+ Numer.lsw = Denom.lsw = 0;
+ XSIG_LL(Numer) = significand(st1_ptr);
+ XSIG_LL(Denom) = significand(st0_ptr);
+ }
+ div_Xsig(&Numer, &Denom, &argSignif);
+ exponent += norm_Xsig(&argSignif);
+
+ if ((exponent >= -1)
+ || ((exponent == -2) && (argSignif.msw > 0xd413ccd0))) {
+ /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
+ /* Convert the argument by an identity for atan */
+ transformed = 1;
+
+ if (exponent >= 0) {
#ifdef PARANOID
- if ( !( (exponent == 0) &&
- (argSignif.lsw == 0) && (argSignif.midw == 0) &&
- (argSignif.msw == 0x80000000) ) )
- {
- EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */
- return;
- }
+ if (!((exponent == 0) &&
+ (argSignif.lsw == 0) && (argSignif.midw == 0) &&
+ (argSignif.msw == 0x80000000))) {
+ EXCEPTION(EX_INTERNAL | 0x104); /* There must be a logic error */
+ return;
+ }
#endif /* PARANOID */
- argSignif.msw = 0; /* Make the transformed arg -> 0.0 */
+ argSignif.msw = 0; /* Make the transformed arg -> 0.0 */
+ } else {
+ Numer.lsw = Denom.lsw = argSignif.lsw;
+ XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
+
+ if (exponent < -1)
+ shr_Xsig(&Numer, -1 - exponent);
+ negate_Xsig(&Numer);
+
+ shr_Xsig(&Denom, -exponent);
+ Denom.msw |= 0x80000000;
+
+ div_Xsig(&Numer, &Denom, &argSignif);
+
+ exponent = -1 + norm_Xsig(&argSignif);
+ }
+ } else {
+ transformed = 0;
+ }
+
+ argSq.lsw = argSignif.lsw;
+ argSq.midw = argSignif.midw;
+ argSq.msw = argSignif.msw;
+ mul_Xsig_Xsig(&argSq, &argSq);
+
+ argSqSq.lsw = argSq.lsw;
+ argSqSq.midw = argSq.midw;
+ argSqSq.msw = argSq.msw;
+ mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+ accumulatore.lsw = argSq.lsw;
+ XSIG_LL(accumulatore) = XSIG_LL(argSq);
+
+ shr_Xsig(&argSq, 2 * (-1 - exponent - 1));
+ shr_Xsig(&argSqSq, 4 * (-1 - exponent - 1));
+
+ /* Now have argSq etc with binary point at the left
+ .1xxxxxxxx */
+
+ /* Do the basic fixed point polynomial evaluation */
+ accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+ polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
+ oddplterms, HIPOWERop - 1);
+ mul64_Xsig(&accumulator, &XSIG_LL(argSq));
+ negate_Xsig(&accumulator);
+ polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms,
+ HIPOWERon - 1);
+ negate_Xsig(&accumulator);
+ add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
+
+ mul64_Xsig(&accumulatore, &denomterm);
+ shr_Xsig(&accumulatore, 1 + 2 * (-1 - exponent));
+ accumulatore.msw |= 0x80000000;
+
+ div_Xsig(&accumulator, &accumulatore, &accumulator);
+
+ mul_Xsig_Xsig(&accumulator, &argSignif);
+ mul_Xsig_Xsig(&accumulator, &argSq);
+
+ shr_Xsig(&accumulator, 3);
+ negate_Xsig(&accumulator);
+ add_Xsig_Xsig(&accumulator, &argSignif);
+
+ if (transformed) {
+ /* compute pi/4 - accumulator */
+ shr_Xsig(&accumulator, -1 - exponent);
+ negate_Xsig(&accumulator);
+ add_Xsig_Xsig(&accumulator, &pi_signif);
+ exponent = -1;
+ }
+
+ if (inverted) {
+ /* compute pi/2 - accumulator */
+ shr_Xsig(&accumulator, -exponent);
+ negate_Xsig(&accumulator);
+ add_Xsig_Xsig(&accumulator, &pi_signif);
+ exponent = 0;
}
- else
- {
- Numer.lsw = Denom.lsw = argSignif.lsw;
- XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
-
- if ( exponent < -1 )
- shr_Xsig(&Numer, -1-exponent);
- negate_Xsig(&Numer);
-
- shr_Xsig(&Denom, -exponent);
- Denom.msw |= 0x80000000;
-
- div_Xsig(&Numer, &Denom, &argSignif);
-
- exponent = -1 + norm_Xsig(&argSignif);
+
+ if (sign1) {
+ /* compute pi - accumulator */
+ shr_Xsig(&accumulator, 1 - exponent);
+ negate_Xsig(&accumulator);
+ add_Xsig_Xsig(&accumulator, &pi_signif);
+ exponent = 1;
}
- }
- else
- {
- transformed = 0;
- }
-
- argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
- argSq.msw = argSignif.msw;
- mul_Xsig_Xsig(&argSq, &argSq);
-
- argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
- mul_Xsig_Xsig(&argSqSq, &argSqSq);
-
- accumulatore.lsw = argSq.lsw;
- XSIG_LL(accumulatore) = XSIG_LL(argSq);
-
- shr_Xsig(&argSq, 2*(-1-exponent-1));
- shr_Xsig(&argSqSq, 4*(-1-exponent-1));
-
- /* Now have argSq etc with binary point at the left
- .1xxxxxxxx */
-
- /* Do the basic fixed point polynomial evaluation */
- accumulator.msw = accumulator.midw = accumulator.lsw = 0;
- polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
- oddplterms, HIPOWERop-1);
- mul64_Xsig(&accumulator, &XSIG_LL(argSq));
- negate_Xsig(&accumulator);
- polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
- negate_Xsig(&accumulator);
- add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
-
- mul64_Xsig(&accumulatore, &denomterm);
- shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
- accumulatore.msw |= 0x80000000;
-
- div_Xsig(&accumulator, &accumulatore, &accumulator);
-
- mul_Xsig_Xsig(&accumulator, &argSignif);
- mul_Xsig_Xsig(&accumulator, &argSq);
-
- shr_Xsig(&accumulator, 3);
- negate_Xsig(&accumulator);
- add_Xsig_Xsig(&accumulator, &argSignif);
-
- if ( transformed )
- {
- /* compute pi/4 - accumulator */
- shr_Xsig(&accumulator, -1-exponent);
- negate_Xsig(&accumulator);
- add_Xsig_Xsig(&accumulator, &pi_signif);
- exponent = -1;
- }
-
- if ( inverted )
- {
- /* compute pi/2 - accumulator */
- shr_Xsig(&accumulator, -exponent);
- negate_Xsig(&accumulator);
- add_Xsig_Xsig(&accumulator, &pi_signif);
- exponent = 0;
- }
-
- if ( sign1 )
- {
- /* compute pi - accumulator */
- shr_Xsig(&accumulator, 1 - exponent);
- negate_Xsig(&accumulator);
- add_Xsig_Xsig(&accumulator, &pi_signif);
- exponent = 1;
- }
-
- exponent += round_Xsig(&accumulator);
-
- significand(st1_ptr) = XSIG_LL(accumulator);
- setexponent16(st1_ptr, exponent);
-
- tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
- FPU_settagi(1, tag);
-
- set_precision_flag_up(); /* We do not really know if up or down,
- use this as the default. */
+
+ exponent += round_Xsig(&accumulator);
+
+ significand(st1_ptr) = XSIG_LL(accumulator);
+ setexponent16(st1_ptr, exponent);
+
+ tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
+ FPU_settagi(1, tag);
+
+ set_precision_flag_up(); /* We do not really know if up or down,
+ use this as the default. */
}
diff --git a/arch/x86/math-emu/poly_l2.c b/arch/x86/math-emu/poly_l2.c
index dd00e1d5b07..8e2ff4b28a0 100644
--- a/arch/x86/math-emu/poly_l2.c
+++ b/arch/x86/math-emu/poly_l2.c
@@ -10,7 +10,6 @@
| |
+---------------------------------------------------------------------------*/
-
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
@@ -18,184 +17,163 @@
#include "control_w.h"
#include "poly.h"
-
static void log2_kernel(FPU_REG const *arg, u_char argsign,
- Xsig *accum_result, long int *expon);
-
+ Xsig * accum_result, long int *expon);
/*--- poly_l2() -------------------------------------------------------------+
| Base 2 logarithm by a polynomial approximation. |
+---------------------------------------------------------------------------*/
-void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
+void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
{
- long int exponent, expon, expon_expon;
- Xsig accumulator, expon_accum, yaccum;
- u_char sign, argsign;
- FPU_REG x;
- int tag;
-
- exponent = exponent16(st0_ptr);
-
- /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
- if ( st0_ptr->sigh > (unsigned)0xb504f334 )
- {
- /* Treat as sqrt(2)/2 < st0_ptr < 1 */
- significand(&x) = - significand(st0_ptr);
- setexponent16(&x, -1);
- exponent++;
- argsign = SIGN_NEG;
- }
- else
- {
- /* Treat as 1 <= st0_ptr < sqrt(2) */
- x.sigh = st0_ptr->sigh - 0x80000000;
- x.sigl = st0_ptr->sigl;
- setexponent16(&x, 0);
- argsign = SIGN_POS;
- }
- tag = FPU_normalize_nuo(&x);
-
- if ( tag == TAG_Zero )
- {
- expon = 0;
- accumulator.msw = accumulator.midw = accumulator.lsw = 0;
- }
- else
- {
- log2_kernel(&x, argsign, &accumulator, &expon);
- }
-
- if ( exponent < 0 )
- {
- sign = SIGN_NEG;
- exponent = -exponent;
- }
- else
- sign = SIGN_POS;
- expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
- if ( exponent )
- {
- expon_expon = 31 + norm_Xsig(&expon_accum);
- shr_Xsig(&accumulator, expon_expon - expon);
-
- if ( sign ^ argsign )
- negate_Xsig(&accumulator);
- add_Xsig_Xsig(&accumulator, &expon_accum);
- }
- else
- {
- expon_expon = expon;
- sign = argsign;
- }
-
- yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr);
- mul_Xsig_Xsig(&accumulator, &yaccum);
-
- expon_expon += round_Xsig(&accumulator);
-
- if ( accumulator.msw == 0 )
- {
- FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
- return;
- }
-
- significand(st1_ptr) = XSIG_LL(accumulator);
- setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
-
- tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
- FPU_settagi(1, tag);
-
- set_precision_flag_up(); /* 80486 appears to always do this */
-
- return;
+ long int exponent, expon, expon_expon;
+ Xsig accumulator, expon_accum, yaccum;
+ u_char sign, argsign;
+ FPU_REG x;
+ int tag;
+
+ exponent = exponent16(st0_ptr);
+
+ /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
+ if (st0_ptr->sigh > (unsigned)0xb504f334) {
+ /* Treat as sqrt(2)/2 < st0_ptr < 1 */
+ significand(&x) = -significand(st0_ptr);
+ setexponent16(&x, -1);
+ exponent++;
+ argsign = SIGN_NEG;
+ } else {
+ /* Treat as 1 <= st0_ptr < sqrt(2) */
+ x.sigh = st0_ptr->sigh - 0x80000000;
+ x.sigl = st0_ptr->sigl;
+ setexponent16(&x, 0);
+ argsign = SIGN_POS;
+ }
+ tag = FPU_normalize_nuo(&x);
-}
+ if (tag == TAG_Zero) {
+ expon = 0;
+ accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+ } else {
+ log2_kernel(&x, argsign, &accumulator, &expon);
+ }
+
+ if (exponent < 0) {
+ sign = SIGN_NEG;
+ exponent = -exponent;
+ } else
+ sign = SIGN_POS;
+ expon_accum.msw = exponent;
+ expon_accum.midw = expon_accum.lsw = 0;
+ if (exponent) {
+ expon_expon = 31 + norm_Xsig(&expon_accum);
+ shr_Xsig(&accumulator, expon_expon - expon);
+
+ if (sign ^ argsign)
+ negate_Xsig(&accumulator);
+ add_Xsig_Xsig(&accumulator, &expon_accum);
+ } else {
+ expon_expon = expon;
+ sign = argsign;
+ }
+
+ yaccum.lsw = 0;
+ XSIG_LL(yaccum) = significand(st1_ptr);
+ mul_Xsig_Xsig(&accumulator, &yaccum);
+
+ expon_expon += round_Xsig(&accumulator);
+
+ if (accumulator.msw == 0) {
+ FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+ return;
+ }
+
+ significand(st1_ptr) = XSIG_LL(accumulator);
+ setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
+ tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
+ FPU_settagi(1, tag);
+
+ set_precision_flag_up(); /* 80486 appears to always do this */
+
+ return;
+
+}
/*--- poly_l2p1() -----------------------------------------------------------+
| Base 2 logarithm by a polynomial approximation. |
| log2(x+1) |
+---------------------------------------------------------------------------*/
-int poly_l2p1(u_char sign0, u_char sign1,
- FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest)
+int poly_l2p1(u_char sign0, u_char sign1,
+ FPU_REG * st0_ptr, FPU_REG * st1_ptr, FPU_REG * dest)
{
- u_char tag;
- long int exponent;
- Xsig accumulator, yaccum;
+ u_char tag;
+ long int exponent;
+ Xsig accumulator, yaccum;
- if ( exponent16(st0_ptr) < 0 )
- {
- log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
+ if (exponent16(st0_ptr) < 0) {
+ log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
- yaccum.lsw = 0;
- XSIG_LL(yaccum) = significand(st1_ptr);
- mul_Xsig_Xsig(&accumulator, &yaccum);
+ yaccum.lsw = 0;
+ XSIG_LL(yaccum) = significand(st1_ptr);
+ mul_Xsig_Xsig(&accumulator, &yaccum);
- exponent += round_Xsig(&accumulator);
+ exponent += round_Xsig(&accumulator);
- exponent += exponent16(st1_ptr) + 1;
- if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER;
+ exponent += exponent16(st1_ptr) + 1;
+ if (exponent < EXP_WAY_UNDER)
+ exponent = EXP_WAY_UNDER;
- significand(dest) = XSIG_LL(accumulator);
- setexponent16(dest, exponent);
+ significand(dest) = XSIG_LL(accumulator);
+ setexponent16(dest, exponent);
- tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
- FPU_settagi(1, tag);
+ tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
+ FPU_settagi(1, tag);
- if ( tag == TAG_Valid )
- set_precision_flag_up(); /* 80486 appears to always do this */
- }
- else
- {
- /* The magnitude of st0_ptr is far too large. */
+ if (tag == TAG_Valid)
+ set_precision_flag_up(); /* 80486 appears to always do this */
+ } else {
+ /* The magnitude of st0_ptr is far too large. */
- if ( sign0 != SIGN_POS )
- {
- /* Trying to get the log of a negative number. */
-#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
- changesign(st1_ptr);
+ if (sign0 != SIGN_POS) {
+ /* Trying to get the log of a negative number. */
+#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
+ changesign(st1_ptr);
#else
- if ( arith_invalid(1) < 0 )
- return 1;
+ if (arith_invalid(1) < 0)
+ return 1;
#endif /* PECULIAR_486 */
- }
+ }
- /* 80486 appears to do this */
- if ( sign0 == SIGN_NEG )
- set_precision_flag_down();
- else
- set_precision_flag_up();
- }
+ /* 80486 appears to do this */
+ if (sign0 == SIGN_NEG)
+ set_precision_flag_down();
+ else
+ set_precision_flag_up();
+ }
- if ( exponent(dest) <= EXP_UNDER )
- EXCEPTION(EX_Underflow);
+ if (exponent(dest) <= EXP_UNDER)
+ EXCEPTION(EX_Underflow);
- return 0;
+ return 0;
}
-
-
-
#undef HIPOWER
#define HIPOWER 10
-static const unsigned long long logterms[HIPOWER] =
-{
- 0x2a8eca5705fc2ef0LL,
- 0xf6384ee1d01febceLL,
- 0x093bb62877cdf642LL,
- 0x006985d8a9ec439bLL,
- 0x0005212c4f55a9c8LL,
- 0x00004326a16927f0LL,
- 0x0000038d1d80a0e7LL,
- 0x0000003141cc80c6LL,
- 0x00000002b1668c9fLL,
- 0x000000002c7a46aaLL
+static const unsigned long long logterms[HIPOWER] = {
+ 0x2a8eca5705fc2ef0LL,
+ 0xf6384ee1d01febceLL,
+ 0x093bb62877cdf642LL,
+ 0x006985d8a9ec439bLL,
+ 0x0005212c4f55a9c8LL,
+ 0x00004326a16927f0LL,
+ 0x0000038d1d80a0e7LL,
+ 0x0000003141cc80c6LL,
+ 0x00000002b1668c9fLL,
+ 0x000000002c7a46aaLL
};
static const unsigned long leadterm = 0xb8000000;
-
/*--- log2_kernel() ---------------------------------------------------------+
| Base 2 logarithm by a polynomial approximation. |
| log2(x+1) |
@@ -203,70 +181,64 @@ static const unsigned long leadterm = 0xb8000000;
static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
long int *expon)
{
- long int exponent, adj;
- unsigned long long Xsq;
- Xsig accumulator, Numer, Denom, argSignif, arg_signif;
-
- exponent = exponent16(arg);
- Numer.lsw = Denom.lsw = 0;
- XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
- if ( argsign == SIGN_POS )
- {
- shr_Xsig(&Denom, 2 - (1 + exponent));
- Denom.msw |= 0x80000000;
- div_Xsig(&Numer, &Denom, &argSignif);
- }
- else
- {
- shr_Xsig(&Denom, 1 - (1 + exponent));
- negate_Xsig(&Denom);
- if ( Denom.msw & 0x80000000 )
- {
- div_Xsig(&Numer, &Denom, &argSignif);
- exponent ++;
- }
- else
- {
- /* Denom must be 1.0 */
- argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
- argSignif.msw = Numer.msw;
+ long int exponent, adj;
+ unsigned long long Xsq;
+ Xsig accumulator, Numer, Denom, argSignif, arg_signif;
+
+ exponent = exponent16(arg);
+ Numer.lsw = Denom.lsw = 0;
+ XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
+ if (argsign == SIGN_POS) {
+ shr_Xsig(&Denom, 2 - (1 + exponent));
+ Denom.msw |= 0x80000000;
+ div_Xsig(&Numer, &Denom, &argSignif);
+ } else {
+ shr_Xsig(&Denom, 1 - (1 + exponent));
+ negate_Xsig(&Denom);
+ if (Denom.msw & 0x80000000) {
+ div_Xsig(&Numer, &Denom, &argSignif);
+ exponent++;
+ } else {
+ /* Denom must be 1.0 */
+ argSignif.lsw = Numer.lsw;
+ argSignif.midw = Numer.midw;
+ argSignif.msw = Numer.msw;
+ }
}
- }
#ifndef PECULIAR_486
- /* Should check here that |local_arg| is within the valid range */
- if ( exponent >= -2 )
- {
- if ( (exponent > -2) ||
- (argSignif.msw > (unsigned)0xafb0ccc0) )
- {
- /* The argument is too large */
+ /* Should check here that |local_arg| is within the valid range */
+ if (exponent >= -2) {
+ if ((exponent > -2) || (argSignif.msw > (unsigned)0xafb0ccc0)) {
+ /* The argument is too large */
+ }
}
- }
#endif /* PECULIAR_486 */
- arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
- adj = norm_Xsig(&argSignif);
- accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
- mul_Xsig_Xsig(&accumulator, &accumulator);
- shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
- Xsq = XSIG_LL(accumulator);
- if ( accumulator.lsw & 0x80000000 )
- Xsq++;
-
- accumulator.msw = accumulator.midw = accumulator.lsw = 0;
- /* Do the basic fixed point polynomial evaluation */
- polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
-
- mul_Xsig_Xsig(&accumulator, &argSignif);
- shr_Xsig(&accumulator, 6 - adj);
-
- mul32_Xsig(&arg_signif, leadterm);
- add_two_Xsig(&accumulator, &arg_signif, &exponent);
-
- *expon = exponent + 1;
- accum_result->lsw = accumulator.lsw;
- accum_result->midw = accumulator.midw;
- accum_result->msw = accumulator.msw;
+ arg_signif.lsw = argSignif.lsw;
+ XSIG_LL(arg_signif) = XSIG_LL(argSignif);
+ adj = norm_Xsig(&argSignif);
+ accumulator.lsw = argSignif.lsw;
+ XSIG_LL(accumulator) = XSIG_LL(argSignif);
+ mul_Xsig_Xsig(&accumulator, &accumulator);
+ shr_Xsig(&accumulator, 2 * (-1 - (1 + exponent + adj)));
+ Xsq = XSIG_LL(accumulator);
+ if (accumulator.lsw & 0x80000000)
+ Xsq++;
+
+ accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+ /* Do the basic fixed point polynomial evaluation */
+ polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER - 1);
+
+ mul_Xsig_Xsig(&accumulator, &argSignif);
+ shr_Xsig(&accumulator, 6 - adj);
+
+ mul32_Xsig(&arg_signif, leadterm);
+ add_two_Xsig(&accumulator, &arg_signif, &exponent);
+
+ *expon = exponent + 1;
+ accum_result->lsw = accumulator.lsw;
+ accum_result->midw = accumulator.midw;
+ accum_result->msw = accumulator.msw;
}
diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c
index a36313fb06f..b862039c728 100644
--- a/arch/x86/math-emu/poly_sin.c
+++ b/arch/x86/math-emu/poly_sin.c
@@ -11,7 +11,6 @@
| |
+---------------------------------------------------------------------------*/
-
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
@@ -19,379 +18,361 @@
#include "control_w.h"
#include "poly.h"
-
#define N_COEFF_P 4
#define N_COEFF_N 4
-static const unsigned long long pos_terms_l[N_COEFF_P] =
-{
- 0xaaaaaaaaaaaaaaabLL,
- 0x00d00d00d00cf906LL,
- 0x000006b99159a8bbLL,
- 0x000000000d7392e6LL
+static const unsigned long long pos_terms_l[N_COEFF_P] = {
+ 0xaaaaaaaaaaaaaaabLL,
+ 0x00d00d00d00cf906LL,
+ 0x000006b99159a8bbLL,
+ 0x000000000d7392e6LL
};
-static const unsigned long long neg_terms_l[N_COEFF_N] =
-{
- 0x2222222222222167LL,
- 0x0002e3bc74aab624LL,
- 0x0000000b09229062LL,
- 0x00000000000c7973LL
+static const unsigned long long neg_terms_l[N_COEFF_N] = {
+ 0x2222222222222167LL,
+ 0x0002e3bc74aab624LL,
+ 0x0000000b09229062LL,
+ 0x00000000000c7973LL
};
-
-
#define N_COEFF_PH 4
#define N_COEFF_NH 4
-static const unsigned long long pos_terms_h[N_COEFF_PH] =
-{
- 0x0000000000000000LL,
- 0x05b05b05b05b0406LL,
- 0x000049f93edd91a9LL,
- 0x00000000c9c9ed62LL
+static const unsigned long long pos_terms_h[N_COEFF_PH] = {
+ 0x0000000000000000LL,
+ 0x05b05b05b05b0406LL,
+ 0x000049f93edd91a9LL,
+ 0x00000000c9c9ed62LL
};
-static const unsigned long long neg_terms_h[N_COEFF_NH] =
-{
- 0xaaaaaaaaaaaaaa98LL,
- 0x001a01a01a019064LL,
- 0x0000008f76c68a77LL,
- 0x0000000000d58f5eLL
+static const unsigned long long neg_terms_h[N_COEFF_NH] = {
+ 0xaaaaaaaaaaaaaa98LL,
+ 0x001a01a01a019064LL,
+ 0x0000008f76c68a77LL,
+ 0x0000000000d58f5eLL
};
-
/*--- poly_sine() -----------------------------------------------------------+
| |
+---------------------------------------------------------------------------*/
-void poly_sine(FPU_REG *st0_ptr)
+void poly_sine(FPU_REG *st0_ptr)
{
- int exponent, echange;
- Xsig accumulator, argSqrd, argTo4;
- unsigned long fix_up, adj;
- unsigned long long fixed_arg;
- FPU_REG result;
+ int exponent, echange;
+ Xsig accumulator, argSqrd, argTo4;
+ unsigned long fix_up, adj;
+ unsigned long long fixed_arg;
+ FPU_REG result;
- exponent = exponent(st0_ptr);
+ exponent = exponent(st0_ptr);
- accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+ accumulator.lsw = accumulator.midw = accumulator.msw = 0;
- /* Split into two ranges, for arguments below and above 1.0 */
- /* The boundary between upper and lower is approx 0.88309101259 */
- if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
- {
- /* The argument is <= 0.88309101259 */
+ /* Split into two ranges, for arguments below and above 1.0 */
+ /* The boundary between upper and lower is approx 0.88309101259 */
+ if ((exponent < -1)
+ || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
+ /* The argument is <= 0.88309101259 */
+
+ argSqrd.msw = st0_ptr->sigh;
+ argSqrd.midw = st0_ptr->sigl;
+ argSqrd.lsw = 0;
+ mul64_Xsig(&argSqrd, &significand(st0_ptr));
+ shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+ argTo4.msw = argSqrd.msw;
+ argTo4.midw = argSqrd.midw;
+ argTo4.lsw = argSqrd.lsw;
+ mul_Xsig_Xsig(&argTo4, &argTo4);
- argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
- mul64_Xsig(&argSqrd, &significand(st0_ptr));
- shr_Xsig(&argSqrd, 2*(-1-exponent));
- argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
- argTo4.lsw = argSqrd.lsw;
- mul_Xsig_Xsig(&argTo4, &argTo4);
+ polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+ N_COEFF_N - 1);
+ mul_Xsig_Xsig(&accumulator, &argSqrd);
+ negate_Xsig(&accumulator);
- polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
- N_COEFF_N-1);
- mul_Xsig_Xsig(&accumulator, &argSqrd);
- negate_Xsig(&accumulator);
+ polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+ N_COEFF_P - 1);
- polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
- N_COEFF_P-1);
+ shr_Xsig(&accumulator, 2); /* Divide by four */
+ accumulator.msw |= 0x80000000; /* Add 1.0 */
- shr_Xsig(&accumulator, 2); /* Divide by four */
- accumulator.msw |= 0x80000000; /* Add 1.0 */
+ mul64_Xsig(&accumulator, &significand(st0_ptr));
+ mul64_Xsig(&accumulator, &significand(st0_ptr));
+ mul64_Xsig(&accumulator, &significand(st0_ptr));
- mul64_Xsig(&accumulator, &significand(st0_ptr));
- mul64_Xsig(&accumulator, &significand(st0_ptr));
- mul64_Xsig(&accumulator, &significand(st0_ptr));
+ /* Divide by four, FPU_REG compatible, etc */
+ exponent = 3 * exponent;
- /* Divide by four, FPU_REG compatible, etc */
- exponent = 3*exponent;
+ /* The minimum exponent difference is 3 */
+ shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
- /* The minimum exponent difference is 3 */
- shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
+ negate_Xsig(&accumulator);
+ XSIG_LL(accumulator) += significand(st0_ptr);
- negate_Xsig(&accumulator);
- XSIG_LL(accumulator) += significand(st0_ptr);
+ echange = round_Xsig(&accumulator);
- echange = round_Xsig(&accumulator);
+ setexponentpos(&result, exponent(st0_ptr) + echange);
+ } else {
+ /* The argument is > 0.88309101259 */
+ /* We use sin(st(0)) = cos(pi/2-st(0)) */
- setexponentpos(&result, exponent(st0_ptr) + echange);
- }
- else
- {
- /* The argument is > 0.88309101259 */
- /* We use sin(st(0)) = cos(pi/2-st(0)) */
+ fixed_arg = significand(st0_ptr);
- fixed_arg = significand(st0_ptr);
+ if (exponent == 0) {
+ /* The argument is >= 1.0 */
- if ( exponent == 0 )
- {
- /* The argument is >= 1.0 */
+ /* Put the binary point at the left. */
+ fixed_arg <<= 1;
+ }
+ /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+ fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+ /* There is a special case which arises due to rounding, to fix here. */
+ if (fixed_arg == 0xffffffffffffffffLL)
+ fixed_arg = 0;
- /* Put the binary point at the left. */
- fixed_arg <<= 1;
- }
- /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
- fixed_arg = 0x921fb54442d18469LL - fixed_arg;
- /* There is a special case which arises due to rounding, to fix here. */
- if ( fixed_arg == 0xffffffffffffffffLL )
- fixed_arg = 0;
+ XSIG_LL(argSqrd) = fixed_arg;
+ argSqrd.lsw = 0;
+ mul64_Xsig(&argSqrd, &fixed_arg);
- XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
- mul64_Xsig(&argSqrd, &fixed_arg);
+ XSIG_LL(argTo4) = XSIG_LL(argSqrd);
+ argTo4.lsw = argSqrd.lsw;
+ mul_Xsig_Xsig(&argTo4, &argTo4);
- XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
- mul_Xsig_Xsig(&argTo4, &argTo4);
+ polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+ N_COEFF_NH - 1);
+ mul_Xsig_Xsig(&accumulator, &argSqrd);
+ negate_Xsig(&accumulator);
- polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
- N_COEFF_NH-1);
- mul_Xsig_Xsig(&accumulator, &argSqrd);
- negate_Xsig(&accumulator);
+ polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+ N_COEFF_PH - 1);
+ negate_Xsig(&accumulator);
- polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
- N_COEFF_PH-1);
- negate_Xsig(&accumulator);
+ mul64_Xsig(&accumulator, &fixed_arg);
+ mul64_Xsig(&accumulator, &fixed_arg);
- mul64_Xsig(&accumulator, &fixed_arg);
- mul64_Xsig(&accumulator, &fixed_arg);
+ shr_Xsig(&accumulator, 3);
+ negate_Xsig(&accumulator);
- shr_Xsig(&accumulator, 3);
- negate_Xsig(&accumulator);
+ add_Xsig_Xsig(&accumulator, &argSqrd);
- add_Xsig_Xsig(&accumulator, &argSqrd);
+ shr_Xsig(&accumulator, 1);
- shr_Xsig(&accumulator, 1);
+ accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
+ negate_Xsig(&accumulator);
- accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
- negate_Xsig(&accumulator);
+ /* The basic computation is complete. Now fix the answer to
+ compensate for the error due to the approximation used for
+ pi/2
+ */
- /* The basic computation is complete. Now fix the answer to
- compensate for the error due to the approximation used for
- pi/2
- */
+ /* This has an exponent of -65 */
+ fix_up = 0x898cc517;
+ /* The fix-up needs to be improved for larger args */
+ if (argSqrd.msw & 0xffc00000) {
+ /* Get about 32 bit precision in these: */
+ fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
+ }
+ fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
- /* This has an exponent of -65 */
- fix_up = 0x898cc517;
- /* The fix-up needs to be improved for larger args */
- if ( argSqrd.msw & 0xffc00000 )
- {
- /* Get about 32 bit precision in these: */
- fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
- }
- fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
+ adj = accumulator.lsw; /* temp save */
+ accumulator.lsw -= fix_up;
+ if (accumulator.lsw > adj)
+ XSIG_LL(accumulator)--;
- adj = accumulator.lsw; /* temp save */
- accumulator.lsw -= fix_up;
- if ( accumulator.lsw > adj )
- XSIG_LL(accumulator) --;
+ echange = round_Xsig(&accumulator);
- echange = round_Xsig(&accumulator);
-
- setexponentpos(&result, echange - 1);
- }
+ setexponentpos(&result, echange - 1);
+ }
- significand(&result) = XSIG_LL(accumulator);
- setsign(&result, getsign(st0_ptr));
- FPU_copy_to_reg0(&result, TAG_Valid);
+ significand(&result) = XSIG_LL(accumulator);
+ setsign(&result, getsign(st0_ptr));
+ FPU_copy_to_reg0(&result, TAG_Valid);
#ifdef PARANOID
- if ( (exponent(&result) >= 0)
- && (significand(&result) > 0x8000000000000000LL) )
- {
- EXCEPTION(EX_INTERNAL|0x150);
- }
+ if ((exponent(&result) >= 0)
+ && (significand(&result) > 0x8000000000000000LL)) {
+ EXCEPTION(EX_INTERNAL | 0x150);
+ }
#endif /* PARANOID */
}
-
-
/*--- poly_cos() ------------------------------------------------------------+
| |
+---------------------------------------------------------------------------*/
-void poly_cos(FPU_REG *st0_ptr)
+void poly_cos(FPU_REG *st0_ptr)
{
- FPU_REG result;
- long int exponent, exp2, echange;
- Xsig accumulator, argSqrd, fix_up, argTo4;
- unsigned long long fixed_arg;
+ FPU_REG result;
+ long int exponent, exp2, echange;
+ Xsig accumulator, argSqrd, fix_up, argTo4;
+ unsigned long long fixed_arg;
#ifdef PARANOID
- if ( (exponent(st0_ptr) > 0)
- || ((exponent(st0_ptr) == 0)
- && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
- {
- EXCEPTION(EX_Invalid);
- FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
- return;
- }
-#endif /* PARANOID */
-
- exponent = exponent(st0_ptr);
-
- accumulator.lsw = accumulator.midw = accumulator.msw = 0;
-
- if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
- {
- /* arg is < 0.687705 */
-
- argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
- argSqrd.lsw = 0;
- mul64_Xsig(&argSqrd, &significand(st0_ptr));
-
- if ( exponent < -1 )
- {
- /* shift the argument right by the required places */
- shr_Xsig(&argSqrd, 2*(-1-exponent));
- }
-
- argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
- argTo4.lsw = argSqrd.lsw;
- mul_Xsig_Xsig(&argTo4, &argTo4);
-
- polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
- N_COEFF_NH-1);
- mul_Xsig_Xsig(&accumulator, &argSqrd);
- negate_Xsig(&accumulator);
-
- polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
- N_COEFF_PH-1);
- negate_Xsig(&accumulator);
-
- mul64_Xsig(&accumulator, &significand(st0_ptr));
- mul64_Xsig(&accumulator, &significand(st0_ptr));
- shr_Xsig(&accumulator, -2*(1+exponent));
-
- shr_Xsig(&accumulator, 3);
- negate_Xsig(&accumulator);
-
- add_Xsig_Xsig(&accumulator, &argSqrd);
-
- shr_Xsig(&accumulator, 1);
-
- /* It doesn't matter if accumulator is all zero here, the
- following code will work ok */
- negate_Xsig(&accumulator);
-
- if ( accumulator.lsw & 0x80000000 )
- XSIG_LL(accumulator) ++;
- if ( accumulator.msw == 0 )
- {
- /* The result is 1.0 */
- FPU_copy_to_reg0(&CONST_1, TAG_Valid);
- return;
- }
- else
- {
- significand(&result) = XSIG_LL(accumulator);
-
- /* will be a valid positive nr with expon = -1 */
- setexponentpos(&result, -1);
- }
- }
- else
- {
- fixed_arg = significand(st0_ptr);
-
- if ( exponent == 0 )
- {
- /* The argument is >= 1.0 */
-
- /* Put the binary point at the left. */
- fixed_arg <<= 1;
- }
- /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
- fixed_arg = 0x921fb54442d18469LL - fixed_arg;
- /* There is a special case which arises due to rounding, to fix here. */
- if ( fixed_arg == 0xffffffffffffffffLL )
- fixed_arg = 0;
-
- exponent = -1;
- exp2 = -1;
-
- /* A shift is needed here only for a narrow range of arguments,
- i.e. for fixed_arg approx 2^-32, but we pick up more... */
- if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
- {
- fixed_arg <<= 16;
- exponent -= 16;
- exp2 -= 16;
+ if ((exponent(st0_ptr) > 0)
+ || ((exponent(st0_ptr) == 0)
+ && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
+ EXCEPTION(EX_Invalid);
+ FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+ return;
}
+#endif /* PARANOID */
- XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
- mul64_Xsig(&argSqrd, &fixed_arg);
-
- if ( exponent < -1 )
- {
- /* shift the argument right by the required places */
- shr_Xsig(&argSqrd, 2*(-1-exponent));
- }
-
- argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
- argTo4.lsw = argSqrd.lsw;
- mul_Xsig_Xsig(&argTo4, &argTo4);
-
- polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
- N_COEFF_N-1);
- mul_Xsig_Xsig(&accumulator, &argSqrd);
- negate_Xsig(&accumulator);
-
- polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
- N_COEFF_P-1);
-
- shr_Xsig(&accumulator, 2); /* Divide by four */
- accumulator.msw |= 0x80000000; /* Add 1.0 */
-
- mul64_Xsig(&accumulator, &fixed_arg);
- mul64_Xsig(&accumulator, &fixed_arg);
- mul64_Xsig(&accumulator, &fixed_arg);
-
- /* Divide by four, FPU_REG compatible, etc */
- exponent = 3*exponent;
-
- /* The minimum exponent difference is 3 */
- shr_Xsig(&accumulator, exp2 - exponent);
-
- negate_Xsig(&accumulator);
- XSIG_LL(accumulator) += fixed_arg;
-
- /* The basic computation is complete. Now fix the answer to
- compensate for the error due to the approximation used for
- pi/2
- */
-
- /* This has an exponent of -65 */
- XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
- fix_up.lsw = 0;
-
- /* The fix-up needs to be improved for larger args */
- if ( argSqrd.msw & 0xffc00000 )
- {
- /* Get about 32 bit precision in these: */
- fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
- fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
+ exponent = exponent(st0_ptr);
+
+ accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+
+ if ((exponent < -1)
+ || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
+ /* arg is < 0.687705 */
+
+ argSqrd.msw = st0_ptr->sigh;
+ argSqrd.midw = st0_ptr->sigl;
+ argSqrd.lsw = 0;
+ mul64_Xsig(&argSqrd, &significand(st0_ptr));
+
+ if (exponent < -1) {
+ /* shift the argument right by the required places */
+ shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+ }
+
+ argTo4.msw = argSqrd.msw;
+ argTo4.midw = argSqrd.midw;
+ argTo4.lsw = argSqrd.lsw;
+ mul_Xsig_Xsig(&argTo4, &argTo4);
+
+ polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+ N_COEFF_NH - 1);
+ mul_Xsig_Xsig(&accumulator, &argSqrd);
+ negate_Xsig(&accumulator);
+
+ polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+ N_COEFF_PH - 1);
+ negate_Xsig(&accumulator);
+
+ mul64_Xsig(&accumulator, &significand(st0_ptr));
+ mul64_Xsig(&accumulator, &significand(st0_ptr));
+ shr_Xsig(&accumulator, -2 * (1 + exponent));
+
+ shr_Xsig(&accumulator, 3);
+ negate_Xsig(&accumulator);
+
+ add_Xsig_Xsig(&accumulator, &argSqrd);
+
+ shr_Xsig(&accumulator, 1);
+
+ /* It doesn't matter if accumulator is all zero here, the
+ following code will work ok */
+ negate_Xsig(&accumulator);
+
+ if (accumulator.lsw & 0x80000000)
+ XSIG_LL(accumulator)++;
+ if (accumulator.msw == 0) {
+ /* The result is 1.0 */
+ FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+ return;
+ } else {
+ significand(&result) = XSIG_LL(accumulator);
+
+ /* will be a valid positive nr with expon = -1 */
+ setexponentpos(&result, -1);
+ }
+ } else {
+ fixed_arg = significand(st0_ptr);
+
+ if (exponent == 0) {
+ /* The argument is >= 1.0 */
+
+ /* Put the binary point at the left. */
+ fixed_arg <<= 1;
+ }
+ /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+ fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+ /* There is a special case which arises due to rounding, to fix here. */
+ if (fixed_arg == 0xffffffffffffffffLL)
+ fixed_arg = 0;
+
+ exponent = -1;
+ exp2 = -1;
+
+ /* A shift is needed here only for a narrow range of arguments,
+ i.e. for fixed_arg approx 2^-32, but we pick up more... */
+ if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
+ fixed_arg <<= 16;
+ exponent -= 16;
+ exp2 -= 16;
+ }
+
+ XSIG_LL(argSqrd) = fixed_arg;
+ argSqrd.lsw = 0;
+ mul64_Xsig(&argSqrd, &fixed_arg);
+
+ if (exponent < -1) {
+ /* shift the argument right by the required places */
+ shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+ }
+
+ argTo4.msw = argSqrd.msw;
+ argTo4.midw = argSqrd.midw;
+ argTo4.lsw = argSqrd.lsw;
+ mul_Xsig_Xsig(&argTo4, &argTo4);
+
+ polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+ N_COEFF_N - 1);
+ mul_Xsig_Xsig(&accumulator, &argSqrd);
+ negate_Xsig(&accumulator);
+
+ polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+ N_COEFF_P - 1);
+
+ shr_Xsig(&accumulator, 2); /* Divide by four */
+ accumulator.msw |= 0x80000000; /* Add 1.0 */
+
+ mul64_Xsig(&accumulator, &fixed_arg);
+ mul64_Xsig(&accumulator, &fixed_arg);
+ mul64_Xsig(&accumulator, &fixed_arg);
+
+ /* Divide by four, FPU_REG compatible, etc */
+ exponent = 3 * exponent;
+
+ /* The minimum exponent difference is 3 */
+ shr_Xsig(&accumulator, exp2 - exponent);
+
+ negate_Xsig(&accumulator);
+ XSIG_LL(accumulator) += fixed_arg;
+
+ /* The basic computation is complete. Now fix the answer to
+ compensate for the error due to the approximation used for
+ pi/2
+ */
+
+ /* This has an exponent of -65 */
+ XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
+ fix_up.lsw = 0;
+
+ /* The fix-up needs to be improved for larger args */
+ if (argSqrd.msw & 0xffc00000) {
+ /* Get about 32 bit precision in these: */
+ fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
+ fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
+ }
+
+ exp2 += norm_Xsig(&accumulator);
+ shr_Xsig(&accumulator, 1); /* Prevent overflow */
+ exp2++;
+ shr_Xsig(&fix_up, 65 + exp2);
+
+ add_Xsig_Xsig(&accumulator, &fix_up);
+
+ echange = round_Xsig(&accumulator);
+
+ setexponentpos(&result, exp2 + echange);
+ significand(&result) = XSIG_LL(accumulator);
}
- exp2 += norm_Xsig(&accumulator);
- shr_Xsig(&accumulator, 1); /* Prevent overflow */
- exp2++;
- shr_Xsig(&fix_up, 65 + exp2);
-
- add_Xsig_Xsig(&accumulator, &fix_up);
-
- echange = round_Xsig(&accumulator);
-
- setexponentpos(&result, exp2 + echange);
- significand(&result) = XSIG_LL(accumulator);
- }
-
- FPU_copy_to_reg0(&result, TAG_Valid);
+ FPU_copy_to_reg0(&result, TAG_Valid);
#ifdef PARANOID
- if ( (exponent(&result) >= 0)
- && (significand(&result) > 0x8000000000000000LL) )
- {
- EXCEPTION(EX_INTERNAL|0x151);
- }
+ if ((exponent(&result) >= 0)
+ && (significand(&result) > 0x8000000000000000LL)) {
+ EXCEPTION(EX_INTERNAL | 0x151);
+ }
#endif /* PARANOID */
}
diff --git a/arch/x86/math-emu/poly_tan.c b/arch/x86/math-emu/poly_tan.c
index 8df3e03b6e6..1875763e0c0 100644
--- a/arch/x86/math-emu/poly_tan.c
+++ b/arch/x86/math-emu/poly_tan.c
@@ -17,206 +17,196 @@
#include "control_w.h"
#include "poly.h"
-
#define HiPOWERop 3 /* odd poly, positive terms */
-static const unsigned long long oddplterm[HiPOWERop] =
-{
- 0x0000000000000000LL,
- 0x0051a1cf08fca228LL,
- 0x0000000071284ff7LL
+static const unsigned long long oddplterm[HiPOWERop] = {
+ 0x0000000000000000LL,
+ 0x0051a1cf08fca228LL,
+ 0x0000000071284ff7LL
};
#define HiPOWERon 2 /* odd poly, negative terms */
-static const unsigned long long oddnegterm[HiPOWERon] =
-{
- 0x1291a9a184244e80LL,
- 0x0000583245819c21LL
+static const unsigned long long oddnegterm[HiPOWERon] = {
+ 0x1291a9a184244e80LL,
+ 0x0000583245819c21LL
};
#define HiPOWERep 2 /* even poly, positive terms */
-static const unsigned long long evenplterm[HiPOWERep] =
-{
- 0x0e848884b539e888LL,
- 0x00003c7f18b887daLL
+static const unsigned long long evenplterm[HiPOWERep] = {
+ 0x0e848884b539e888LL,
+ 0x00003c7f18b887daLL
};
#define HiPOWERen 2 /* even poly, negative terms */
-static const unsigned long long evennegterm[HiPOWERen] =
-{
- 0xf1f0200fd51569ccLL,
- 0x003afb46105c4432LL
+static const unsigned long long evennegterm[HiPOWERen] = {
+ 0xf1f0200fd51569ccLL,
+ 0x003afb46105c4432LL
};
static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
-
/*--- poly_tan() ------------------------------------------------------------+
| |
+---------------------------------------------------------------------------*/
-void poly_tan(FPU_REG *st0_ptr)
+void poly_tan(FPU_REG *st0_ptr)
{
- long int exponent;
- int invert;
- Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
- argSignif, fix_up;
- unsigned long adj;
+ long int exponent;
+ int invert;
+ Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
+ argSignif, fix_up;
+ unsigned long adj;
- exponent = exponent(st0_ptr);
+ exponent = exponent(st0_ptr);
#ifdef PARANOID
- if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */
- { arith_invalid(0); return; } /* Need a positive number */
+ if (signnegative(st0_ptr)) { /* Can't hack a number < 0.0 */
+ arith_invalid(0);
+ return;
+ } /* Need a positive number */
#endif /* PARANOID */
- /* Split the problem into two domains, smaller and larger than pi/4 */
- if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) )
- {
- /* The argument is greater than (approx) pi/4 */
- invert = 1;
- accum.lsw = 0;
- XSIG_LL(accum) = significand(st0_ptr);
-
- if ( exponent == 0 )
- {
- /* The argument is >= 1.0 */
- /* Put the binary point at the left. */
- XSIG_LL(accum) <<= 1;
- }
- /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
- XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
- /* This is a special case which arises due to rounding. */
- if ( XSIG_LL(accum) == 0xffffffffffffffffLL )
- {
- FPU_settag0(TAG_Valid);
- significand(st0_ptr) = 0x8a51e04daabda360LL;
- setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative);
- return;
+ /* Split the problem into two domains, smaller and larger than pi/4 */
+ if ((exponent == 0)
+ || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2))) {
+ /* The argument is greater than (approx) pi/4 */
+ invert = 1;
+ accum.lsw = 0;
+ XSIG_LL(accum) = significand(st0_ptr);
+
+ if (exponent == 0) {
+ /* The argument is >= 1.0 */
+ /* Put the binary point at the left. */
+ XSIG_LL(accum) <<= 1;
+ }
+ /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+ XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
+ /* This is a special case which arises due to rounding. */
+ if (XSIG_LL(accum) == 0xffffffffffffffffLL) {
+ FPU_settag0(TAG_Valid);
+ significand(st0_ptr) = 0x8a51e04daabda360LL;
+ setexponent16(st0_ptr,
+ (0x41 + EXTENDED_Ebias) | SIGN_Negative);
+ return;
+ }
+
+ argSignif.lsw = accum.lsw;
+ XSIG_LL(argSignif) = XSIG_LL(accum);
+ exponent = -1 + norm_Xsig(&argSignif);
+ } else {
+ invert = 0;
+ argSignif.lsw = 0;
+ XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
+
+ if (exponent < -1) {
+ /* shift the argument right by the required places */
+ if (FPU_shrx(&XSIG_LL(accum), -1 - exponent) >=
+ 0x80000000U)
+ XSIG_LL(accum)++; /* round up */
+ }
}
- argSignif.lsw = accum.lsw;
- XSIG_LL(argSignif) = XSIG_LL(accum);
- exponent = -1 + norm_Xsig(&argSignif);
- }
- else
- {
- invert = 0;
- argSignif.lsw = 0;
- XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
-
- if ( exponent < -1 )
- {
- /* shift the argument right by the required places */
- if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
- XSIG_LL(accum) ++; /* round up */
- }
- }
-
- XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
- mul_Xsig_Xsig(&argSq, &argSq);
- XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
- mul_Xsig_Xsig(&argSqSq, &argSqSq);
-
- /* Compute the negative terms for the numerator polynomial */
- accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
- polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
- mul_Xsig_Xsig(&accumulatoro, &argSq);
- negate_Xsig(&accumulatoro);
- /* Add the positive terms */
- polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
-
-
- /* Compute the positive terms for the denominator polynomial */
- accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
- polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
- mul_Xsig_Xsig(&accumulatore, &argSq);
- negate_Xsig(&accumulatore);
- /* Add the negative terms */
- polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
- /* Multiply by arg^2 */
- mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
- mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
- /* de-normalize and divide by 2 */
- shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
- negate_Xsig(&accumulatore); /* This does 1 - accumulator */
-
- /* Now find the ratio. */
- if ( accumulatore.msw == 0 )
- {
- /* accumulatoro must contain 1.0 here, (actually, 0) but it
- really doesn't matter what value we use because it will
- have negligible effect in later calculations
- */
- XSIG_LL(accum) = 0x8000000000000000LL;
- accum.lsw = 0;
- }
- else
- {
- div_Xsig(&accumulatoro, &accumulatore, &accum);
- }
-
- /* Multiply by 1/3 * arg^3 */
- mul64_Xsig(&accum, &XSIG_LL(argSignif));
- mul64_Xsig(&accum, &XSIG_LL(argSignif));
- mul64_Xsig(&accum, &XSIG_LL(argSignif));
- mul64_Xsig(&accum, &twothirds);
- shr_Xsig(&accum, -2*(exponent+1));
-
- /* tan(arg) = arg + accum */
- add_two_Xsig(&accum, &argSignif, &exponent);
-
- if ( invert )
- {
- /* We now have the value of tan(pi_2 - arg) where pi_2 is an
- approximation for pi/2
- */
- /* The next step is to fix the answer to compensate for the
- error due to the approximation used for pi/2
- */
-
- /* This is (approx) delta, the error in our approx for pi/2
- (see above). It has an exponent of -65
- */
- XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
- fix_up.lsw = 0;
-
- if ( exponent == 0 )
- adj = 0xffffffff; /* We want approx 1.0 here, but
- this is close enough. */
- else if ( exponent > -30 )
- {
- adj = accum.msw >> -(exponent+1); /* tan */
- adj = mul_32_32(adj, adj); /* tan^2 */
+ XSIG_LL(argSq) = XSIG_LL(accum);
+ argSq.lsw = accum.lsw;
+ mul_Xsig_Xsig(&argSq, &argSq);
+ XSIG_LL(argSqSq) = XSIG_LL(argSq);
+ argSqSq.lsw = argSq.lsw;
+ mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+ /* Compute the negative terms for the numerator polynomial */
+ accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
+ polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm,
+ HiPOWERon - 1);
+ mul_Xsig_Xsig(&accumulatoro, &argSq);
+ negate_Xsig(&accumulatoro);
+ /* Add the positive terms */
+ polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm,
+ HiPOWERop - 1);
+
+ /* Compute the positive terms for the denominator polynomial */
+ accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
+ polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm,
+ HiPOWERep - 1);
+ mul_Xsig_Xsig(&accumulatore, &argSq);
+ negate_Xsig(&accumulatore);
+ /* Add the negative terms */
+ polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm,
+ HiPOWERen - 1);
+ /* Multiply by arg^2 */
+ mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+ mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+ /* de-normalize and divide by 2 */
+ shr_Xsig(&accumulatore, -2 * (1 + exponent) + 1);
+ negate_Xsig(&accumulatore); /* This does 1 - accumulator */
+
+ /* Now find the ratio. */
+ if (accumulatore.msw == 0) {
+ /* accumulatoro must contain 1.0 here, (actually, 0) but it
+ really doesn't matter what value we use because it will
+ have negligible effect in later calculations
+ */
+ XSIG_LL(accum) = 0x8000000000000000LL;
+ accum.lsw = 0;
+ } else {
+ div_Xsig(&accumulatoro, &accumulatore, &accum);
}
- else
- adj = 0;
- adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */
-
- fix_up.msw += adj;
- if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */
- {
- /* Yes, we need to add an msb */
- shr_Xsig(&fix_up, 1);
- fix_up.msw |= 0x80000000;
- shr_Xsig(&fix_up, 64 + exponent);
+
+ /* Multiply by 1/3 * arg^3 */
+ mul64_Xsig(&accum, &XSIG_LL(argSignif));
+ mul64_Xsig(&accum, &XSIG_LL(argSignif));
+ mul64_Xsig(&accum, &XSIG_LL(argSignif));
+ mul64_Xsig(&accum, &twothirds);
+ shr_Xsig(&accum, -2 * (exponent + 1));
+
+ /* tan(arg) = arg + accum */
+ add_two_Xsig(&accum, &argSignif, &exponent);
+
+ if (invert) {
+ /* We now have the value of tan(pi_2 - arg) where pi_2 is an
+ approximation for pi/2
+ */
+ /* The next step is to fix the answer to compensate for the
+ error due to the approximation used for pi/2
+ */
+
+ /* This is (approx) delta, the error in our approx for pi/2
+ (see above). It has an exponent of -65
+ */
+ XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
+ fix_up.lsw = 0;
+
+ if (exponent == 0)
+ adj = 0xffffffff; /* We want approx 1.0 here, but
+ this is close enough. */
+ else if (exponent > -30) {
+ adj = accum.msw >> -(exponent + 1); /* tan */
+ adj = mul_32_32(adj, adj); /* tan^2 */
+ } else
+ adj = 0;
+ adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */
+
+ fix_up.msw += adj;
+ if (!(fix_up.msw & 0x80000000)) { /* did fix_up overflow ? */
+ /* Yes, we need to add an msb */
+ shr_Xsig(&fix_up, 1);
+ fix_up.msw |= 0x80000000;
+ shr_Xsig(&fix_up, 64 + exponent);
+ } else
+ shr_Xsig(&fix_up, 65 + exponent);
+
+ add_two_Xsig(&accum, &fix_up, &exponent);
+
+ /* accum now contains tan(pi/2 - arg).
+ Use tan(arg) = 1.0 / tan(pi/2 - arg)
+ */
+ accumulatoro.lsw = accumulatoro.midw = 0;
+ accumulatoro.msw = 0x80000000;
+ div_Xsig(&accumulatoro, &accum, &accum);
+ exponent = -exponent - 1;
}
- else
- shr_Xsig(&fix_up, 65 + exponent);
-
- add_two_Xsig(&accum, &fix_up, &exponent);
-
- /* accum now contains tan(pi/2 - arg).
- Use tan(arg) = 1.0 / tan(pi/2 - arg)
- */
- accumulatoro.lsw = accumulatoro.midw = 0;
- accumulatoro.msw = 0x80000000;
- div_Xsig(&accumulatoro, &accum, &accum);
- exponent = - exponent - 1;
- }
-
- /* Transfer the result */
- round_Xsig(&accum);
- FPU_settag0(TAG_Valid);
- significand(st0_ptr) = XSIG_LL(accum);
- setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */
+
+ /* Transfer the result */
+ round_Xsig(&accum);
+ FPU_settag0(TAG_Valid);
+ significand(st0_ptr) = XSIG_LL(accum);
+ setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */
}
diff --git a/arch/x86/math-emu/reg_add_sub.c b/arch/x86/math-emu/reg_add_sub.c
index 7cd3b37ac08..deea48b9f13 100644
--- a/arch/x86/math-emu/reg_add_sub.c
+++ b/arch/x86/math-emu/reg_add_sub.c
@@ -27,7 +27,7 @@
static
int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
FPU_REG const *b, u_char tagb, u_char signb,
- FPU_REG *dest, int deststnr, int control_w);
+ FPU_REG * dest, int deststnr, int control_w);
/*
Operates on st(0) and st(n), or on st(0) and temporary data.
@@ -35,340 +35,299 @@ int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
*/
int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
{
- FPU_REG *a = &st(0);
- FPU_REG *dest = &st(deststnr);
- u_char signb = getsign(b);
- u_char taga = FPU_gettag0();
- u_char signa = getsign(a);
- u_char saved_sign = getsign(dest);
- int diff, tag, expa, expb;
-
- if ( !(taga | tagb) )
- {
- expa = exponent(a);
- expb = exponent(b);
-
- valid_add:
- /* Both registers are valid */
- if (!(signa ^ signb))
- {
- /* signs are the same */
- tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb);
- }
- else
- {
- /* The signs are different, so do a subtraction */
- diff = expa - expb;
- if (!diff)
- {
- diff = a->sigh - b->sigh; /* This works only if the ms bits
- are identical. */
- if (!diff)
- {
- diff = a->sigl > b->sigl;
- if (!diff)
- diff = -(a->sigl < b->sigl);
+ FPU_REG *a = &st(0);
+ FPU_REG *dest = &st(deststnr);
+ u_char signb = getsign(b);
+ u_char taga = FPU_gettag0();
+ u_char signa = getsign(a);
+ u_char saved_sign = getsign(dest);
+ int diff, tag, expa, expb;
+
+ if (!(taga | tagb)) {
+ expa = exponent(a);
+ expb = exponent(b);
+
+ valid_add:
+ /* Both registers are valid */
+ if (!(signa ^ signb)) {
+ /* signs are the same */
+ tag =
+ FPU_u_add(a, b, dest, control_w, signa, expa, expb);
+ } else {
+ /* The signs are different, so do a subtraction */
+ diff = expa - expb;
+ if (!diff) {
+ diff = a->sigh - b->sigh; /* This works only if the ms bits
+ are identical. */
+ if (!diff) {
+ diff = a->sigl > b->sigl;
+ if (!diff)
+ diff = -(a->sigl < b->sigl);
+ }
+ }
+
+ if (diff > 0) {
+ tag =
+ FPU_u_sub(a, b, dest, control_w, signa,
+ expa, expb);
+ } else if (diff < 0) {
+ tag =
+ FPU_u_sub(b, a, dest, control_w, signb,
+ expb, expa);
+ } else {
+ FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+ /* sign depends upon rounding mode */
+ setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+ ? SIGN_POS : SIGN_NEG);
+ return TAG_Zero;
+ }
}
- }
-
- if (diff > 0)
- {
- tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
- }
- else if ( diff < 0 )
- {
- tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa);
- }
- else
- {
- FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
- /* sign depends upon rounding mode */
- setsign(dest, ((control_w & CW_RC) != RC_DOWN)
- ? SIGN_POS : SIGN_NEG);
- return TAG_Zero;
- }
- }
- if ( tag < 0 )
- {
- setsign(dest, saved_sign);
- return tag;
+ if (tag < 0) {
+ setsign(dest, saved_sign);
+ return tag;
+ }
+ FPU_settagi(deststnr, tag);
+ return tag;
}
- FPU_settagi(deststnr, tag);
- return tag;
- }
- if ( taga == TAG_Special )
- taga = FPU_Special(a);
- if ( tagb == TAG_Special )
- tagb = FPU_Special(b);
+ if (taga == TAG_Special)
+ taga = FPU_Special(a);
+ if (tagb == TAG_Special)
+ tagb = FPU_Special(b);
- if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+ if (((taga == TAG_Valid) && (tagb == TW_Denormal))
|| ((taga == TW_Denormal) && (tagb == TAG_Valid))
- || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
- {
- FPU_REG x, y;
+ || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+ FPU_REG x, y;
+
+ if (denormal_operand() < 0)
+ return FPU_Exception;
+
+ FPU_to_exp16(a, &x);
+ FPU_to_exp16(b, &y);
+ a = &x;
+ b = &y;
+ expa = exponent16(a);
+ expb = exponent16(b);
+ goto valid_add;
+ }
- if ( denormal_operand() < 0 )
- return FPU_Exception;
+ if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+ if (deststnr == 0)
+ return real_2op_NaN(b, tagb, deststnr, a);
+ else
+ return real_2op_NaN(a, taga, deststnr, a);
+ }
- FPU_to_exp16(a, &x);
- FPU_to_exp16(b, &y);
- a = &x;
- b = &y;
- expa = exponent16(a);
- expb = exponent16(b);
- goto valid_add;
- }
-
- if ( (taga == TW_NaN) || (tagb == TW_NaN) )
- {
- if ( deststnr == 0 )
- return real_2op_NaN(b, tagb, deststnr, a);
- else
- return real_2op_NaN(a, taga, deststnr, a);
- }
-
- return add_sub_specials(a, taga, signa, b, tagb, signb,
- dest, deststnr, control_w);
+ return add_sub_specials(a, taga, signa, b, tagb, signb,
+ dest, deststnr, control_w);
}
-
/* Subtract b from a. (a-b) -> dest */
int FPU_sub(int flags, int rm, int control_w)
{
- FPU_REG const *a, *b;
- FPU_REG *dest;
- u_char taga, tagb, signa, signb, saved_sign, sign;
- int diff, tag = 0, expa, expb, deststnr;
-
- a = &st(0);
- taga = FPU_gettag0();
-
- deststnr = 0;
- if ( flags & LOADED )
- {
- b = (FPU_REG *)rm;
- tagb = flags & 0x0f;
- }
- else
- {
- b = &st(rm);
- tagb = FPU_gettagi(rm);
-
- if ( flags & DEST_RM )
- deststnr = rm;
- }
-
- signa = getsign(a);
- signb = getsign(b);
-
- if ( flags & REV )
- {
- signa ^= SIGN_NEG;
- signb ^= SIGN_NEG;
- }
-
- dest = &st(deststnr);
- saved_sign = getsign(dest);
-
- if ( !(taga | tagb) )
- {
- expa = exponent(a);
- expb = exponent(b);
-
- valid_subtract:
- /* Both registers are valid */
-
- diff = expa - expb;
-
- if (!diff)
- {
- diff = a->sigh - b->sigh; /* Works only if ms bits are identical */
- if (!diff)
- {
- diff = a->sigl > b->sigl;
- if (!diff)
- diff = -(a->sigl < b->sigl);
- }
+ FPU_REG const *a, *b;
+ FPU_REG *dest;
+ u_char taga, tagb, signa, signb, saved_sign, sign;
+ int diff, tag = 0, expa, expb, deststnr;
+
+ a = &st(0);
+ taga = FPU_gettag0();
+
+ deststnr = 0;
+ if (flags & LOADED) {
+ b = (FPU_REG *) rm;
+ tagb = flags & 0x0f;
+ } else {
+ b = &st(rm);
+ tagb = FPU_gettagi(rm);
+
+ if (flags & DEST_RM)
+ deststnr = rm;
}
- switch ( (((int)signa)*2 + signb) / SIGN_NEG )
- {
- case 0: /* P - P */
- case 3: /* N - N */
- if (diff > 0)
- {
- /* |a| > |b| */
- tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
- }
- else if ( diff == 0 )
- {
- FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-
- /* sign depends upon rounding mode */
- setsign(dest, ((control_w & CW_RC) != RC_DOWN)
- ? SIGN_POS : SIGN_NEG);
- return TAG_Zero;
- }
- else
- {
- sign = signa ^ SIGN_NEG;
- tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa);
- }
- break;
- case 1: /* P - N */
- tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb);
- break;
- case 2: /* N - P */
- tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb);
- break;
+ signa = getsign(a);
+ signb = getsign(b);
+
+ if (flags & REV) {
+ signa ^= SIGN_NEG;
+ signb ^= SIGN_NEG;
+ }
+
+ dest = &st(deststnr);
+ saved_sign = getsign(dest);
+
+ if (!(taga | tagb)) {
+ expa = exponent(a);
+ expb = exponent(b);
+
+ valid_subtract:
+ /* Both registers are valid */
+
+ diff = expa - expb;
+
+ if (!diff) {
+ diff = a->sigh - b->sigh; /* Works only if ms bits are identical */
+ if (!diff) {
+ diff = a->sigl > b->sigl;
+ if (!diff)
+ diff = -(a->sigl < b->sigl);
+ }
+ }
+
+ switch ((((int)signa) * 2 + signb) / SIGN_NEG) {
+ case 0: /* P - P */
+ case 3: /* N - N */
+ if (diff > 0) {
+ /* |a| > |b| */
+ tag =
+ FPU_u_sub(a, b, dest, control_w, signa,
+ expa, expb);
+ } else if (diff == 0) {
+ FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+
+ /* sign depends upon rounding mode */
+ setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+ ? SIGN_POS : SIGN_NEG);
+ return TAG_Zero;
+ } else {
+ sign = signa ^ SIGN_NEG;
+ tag =
+ FPU_u_sub(b, a, dest, control_w, sign, expb,
+ expa);
+ }
+ break;
+ case 1: /* P - N */
+ tag =
+ FPU_u_add(a, b, dest, control_w, SIGN_POS, expa,
+ expb);
+ break;
+ case 2: /* N - P */
+ tag =
+ FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa,
+ expb);
+ break;
#ifdef PARANOID
- default:
- EXCEPTION(EX_INTERNAL|0x111);
- return -1;
+ default:
+ EXCEPTION(EX_INTERNAL | 0x111);
+ return -1;
#endif
+ }
+ if (tag < 0) {
+ setsign(dest, saved_sign);
+ return tag;
+ }
+ FPU_settagi(deststnr, tag);
+ return tag;
}
- if ( tag < 0 )
- {
- setsign(dest, saved_sign);
- return tag;
- }
- FPU_settagi(deststnr, tag);
- return tag;
- }
- if ( taga == TAG_Special )
- taga = FPU_Special(a);
- if ( tagb == TAG_Special )
- tagb = FPU_Special(b);
+ if (taga == TAG_Special)
+ taga = FPU_Special(a);
+ if (tagb == TAG_Special)
+ tagb = FPU_Special(b);
- if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+ if (((taga == TAG_Valid) && (tagb == TW_Denormal))
|| ((taga == TW_Denormal) && (tagb == TAG_Valid))
- || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
- {
- FPU_REG x, y;
+ || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+ FPU_REG x, y;
- if ( denormal_operand() < 0 )
- return FPU_Exception;
+ if (denormal_operand() < 0)
+ return FPU_Exception;
+
+ FPU_to_exp16(a, &x);
+ FPU_to_exp16(b, &y);
+ a = &x;
+ b = &y;
+ expa = exponent16(a);
+ expb = exponent16(b);
- FPU_to_exp16(a, &x);
- FPU_to_exp16(b, &y);
- a = &x;
- b = &y;
- expa = exponent16(a);
- expb = exponent16(b);
-
- goto valid_subtract;
- }
-
- if ( (taga == TW_NaN) || (tagb == TW_NaN) )
- {
- FPU_REG const *d1, *d2;
- if ( flags & REV )
- {
- d1 = b;
- d2 = a;
+ goto valid_subtract;
}
- else
- {
- d1 = a;
- d2 = b;
+
+ if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+ FPU_REG const *d1, *d2;
+ if (flags & REV) {
+ d1 = b;
+ d2 = a;
+ } else {
+ d1 = a;
+ d2 = b;
+ }
+ if (flags & LOADED)
+ return real_2op_NaN(b, tagb, deststnr, d1);
+ if (flags & DEST_RM)
+ return real_2op_NaN(a, taga, deststnr, d2);
+ else
+ return real_2op_NaN(b, tagb, deststnr, d2);
}
- if ( flags & LOADED )
- return real_2op_NaN(b, tagb, deststnr, d1);
- if ( flags & DEST_RM )
- return real_2op_NaN(a, taga, deststnr, d2);
- else
- return real_2op_NaN(b, tagb, deststnr, d2);
- }
-
- return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
- dest, deststnr, control_w);
-}
+ return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
+ dest, deststnr, control_w);
+}
static
int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
FPU_REG const *b, u_char tagb, u_char signb,
- FPU_REG *dest, int deststnr, int control_w)
+ FPU_REG * dest, int deststnr, int control_w)
{
- if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
- && (denormal_operand() < 0) )
- return FPU_Exception;
-
- if (taga == TAG_Zero)
- {
- if (tagb == TAG_Zero)
- {
- /* Both are zero, result will be zero. */
- u_char different_signs = signa ^ signb;
-
- FPU_copy_to_regi(a, TAG_Zero, deststnr);
- if ( different_signs )
- {
- /* Signs are different. */
- /* Sign of answer depends upon rounding mode. */
- setsign(dest, ((control_w & CW_RC) != RC_DOWN)
- ? SIGN_POS : SIGN_NEG);
- }
- else
- setsign(dest, signa); /* signa may differ from the sign of a. */
- return TAG_Zero;
- }
- else
- {
- reg_copy(b, dest);
- if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) )
- {
- /* A pseudoDenormal, convert it. */
- addexponent(dest, 1);
- tagb = TAG_Valid;
- }
- else if ( tagb > TAG_Empty )
- tagb = TAG_Special;
- setsign(dest, signb); /* signb may differ from the sign of b. */
- FPU_settagi(deststnr, tagb);
- return tagb;
- }
- }
- else if (tagb == TAG_Zero)
- {
- reg_copy(a, dest);
- if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) )
- {
- /* A pseudoDenormal */
- addexponent(dest, 1);
- taga = TAG_Valid;
- }
- else if ( taga > TAG_Empty )
- taga = TAG_Special;
- setsign(dest, signa); /* signa may differ from the sign of a. */
- FPU_settagi(deststnr, taga);
- return taga;
- }
- else if (taga == TW_Infinity)
- {
- if ( (tagb != TW_Infinity) || (signa == signb) )
- {
- FPU_copy_to_regi(a, TAG_Special, deststnr);
- setsign(dest, signa); /* signa may differ from the sign of a. */
- return taga;
+ if (((taga == TW_Denormal) || (tagb == TW_Denormal))
+ && (denormal_operand() < 0))
+ return FPU_Exception;
+
+ if (taga == TAG_Zero) {
+ if (tagb == TAG_Zero) {
+ /* Both are zero, result will be zero. */
+ u_char different_signs = signa ^ signb;
+
+ FPU_copy_to_regi(a, TAG_Zero, deststnr);
+ if (different_signs) {
+ /* Signs are different. */
+ /* Sign of answer depends upon rounding mode. */
+ setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+ ? SIGN_POS : SIGN_NEG);
+ } else
+ setsign(dest, signa); /* signa may differ from the sign of a. */
+ return TAG_Zero;
+ } else {
+ reg_copy(b, dest);
+ if ((tagb == TW_Denormal) && (b->sigh & 0x80000000)) {
+ /* A pseudoDenormal, convert it. */
+ addexponent(dest, 1);
+ tagb = TAG_Valid;
+ } else if (tagb > TAG_Empty)
+ tagb = TAG_Special;
+ setsign(dest, signb); /* signb may differ from the sign of b. */
+ FPU_settagi(deststnr, tagb);
+ return tagb;
+ }
+ } else if (tagb == TAG_Zero) {
+ reg_copy(a, dest);
+ if ((taga == TW_Denormal) && (a->sigh & 0x80000000)) {
+ /* A pseudoDenormal */
+ addexponent(dest, 1);
+ taga = TAG_Valid;
+ } else if (taga > TAG_Empty)
+ taga = TAG_Special;
+ setsign(dest, signa); /* signa may differ from the sign of a. */
+ FPU_settagi(deststnr, taga);
+ return taga;
+ } else if (taga == TW_Infinity) {
+ if ((tagb != TW_Infinity) || (signa == signb)) {
+ FPU_copy_to_regi(a, TAG_Special, deststnr);
+ setsign(dest, signa); /* signa may differ from the sign of a. */
+ return taga;
+ }
+ /* Infinity-Infinity is undefined. */
+ return arith_invalid(deststnr);
+ } else if (tagb == TW_Infinity) {
+ FPU_copy_to_regi(b, TAG_Special, deststnr);
+ setsign(dest, signb); /* signb may differ from the sign of b. */
+ return tagb;
}
- /* Infinity-Infinity is undefined. */
- return arith_invalid(deststnr);
- }
- else if (tagb == TW_Infinity)
- {
- FPU_copy_to_regi(b, TAG_Special, deststnr);
- setsign(dest, signb); /* signb may differ from the sign of b. */
- return tagb;
- }
-
#ifdef PARANOID
- EXCEPTION(EX_INTERNAL|0x101);
+ EXCEPTION(EX_INTERNAL | 0x101);
#endif
- return FPU_Exception;
+ return FPU_Exception;
}
-
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c
index f37c5b5a35a..ecce55fc2e2 100644
--- a/arch/x86/math-emu/reg_compare.c
+++ b/arch/x86/math-emu/reg_compare.c
@@ -20,362 +20,331 @@
#include "control_w.h"
#include "status_w.h"
-
static int compare(FPU_REG const *b, int tagb)
{
- int diff, exp0, expb;
- u_char st0_tag;
- FPU_REG *st0_ptr;
- FPU_REG x, y;
- u_char st0_sign, signb = getsign(b);
-
- st0_ptr = &st(0);
- st0_tag = FPU_gettag0();
- st0_sign = getsign(st0_ptr);
-
- if ( tagb == TAG_Special )
- tagb = FPU_Special(b);
- if ( st0_tag == TAG_Special )
- st0_tag = FPU_Special(st0_ptr);
-
- if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
- || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) )
- {
- if ( st0_tag == TAG_Zero )
- {
- if ( tagb == TAG_Zero ) return COMP_A_eq_B;
- if ( tagb == TAG_Valid )
- return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
- if ( tagb == TW_Denormal )
- return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
- | COMP_Denormal;
- }
- else if ( tagb == TAG_Zero )
- {
- if ( st0_tag == TAG_Valid )
- return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
- if ( st0_tag == TW_Denormal )
- return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
- | COMP_Denormal;
+ int diff, exp0, expb;
+ u_char st0_tag;
+ FPU_REG *st0_ptr;
+ FPU_REG x, y;
+ u_char st0_sign, signb = getsign(b);
+
+ st0_ptr = &st(0);
+ st0_tag = FPU_gettag0();
+ st0_sign = getsign(st0_ptr);
+
+ if (tagb == TAG_Special)
+ tagb = FPU_Special(b);
+ if (st0_tag == TAG_Special)
+ st0_tag = FPU_Special(st0_ptr);
+
+ if (((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
+ || ((tagb != TAG_Valid) && (tagb != TW_Denormal))) {
+ if (st0_tag == TAG_Zero) {
+ if (tagb == TAG_Zero)
+ return COMP_A_eq_B;
+ if (tagb == TAG_Valid)
+ return ((signb ==
+ SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
+ if (tagb == TW_Denormal)
+ return ((signb ==
+ SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+ | COMP_Denormal;
+ } else if (tagb == TAG_Zero) {
+ if (st0_tag == TAG_Valid)
+ return ((st0_sign ==
+ SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+ if (st0_tag == TW_Denormal)
+ return ((st0_sign ==
+ SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+ | COMP_Denormal;
+ }
+
+ if (st0_tag == TW_Infinity) {
+ if ((tagb == TAG_Valid) || (tagb == TAG_Zero))
+ return ((st0_sign ==
+ SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+ else if (tagb == TW_Denormal)
+ return ((st0_sign ==
+ SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+ | COMP_Denormal;
+ else if (tagb == TW_Infinity) {
+ /* The 80486 book says that infinities can be equal! */
+ return (st0_sign == signb) ? COMP_A_eq_B :
+ ((st0_sign ==
+ SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+ }
+ /* Fall through to the NaN code */
+ } else if (tagb == TW_Infinity) {
+ if ((st0_tag == TAG_Valid) || (st0_tag == TAG_Zero))
+ return ((signb ==
+ SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
+ if (st0_tag == TW_Denormal)
+ return ((signb ==
+ SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+ | COMP_Denormal;
+ /* Fall through to the NaN code */
+ }
+
+ /* The only possibility now should be that one of the arguments
+ is a NaN */
+ if ((st0_tag == TW_NaN) || (tagb == TW_NaN)) {
+ int signalling = 0, unsupported = 0;
+ if (st0_tag == TW_NaN) {
+ signalling =
+ (st0_ptr->sigh & 0xc0000000) == 0x80000000;
+ unsupported = !((exponent(st0_ptr) == EXP_OVER)
+ && (st0_ptr->
+ sigh & 0x80000000));
+ }
+ if (tagb == TW_NaN) {
+ signalling |=
+ (b->sigh & 0xc0000000) == 0x80000000;
+ unsupported |= !((exponent(b) == EXP_OVER)
+ && (b->sigh & 0x80000000));
+ }
+ if (signalling || unsupported)
+ return COMP_No_Comp | COMP_SNaN | COMP_NaN;
+ else
+ /* Neither is a signaling NaN */
+ return COMP_No_Comp | COMP_NaN;
+ }
+
+ EXCEPTION(EX_Invalid);
}
- if ( st0_tag == TW_Infinity )
- {
- if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) )
- return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
- else if ( tagb == TW_Denormal )
- return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
- | COMP_Denormal;
- else if ( tagb == TW_Infinity )
- {
- /* The 80486 book says that infinities can be equal! */
- return (st0_sign == signb) ? COMP_A_eq_B :
- ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
- }
- /* Fall through to the NaN code */
- }
- else if ( tagb == TW_Infinity )
- {
- if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) )
- return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
- if ( st0_tag == TW_Denormal )
- return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
- | COMP_Denormal;
- /* Fall through to the NaN code */
+ if (st0_sign != signb) {
+ return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+ | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+ COMP_Denormal : 0);
}
- /* The only possibility now should be that one of the arguments
- is a NaN */
- if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) )
- {
- int signalling = 0, unsupported = 0;
- if ( st0_tag == TW_NaN )
- {
- signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000;
- unsupported = !((exponent(st0_ptr) == EXP_OVER)
- && (st0_ptr->sigh & 0x80000000));
- }
- if ( tagb == TW_NaN )
- {
- signalling |= (b->sigh & 0xc0000000) == 0x80000000;
- unsupported |= !((exponent(b) == EXP_OVER)
- && (b->sigh & 0x80000000));
- }
- if ( signalling || unsupported )
- return COMP_No_Comp | COMP_SNaN | COMP_NaN;
- else
- /* Neither is a signaling NaN */
- return COMP_No_Comp | COMP_NaN;
+ if ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) {
+ FPU_to_exp16(st0_ptr, &x);
+ FPU_to_exp16(b, &y);
+ st0_ptr = &x;
+ b = &y;
+ exp0 = exponent16(st0_ptr);
+ expb = exponent16(b);
+ } else {
+ exp0 = exponent(st0_ptr);
+ expb = exponent(b);
}
-
- EXCEPTION(EX_Invalid);
- }
-
- if (st0_sign != signb)
- {
- return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
- | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
- COMP_Denormal : 0);
- }
-
- if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) )
- {
- FPU_to_exp16(st0_ptr, &x);
- FPU_to_exp16(b, &y);
- st0_ptr = &x;
- b = &y;
- exp0 = exponent16(st0_ptr);
- expb = exponent16(b);
- }
- else
- {
- exp0 = exponent(st0_ptr);
- expb = exponent(b);
- }
#ifdef PARANOID
- if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
- if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
+ if (!(st0_ptr->sigh & 0x80000000))
+ EXCEPTION(EX_Invalid);
+ if (!(b->sigh & 0x80000000))
+ EXCEPTION(EX_Invalid);
#endif /* PARANOID */
- diff = exp0 - expb;
- if ( diff == 0 )
- {
- diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are
- identical */
- if ( diff == 0 )
- {
- diff = st0_ptr->sigl > b->sigl;
- if ( diff == 0 )
- diff = -(st0_ptr->sigl < b->sigl);
+ diff = exp0 - expb;
+ if (diff == 0) {
+ diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are
+ identical */
+ if (diff == 0) {
+ diff = st0_ptr->sigl > b->sigl;
+ if (diff == 0)
+ diff = -(st0_ptr->sigl < b->sigl);
+ }
}
- }
-
- if ( diff > 0 )
- {
- return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
- | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
- COMP_Denormal : 0);
- }
- if ( diff < 0 )
- {
- return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
- | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
- COMP_Denormal : 0);
- }
-
- return COMP_A_eq_B
- | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
- COMP_Denormal : 0);
-}
+ if (diff > 0) {
+ return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+ | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+ COMP_Denormal : 0);
+ }
+ if (diff < 0) {
+ return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+ | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+ COMP_Denormal : 0);
+ }
+ return COMP_A_eq_B
+ | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+ COMP_Denormal : 0);
+
+}
/* This function requires that st(0) is not empty */
int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
{
- int f = 0, c;
-
- c = compare(loaded_data, loaded_tag);
-
- if (c & COMP_NaN)
- {
- EXCEPTION(EX_Invalid);
- f = SW_C3 | SW_C2 | SW_C0;
- }
- else
- switch (c & 7)
- {
- case COMP_A_lt_B:
- f = SW_C0;
- break;
- case COMP_A_eq_B:
- f = SW_C3;
- break;
- case COMP_A_gt_B:
- f = 0;
- break;
- case COMP_No_Comp:
- f = SW_C3 | SW_C2 | SW_C0;
- break;
+ int f = 0, c;
+
+ c = compare(loaded_data, loaded_tag);
+
+ if (c & COMP_NaN) {
+ EXCEPTION(EX_Invalid);
+ f = SW_C3 | SW_C2 | SW_C0;
+ } else
+ switch (c & 7) {
+ case COMP_A_lt_B:
+ f = SW_C0;
+ break;
+ case COMP_A_eq_B:
+ f = SW_C3;
+ break;
+ case COMP_A_gt_B:
+ f = 0;
+ break;
+ case COMP_No_Comp:
+ f = SW_C3 | SW_C2 | SW_C0;
+ break;
#ifdef PARANOID
- default:
- EXCEPTION(EX_INTERNAL|0x121);
- f = SW_C3 | SW_C2 | SW_C0;
- break;
+ default:
+ EXCEPTION(EX_INTERNAL | 0x121);
+ f = SW_C3 | SW_C2 | SW_C0;
+ break;
#endif /* PARANOID */
- }
- setcc(f);
- if (c & COMP_Denormal)
- {
- return denormal_operand() < 0;
- }
- return 0;
+ }
+ setcc(f);
+ if (c & COMP_Denormal) {
+ return denormal_operand() < 0;
+ }
+ return 0;
}
-
static int compare_st_st(int nr)
{
- int f = 0, c;
- FPU_REG *st_ptr;
-
- if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
- {
- setcc(SW_C3 | SW_C2 | SW_C0);
- /* Stack fault */
- EXCEPTION(EX_StackUnder);
- return !(control_word & CW_Invalid);
- }
-
- st_ptr = &st(nr);
- c = compare(st_ptr, FPU_gettagi(nr));
- if (c & COMP_NaN)
- {
- setcc(SW_C3 | SW_C2 | SW_C0);
- EXCEPTION(EX_Invalid);
- return !(control_word & CW_Invalid);
- }
- else
- switch (c & 7)
- {
- case COMP_A_lt_B:
- f = SW_C0;
- break;
- case COMP_A_eq_B:
- f = SW_C3;
- break;
- case COMP_A_gt_B:
- f = 0;
- break;
- case COMP_No_Comp:
- f = SW_C3 | SW_C2 | SW_C0;
- break;
+ int f = 0, c;
+ FPU_REG *st_ptr;
+
+ if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
+ setcc(SW_C3 | SW_C2 | SW_C0);
+ /* Stack fault */
+ EXCEPTION(EX_StackUnder);
+ return !(control_word & CW_Invalid);
+ }
+
+ st_ptr = &st(nr);
+ c = compare(st_ptr, FPU_gettagi(nr));
+ if (c & COMP_NaN) {
+ setcc(SW_C3 | SW_C2 | SW_C0);
+ EXCEPTION(EX_Invalid);
+ return !(control_word & CW_Invalid);
+ } else
+ switch (c & 7) {
+ case COMP_A_lt_B:
+ f = SW_C0;
+ break;
+ case COMP_A_eq_B:
+ f = SW_C3;
+ break;
+ case COMP_A_gt_B:
+ f = 0;
+ break;
+ case COMP_No_Comp:
+ f = SW_C3 | SW_C2 | SW_C0;
+ break;
#ifdef PARANOID
- default:
- EXCEPTION(EX_INTERNAL|0x122);
- f = SW_C3 | SW_C2 | SW_C0;
- break;
+ default:
+ EXCEPTION(EX_INTERNAL | 0x122);
+ f = SW_C3 | SW_C2 | SW_C0;
+ break;
#endif /* PARANOID */
- }
- setcc(f);
- if (c & COMP_Denormal)
- {
- return denormal_operand() < 0;
- }
- return 0;
+ }
+ setcc(f);
+ if (c & COMP_Denormal) {
+ return denormal_operand() < 0;
+ }
+ return 0;
}
-
static int compare_u_st_st(int nr)
{
- int f = 0, c;
- FPU_REG *st_ptr;
-
- if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
- {
- setcc(SW_C3 | SW_C2 | SW_C0);
- /* Stack fault */
- EXCEPTION(EX_StackUnder);
- return !(control_word & CW_Invalid);
- }
-
- st_ptr = &st(nr);
- c = compare(st_ptr, FPU_gettagi(nr));
- if (c & COMP_NaN)
- {
- setcc(SW_C3 | SW_C2 | SW_C0);
- if (c & COMP_SNaN) /* This is the only difference between
- un-ordered and ordinary comparisons */
- {
- EXCEPTION(EX_Invalid);
- return !(control_word & CW_Invalid);
+ int f = 0, c;
+ FPU_REG *st_ptr;
+
+ if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
+ setcc(SW_C3 | SW_C2 | SW_C0);
+ /* Stack fault */
+ EXCEPTION(EX_StackUnder);
+ return !(control_word & CW_Invalid);
}
- return 0;
- }
- else
- switch (c & 7)
- {
- case COMP_A_lt_B:
- f = SW_C0;
- break;
- case COMP_A_eq_B:
- f = SW_C3;
- break;
- case COMP_A_gt_B:
- f = 0;
- break;
- case COMP_No_Comp:
- f = SW_C3 | SW_C2 | SW_C0;
- break;
+
+ st_ptr = &st(nr);
+ c = compare(st_ptr, FPU_gettagi(nr));
+ if (c & COMP_NaN) {
+ setcc(SW_C3 | SW_C2 | SW_C0);
+ if (c & COMP_SNaN) { /* This is the only difference between
+ un-ordered and ordinary comparisons */
+ EXCEPTION(EX_Invalid);
+ return !(control_word & CW_Invalid);
+ }
+ return 0;
+ } else
+ switch (c & 7) {
+ case COMP_A_lt_B:
+ f = SW_C0;
+ break;
+ case COMP_A_eq_B:
+ f = SW_C3;
+ break;
+ case COMP_A_gt_B:
+ f = 0;
+ break;
+ case COMP_No_Comp:
+ f = SW_C3 | SW_C2 | SW_C0;
+ break;
#ifdef PARANOID
- default:
- EXCEPTION(EX_INTERNAL|0x123);
- f = SW_C3 | SW_C2 | SW_C0;
- break;
-#endif /* PARANOID */
- }
- setcc(f);
- if (c & COMP_Denormal)
- {
- return denormal_operand() < 0;
- }
- return 0;
+ default:
+ EXCEPTION(EX_INTERNAL | 0x123);
+ f = SW_C3 | SW_C2 | SW_C0;
+ break;
+#endif /* PARANOID */
+ }
+ setcc(f);
+ if (c & COMP_Denormal) {
+ return denormal_operand() < 0;
+ }
+ return 0;
}
/*---------------------------------------------------------------------------*/
void fcom_st(void)
{
- /* fcom st(i) */
- compare_st_st(FPU_rm);
+ /* fcom st(i) */
+ compare_st_st(FPU_rm);
}
-
void fcompst(void)
{
- /* fcomp st(i) */
- if ( !compare_st_st(FPU_rm) )
- FPU_pop();
+ /* fcomp st(i) */
+ if (!compare_st_st(FPU_rm))
+ FPU_pop();
}
-
void fcompp(void)
{
- /* fcompp */
- if (FPU_rm != 1)
- {
- FPU_illegal();
- return;
- }
- if ( !compare_st_st(1) )
- poppop();
+ /* fcompp */
+ if (FPU_rm != 1) {
+ FPU_illegal();
+ return;
+ }
+ if (!compare_st_st(1))
+ poppop();
}
-
void fucom_(void)
{
- /* fucom st(i) */
- compare_u_st_st(FPU_rm);
+ /* fucom st(i) */
+ compare_u_st_st(FPU_rm);
}
-
void fucomp(void)
{
- /* fucomp st(i) */
- if ( !compare_u_st_st(FPU_rm) )
- FPU_pop();
+ /* fucomp st(i) */
+ if (!compare_u_st_st(FPU_rm))
+ FPU_pop();
}
-
void fucompp(void)
{
- /* fucompp */
- if (FPU_rm == 1)
- {
- if ( !compare_u_st_st(1) )
- poppop();
- }
- else
- FPU_illegal();
+ /* fucompp */
+ if (FPU_rm == 1) {
+ if (!compare_u_st_st(1))
+ poppop();
+ } else
+ FPU_illegal();
}
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c
index a8501580196..04869e64b18 100644
--- a/arch/x86/math-emu/reg_constant.c
+++ b/arch/x86/math-emu/reg_constant.c
@@ -16,29 +16,28 @@
#include "reg_constant.h"
#include "control_w.h"
-
#define MAKE_REG(s,e,l,h) { l, h, \
((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
-FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
+FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
#if 0
-FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
+FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000);
-#endif /* 0 */
-static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
-static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
-FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
-FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
-FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
-static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
-static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
+#endif /* 0 */
+static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
+static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
+FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
+FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
+FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
+static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
+static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
/* Extra bits to take pi/2 to more than 128 bits precision. */
FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
- 0xfc8f8cbb, 0xece675d1);
+ 0xfc8f8cbb, 0xece675d1);
/* Only the sign (and tag) is used in internal zeroes */
-FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
+FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
/* Only the sign and significand (and tag) are used in internal NaNs */
/* The 80486 never generates one of these
@@ -48,24 +47,22 @@ FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000);
FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
/* Only the sign (and tag) is used in internal infinities */
-FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
-
+FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
static void fld_const(FPU_REG const *c, int adj, u_char tag)
{
- FPU_REG *st_new_ptr;
-
- if ( STACK_OVERFLOW )
- {
- FPU_stack_overflow();
- return;
- }
- push();
- reg_copy(c, st_new_ptr);
- st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to
- borrow or carry. */
- FPU_settag0(tag);
- clear_C1();
+ FPU_REG *st_new_ptr;
+
+ if (STACK_OVERFLOW) {
+ FPU_stack_overflow();
+ return;
+ }
+ push();
+ reg_copy(c, st_new_ptr);
+ st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to
+ borrow or carry. */
+ FPU_settag0(tag);
+ clear_C1();
}
/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
@@ -75,46 +72,46 @@ static void fld_const(FPU_REG const *c, int adj, u_char tag)
static void fld1(int rc)
{
- fld_const(&CONST_1, 0, TAG_Valid);
+ fld_const(&CONST_1, 0, TAG_Valid);
}
static void fldl2t(int rc)
{
- fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
+ fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
}
static void fldl2e(int rc)
{
- fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+ fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
}
static void fldpi(int rc)
{
- fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+ fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
}
static void fldlg2(int rc)
{
- fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+ fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
}
static void fldln2(int rc)
{
- fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+ fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
}
static void fldz(int rc)
{
- fld_const(&CONST_Z, 0, TAG_Zero);
+ fld_const(&CONST_Z, 0, TAG_Zero);
}
-typedef void (*FUNC_RC)(int);
+typedef void (*FUNC_RC) (int);
static FUNC_RC constants_table[] = {
- fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
+ fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC) FPU_illegal
};
void fconst(void)
{
- (constants_table[FPU_rm])(control_word & CW_RC);
+ (constants_table[FPU_rm]) (control_word & CW_RC);
}
diff --git a/arch/x86/math-emu/reg_convert.c b/arch/x86/math-emu/reg_convert.c
index 45a25875270..10806077997 100644
--- a/arch/x86/math-emu/reg_convert.c
+++ b/arch/x86/math-emu/reg_convert.c
@@ -13,41 +13,34 @@
#include "exception.h"
#include "fpu_emu.h"
-
int FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
{
- int sign = getsign(a);
-
- *(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
-
- /* Set up the exponent as a 16 bit quantity. */
- setexponent16(x, exponent(a));
-
- if ( exponent16(x) == EXP_UNDER )
- {
- /* The number is a de-normal or pseudodenormal. */
- /* We only deal with the significand and exponent. */
-
- if (x->sigh & 0x80000000)
- {
- /* Is a pseudodenormal. */
- /* This is non-80486 behaviour because the number
- loses its 'denormal' identity. */
- addexponent(x, 1);
- }
- else
- {
- /* Is a denormal. */
- addexponent(x, 1);
- FPU_normalize_nuo(x);
+ int sign = getsign(a);
+
+ *(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
+
+ /* Set up the exponent as a 16 bit quantity. */
+ setexponent16(x, exponent(a));
+
+ if (exponent16(x) == EXP_UNDER) {
+ /* The number is a de-normal or pseudodenormal. */
+ /* We only deal with the significand and exponent. */
+
+ if (x->sigh & 0x80000000) {
+ /* Is a pseudodenormal. */
+ /* This is non-80486 behaviour because the number
+ loses its 'denormal' identity. */
+ addexponent(x, 1);
+ } else {
+ /* Is a denormal. */
+ addexponent(x, 1);
+ FPU_normalize_nuo(x);
+ }
}
- }
- if ( !(x->sigh & 0x80000000) )
- {
- EXCEPTION(EX_INTERNAL | 0x180);
- }
+ if (!(x->sigh & 0x80000000)) {
+ EXCEPTION(EX_INTERNAL | 0x180);
+ }
- return sign;
+ return sign;
}
-
diff --git a/arch/x86/math-emu/reg_divide.c b/arch/x86/math-emu/reg_divide.c
index 5cee7ff920d..6827012db34 100644
--- a/arch/x86/math-emu/reg_divide.c
+++ b/arch/x86/math-emu/reg_divide.c
@@ -26,182 +26,157 @@
*/
int FPU_div(int flags, int rm, int control_w)
{
- FPU_REG x, y;
- FPU_REG const *a, *b, *st0_ptr, *st_ptr;
- FPU_REG *dest;
- u_char taga, tagb, signa, signb, sign, saved_sign;
- int tag, deststnr;
-
- if ( flags & DEST_RM )
- deststnr = rm;
- else
- deststnr = 0;
-
- if ( flags & REV )
- {
- b = &st(0);
- st0_ptr = b;
- tagb = FPU_gettag0();
- if ( flags & LOADED )
- {
- a = (FPU_REG *)rm;
- taga = flags & 0x0f;
+ FPU_REG x, y;
+ FPU_REG const *a, *b, *st0_ptr, *st_ptr;
+ FPU_REG *dest;
+ u_char taga, tagb, signa, signb, sign, saved_sign;
+ int tag, deststnr;
+
+ if (flags & DEST_RM)
+ deststnr = rm;
+ else
+ deststnr = 0;
+
+ if (flags & REV) {
+ b = &st(0);
+ st0_ptr = b;
+ tagb = FPU_gettag0();
+ if (flags & LOADED) {
+ a = (FPU_REG *) rm;
+ taga = flags & 0x0f;
+ } else {
+ a = &st(rm);
+ st_ptr = a;
+ taga = FPU_gettagi(rm);
+ }
+ } else {
+ a = &st(0);
+ st0_ptr = a;
+ taga = FPU_gettag0();
+ if (flags & LOADED) {
+ b = (FPU_REG *) rm;
+ tagb = flags & 0x0f;
+ } else {
+ b = &st(rm);
+ st_ptr = b;
+ tagb = FPU_gettagi(rm);
+ }
}
- else
- {
- a = &st(rm);
- st_ptr = a;
- taga = FPU_gettagi(rm);
- }
- }
- else
- {
- a = &st(0);
- st0_ptr = a;
- taga = FPU_gettag0();
- if ( flags & LOADED )
- {
- b = (FPU_REG *)rm;
- tagb = flags & 0x0f;
- }
- else
- {
- b = &st(rm);
- st_ptr = b;
- tagb = FPU_gettagi(rm);
- }
- }
- signa = getsign(a);
- signb = getsign(b);
+ signa = getsign(a);
+ signb = getsign(b);
- sign = signa ^ signb;
+ sign = signa ^ signb;
- dest = &st(deststnr);
- saved_sign = getsign(dest);
+ dest = &st(deststnr);
+ saved_sign = getsign(dest);
- if ( !(taga | tagb) )
- {
- /* Both regs Valid, this should be the most common case. */
- reg_copy(a, &x);
- reg_copy(b, &y);
- setpositive(&x);
- setpositive(&y);
- tag = FPU_u_div(&x, &y, dest, control_w, sign);
+ if (!(taga | tagb)) {
+ /* Both regs Valid, this should be the most common case. */
+ reg_copy(a, &x);
+ reg_copy(b, &y);
+ setpositive(&x);
+ setpositive(&y);
+ tag = FPU_u_div(&x, &y, dest, control_w, sign);
- if ( tag < 0 )
- return tag;
+ if (tag < 0)
+ return tag;
- FPU_settagi(deststnr, tag);
- return tag;
- }
+ FPU_settagi(deststnr, tag);
+ return tag;
+ }
- if ( taga == TAG_Special )
- taga = FPU_Special(a);
- if ( tagb == TAG_Special )
- tagb = FPU_Special(b);
+ if (taga == TAG_Special)
+ taga = FPU_Special(a);
+ if (tagb == TAG_Special)
+ tagb = FPU_Special(b);
- if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+ if (((taga == TAG_Valid) && (tagb == TW_Denormal))
|| ((taga == TW_Denormal) && (tagb == TAG_Valid))
- || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
- {
- if ( denormal_operand() < 0 )
- return FPU_Exception;
-
- FPU_to_exp16(a, &x);
- FPU_to_exp16(b, &y);
- tag = FPU_u_div(&x, &y, dest, control_w, sign);
- if ( tag < 0 )
- return tag;
-
- FPU_settagi(deststnr, tag);
- return tag;
- }
- else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
- {
- if ( tagb != TAG_Zero )
- {
- /* Want to find Zero/Valid */
- if ( tagb == TW_Denormal )
- {
- if ( denormal_operand() < 0 )
- return FPU_Exception;
- }
-
- /* The result is zero. */
- FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
- setsign(dest, sign);
- return TAG_Zero;
+ || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+ if (denormal_operand() < 0)
+ return FPU_Exception;
+
+ FPU_to_exp16(a, &x);
+ FPU_to_exp16(b, &y);
+ tag = FPU_u_div(&x, &y, dest, control_w, sign);
+ if (tag < 0)
+ return tag;
+
+ FPU_settagi(deststnr, tag);
+ return tag;
+ } else if ((taga <= TW_Denormal) && (tagb <= TW_Denormal)) {
+ if (tagb != TAG_Zero) {
+ /* Want to find Zero/Valid */
+ if (tagb == TW_Denormal) {
+ if (denormal_operand() < 0)
+ return FPU_Exception;
+ }
+
+ /* The result is zero. */
+ FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+ setsign(dest, sign);
+ return TAG_Zero;
+ }
+ /* We have an exception condition, either 0/0 or Valid/Zero. */
+ if (taga == TAG_Zero) {
+ /* 0/0 */
+ return arith_invalid(deststnr);
+ }
+ /* Valid/Zero */
+ return FPU_divide_by_zero(deststnr, sign);
}
- /* We have an exception condition, either 0/0 or Valid/Zero. */
- if ( taga == TAG_Zero )
- {
- /* 0/0 */
- return arith_invalid(deststnr);
+ /* Must have infinities, NaNs, etc */
+ else if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+ if (flags & LOADED)
+ return real_2op_NaN((FPU_REG *) rm, flags & 0x0f, 0,
+ st0_ptr);
+
+ if (flags & DEST_RM) {
+ int tag;
+ tag = FPU_gettag0();
+ if (tag == TAG_Special)
+ tag = FPU_Special(st0_ptr);
+ return real_2op_NaN(st0_ptr, tag, rm,
+ (flags & REV) ? st0_ptr : &st(rm));
+ } else {
+ int tag;
+ tag = FPU_gettagi(rm);
+ if (tag == TAG_Special)
+ tag = FPU_Special(&st(rm));
+ return real_2op_NaN(&st(rm), tag, 0,
+ (flags & REV) ? st0_ptr : &st(rm));
+ }
+ } else if (taga == TW_Infinity) {
+ if (tagb == TW_Infinity) {
+ /* infinity/infinity */
+ return arith_invalid(deststnr);
+ } else {
+ /* tagb must be Valid or Zero */
+ if ((tagb == TW_Denormal) && (denormal_operand() < 0))
+ return FPU_Exception;
+
+ /* Infinity divided by Zero or Valid does
+ not raise and exception, but returns Infinity */
+ FPU_copy_to_regi(a, TAG_Special, deststnr);
+ setsign(dest, sign);
+ return taga;
+ }
+ } else if (tagb == TW_Infinity) {
+ if ((taga == TW_Denormal) && (denormal_operand() < 0))
+ return FPU_Exception;
+
+ /* The result is zero. */
+ FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+ setsign(dest, sign);
+ return TAG_Zero;
}
- /* Valid/Zero */
- return FPU_divide_by_zero(deststnr, sign);
- }
- /* Must have infinities, NaNs, etc */
- else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
- {
- if ( flags & LOADED )
- return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr);
-
- if ( flags & DEST_RM )
- {
- int tag;
- tag = FPU_gettag0();
- if ( tag == TAG_Special )
- tag = FPU_Special(st0_ptr);
- return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm));
- }
- else
- {
- int tag;
- tag = FPU_gettagi(rm);
- if ( tag == TAG_Special )
- tag = FPU_Special(&st(rm));
- return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm));
- }
- }
- else if (taga == TW_Infinity)
- {
- if (tagb == TW_Infinity)
- {
- /* infinity/infinity */
- return arith_invalid(deststnr);
- }
- else
- {
- /* tagb must be Valid or Zero */
- if ( (tagb == TW_Denormal) && (denormal_operand() < 0) )
- return FPU_Exception;
-
- /* Infinity divided by Zero or Valid does
- not raise and exception, but returns Infinity */
- FPU_copy_to_regi(a, TAG_Special, deststnr);
- setsign(dest, sign);
- return taga;
- }
- }
- else if (tagb == TW_Infinity)
- {
- if ( (taga == TW_Denormal) && (denormal_operand() < 0) )
- return FPU_Exception;
-
- /* The result is zero. */
- FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
- setsign(dest, sign);
- return TAG_Zero;
- }
#ifdef PARANOID
- else
- {
- EXCEPTION(EX_INTERNAL|0x102);
- return FPU_Exception;
- }
-#endif /* PARANOID */
+ else {
+ EXCEPTION(EX_INTERNAL | 0x102);
+ return FPU_Exception;
+ }
+#endif /* PARANOID */
return 0;
}
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index e976caef649..799d4af5be6 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -27,1084 +27,938 @@
#include "control_w.h"
#include "status_w.h"
-
-#define DOUBLE_Emax 1023 /* largest valid exponent */
+#define DOUBLE_Emax 1023 /* largest valid exponent */
#define DOUBLE_Ebias 1023
-#define DOUBLE_Emin (-1022) /* smallest valid exponent */
+#define DOUBLE_Emin (-1022) /* smallest valid exponent */
-#define SINGLE_Emax 127 /* largest valid exponent */
+#define SINGLE_Emax 127 /* largest valid exponent */
#define SINGLE_Ebias 127
-#define SINGLE_Emin (-126) /* smallest valid exponent */
-
+#define SINGLE_Emin (-126) /* smallest valid exponent */
static u_char normalize_no_excep(FPU_REG *r, int exp, int sign)
{
- u_char tag;
+ u_char tag;
- setexponent16(r, exp);
+ setexponent16(r, exp);
- tag = FPU_normalize_nuo(r);
- stdexp(r);
- if ( sign )
- setnegative(r);
+ tag = FPU_normalize_nuo(r);
+ stdexp(r);
+ if (sign)
+ setnegative(r);
- return tag;
+ return tag;
}
-
int FPU_tagof(FPU_REG *ptr)
{
- int exp;
-
- exp = exponent16(ptr) & 0x7fff;
- if ( exp == 0 )
- {
- if ( !(ptr->sigh | ptr->sigl) )
- {
- return TAG_Zero;
+ int exp;
+
+ exp = exponent16(ptr) & 0x7fff;
+ if (exp == 0) {
+ if (!(ptr->sigh | ptr->sigl)) {
+ return TAG_Zero;
+ }
+ /* The number is a de-normal or pseudodenormal. */
+ return TAG_Special;
+ }
+
+ if (exp == 0x7fff) {
+ /* Is an Infinity, a NaN, or an unsupported data type. */
+ return TAG_Special;
}
- /* The number is a de-normal or pseudodenormal. */
- return TAG_Special;
- }
-
- if ( exp == 0x7fff )
- {
- /* Is an Infinity, a NaN, or an unsupported data type. */
- return TAG_Special;
- }
-
- if ( !(ptr->sigh & 0x80000000) )
- {
- /* Unsupported data type. */
- /* Valid numbers have the ms bit set to 1. */
- /* Unnormal. */
- return TAG_Special;
- }
-
- return TAG_Valid;
-}
+ if (!(ptr->sigh & 0x80000000)) {
+ /* Unsupported data type. */
+ /* Valid numbers have the ms bit set to 1. */
+ /* Unnormal. */
+ return TAG_Special;
+ }
+
+ return TAG_Valid;
+}
/* Get a long double from user memory */
int FPU_load_extended(long double __user *s, int stnr)
{
- FPU_REG *sti_ptr = &st(stnr);
+ FPU_REG *sti_ptr = &st(stnr);
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, s, 10);
- __copy_from_user(sti_ptr, s, 10);
- RE_ENTRANT_CHECK_ON;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, s, 10);
+ __copy_from_user(sti_ptr, s, 10);
+ RE_ENTRANT_CHECK_ON;
- return FPU_tagof(sti_ptr);
+ return FPU_tagof(sti_ptr);
}
-
/* Get a double from user memory */
int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data)
{
- int exp, tag, negative;
- unsigned m64, l64;
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, dfloat, 8);
- FPU_get_user(m64, 1 + (unsigned long __user *) dfloat);
- FPU_get_user(l64, (unsigned long __user *) dfloat);
- RE_ENTRANT_CHECK_ON;
-
- negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
- exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
- m64 &= 0xfffff;
- if ( exp > DOUBLE_Emax + EXTENDED_Ebias )
- {
- /* Infinity or NaN */
- if ((m64 == 0) && (l64 == 0))
- {
- /* +- infinity */
- loaded_data->sigh = 0x80000000;
- loaded_data->sigl = 0x00000000;
- exp = EXP_Infinity + EXTENDED_Ebias;
- tag = TAG_Special;
- }
- else
- {
- /* Must be a signaling or quiet NaN */
- exp = EXP_NaN + EXTENDED_Ebias;
- loaded_data->sigh = (m64 << 11) | 0x80000000;
- loaded_data->sigh |= l64 >> 21;
- loaded_data->sigl = l64 << 11;
- tag = TAG_Special; /* The calling function must look for NaNs */
- }
- }
- else if ( exp < DOUBLE_Emin + EXTENDED_Ebias )
- {
- /* Zero or de-normal */
- if ((m64 == 0) && (l64 == 0))
- {
- /* Zero */
- reg_copy(&CONST_Z, loaded_data);
- exp = 0;
- tag = TAG_Zero;
- }
- else
- {
- /* De-normal */
- loaded_data->sigh = m64 << 11;
- loaded_data->sigh |= l64 >> 21;
- loaded_data->sigl = l64 << 11;
-
- return normalize_no_excep(loaded_data, DOUBLE_Emin, negative)
- | (denormal_operand() < 0 ? FPU_Exception : 0);
- }
- }
- else
- {
- loaded_data->sigh = (m64 << 11) | 0x80000000;
- loaded_data->sigh |= l64 >> 21;
- loaded_data->sigl = l64 << 11;
+ int exp, tag, negative;
+ unsigned m64, l64;
+
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, dfloat, 8);
+ FPU_get_user(m64, 1 + (unsigned long __user *)dfloat);
+ FPU_get_user(l64, (unsigned long __user *)dfloat);
+ RE_ENTRANT_CHECK_ON;
+
+ negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
+ exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
+ m64 &= 0xfffff;
+ if (exp > DOUBLE_Emax + EXTENDED_Ebias) {
+ /* Infinity or NaN */
+ if ((m64 == 0) && (l64 == 0)) {
+ /* +- infinity */
+ loaded_data->sigh = 0x80000000;
+ loaded_data->sigl = 0x00000000;
+ exp = EXP_Infinity + EXTENDED_Ebias;
+ tag = TAG_Special;
+ } else {
+ /* Must be a signaling or quiet NaN */
+ exp = EXP_NaN + EXTENDED_Ebias;
+ loaded_data->sigh = (m64 << 11) | 0x80000000;
+ loaded_data->sigh |= l64 >> 21;
+ loaded_data->sigl = l64 << 11;
+ tag = TAG_Special; /* The calling function must look for NaNs */
+ }
+ } else if (exp < DOUBLE_Emin + EXTENDED_Ebias) {
+ /* Zero or de-normal */
+ if ((m64 == 0) && (l64 == 0)) {
+ /* Zero */
+ reg_copy(&CONST_Z, loaded_data);
+ exp = 0;
+ tag = TAG_Zero;
+ } else {
+ /* De-normal */
+ loaded_data->sigh = m64 << 11;
+ loaded_data->sigh |= l64 >> 21;
+ loaded_data->sigl = l64 << 11;
+
+ return normalize_no_excep(loaded_data, DOUBLE_Emin,
+ negative)
+ | (denormal_operand() < 0 ? FPU_Exception : 0);
+ }
+ } else {
+ loaded_data->sigh = (m64 << 11) | 0x80000000;
+ loaded_data->sigh |= l64 >> 21;
+ loaded_data->sigl = l64 << 11;
- tag = TAG_Valid;
- }
+ tag = TAG_Valid;
+ }
- setexponent16(loaded_data, exp | negative);
+ setexponent16(loaded_data, exp | negative);
- return tag;
+ return tag;
}
-
/* Get a float from user memory */
int FPU_load_single(float __user *single, FPU_REG *loaded_data)
{
- unsigned m32;
- int exp, tag, negative;
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, single, 4);
- FPU_get_user(m32, (unsigned long __user *) single);
- RE_ENTRANT_CHECK_ON;
-
- negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
-
- if (!(m32 & 0x7fffffff))
- {
- /* Zero */
- reg_copy(&CONST_Z, loaded_data);
- addexponent(loaded_data, negative);
- return TAG_Zero;
- }
- exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
- m32 = (m32 & 0x7fffff) << 8;
- if ( exp < SINGLE_Emin + EXTENDED_Ebias )
- {
- /* De-normals */
- loaded_data->sigh = m32;
- loaded_data->sigl = 0;
-
- return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
- | (denormal_operand() < 0 ? FPU_Exception : 0);
- }
- else if ( exp > SINGLE_Emax + EXTENDED_Ebias )
- {
- /* Infinity or NaN */
- if ( m32 == 0 )
- {
- /* +- infinity */
- loaded_data->sigh = 0x80000000;
- loaded_data->sigl = 0x00000000;
- exp = EXP_Infinity + EXTENDED_Ebias;
- tag = TAG_Special;
+ unsigned m32;
+ int exp, tag, negative;
+
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, single, 4);
+ FPU_get_user(m32, (unsigned long __user *)single);
+ RE_ENTRANT_CHECK_ON;
+
+ negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
+
+ if (!(m32 & 0x7fffffff)) {
+ /* Zero */
+ reg_copy(&CONST_Z, loaded_data);
+ addexponent(loaded_data, negative);
+ return TAG_Zero;
}
- else
- {
- /* Must be a signaling or quiet NaN */
- exp = EXP_NaN + EXTENDED_Ebias;
- loaded_data->sigh = m32 | 0x80000000;
- loaded_data->sigl = 0;
- tag = TAG_Special; /* The calling function must look for NaNs */
+ exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
+ m32 = (m32 & 0x7fffff) << 8;
+ if (exp < SINGLE_Emin + EXTENDED_Ebias) {
+ /* De-normals */
+ loaded_data->sigh = m32;
+ loaded_data->sigl = 0;
+
+ return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
+ | (denormal_operand() < 0 ? FPU_Exception : 0);
+ } else if (exp > SINGLE_Emax + EXTENDED_Ebias) {
+ /* Infinity or NaN */
+ if (m32 == 0) {
+ /* +- infinity */
+ loaded_data->sigh = 0x80000000;
+ loaded_data->sigl = 0x00000000;
+ exp = EXP_Infinity + EXTENDED_Ebias;
+ tag = TAG_Special;
+ } else {
+ /* Must be a signaling or quiet NaN */
+ exp = EXP_NaN + EXTENDED_Ebias;
+ loaded_data->sigh = m32 | 0x80000000;
+ loaded_data->sigl = 0;
+ tag = TAG_Special; /* The calling function must look for NaNs */
+ }
+ } else {
+ loaded_data->sigh = m32 | 0x80000000;
+ loaded_data->sigl = 0;
+ tag = TAG_Valid;
}
- }
- else
- {
- loaded_data->sigh = m32 | 0x80000000;
- loaded_data->sigl = 0;
- tag = TAG_Valid;
- }
- setexponent16(loaded_data, exp | negative); /* Set the sign. */
+ setexponent16(loaded_data, exp | negative); /* Set the sign. */
- return tag;
+ return tag;
}
-
/* Get a long long from user memory */
int FPU_load_int64(long long __user *_s)
{
- long long s;
- int sign;
- FPU_REG *st0_ptr = &st(0);
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, _s, 8);
- if (copy_from_user(&s,_s,8))
- FPU_abort;
- RE_ENTRANT_CHECK_ON;
-
- if (s == 0)
- {
- reg_copy(&CONST_Z, st0_ptr);
- return TAG_Zero;
- }
-
- if (s > 0)
- sign = SIGN_Positive;
- else
- {
- s = -s;
- sign = SIGN_Negative;
- }
-
- significand(st0_ptr) = s;
-
- return normalize_no_excep(st0_ptr, 63, sign);
-}
+ long long s;
+ int sign;
+ FPU_REG *st0_ptr = &st(0);
+
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, _s, 8);
+ if (copy_from_user(&s, _s, 8))
+ FPU_abort;
+ RE_ENTRANT_CHECK_ON;
+
+ if (s == 0) {
+ reg_copy(&CONST_Z, st0_ptr);
+ return TAG_Zero;
+ }
+
+ if (s > 0)
+ sign = SIGN_Positive;
+ else {
+ s = -s;
+ sign = SIGN_Negative;
+ }
+ significand(st0_ptr) = s;
+
+ return normalize_no_excep(st0_ptr, 63, sign);
+}
/* Get a long from user memory */
int FPU_load_int32(long __user *_s, FPU_REG *loaded_data)
{
- long s;
- int negative;
+ long s;
+ int negative;
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, _s, 4);
- FPU_get_user(s, _s);
- RE_ENTRANT_CHECK_ON;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, _s, 4);
+ FPU_get_user(s, _s);
+ RE_ENTRANT_CHECK_ON;
- if (s == 0)
- { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
+ if (s == 0) {
+ reg_copy(&CONST_Z, loaded_data);
+ return TAG_Zero;
+ }
- if (s > 0)
- negative = SIGN_Positive;
- else
- {
- s = -s;
- negative = SIGN_Negative;
- }
+ if (s > 0)
+ negative = SIGN_Positive;
+ else {
+ s = -s;
+ negative = SIGN_Negative;
+ }
- loaded_data->sigh = s;
- loaded_data->sigl = 0;
+ loaded_data->sigh = s;
+ loaded_data->sigl = 0;
- return normalize_no_excep(loaded_data, 31, negative);
+ return normalize_no_excep(loaded_data, 31, negative);
}
-
/* Get a short from user memory */
int FPU_load_int16(short __user *_s, FPU_REG *loaded_data)
{
- int s, negative;
+ int s, negative;
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, _s, 2);
- /* Cast as short to get the sign extended. */
- FPU_get_user(s, _s);
- RE_ENTRANT_CHECK_ON;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, _s, 2);
+ /* Cast as short to get the sign extended. */
+ FPU_get_user(s, _s);
+ RE_ENTRANT_CHECK_ON;
- if (s == 0)
- { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
+ if (s == 0) {
+ reg_copy(&CONST_Z, loaded_data);
+ return TAG_Zero;
+ }
- if (s > 0)
- negative = SIGN_Positive;
- else
- {
- s = -s;
- negative = SIGN_Negative;
- }
+ if (s > 0)
+ negative = SIGN_Positive;
+ else {
+ s = -s;
+ negative = SIGN_Negative;
+ }
- loaded_data->sigh = s << 16;
- loaded_data->sigl = 0;
+ loaded_data->sigh = s << 16;
+ loaded_data->sigl = 0;
- return normalize_no_excep(loaded_data, 15, negative);
+ return normalize_no_excep(loaded_data, 15, negative);
}
-
/* Get a packed bcd array from user memory */
int FPU_load_bcd(u_char __user *s)
{
- FPU_REG *st0_ptr = &st(0);
- int pos;
- u_char bcd;
- long long l=0;
- int sign;
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, s, 10);
- RE_ENTRANT_CHECK_ON;
- for ( pos = 8; pos >= 0; pos--)
- {
- l *= 10;
- RE_ENTRANT_CHECK_OFF;
- FPU_get_user(bcd, s+pos);
- RE_ENTRANT_CHECK_ON;
- l += bcd >> 4;
- l *= 10;
- l += bcd & 0x0f;
- }
-
- RE_ENTRANT_CHECK_OFF;
- FPU_get_user(sign, s+9);
- sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
- RE_ENTRANT_CHECK_ON;
-
- if ( l == 0 )
- {
- reg_copy(&CONST_Z, st0_ptr);
- addexponent(st0_ptr, sign); /* Set the sign. */
- return TAG_Zero;
- }
- else
- {
- significand(st0_ptr) = l;
- return normalize_no_excep(st0_ptr, 63, sign);
- }
+ FPU_REG *st0_ptr = &st(0);
+ int pos;
+ u_char bcd;
+ long long l = 0;
+ int sign;
+
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, s, 10);
+ RE_ENTRANT_CHECK_ON;
+ for (pos = 8; pos >= 0; pos--) {
+ l *= 10;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_get_user(bcd, s + pos);
+ RE_ENTRANT_CHECK_ON;
+ l += bcd >> 4;
+ l *= 10;
+ l += bcd & 0x0f;
+ }
+
+ RE_ENTRANT_CHECK_OFF;
+ FPU_get_user(sign, s + 9);
+ sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
+ RE_ENTRANT_CHECK_ON;
+
+ if (l == 0) {
+ reg_copy(&CONST_Z, st0_ptr);
+ addexponent(st0_ptr, sign); /* Set the sign. */
+ return TAG_Zero;
+ } else {
+ significand(st0_ptr) = l;
+ return normalize_no_excep(st0_ptr, 63, sign);
+ }
}
/*===========================================================================*/
/* Put a long double into user memory */
-int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d)
+int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
+ long double __user * d)
{
- /*
- The only exception raised by an attempt to store to an
- extended format is the Invalid Stack exception, i.e.
- attempting to store from an empty register.
- */
-
- if ( st0_tag != TAG_Empty )
- {
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE, d, 10);
-
- FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d);
- FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4));
- FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8));
- RE_ENTRANT_CHECK_ON;
-
- return 1;
- }
-
- /* Empty register (stack underflow) */
- EXCEPTION(EX_StackUnder);
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- /* Put out the QNaN indefinite */
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,d,10);
- FPU_put_user(0, (unsigned long __user *) d);
- FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d);
- FPU_put_user(0xffff, 4 + (short __user *) d);
- RE_ENTRANT_CHECK_ON;
- return 1;
- }
- else
- return 0;
+ /*
+ The only exception raised by an attempt to store to an
+ extended format is the Invalid Stack exception, i.e.
+ attempting to store from an empty register.
+ */
+
+ if (st0_tag != TAG_Empty) {
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 10);
+
+ FPU_put_user(st0_ptr->sigl, (unsigned long __user *)d);
+ FPU_put_user(st0_ptr->sigh,
+ (unsigned long __user *)((u_char __user *) d + 4));
+ FPU_put_user(exponent16(st0_ptr),
+ (unsigned short __user *)((u_char __user *) d +
+ 8));
+ RE_ENTRANT_CHECK_ON;
+
+ return 1;
+ }
-}
+ /* Empty register (stack underflow) */
+ EXCEPTION(EX_StackUnder);
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ /* Put out the QNaN indefinite */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 10);
+ FPU_put_user(0, (unsigned long __user *)d);
+ FPU_put_user(0xc0000000, 1 + (unsigned long __user *)d);
+ FPU_put_user(0xffff, 4 + (short __user *)d);
+ RE_ENTRANT_CHECK_ON;
+ return 1;
+ } else
+ return 0;
+}
/* Put a double into user memory */
int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
{
- unsigned long l[2];
- unsigned long increment = 0; /* avoid gcc warnings */
- int precision_loss;
- int exp;
- FPU_REG tmp;
+ unsigned long l[2];
+ unsigned long increment = 0; /* avoid gcc warnings */
+ int precision_loss;
+ int exp;
+ FPU_REG tmp;
- if ( st0_tag == TAG_Valid )
- {
- reg_copy(st0_ptr, &tmp);
- exp = exponent(&tmp);
+ if (st0_tag == TAG_Valid) {
+ reg_copy(st0_ptr, &tmp);
+ exp = exponent(&tmp);
- if ( exp < DOUBLE_Emin ) /* It may be a denormal */
- {
- addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */
+ if (exp < DOUBLE_Emin) { /* It may be a denormal */
+ addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */
- denormal_arg:
+ denormal_arg:
- if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
- {
+ if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) {
#ifdef PECULIAR_486
- /* Did it round to a non-denormal ? */
- /* This behaviour might be regarded as peculiar, it appears
- that the 80486 rounds to the dest precision, then
- converts to decide underflow. */
- if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) &&
- (st0_ptr->sigl & 0x000007ff)) )
+ /* Did it round to a non-denormal ? */
+ /* This behaviour might be regarded as peculiar, it appears
+ that the 80486 rounds to the dest precision, then
+ converts to decide underflow. */
+ if (!
+ ((tmp.sigh == 0x00100000) && (tmp.sigl == 0)
+ && (st0_ptr->sigl & 0x000007ff)))
#endif /* PECULIAR_486 */
- {
- EXCEPTION(EX_Underflow);
- /* This is a special case: see sec 16.2.5.1 of
- the 80486 book */
- if ( !(control_word & CW_Underflow) )
- return 0;
- }
- EXCEPTION(precision_loss);
- if ( !(control_word & CW_Precision) )
- return 0;
- }
- l[0] = tmp.sigl;
- l[1] = tmp.sigh;
- }
- else
- {
- if ( tmp.sigl & 0x000007ff )
- {
- precision_loss = 1;
- switch (control_word & CW_RC)
- {
- case RC_RND:
- /* Rounding can get a little messy.. */
- increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */
- ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */
- break;
- case RC_DOWN: /* towards -infinity */
- increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff;
- break;
- case RC_UP: /* towards +infinity */
- increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0;
- break;
- case RC_CHOP:
- increment = 0;
- break;
- }
-
- /* Truncate the mantissa */
- tmp.sigl &= 0xfffff800;
-
- if ( increment )
- {
- if ( tmp.sigl >= 0xfffff800 )
- {
- /* the sigl part overflows */
- if ( tmp.sigh == 0xffffffff )
- {
- /* The sigh part overflows */
- tmp.sigh = 0x80000000;
- exp++;
- if (exp >= EXP_OVER)
- goto overflow;
+ {
+ EXCEPTION(EX_Underflow);
+ /* This is a special case: see sec 16.2.5.1 of
+ the 80486 book */
+ if (!(control_word & CW_Underflow))
+ return 0;
+ }
+ EXCEPTION(precision_loss);
+ if (!(control_word & CW_Precision))
+ return 0;
}
- else
- {
- tmp.sigh ++;
+ l[0] = tmp.sigl;
+ l[1] = tmp.sigh;
+ } else {
+ if (tmp.sigl & 0x000007ff) {
+ precision_loss = 1;
+ switch (control_word & CW_RC) {
+ case RC_RND:
+ /* Rounding can get a little messy.. */
+ increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */
+ ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */
+ break;
+ case RC_DOWN: /* towards -infinity */
+ increment =
+ signpositive(&tmp) ? 0 : tmp.
+ sigl & 0x7ff;
+ break;
+ case RC_UP: /* towards +infinity */
+ increment =
+ signpositive(&tmp) ? tmp.
+ sigl & 0x7ff : 0;
+ break;
+ case RC_CHOP:
+ increment = 0;
+ break;
+ }
+
+ /* Truncate the mantissa */
+ tmp.sigl &= 0xfffff800;
+
+ if (increment) {
+ if (tmp.sigl >= 0xfffff800) {
+ /* the sigl part overflows */
+ if (tmp.sigh == 0xffffffff) {
+ /* The sigh part overflows */
+ tmp.sigh = 0x80000000;
+ exp++;
+ if (exp >= EXP_OVER)
+ goto overflow;
+ } else {
+ tmp.sigh++;
+ }
+ tmp.sigl = 0x00000000;
+ } else {
+ /* We only need to increment sigl */
+ tmp.sigl += 0x00000800;
+ }
+ }
+ } else
+ precision_loss = 0;
+
+ l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
+ l[1] = ((tmp.sigh >> 11) & 0xfffff);
+
+ if (exp > DOUBLE_Emax) {
+ overflow:
+ EXCEPTION(EX_Overflow);
+ if (!(control_word & CW_Overflow))
+ return 0;
+ set_precision_flag_up();
+ if (!(control_word & CW_Precision))
+ return 0;
+
+ /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+ /* Overflow to infinity */
+ l[0] = 0x00000000; /* Set to */
+ l[1] = 0x7ff00000; /* + INF */
+ } else {
+ if (precision_loss) {
+ if (increment)
+ set_precision_flag_up();
+ else
+ set_precision_flag_down();
+ }
+ /* Add the exponent */
+ l[1] |= (((exp + DOUBLE_Ebias) & 0x7ff) << 20);
}
- tmp.sigl = 0x00000000;
- }
- else
- {
- /* We only need to increment sigl */
- tmp.sigl += 0x00000800;
- }
- }
- }
- else
- precision_loss = 0;
-
- l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
- l[1] = ((tmp.sigh >> 11) & 0xfffff);
-
- if ( exp > DOUBLE_Emax )
- {
- overflow:
- EXCEPTION(EX_Overflow);
- if ( !(control_word & CW_Overflow) )
- return 0;
- set_precision_flag_up();
- if ( !(control_word & CW_Precision) )
- return 0;
-
- /* This is a special case: see sec 16.2.5.1 of the 80486 book */
- /* Overflow to infinity */
- l[0] = 0x00000000; /* Set to */
- l[1] = 0x7ff00000; /* + INF */
- }
- else
- {
- if ( precision_loss )
- {
- if ( increment )
- set_precision_flag_up();
- else
- set_precision_flag_down();
}
- /* Add the exponent */
- l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20);
- }
- }
- }
- else if (st0_tag == TAG_Zero)
- {
- /* Number is zero */
- l[0] = 0;
- l[1] = 0;
- }
- else if ( st0_tag == TAG_Special )
- {
- st0_tag = FPU_Special(st0_ptr);
- if ( st0_tag == TW_Denormal )
- {
- /* A denormal will always underflow. */
+ } else if (st0_tag == TAG_Zero) {
+ /* Number is zero */
+ l[0] = 0;
+ l[1] = 0;
+ } else if (st0_tag == TAG_Special) {
+ st0_tag = FPU_Special(st0_ptr);
+ if (st0_tag == TW_Denormal) {
+ /* A denormal will always underflow. */
#ifndef PECULIAR_486
- /* An 80486 is supposed to be able to generate
- a denormal exception here, but... */
- /* Underflow has priority. */
- if ( control_word & CW_Underflow )
- denormal_operand();
+ /* An 80486 is supposed to be able to generate
+ a denormal exception here, but... */
+ /* Underflow has priority. */
+ if (control_word & CW_Underflow)
+ denormal_operand();
#endif /* PECULIAR_486 */
- reg_copy(st0_ptr, &tmp);
- goto denormal_arg;
- }
- else if (st0_tag == TW_Infinity)
- {
- l[0] = 0;
- l[1] = 0x7ff00000;
- }
- else if (st0_tag == TW_NaN)
- {
- /* Is it really a NaN ? */
- if ( (exponent(st0_ptr) == EXP_OVER)
- && (st0_ptr->sigh & 0x80000000) )
- {
- /* See if we can get a valid NaN from the FPU_REG */
- l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21);
- l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
- if ( !(st0_ptr->sigh & 0x40000000) )
- {
- /* It is a signalling NaN */
- EXCEPTION(EX_Invalid);
- if ( !(control_word & CW_Invalid) )
- return 0;
- l[1] |= (0x40000000 >> 11);
+ reg_copy(st0_ptr, &tmp);
+ goto denormal_arg;
+ } else if (st0_tag == TW_Infinity) {
+ l[0] = 0;
+ l[1] = 0x7ff00000;
+ } else if (st0_tag == TW_NaN) {
+ /* Is it really a NaN ? */
+ if ((exponent(st0_ptr) == EXP_OVER)
+ && (st0_ptr->sigh & 0x80000000)) {
+ /* See if we can get a valid NaN from the FPU_REG */
+ l[0] =
+ (st0_ptr->sigl >> 11) | (st0_ptr->
+ sigh << 21);
+ l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
+ if (!(st0_ptr->sigh & 0x40000000)) {
+ /* It is a signalling NaN */
+ EXCEPTION(EX_Invalid);
+ if (!(control_word & CW_Invalid))
+ return 0;
+ l[1] |= (0x40000000 >> 11);
+ }
+ l[1] |= 0x7ff00000;
+ } else {
+ /* It is an unsupported data type */
+ EXCEPTION(EX_Invalid);
+ if (!(control_word & CW_Invalid))
+ return 0;
+ l[0] = 0;
+ l[1] = 0xfff80000;
+ }
}
- l[1] |= 0x7ff00000;
- }
- else
- {
- /* It is an unsupported data type */
- EXCEPTION(EX_Invalid);
- if ( !(control_word & CW_Invalid) )
- return 0;
- l[0] = 0;
- l[1] = 0xfff80000;
- }
+ } else if (st0_tag == TAG_Empty) {
+ /* Empty register (stack underflow) */
+ EXCEPTION(EX_StackUnder);
+ if (control_word & CW_Invalid) {
+ /* The masked response */
+ /* Put out the QNaN indefinite */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, dfloat, 8);
+ FPU_put_user(0, (unsigned long __user *)dfloat);
+ FPU_put_user(0xfff80000,
+ 1 + (unsigned long __user *)dfloat);
+ RE_ENTRANT_CHECK_ON;
+ return 1;
+ } else
+ return 0;
}
- }
- else if ( st0_tag == TAG_Empty )
- {
- /* Empty register (stack underflow) */
- EXCEPTION(EX_StackUnder);
- if ( control_word & CW_Invalid )
- {
- /* The masked response */
- /* Put out the QNaN indefinite */
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,dfloat,8);
- FPU_put_user(0, (unsigned long __user *) dfloat);
- FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat);
- RE_ENTRANT_CHECK_ON;
- return 1;
- }
- else
- return 0;
- }
- if ( getsign(st0_ptr) )
- l[1] |= 0x80000000;
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,dfloat,8);
- FPU_put_user(l[0], (unsigned long __user *)dfloat);
- FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
- RE_ENTRANT_CHECK_ON;
-
- return 1;
-}
+ if (getsign(st0_ptr))
+ l[1] |= 0x80000000;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, dfloat, 8);
+ FPU_put_user(l[0], (unsigned long __user *)dfloat);
+ FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
+ RE_ENTRANT_CHECK_ON;
+
+ return 1;
+}
/* Put a float into user memory */
int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single)
{
- long templ = 0;
- unsigned long increment = 0; /* avoid gcc warnings */
- int precision_loss;
- int exp;
- FPU_REG tmp;
+ long templ = 0;
+ unsigned long increment = 0; /* avoid gcc warnings */
+ int precision_loss;
+ int exp;
+ FPU_REG tmp;
- if ( st0_tag == TAG_Valid )
- {
+ if (st0_tag == TAG_Valid) {
- reg_copy(st0_ptr, &tmp);
- exp = exponent(&tmp);
+ reg_copy(st0_ptr, &tmp);
+ exp = exponent(&tmp);
- if ( exp < SINGLE_Emin )
- {
- addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */
+ if (exp < SINGLE_Emin) {
+ addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */
- denormal_arg:
+ denormal_arg:
- if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
- {
+ if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) {
#ifdef PECULIAR_486
- /* Did it round to a non-denormal ? */
- /* This behaviour might be regarded as peculiar, it appears
- that the 80486 rounds to the dest precision, then
- converts to decide underflow. */
- if ( !((tmp.sigl == 0x00800000) &&
- ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) )
+ /* Did it round to a non-denormal ? */
+ /* This behaviour might be regarded as peculiar, it appears
+ that the 80486 rounds to the dest precision, then
+ converts to decide underflow. */
+ if (!((tmp.sigl == 0x00800000) &&
+ ((st0_ptr->sigh & 0x000000ff)
+ || st0_ptr->sigl)))
#endif /* PECULIAR_486 */
- {
- EXCEPTION(EX_Underflow);
- /* This is a special case: see sec 16.2.5.1 of
- the 80486 book */
- if ( !(control_word & CW_Underflow) )
- return 0;
- }
- EXCEPTION(precision_loss);
- if ( !(control_word & CW_Precision) )
- return 0;
- }
- templ = tmp.sigl;
- }
- else
- {
- if ( tmp.sigl | (tmp.sigh & 0x000000ff) )
- {
- unsigned long sigh = tmp.sigh;
- unsigned long sigl = tmp.sigl;
-
- precision_loss = 1;
- switch (control_word & CW_RC)
- {
- case RC_RND:
- increment = ((sigh & 0xff) > 0x80) /* more than half */
- || (((sigh & 0xff) == 0x80) && sigl) /* more than half */
- || ((sigh & 0x180) == 0x180); /* round to even */
- break;
- case RC_DOWN: /* towards -infinity */
- increment = signpositive(&tmp)
- ? 0 : (sigl | (sigh & 0xff));
- break;
- case RC_UP: /* towards +infinity */
- increment = signpositive(&tmp)
- ? (sigl | (sigh & 0xff)) : 0;
- break;
- case RC_CHOP:
- increment = 0;
- break;
- }
-
- /* Truncate part of the mantissa */
- tmp.sigl = 0;
-
- if (increment)
- {
- if ( sigh >= 0xffffff00 )
- {
- /* The sigh part overflows */
- tmp.sigh = 0x80000000;
- exp++;
- if ( exp >= EXP_OVER )
- goto overflow;
- }
- else
- {
- tmp.sigh &= 0xffffff00;
- tmp.sigh += 0x100;
- }
- }
- else
- {
- tmp.sigh &= 0xffffff00; /* Finish the truncation */
- }
- }
- else
- precision_loss = 0;
-
- templ = (tmp.sigh >> 8) & 0x007fffff;
-
- if ( exp > SINGLE_Emax )
- {
- overflow:
- EXCEPTION(EX_Overflow);
- if ( !(control_word & CW_Overflow) )
- return 0;
- set_precision_flag_up();
- if ( !(control_word & CW_Precision) )
- return 0;
-
- /* This is a special case: see sec 16.2.5.1 of the 80486 book. */
- /* Masked response is overflow to infinity. */
- templ = 0x7f800000;
- }
- else
- {
- if ( precision_loss )
- {
- if ( increment )
- set_precision_flag_up();
- else
- set_precision_flag_down();
+ {
+ EXCEPTION(EX_Underflow);
+ /* This is a special case: see sec 16.2.5.1 of
+ the 80486 book */
+ if (!(control_word & CW_Underflow))
+ return 0;
+ }
+ EXCEPTION(precision_loss);
+ if (!(control_word & CW_Precision))
+ return 0;
+ }
+ templ = tmp.sigl;
+ } else {
+ if (tmp.sigl | (tmp.sigh & 0x000000ff)) {
+ unsigned long sigh = tmp.sigh;
+ unsigned long sigl = tmp.sigl;
+
+ precision_loss = 1;
+ switch (control_word & CW_RC) {
+ case RC_RND:
+ increment = ((sigh & 0xff) > 0x80) /* more than half */
+ ||(((sigh & 0xff) == 0x80) && sigl) /* more than half */
+ ||((sigh & 0x180) == 0x180); /* round to even */
+ break;
+ case RC_DOWN: /* towards -infinity */
+ increment = signpositive(&tmp)
+ ? 0 : (sigl | (sigh & 0xff));
+ break;
+ case RC_UP: /* towards +infinity */
+ increment = signpositive(&tmp)
+ ? (sigl | (sigh & 0xff)) : 0;
+ break;
+ case RC_CHOP:
+ increment = 0;
+ break;
+ }
+
+ /* Truncate part of the mantissa */
+ tmp.sigl = 0;
+
+ if (increment) {
+ if (sigh >= 0xffffff00) {
+ /* The sigh part overflows */
+ tmp.sigh = 0x80000000;
+ exp++;
+ if (exp >= EXP_OVER)
+ goto overflow;
+ } else {
+ tmp.sigh &= 0xffffff00;
+ tmp.sigh += 0x100;
+ }
+ } else {
+ tmp.sigh &= 0xffffff00; /* Finish the truncation */
+ }
+ } else
+ precision_loss = 0;
+
+ templ = (tmp.sigh >> 8) & 0x007fffff;
+
+ if (exp > SINGLE_Emax) {
+ overflow:
+ EXCEPTION(EX_Overflow);
+ if (!(control_word & CW_Overflow))
+ return 0;
+ set_precision_flag_up();
+ if (!(control_word & CW_Precision))
+ return 0;
+
+ /* This is a special case: see sec 16.2.5.1 of the 80486 book. */
+ /* Masked response is overflow to infinity. */
+ templ = 0x7f800000;
+ } else {
+ if (precision_loss) {
+ if (increment)
+ set_precision_flag_up();
+ else
+ set_precision_flag_down();
+ }
+ /* Add the exponent */
+ templ |= ((exp + SINGLE_Ebias) & 0xff) << 23;
+ }
}
- /* Add the exponent */
- templ |= ((exp+SINGLE_Ebias) & 0xff) << 23;
- }
- }
- }
- else if (st0_tag == TAG_Zero)
- {
- templ = 0;
- }
- else if ( st0_tag == TAG_Special )
- {
- st0_tag = FPU_Special(st0_ptr);
- if (st0_tag == TW_Denormal)
- {
- reg_copy(st0_ptr, &tmp);
-
- /* A denormal will always underflow. */
+ } else if (st0_tag == TAG_Zero) {
+ templ = 0;
+ } else if (st0_tag == TAG_Special) {
+ st0_tag = FPU_Special(st0_ptr);
+ if (st0_tag == TW_Denormal) {
+ reg_copy(st0_ptr, &tmp);
+
+ /* A denormal will always underflow. */
#ifndef PECULIAR_486
- /* An 80486 is supposed to be able to generate
- a denormal exception here, but... */
- /* Underflow has priority. */
- if ( control_word & CW_Underflow )
- denormal_operand();
-#endif /* PECULIAR_486 */
- goto denormal_arg;
- }
- else if (st0_tag == TW_Infinity)
- {
- templ = 0x7f800000;
- }
- else if (st0_tag == TW_NaN)
- {
- /* Is it really a NaN ? */
- if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) )
- {
- /* See if we can get a valid NaN from the FPU_REG */
- templ = st0_ptr->sigh >> 8;
- if ( !(st0_ptr->sigh & 0x40000000) )
- {
- /* It is a signalling NaN */
- EXCEPTION(EX_Invalid);
- if ( !(control_word & CW_Invalid) )
- return 0;
- templ |= (0x40000000 >> 8);
+ /* An 80486 is supposed to be able to generate
+ a denormal exception here, but... */
+ /* Underflow has priority. */
+ if (control_word & CW_Underflow)
+ denormal_operand();
+#endif /* PECULIAR_486 */
+ goto denormal_arg;
+ } else if (st0_tag == TW_Infinity) {
+ templ = 0x7f800000;
+ } else if (st0_tag == TW_NaN) {
+ /* Is it really a NaN ? */
+ if ((exponent(st0_ptr) == EXP_OVER)
+ && (st0_ptr->sigh & 0x80000000)) {
+ /* See if we can get a valid NaN from the FPU_REG */
+ templ = st0_ptr->sigh >> 8;
+ if (!(st0_ptr->sigh & 0x40000000)) {
+ /* It is a signalling NaN */
+ EXCEPTION(EX_Invalid);
+ if (!(control_word & CW_Invalid))
+ return 0;
+ templ |= (0x40000000 >> 8);
+ }
+ templ |= 0x7f800000;
+ } else {
+ /* It is an unsupported data type */
+ EXCEPTION(EX_Invalid);
+ if (!(control_word & CW_Invalid))
+ return 0;
+ templ = 0xffc00000;
+ }
}
- templ |= 0x7f800000;
- }
- else
- {
- /* It is an unsupported data type */
- EXCEPTION(EX_Invalid);
- if ( !(control_word & CW_Invalid) )
- return 0;
- templ = 0xffc00000;
- }
- }
#ifdef PARANOID
- else
- {
- EXCEPTION(EX_INTERNAL|0x164);
- return 0;
- }
+ else {
+ EXCEPTION(EX_INTERNAL | 0x164);
+ return 0;
+ }
#endif
- }
- else if ( st0_tag == TAG_Empty )
- {
- /* Empty register (stack underflow) */
- EXCEPTION(EX_StackUnder);
- if ( control_word & EX_Invalid )
- {
- /* The masked response */
- /* Put out the QNaN indefinite */
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,single,4);
- FPU_put_user(0xffc00000, (unsigned long __user *) single);
- RE_ENTRANT_CHECK_ON;
- return 1;
+ } else if (st0_tag == TAG_Empty) {
+ /* Empty register (stack underflow) */
+ EXCEPTION(EX_StackUnder);
+ if (control_word & EX_Invalid) {
+ /* The masked response */
+ /* Put out the QNaN indefinite */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, single, 4);
+ FPU_put_user(0xffc00000,
+ (unsigned long __user *)single);
+ RE_ENTRANT_CHECK_ON;
+ return 1;
+ } else
+ return 0;
}
- else
- return 0;
- }
#ifdef PARANOID
- else
- {
- EXCEPTION(EX_INTERNAL|0x163);
- return 0;
- }
+ else {
+ EXCEPTION(EX_INTERNAL | 0x163);
+ return 0;
+ }
#endif
- if ( getsign(st0_ptr) )
- templ |= 0x80000000;
+ if (getsign(st0_ptr))
+ templ |= 0x80000000;
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,single,4);
- FPU_put_user(templ,(unsigned long __user *) single);
- RE_ENTRANT_CHECK_ON;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, single, 4);
+ FPU_put_user(templ, (unsigned long __user *)single);
+ RE_ENTRANT_CHECK_ON;
- return 1;
+ return 1;
}
-
/* Put a long long into user memory */
int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d)
{
- FPU_REG t;
- long long tll;
- int precision_loss;
-
- if ( st0_tag == TAG_Empty )
- {
- /* Empty register (stack underflow) */
- EXCEPTION(EX_StackUnder);
- goto invalid_operand;
- }
- else if ( st0_tag == TAG_Special )
- {
- st0_tag = FPU_Special(st0_ptr);
- if ( (st0_tag == TW_Infinity) ||
- (st0_tag == TW_NaN) )
- {
- EXCEPTION(EX_Invalid);
- goto invalid_operand;
+ FPU_REG t;
+ long long tll;
+ int precision_loss;
+
+ if (st0_tag == TAG_Empty) {
+ /* Empty register (stack underflow) */
+ EXCEPTION(EX_StackUnder);
+ goto invalid_operand;
+ } else if (st0_tag == TAG_Special) {
+ st0_tag = FPU_Special(st0_ptr);
+ if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+ EXCEPTION(EX_Invalid);
+ goto invalid_operand;
+ }
}
- }
-
- reg_copy(st0_ptr, &t);
- precision_loss = FPU_round_to_int(&t, st0_tag);
- ((long *)&tll)[0] = t.sigl;
- ((long *)&tll)[1] = t.sigh;
- if ( (precision_loss == 1) ||
- ((t.sigh & 0x80000000) &&
- !((t.sigh == 0x80000000) && (t.sigl == 0) &&
- signnegative(&t))) )
- {
- EXCEPTION(EX_Invalid);
- /* This is a special case: see sec 16.2.5.1 of the 80486 book */
- invalid_operand:
- if ( control_word & EX_Invalid )
- {
- /* Produce something like QNaN "indefinite" */
- tll = 0x8000000000000000LL;
+
+ reg_copy(st0_ptr, &t);
+ precision_loss = FPU_round_to_int(&t, st0_tag);
+ ((long *)&tll)[0] = t.sigl;
+ ((long *)&tll)[1] = t.sigh;
+ if ((precision_loss == 1) ||
+ ((t.sigh & 0x80000000) &&
+ !((t.sigh == 0x80000000) && (t.sigl == 0) && signnegative(&t)))) {
+ EXCEPTION(EX_Invalid);
+ /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+ invalid_operand:
+ if (control_word & EX_Invalid) {
+ /* Produce something like QNaN "indefinite" */
+ tll = 0x8000000000000000LL;
+ } else
+ return 0;
+ } else {
+ if (precision_loss)
+ set_precision_flag(precision_loss);
+ if (signnegative(&t))
+ tll = -tll;
}
- else
- return 0;
- }
- else
- {
- if ( precision_loss )
- set_precision_flag(precision_loss);
- if ( signnegative(&t) )
- tll = - tll;
- }
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,d,8);
- if (copy_to_user(d, &tll, 8))
- FPU_abort;
- RE_ENTRANT_CHECK_ON;
-
- return 1;
-}
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 8);
+ if (copy_to_user(d, &tll, 8))
+ FPU_abort;
+ RE_ENTRANT_CHECK_ON;
+
+ return 1;
+}
/* Put a long into user memory */
int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d)
{
- FPU_REG t;
- int precision_loss;
-
- if ( st0_tag == TAG_Empty )
- {
- /* Empty register (stack underflow) */
- EXCEPTION(EX_StackUnder);
- goto invalid_operand;
- }
- else if ( st0_tag == TAG_Special )
- {
- st0_tag = FPU_Special(st0_ptr);
- if ( (st0_tag == TW_Infinity) ||
- (st0_tag == TW_NaN) )
- {
- EXCEPTION(EX_Invalid);
- goto invalid_operand;
+ FPU_REG t;
+ int precision_loss;
+
+ if (st0_tag == TAG_Empty) {
+ /* Empty register (stack underflow) */
+ EXCEPTION(EX_StackUnder);
+ goto invalid_operand;
+ } else if (st0_tag == TAG_Special) {
+ st0_tag = FPU_Special(st0_ptr);
+ if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+ EXCEPTION(EX_Invalid);
+ goto invalid_operand;
+ }
}
- }
-
- reg_copy(st0_ptr, &t);
- precision_loss = FPU_round_to_int(&t, st0_tag);
- if (t.sigh ||
- ((t.sigl & 0x80000000) &&
- !((t.sigl == 0x80000000) && signnegative(&t))) )
- {
- EXCEPTION(EX_Invalid);
- /* This is a special case: see sec 16.2.5.1 of the 80486 book */
- invalid_operand:
- if ( control_word & EX_Invalid )
- {
- /* Produce something like QNaN "indefinite" */
- t.sigl = 0x80000000;
+
+ reg_copy(st0_ptr, &t);
+ precision_loss = FPU_round_to_int(&t, st0_tag);
+ if (t.sigh ||
+ ((t.sigl & 0x80000000) &&
+ !((t.sigl == 0x80000000) && signnegative(&t)))) {
+ EXCEPTION(EX_Invalid);
+ /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+ invalid_operand:
+ if (control_word & EX_Invalid) {
+ /* Produce something like QNaN "indefinite" */
+ t.sigl = 0x80000000;
+ } else
+ return 0;
+ } else {
+ if (precision_loss)
+ set_precision_flag(precision_loss);
+ if (signnegative(&t))
+ t.sigl = -(long)t.sigl;
}
- else
- return 0;
- }
- else
- {
- if ( precision_loss )
- set_precision_flag(precision_loss);
- if ( signnegative(&t) )
- t.sigl = -(long)t.sigl;
- }
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,d,4);
- FPU_put_user(t.sigl, (unsigned long __user *) d);
- RE_ENTRANT_CHECK_ON;
-
- return 1;
-}
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 4);
+ FPU_put_user(t.sigl, (unsigned long __user *)d);
+ RE_ENTRANT_CHECK_ON;
+
+ return 1;
+}
/* Put a short into user memory */
int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d)
{
- FPU_REG t;
- int precision_loss;
-
- if ( st0_tag == TAG_Empty )
- {
- /* Empty register (stack underflow) */
- EXCEPTION(EX_StackUnder);
- goto invalid_operand;
- }
- else if ( st0_tag == TAG_Special )
- {
- st0_tag = FPU_Special(st0_ptr);
- if ( (st0_tag == TW_Infinity) ||
- (st0_tag == TW_NaN) )
- {
- EXCEPTION(EX_Invalid);
- goto invalid_operand;
+ FPU_REG t;
+ int precision_loss;
+
+ if (st0_tag == TAG_Empty) {
+ /* Empty register (stack underflow) */
+ EXCEPTION(EX_StackUnder);
+ goto invalid_operand;
+ } else if (st0_tag == TAG_Special) {
+ st0_tag = FPU_Special(st0_ptr);
+ if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+ EXCEPTION(EX_Invalid);
+ goto invalid_operand;
+ }
}
- }
-
- reg_copy(st0_ptr, &t);
- precision_loss = FPU_round_to_int(&t, st0_tag);
- if (t.sigh ||
- ((t.sigl & 0xffff8000) &&
- !((t.sigl == 0x8000) && signnegative(&t))) )
- {
- EXCEPTION(EX_Invalid);
- /* This is a special case: see sec 16.2.5.1 of the 80486 book */
- invalid_operand:
- if ( control_word & EX_Invalid )
- {
- /* Produce something like QNaN "indefinite" */
- t.sigl = 0x8000;
+
+ reg_copy(st0_ptr, &t);
+ precision_loss = FPU_round_to_int(&t, st0_tag);
+ if (t.sigh ||
+ ((t.sigl & 0xffff8000) &&
+ !((t.sigl == 0x8000) && signnegative(&t)))) {
+ EXCEPTION(EX_Invalid);
+ /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+ invalid_operand:
+ if (control_word & EX_Invalid) {
+ /* Produce something like QNaN "indefinite" */
+ t.sigl = 0x8000;
+ } else
+ return 0;
+ } else {
+ if (precision_loss)
+ set_precision_flag(precision_loss);
+ if (signnegative(&t))
+ t.sigl = -t.sigl;
}
- else
- return 0;
- }
- else
- {
- if ( precision_loss )
- set_precision_flag(precision_loss);
- if ( signnegative(&t) )
- t.sigl = -t.sigl;
- }
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,d,2);
- FPU_put_user((short)t.sigl, d);
- RE_ENTRANT_CHECK_ON;
-
- return 1;
-}
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 2);
+ FPU_put_user((short)t.sigl, d);
+ RE_ENTRANT_CHECK_ON;
+
+ return 1;
+}
/* Put a packed bcd array into user memory */
int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
{
- FPU_REG t;
- unsigned long long ll;
- u_char b;
- int i, precision_loss;
- u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
-
- if ( st0_tag == TAG_Empty )
- {
- /* Empty register (stack underflow) */
- EXCEPTION(EX_StackUnder);
- goto invalid_operand;
- }
- else if ( st0_tag == TAG_Special )
- {
- st0_tag = FPU_Special(st0_ptr);
- if ( (st0_tag == TW_Infinity) ||
- (st0_tag == TW_NaN) )
- {
- EXCEPTION(EX_Invalid);
- goto invalid_operand;
+ FPU_REG t;
+ unsigned long long ll;
+ u_char b;
+ int i, precision_loss;
+ u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
+
+ if (st0_tag == TAG_Empty) {
+ /* Empty register (stack underflow) */
+ EXCEPTION(EX_StackUnder);
+ goto invalid_operand;
+ } else if (st0_tag == TAG_Special) {
+ st0_tag = FPU_Special(st0_ptr);
+ if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+ EXCEPTION(EX_Invalid);
+ goto invalid_operand;
+ }
+ }
+
+ reg_copy(st0_ptr, &t);
+ precision_loss = FPU_round_to_int(&t, st0_tag);
+ ll = significand(&t);
+
+ /* Check for overflow, by comparing with 999999999999999999 decimal. */
+ if ((t.sigh > 0x0de0b6b3) ||
+ ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff))) {
+ EXCEPTION(EX_Invalid);
+ /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+ invalid_operand:
+ if (control_word & CW_Invalid) {
+ /* Produce the QNaN "indefinite" */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 10);
+ for (i = 0; i < 7; i++)
+ FPU_put_user(0, d + i); /* These bytes "undefined" */
+ FPU_put_user(0xc0, d + 7); /* This byte "undefined" */
+ FPU_put_user(0xff, d + 8);
+ FPU_put_user(0xff, d + 9);
+ RE_ENTRANT_CHECK_ON;
+ return 1;
+ } else
+ return 0;
+ } else if (precision_loss) {
+ /* Precision loss doesn't stop the data transfer */
+ set_precision_flag(precision_loss);
}
- }
-
- reg_copy(st0_ptr, &t);
- precision_loss = FPU_round_to_int(&t, st0_tag);
- ll = significand(&t);
-
- /* Check for overflow, by comparing with 999999999999999999 decimal. */
- if ( (t.sigh > 0x0de0b6b3) ||
- ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) )
- {
- EXCEPTION(EX_Invalid);
- /* This is a special case: see sec 16.2.5.1 of the 80486 book */
- invalid_operand:
- if ( control_word & CW_Invalid )
- {
- /* Produce the QNaN "indefinite" */
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,d,10);
- for ( i = 0; i < 7; i++)
- FPU_put_user(0, d+i); /* These bytes "undefined" */
- FPU_put_user(0xc0, d+7); /* This byte "undefined" */
- FPU_put_user(0xff, d+8);
- FPU_put_user(0xff, d+9);
- RE_ENTRANT_CHECK_ON;
- return 1;
+
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 10);
+ RE_ENTRANT_CHECK_ON;
+ for (i = 0; i < 9; i++) {
+ b = FPU_div_small(&ll, 10);
+ b |= (FPU_div_small(&ll, 10)) << 4;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_put_user(b, d + i);
+ RE_ENTRANT_CHECK_ON;
}
- else
- return 0;
- }
- else if ( precision_loss )
- {
- /* Precision loss doesn't stop the data transfer */
- set_precision_flag(precision_loss);
- }
-
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,d,10);
- RE_ENTRANT_CHECK_ON;
- for ( i = 0; i < 9; i++)
- {
- b = FPU_div_small(&ll, 10);
- b |= (FPU_div_small(&ll, 10)) << 4;
- RE_ENTRANT_CHECK_OFF;
- FPU_put_user(b, d+i);
- RE_ENTRANT_CHECK_ON;
- }
- RE_ENTRANT_CHECK_OFF;
- FPU_put_user(sign, d+9);
- RE_ENTRANT_CHECK_ON;
-
- return 1;
+ RE_ENTRANT_CHECK_OFF;
+ FPU_put_user(sign, d + 9);
+ RE_ENTRANT_CHECK_ON;
+
+ return 1;
}
/*===========================================================================*/
@@ -1119,59 +973,56 @@ int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
largest possible value */
int FPU_round_to_int(FPU_REG *r, u_char tag)
{
- u_char very_big;
- unsigned eax;
-
- if (tag == TAG_Zero)
- {
- /* Make sure that zero is returned */
- significand(r) = 0;
- return 0; /* o.k. */
- }
-
- if (exponent(r) > 63)
- {
- r->sigl = r->sigh = ~0; /* The largest representable number */
- return 1; /* overflow */
- }
-
- eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
- very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */
+ u_char very_big;
+ unsigned eax;
+
+ if (tag == TAG_Zero) {
+ /* Make sure that zero is returned */
+ significand(r) = 0;
+ return 0; /* o.k. */
+ }
+
+ if (exponent(r) > 63) {
+ r->sigl = r->sigh = ~0; /* The largest representable number */
+ return 1; /* overflow */
+ }
+
+ eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
+ very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */
#define half_or_more (eax & 0x80000000)
#define frac_part (eax)
#define more_than_half ((eax & 0x80000001) == 0x80000001)
- switch (control_word & CW_RC)
- {
- case RC_RND:
- if ( more_than_half /* nearest */
- || (half_or_more && (r->sigl & 1)) ) /* odd -> even */
- {
- if ( very_big ) return 1; /* overflow */
- significand(r) ++;
- return PRECISION_LOST_UP;
- }
- break;
- case RC_DOWN:
- if (frac_part && getsign(r))
- {
- if ( very_big ) return 1; /* overflow */
- significand(r) ++;
- return PRECISION_LOST_UP;
- }
- break;
- case RC_UP:
- if (frac_part && !getsign(r))
- {
- if ( very_big ) return 1; /* overflow */
- significand(r) ++;
- return PRECISION_LOST_UP;
+ switch (control_word & CW_RC) {
+ case RC_RND:
+ if (more_than_half /* nearest */
+ || (half_or_more && (r->sigl & 1))) { /* odd -> even */
+ if (very_big)
+ return 1; /* overflow */
+ significand(r)++;
+ return PRECISION_LOST_UP;
+ }
+ break;
+ case RC_DOWN:
+ if (frac_part && getsign(r)) {
+ if (very_big)
+ return 1; /* overflow */
+ significand(r)++;
+ return PRECISION_LOST_UP;
+ }
+ break;
+ case RC_UP:
+ if (frac_part && !getsign(r)) {
+ if (very_big)
+ return 1; /* overflow */
+ significand(r)++;
+ return PRECISION_LOST_UP;
+ }
+ break;
+ case RC_CHOP:
+ break;
}
- break;
- case RC_CHOP:
- break;
- }
- return eax ? PRECISION_LOST_DOWN : 0;
+ return eax ? PRECISION_LOST_DOWN : 0;
}
@@ -1179,197 +1030,195 @@ int FPU_round_to_int(FPU_REG *r, u_char tag)
u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s)
{
- unsigned short tag_word = 0;
- u_char tag;
- int i;
-
- if ( (addr_modes.default_mode == VM86) ||
- ((addr_modes.default_mode == PM16)
- ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
- {
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, s, 0x0e);
- FPU_get_user(control_word, (unsigned short __user *) s);
- FPU_get_user(partial_status, (unsigned short __user *) (s+2));
- FPU_get_user(tag_word, (unsigned short __user *) (s+4));
- FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6));
- FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8));
- FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a));
- FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c));
- RE_ENTRANT_CHECK_ON;
- s += 0x0e;
- if ( addr_modes.default_mode == VM86 )
- {
- instruction_address.offset
- += (instruction_address.selector & 0xf000) << 4;
- operand_address.offset += (operand_address.selector & 0xf000) << 4;
+ unsigned short tag_word = 0;
+ u_char tag;
+ int i;
+
+ if ((addr_modes.default_mode == VM86) ||
+ ((addr_modes.default_mode == PM16)
+ ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX))) {
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, s, 0x0e);
+ FPU_get_user(control_word, (unsigned short __user *)s);
+ FPU_get_user(partial_status, (unsigned short __user *)(s + 2));
+ FPU_get_user(tag_word, (unsigned short __user *)(s + 4));
+ FPU_get_user(instruction_address.offset,
+ (unsigned short __user *)(s + 6));
+ FPU_get_user(instruction_address.selector,
+ (unsigned short __user *)(s + 8));
+ FPU_get_user(operand_address.offset,
+ (unsigned short __user *)(s + 0x0a));
+ FPU_get_user(operand_address.selector,
+ (unsigned short __user *)(s + 0x0c));
+ RE_ENTRANT_CHECK_ON;
+ s += 0x0e;
+ if (addr_modes.default_mode == VM86) {
+ instruction_address.offset
+ += (instruction_address.selector & 0xf000) << 4;
+ operand_address.offset +=
+ (operand_address.selector & 0xf000) << 4;
+ }
+ } else {
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, s, 0x1c);
+ FPU_get_user(control_word, (unsigned short __user *)s);
+ FPU_get_user(partial_status, (unsigned short __user *)(s + 4));
+ FPU_get_user(tag_word, (unsigned short __user *)(s + 8));
+ FPU_get_user(instruction_address.offset,
+ (unsigned long __user *)(s + 0x0c));
+ FPU_get_user(instruction_address.selector,
+ (unsigned short __user *)(s + 0x10));
+ FPU_get_user(instruction_address.opcode,
+ (unsigned short __user *)(s + 0x12));
+ FPU_get_user(operand_address.offset,
+ (unsigned long __user *)(s + 0x14));
+ FPU_get_user(operand_address.selector,
+ (unsigned long __user *)(s + 0x18));
+ RE_ENTRANT_CHECK_ON;
+ s += 0x1c;
}
- }
- else
- {
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ, s, 0x1c);
- FPU_get_user(control_word, (unsigned short __user *) s);
- FPU_get_user(partial_status, (unsigned short __user *) (s+4));
- FPU_get_user(tag_word, (unsigned short __user *) (s+8));
- FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c));
- FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10));
- FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12));
- FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14));
- FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18));
- RE_ENTRANT_CHECK_ON;
- s += 0x1c;
- }
#ifdef PECULIAR_486
- control_word &= ~0xe080;
-#endif /* PECULIAR_486 */
-
- top = (partial_status >> SW_Top_Shift) & 7;
-
- if ( partial_status & ~control_word & CW_Exceptions )
- partial_status |= (SW_Summary | SW_Backward);
- else
- partial_status &= ~(SW_Summary | SW_Backward);
-
- for ( i = 0; i < 8; i++ )
- {
- tag = tag_word & 3;
- tag_word >>= 2;
-
- if ( tag == TAG_Empty )
- /* New tag is empty. Accept it */
- FPU_settag(i, TAG_Empty);
- else if ( FPU_gettag(i) == TAG_Empty )
- {
- /* Old tag is empty and new tag is not empty. New tag is determined
- by old reg contents */
- if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias )
- {
- if ( !(fpu_register(i).sigl | fpu_register(i).sigh) )
- FPU_settag(i, TAG_Zero);
- else
- FPU_settag(i, TAG_Special);
- }
- else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias )
- {
- FPU_settag(i, TAG_Special);
- }
- else if ( fpu_register(i).sigh & 0x80000000 )
- FPU_settag(i, TAG_Valid);
- else
- FPU_settag(i, TAG_Special); /* An Un-normal */
- }
- /* Else old tag is not empty and new tag is not empty. Old tag
- remains correct */
- }
-
- return s;
-}
+ control_word &= ~0xe080;
+#endif /* PECULIAR_486 */
+
+ top = (partial_status >> SW_Top_Shift) & 7;
+
+ if (partial_status & ~control_word & CW_Exceptions)
+ partial_status |= (SW_Summary | SW_Backward);
+ else
+ partial_status &= ~(SW_Summary | SW_Backward);
+
+ for (i = 0; i < 8; i++) {
+ tag = tag_word & 3;
+ tag_word >>= 2;
+
+ if (tag == TAG_Empty)
+ /* New tag is empty. Accept it */
+ FPU_settag(i, TAG_Empty);
+ else if (FPU_gettag(i) == TAG_Empty) {
+ /* Old tag is empty and new tag is not empty. New tag is determined
+ by old reg contents */
+ if (exponent(&fpu_register(i)) == -EXTENDED_Ebias) {
+ if (!
+ (fpu_register(i).sigl | fpu_register(i).
+ sigh))
+ FPU_settag(i, TAG_Zero);
+ else
+ FPU_settag(i, TAG_Special);
+ } else if (exponent(&fpu_register(i)) ==
+ 0x7fff - EXTENDED_Ebias) {
+ FPU_settag(i, TAG_Special);
+ } else if (fpu_register(i).sigh & 0x80000000)
+ FPU_settag(i, TAG_Valid);
+ else
+ FPU_settag(i, TAG_Special); /* An Un-normal */
+ }
+ /* Else old tag is not empty and new tag is not empty. Old tag
+ remains correct */
+ }
+ return s;
+}
void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
{
- int i, regnr;
- u_char __user *s = fldenv(addr_modes, data_address);
- int offset = (top & 7) * 10, other = 80 - offset;
-
- /* Copy all registers in stack order. */
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_READ,s,80);
- __copy_from_user(register_base+offset, s, other);
- if ( offset )
- __copy_from_user(register_base, s+other, offset);
- RE_ENTRANT_CHECK_ON;
-
- for ( i = 0; i < 8; i++ )
- {
- regnr = (i+top) & 7;
- if ( FPU_gettag(regnr) != TAG_Empty )
- /* The loaded data over-rides all other cases. */
- FPU_settag(regnr, FPU_tagof(&st(i)));
- }
+ int i, regnr;
+ u_char __user *s = fldenv(addr_modes, data_address);
+ int offset = (top & 7) * 10, other = 80 - offset;
+
+ /* Copy all registers in stack order. */
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_READ, s, 80);
+ __copy_from_user(register_base + offset, s, other);
+ if (offset)
+ __copy_from_user(register_base, s + other, offset);
+ RE_ENTRANT_CHECK_ON;
+
+ for (i = 0; i < 8; i++) {
+ regnr = (i + top) & 7;
+ if (FPU_gettag(regnr) != TAG_Empty)
+ /* The loaded data over-rides all other cases. */
+ FPU_settag(regnr, FPU_tagof(&st(i)));
+ }
}
-
u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
{
- if ( (addr_modes.default_mode == VM86) ||
- ((addr_modes.default_mode == PM16)
- ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
- {
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,d,14);
+ if ((addr_modes.default_mode == VM86) ||
+ ((addr_modes.default_mode == PM16)
+ ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX))) {
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 14);
#ifdef PECULIAR_486
- FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d);
+ FPU_put_user(control_word & ~0xe080, (unsigned long __user *)d);
#else
- FPU_put_user(control_word, (unsigned short __user *) d);
+ FPU_put_user(control_word, (unsigned short __user *)d);
#endif /* PECULIAR_486 */
- FPU_put_user(status_word(), (unsigned short __user *) (d+2));
- FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4));
- FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6));
- FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a));
- if ( addr_modes.default_mode == VM86 )
- {
- FPU_put_user((instruction_address.offset & 0xf0000) >> 4,
- (unsigned short __user *) (d+8));
- FPU_put_user((operand_address.offset & 0xf0000) >> 4,
- (unsigned short __user *) (d+0x0c));
- }
- else
- {
- FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8));
- FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c));
- }
- RE_ENTRANT_CHECK_ON;
- d += 0x0e;
- }
- else
- {
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE, d, 7*4);
+ FPU_put_user(status_word(), (unsigned short __user *)(d + 2));
+ FPU_put_user(fpu_tag_word, (unsigned short __user *)(d + 4));
+ FPU_put_user(instruction_address.offset,
+ (unsigned short __user *)(d + 6));
+ FPU_put_user(operand_address.offset,
+ (unsigned short __user *)(d + 0x0a));
+ if (addr_modes.default_mode == VM86) {
+ FPU_put_user((instruction_address.
+ offset & 0xf0000) >> 4,
+ (unsigned short __user *)(d + 8));
+ FPU_put_user((operand_address.offset & 0xf0000) >> 4,
+ (unsigned short __user *)(d + 0x0c));
+ } else {
+ FPU_put_user(instruction_address.selector,
+ (unsigned short __user *)(d + 8));
+ FPU_put_user(operand_address.selector,
+ (unsigned short __user *)(d + 0x0c));
+ }
+ RE_ENTRANT_CHECK_ON;
+ d += 0x0e;
+ } else {
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 7 * 4);
#ifdef PECULIAR_486
- control_word &= ~0xe080;
- /* An 80486 sets nearly all of the reserved bits to 1. */
- control_word |= 0xffff0040;
- partial_status = status_word() | 0xffff0000;
- fpu_tag_word |= 0xffff0000;
- I387.soft.fcs &= ~0xf8000000;
- I387.soft.fos |= 0xffff0000;
+ control_word &= ~0xe080;
+ /* An 80486 sets nearly all of the reserved bits to 1. */
+ control_word |= 0xffff0040;
+ partial_status = status_word() | 0xffff0000;
+ fpu_tag_word |= 0xffff0000;
+ I387.soft.fcs &= ~0xf8000000;
+ I387.soft.fos |= 0xffff0000;
#endif /* PECULIAR_486 */
- if (__copy_to_user(d, &control_word, 7*4))
- FPU_abort;
- RE_ENTRANT_CHECK_ON;
- d += 0x1c;
- }
-
- control_word |= CW_Exceptions;
- partial_status &= ~(SW_Summary | SW_Backward);
-
- return d;
-}
+ if (__copy_to_user(d, &control_word, 7 * 4))
+ FPU_abort;
+ RE_ENTRANT_CHECK_ON;
+ d += 0x1c;
+ }
+ control_word |= CW_Exceptions;
+ partial_status &= ~(SW_Summary | SW_Backward);
+
+ return d;
+}
void fsave(fpu_addr_modes addr_modes, u_char __user *data_address)
{
- u_char __user *d;
- int offset = (top & 7) * 10, other = 80 - offset;
+ u_char __user *d;
+ int offset = (top & 7) * 10, other = 80 - offset;
- d = fstenv(addr_modes, data_address);
+ d = fstenv(addr_modes, data_address);
- RE_ENTRANT_CHECK_OFF;
- FPU_access_ok(VERIFY_WRITE,d,80);
+ RE_ENTRANT_CHECK_OFF;
+ FPU_access_ok(VERIFY_WRITE, d, 80);
- /* Copy all registers in stack order. */
- if (__copy_to_user(d, register_base+offset, other))
- FPU_abort;
- if ( offset )
- if (__copy_to_user(d+other, register_base, offset))
- FPU_abort;
- RE_ENTRANT_CHECK_ON;
+ /* Copy all registers in stack order. */
+ if (__copy_to_user(d, register_base + offset, other))
+ FPU_abort;
+ if (offset)
+ if (__copy_to_user(d + other, register_base, offset))
+ FPU_abort;
+ RE_ENTRANT_CHECK_ON;
- finit();
+ finit();
}
/*===========================================================================*/
diff --git a/arch/x86/math-emu/reg_mul.c b/arch/x86/math-emu/reg_mul.c
index 40f50b61bc6..36c37f71f71 100644
--- a/arch/x86/math-emu/reg_mul.c
+++ b/arch/x86/math-emu/reg_mul.c
@@ -20,7 +20,6 @@
#include "reg_constant.h"
#include "fpu_system.h"
-
/*
Multiply two registers to give a register result.
The sources are st(deststnr) and (b,tagb,signb).
@@ -29,104 +28,88 @@
/* This routine must be called with non-empty source registers */
int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
{
- FPU_REG *a = &st(deststnr);
- FPU_REG *dest = a;
- u_char taga = FPU_gettagi(deststnr);
- u_char saved_sign = getsign(dest);
- u_char sign = (getsign(a) ^ getsign(b));
- int tag;
-
+ FPU_REG *a = &st(deststnr);
+ FPU_REG *dest = a;
+ u_char taga = FPU_gettagi(deststnr);
+ u_char saved_sign = getsign(dest);
+ u_char sign = (getsign(a) ^ getsign(b));
+ int tag;
- if ( !(taga | tagb) )
- {
- /* Both regs Valid, this should be the most common case. */
+ if (!(taga | tagb)) {
+ /* Both regs Valid, this should be the most common case. */
- tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b));
- if ( tag < 0 )
- {
- setsign(dest, saved_sign);
- return tag;
+ tag =
+ FPU_u_mul(a, b, dest, control_w, sign,
+ exponent(a) + exponent(b));
+ if (tag < 0) {
+ setsign(dest, saved_sign);
+ return tag;
+ }
+ FPU_settagi(deststnr, tag);
+ return tag;
}
- FPU_settagi(deststnr, tag);
- return tag;
- }
- if ( taga == TAG_Special )
- taga = FPU_Special(a);
- if ( tagb == TAG_Special )
- tagb = FPU_Special(b);
+ if (taga == TAG_Special)
+ taga = FPU_Special(a);
+ if (tagb == TAG_Special)
+ tagb = FPU_Special(b);
- if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+ if (((taga == TAG_Valid) && (tagb == TW_Denormal))
|| ((taga == TW_Denormal) && (tagb == TAG_Valid))
- || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
- {
- FPU_REG x, y;
- if ( denormal_operand() < 0 )
- return FPU_Exception;
-
- FPU_to_exp16(a, &x);
- FPU_to_exp16(b, &y);
- tag = FPU_u_mul(&x, &y, dest, control_w, sign,
- exponent16(&x) + exponent16(&y));
- if ( tag < 0 )
- {
- setsign(dest, saved_sign);
- return tag;
- }
- FPU_settagi(deststnr, tag);
- return tag;
- }
- else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
- {
- if ( ((tagb == TW_Denormal) || (taga == TW_Denormal))
- && (denormal_operand() < 0) )
- return FPU_Exception;
+ || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+ FPU_REG x, y;
+ if (denormal_operand() < 0)
+ return FPU_Exception;
- /* Must have either both arguments == zero, or
- one valid and the other zero.
- The result is therefore zero. */
- FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
- /* The 80486 book says that the answer is +0, but a real
- 80486 behaves this way.
- IEEE-754 apparently says it should be this way. */
- setsign(dest, sign);
- return TAG_Zero;
- }
- /* Must have infinities, NaNs, etc */
- else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
- {
- return real_2op_NaN(b, tagb, deststnr, &st(0));
- }
- else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero))
- || ((tagb == TW_Infinity) && (taga == TAG_Zero)) )
- {
- return arith_invalid(deststnr); /* Zero*Infinity is invalid */
- }
- else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
- && (denormal_operand() < 0) )
- {
- return FPU_Exception;
- }
- else if (taga == TW_Infinity)
- {
- FPU_copy_to_regi(a, TAG_Special, deststnr);
- setsign(dest, sign);
- return TAG_Special;
- }
- else if (tagb == TW_Infinity)
- {
- FPU_copy_to_regi(b, TAG_Special, deststnr);
- setsign(dest, sign);
- return TAG_Special;
- }
+ FPU_to_exp16(a, &x);
+ FPU_to_exp16(b, &y);
+ tag = FPU_u_mul(&x, &y, dest, control_w, sign,
+ exponent16(&x) + exponent16(&y));
+ if (tag < 0) {
+ setsign(dest, saved_sign);
+ return tag;
+ }
+ FPU_settagi(deststnr, tag);
+ return tag;
+ } else if ((taga <= TW_Denormal) && (tagb <= TW_Denormal)) {
+ if (((tagb == TW_Denormal) || (taga == TW_Denormal))
+ && (denormal_operand() < 0))
+ return FPU_Exception;
+ /* Must have either both arguments == zero, or
+ one valid and the other zero.
+ The result is therefore zero. */
+ FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+ /* The 80486 book says that the answer is +0, but a real
+ 80486 behaves this way.
+ IEEE-754 apparently says it should be this way. */
+ setsign(dest, sign);
+ return TAG_Zero;
+ }
+ /* Must have infinities, NaNs, etc */
+ else if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+ return real_2op_NaN(b, tagb, deststnr, &st(0));
+ } else if (((taga == TW_Infinity) && (tagb == TAG_Zero))
+ || ((tagb == TW_Infinity) && (taga == TAG_Zero))) {
+ return arith_invalid(deststnr); /* Zero*Infinity is invalid */
+ } else if (((taga == TW_Denormal) || (tagb == TW_Denormal))
+ && (denormal_operand() < 0)) {
+ return FPU_Exception;
+ } else if (taga == TW_Infinity) {
+ FPU_copy_to_regi(a, TAG_Special, deststnr);
+ setsign(dest, sign);
+ return TAG_Special;
+ } else if (tagb == TW_Infinity) {
+ FPU_copy_to_regi(b, TAG_Special, deststnr);
+ setsign(dest, sign);
+ return TAG_Special;
+ }
#ifdef PARANOID
- else
- {
- EXCEPTION(EX_INTERNAL|0x102);
- return FPU_Exception;
- }
-#endif /* PARANOID */
+ else {
+ EXCEPTION(EX_INTERNAL | 0x102);
+ return FPU_Exception;
+ }
+#endif /* PARANOID */
return 0;
}
diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h
index 59e73302aa6..54a3f226982 100644
--- a/arch/x86/math-emu/status_w.h
+++ b/arch/x86/math-emu/status_w.h
@@ -10,7 +10,7 @@
#ifndef _STATUS_H_
#define _STATUS_H_
-#include "fpu_emu.h" /* for definition of PECULIAR_486 */
+#include "fpu_emu.h" /* for definition of PECULIAR_486 */
#ifdef __ASSEMBLY__
#define Const__(x) $##x
@@ -34,7 +34,7 @@
#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */
#define SW_Invalid Const__(0x0001) /* invalid operation */
-#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */
+#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */
#ifndef __ASSEMBLY__
@@ -50,8 +50,8 @@
((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
static inline void setcc(int cc)
{
- partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3);
- partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3);
+ partial_status &= ~(SW_C0 | SW_C1 | SW_C2 | SW_C3);
+ partial_status |= (cc) & (SW_C0 | SW_C1 | SW_C2 | SW_C3);
}
#ifdef PECULIAR_486
diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
index 362b4ad082d..c36ae88bb54 100644
--- a/arch/x86/mm/Makefile_32
+++ b/arch/x86/mm/Makefile_32
@@ -2,9 +2,8 @@
# Makefile for the linux i386-specific parts of the memory manager.
#
-obj-y := init_32.o pgtable_32.o fault_32.o ioremap_32.o extable_32.o pageattr_32.o mmap_32.o
+obj-y := init_32.o pgtable_32.o fault.o ioremap.o extable.o pageattr.o mmap.o
obj-$(CONFIG_NUMA) += discontig_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
-obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap_32.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
index 6bcb47945b8..688c8c28ac8 100644
--- a/arch/x86/mm/Makefile_64
+++ b/arch/x86/mm/Makefile_64
@@ -2,9 +2,8 @@
# Makefile for the linux x86_64-specific parts of the memory manager.
#
-obj-y := init_64.o fault_64.o ioremap_64.o extable_64.o pageattr_64.o mmap_64.o
+obj-y := init_64.o fault.o ioremap.o extable.o pageattr.o mmap.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NUMA) += numa_64.o
obj-$(CONFIG_K8_NUMA) += k8topology_64.o
obj-$(CONFIG_ACPI_NUMA) += srat_64.o
-
diff --git a/arch/x86/mm/boot_ioremap_32.c b/arch/x86/mm/boot_ioremap_32.c
deleted file mode 100644
index f14da2a53ec..00000000000
--- a/arch/x86/mm/boot_ioremap_32.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * arch/i386/mm/boot_ioremap.c
- *
- * Re-map functions for early boot-time before paging_init() when the
- * boot-time pagetables are still in use
- *
- * Written by Dave Hansen <haveblue@us.ibm.com>
- */
-
-
-/*
- * We need to use the 2-level pagetable functions, but CONFIG_X86_PAE
- * keeps that from happening. If anyone has a better way, I'm listening.
- *
- * boot_pte_t is defined only if this all works correctly
- */
-
-#undef CONFIG_X86_PAE
-#undef CONFIG_PARAVIRT
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <linux/init.h>
-#include <linux/stddef.h>
-
-/*
- * I'm cheating here. It is known that the two boot PTE pages are
- * allocated next to each other. I'm pretending that they're just
- * one big array.
- */
-
-#define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
-
-static unsigned long boot_pte_index(unsigned long vaddr)
-{
- return __pa(vaddr) >> PAGE_SHIFT;
-}
-
-static inline boot_pte_t* boot_vaddr_to_pte(void *address)
-{
- boot_pte_t* boot_pg = (boot_pte_t*)pg0;
- return &boot_pg[boot_pte_index((unsigned long)address)];
-}
-
-/*
- * This is only for a caller who is clever enough to page-align
- * phys_addr and virtual_source, and who also has a preference
- * about which virtual address from which to steal ptes
- */
-static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages,
- void* virtual_source)
-{
- boot_pte_t* pte;
- int i;
- char *vaddr = virtual_source;
-
- pte = boot_vaddr_to_pte(virtual_source);
- for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) {
- set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL));
- __flush_tlb_one(&vaddr[i*PAGE_SIZE]);
- }
-}
-
-/* the virtual space we're going to remap comes from this array */
-#define BOOT_IOREMAP_PAGES 4
-#define BOOT_IOREMAP_SIZE (BOOT_IOREMAP_PAGES*PAGE_SIZE)
-static __initdata char boot_ioremap_space[BOOT_IOREMAP_SIZE]
- __attribute__ ((aligned (PAGE_SIZE)));
-
-/*
- * This only applies to things which need to ioremap before paging_init()
- * bt_ioremap() and plain ioremap() are both useless at this point.
- *
- * When used, we're still using the boot-time pagetables, which only
- * have 2 PTE pages mapping the first 8MB
- *
- * There is no unmap. The boot-time PTE pages aren't used after boot.
- * If you really want the space back, just remap it yourself.
- * boot_ioremap(&ioremap_space-PAGE_OFFSET, BOOT_IOREMAP_SIZE)
- */
-__init void* boot_ioremap(unsigned long phys_addr, unsigned long size)
-{
- unsigned long last_addr, offset;
- unsigned int nrpages;
-
- last_addr = phys_addr + size - 1;
-
- /* page align the requested address */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr) - phys_addr;
-
- nrpages = size >> PAGE_SHIFT;
- if (nrpages > BOOT_IOREMAP_PAGES)
- return NULL;
-
- __boot_ioremap(phys_addr, nrpages, boot_ioremap_space);
-
- return &boot_ioremap_space[offset];
-}
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 13a474d3c6e..04b1d20e261 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -32,6 +32,7 @@
#include <linux/kexec.h>
#include <linux/pfn.h>
#include <linux/swap.h>
+#include <linux/acpi.h>
#include <asm/e820.h>
#include <asm/setup.h>
@@ -103,14 +104,10 @@ extern unsigned long highend_pfn, highstart_pfn;
#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
-static unsigned long node_remap_start_pfn[MAX_NUMNODES];
unsigned long node_remap_size[MAX_NUMNODES];
-static unsigned long node_remap_offset[MAX_NUMNODES];
static void *node_remap_start_vaddr[MAX_NUMNODES];
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-static void *node_remap_end_vaddr[MAX_NUMNODES];
-static void *node_remap_alloc_vaddr[MAX_NUMNODES];
static unsigned long kva_start_pfn;
static unsigned long kva_pages;
/*
@@ -167,6 +164,22 @@ static void __init allocate_pgdat(int nid)
}
}
+#ifdef CONFIG_DISCONTIGMEM
+/*
+ * In the discontig memory model, a portion of the kernel virtual area (KVA)
+ * is reserved and portions of nodes are mapped using it. This is to allow
+ * node-local memory to be allocated for structures that would normally require
+ * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers
+ * should be prepared to allocate from the bootmem allocator instead. This KVA
+ * mechanism is incompatible with SPARSEMEM as it makes assumptions about the
+ * layout of memory that are broken if alloc_remap() succeeds for some of the
+ * map and fails for others
+ */
+static unsigned long node_remap_start_pfn[MAX_NUMNODES];
+static void *node_remap_end_vaddr[MAX_NUMNODES];
+static void *node_remap_alloc_vaddr[MAX_NUMNODES];
+static unsigned long node_remap_offset[MAX_NUMNODES];
+
void *alloc_remap(int nid, unsigned long size)
{
void *allocation = node_remap_alloc_vaddr[nid];
@@ -263,11 +276,46 @@ static unsigned long calculate_numa_remap_pages(void)
return reserve_pages;
}
+static void init_remap_allocator(int nid)
+{
+ node_remap_start_vaddr[nid] = pfn_to_kaddr(
+ kva_start_pfn + node_remap_offset[nid]);
+ node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
+ (node_remap_size[nid] * PAGE_SIZE);
+ node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
+ ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+
+ printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
+ (ulong) node_remap_start_vaddr[nid],
+ (ulong) pfn_to_kaddr(highstart_pfn
+ + node_remap_offset[nid] + node_remap_size[nid]));
+}
+#else
+void *alloc_remap(int nid, unsigned long size)
+{
+ return NULL;
+}
+
+static unsigned long calculate_numa_remap_pages(void)
+{
+ return 0;
+}
+
+static void init_remap_allocator(int nid)
+{
+}
+
+void __init remap_numa_kva(void)
+{
+}
+#endif /* CONFIG_DISCONTIGMEM */
+
extern void setup_bootmem_allocator(void);
unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
+ unsigned long wasted_pages;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -288,11 +336,18 @@ unsigned long __init setup_memory(void)
#ifdef CONFIG_BLK_DEV_INITRD
/* Numa kva area is below the initrd */
- if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image)
- kva_start_pfn = PFN_DOWN(boot_params.hdr.ramdisk_image)
+ if (initrd_start)
+ kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
- kva_pages;
#endif
- kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);
+
+ /*
+ * We waste pages past at the end of the KVA for no good reason other
+ * than how it is located. This is bad.
+ */
+ wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
+ kva_start_pfn -= wasted_pages;
+ kva_pages += wasted_pages;
system_max_low_pfn = max_low_pfn = find_max_low_pfn();
printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
@@ -318,19 +373,9 @@ unsigned long __init setup_memory(void)
printk("Low memory ends at vaddr %08lx\n",
(ulong) pfn_to_kaddr(max_low_pfn));
for_each_online_node(nid) {
- node_remap_start_vaddr[nid] = pfn_to_kaddr(
- kva_start_pfn + node_remap_offset[nid]);
- /* Init the node remap allocator */
- node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
- (node_remap_size[nid] * PAGE_SIZE);
- node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
- ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+ init_remap_allocator(nid);
allocate_pgdat(nid);
- printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
- (ulong) node_remap_start_vaddr[nid],
- (ulong) pfn_to_kaddr(highstart_pfn
- + node_remap_offset[nid] + node_remap_size[nid]));
}
printk("High memory starts at vaddr %08lx\n",
(ulong) pfn_to_kaddr(highstart_pfn));
@@ -345,7 +390,8 @@ unsigned long __init setup_memory(void)
void __init numa_kva_reserve(void)
{
- reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages));
+ if (kva_pages)
+ reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages));
}
void __init zone_sizes_init(void)
@@ -430,3 +476,29 @@ int memory_add_physaddr_to_nid(u64 addr)
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
+
+#ifndef CONFIG_HAVE_ARCH_PARSE_SRAT
+/*
+ * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA
+ *
+ * These stub functions are needed to compile 32-bit NUMA when SRAT is
+ * not set. There are functions in srat_64.c for parsing this table
+ * and it may be possible to make them common functions.
+ */
+void acpi_numa_slit_init (struct acpi_table_slit *slit)
+{
+ printk(KERN_INFO "ACPI: No support for parsing SLIT table\n");
+}
+
+void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa)
+{
+}
+
+void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma)
+{
+}
+
+void acpi_numa_arch_fixup(void)
+{
+}
+#endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
new file mode 100644
index 00000000000..7e8db53528a
--- /dev/null
+++ b/arch/x86/mm/extable.c
@@ -0,0 +1,62 @@
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+
+int fixup_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+
+#ifdef CONFIG_PNPBIOS
+ if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
+ extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
+ extern u32 pnp_bios_is_utter_crap;
+ pnp_bios_is_utter_crap = 1;
+ printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
+ __asm__ volatile(
+ "movl %0, %%esp\n\t"
+ "jmp *%1\n\t"
+ : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
+ panic("do_trap: can't hit this");
+ }
+#endif
+
+ fixup = search_exception_tables(regs->ip);
+ if (fixup) {
+ regs->ip = fixup->fixup;
+ return 1;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Need to defined our own search_extable on X86_64 to work around
+ * a B stepping K8 bug.
+ */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long value)
+{
+ /* B stepping K8 bug */
+ if ((value >> 32) == 0)
+ value |= 0xffffffffUL << 32;
+
+ while (first <= last) {
+ const struct exception_table_entry *mid;
+ long diff;
+
+ mid = (last - first) / 2 + first;
+ diff = mid->insn - value;
+ if (diff == 0)
+ return mid;
+ else if (diff < 0)
+ first = mid+1;
+ else
+ last = mid-1;
+ }
+ return NULL;
+}
+#endif
diff --git a/arch/x86/mm/extable_32.c b/arch/x86/mm/extable_32.c
deleted file mode 100644
index 0ce4f22a263..00000000000
--- a/arch/x86/mm/extable_32.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * linux/arch/i386/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <asm/uaccess.h>
-
-int fixup_exception(struct pt_regs *regs)
-{
- const struct exception_table_entry *fixup;
-
-#ifdef CONFIG_PNPBIOS
- if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
- {
- extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
- extern u32 pnp_bios_is_utter_crap;
- pnp_bios_is_utter_crap = 1;
- printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
- __asm__ volatile(
- "movl %0, %%esp\n\t"
- "jmp *%1\n\t"
- : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
- panic("do_trap: can't hit this");
- }
-#endif
-
- fixup = search_exception_tables(regs->eip);
- if (fixup) {
- regs->eip = fixup->fixup;
- return 1;
- }
-
- return 0;
-}
diff --git a/arch/x86/mm/extable_64.c b/arch/x86/mm/extable_64.c
deleted file mode 100644
index 79ac6e7100a..00000000000
--- a/arch/x86/mm/extable_64.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * linux/arch/x86_64/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <asm/uaccess.h>
-
-/* Simple binary search */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
- unsigned long value)
-{
- /* Work around a B stepping K8 bug */
- if ((value >> 32) == 0)
- value |= 0xffffffffUL << 32;
-
- while (first <= last) {
- const struct exception_table_entry *mid;
- long diff;
-
- mid = (last - first) / 2 + first;
- diff = mid->insn - value;
- if (diff == 0)
- return mid;
- else if (diff < 0)
- first = mid+1;
- else
- last = mid-1;
- }
- return NULL;
-}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
new file mode 100644
index 00000000000..e28cc5277b1
--- /dev/null
+++ b/arch/x86/mm/fault.c
@@ -0,0 +1,986 @@
+/*
+ * Copyright (C) 1995 Linus Torvalds
+ * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h> /* For unblank_screen() */
+#include <linux/compiler.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h> /* for max_low_pfn */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/kdebug.h>
+
+#include <asm/system.h>
+#include <asm/desc.h>
+#include <asm/segment.h>
+#include <asm/pgalloc.h>
+#include <asm/smp.h>
+#include <asm/tlbflush.h>
+#include <asm/proto.h>
+#include <asm-generic/sections.h>
+
+/*
+ * Page fault error code bits
+ * bit 0 == 0 means no page found, 1 means protection fault
+ * bit 1 == 0 means read, 1 means write
+ * bit 2 == 0 means kernel, 1 means user-mode
+ * bit 3 == 1 means use of reserved bit detected
+ * bit 4 == 1 means fault was an instruction fetch
+ */
+#define PF_PROT (1<<0)
+#define PF_WRITE (1<<1)
+#define PF_USER (1<<2)
+#define PF_RSVD (1<<3)
+#define PF_INSTR (1<<4)
+
+static inline int notify_page_fault(struct pt_regs *regs)
+{
+#ifdef CONFIG_KPROBES
+ int ret = 0;
+
+ /* kprobe_running() needs smp_processor_id() */
+#ifdef CONFIG_X86_32
+ if (!user_mode_vm(regs)) {
+#else
+ if (!user_mode(regs)) {
+#endif
+ preempt_disable();
+ if (kprobe_running() && kprobe_fault_handler(regs, 14))
+ ret = 1;
+ preempt_enable();
+ }
+
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+/*
+ * X86_32
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * X86_64
+ * Sometimes the CPU reports invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * Opcode checker based on code by Richard Brunner
+ */
+static int is_prefetch(struct pt_regs *regs, unsigned long addr,
+ unsigned long error_code)
+{
+ unsigned char *instr;
+ int scan_more = 1;
+ int prefetch = 0;
+ unsigned char *max_instr;
+
+#ifdef CONFIG_X86_32
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return 0;
+#endif
+
+ /* If it was a exec fault on NX page, ignore */
+ if (error_code & PF_INSTR)
+ return 0;
+
+ instr = (unsigned char *)convert_ip_to_linear(current, regs);
+ max_instr = instr + 15;
+
+ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+ return 0;
+
+ while (scan_more && instr < max_instr) {
+ unsigned char opcode;
+ unsigned char instr_hi;
+ unsigned char instr_lo;
+
+ if (probe_kernel_address(instr, opcode))
+ break;
+
+ instr_hi = opcode & 0xf0;
+ instr_lo = opcode & 0x0f;
+ instr++;
+
+ switch (instr_hi) {
+ case 0x20:
+ case 0x30:
+ /*
+ * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
+ * In X86_64 long mode, the CPU will signal invalid
+ * opcode if some of these prefixes are present so
+ * X86_64 will never get here anyway
+ */
+ scan_more = ((instr_lo & 7) == 0x6);
+ break;
+#ifdef CONFIG_X86_64
+ case 0x40:
+ /*
+ * In AMD64 long mode 0x40..0x4F are valid REX prefixes
+ * Need to figure out under what instruction mode the
+ * instruction was issued. Could check the LDT for lm,
+ * but for now it's good enough to assume that long
+ * mode only uses well known segments or kernel.
+ */
+ scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
+ break;
+#endif
+ case 0x60:
+ /* 0x64 thru 0x67 are valid prefixes in all modes. */
+ scan_more = (instr_lo & 0xC) == 0x4;
+ break;
+ case 0xF0:
+ /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
+ scan_more = !instr_lo || (instr_lo>>1) == 1;
+ break;
+ case 0x00:
+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
+ scan_more = 0;
+
+ if (probe_kernel_address(instr, opcode))
+ break;
+ prefetch = (instr_lo == 0xF) &&
+ (opcode == 0x0D || opcode == 0x18);
+ break;
+ default:
+ scan_more = 0;
+ break;
+ }
+ }
+ return prefetch;
+}
+
+static void force_sig_info_fault(int si_signo, int si_code,
+ unsigned long address, struct task_struct *tsk)
+{
+ siginfo_t info;
+
+ info.si_signo = si_signo;
+ info.si_errno = 0;
+ info.si_code = si_code;
+ info.si_addr = (void __user *)address;
+ force_sig_info(si_signo, &info, tsk);
+}
+
+#ifdef CONFIG_X86_64
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+#endif
+
+void dump_pagetable(unsigned long address)
+{
+#ifdef CONFIG_X86_32
+ __typeof__(pte_val(__pte(0))) page;
+
+ page = read_cr3();
+ page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+#ifdef CONFIG_X86_PAE
+ printk("*pdpt = %016Lx ", page);
+ if ((page >> PAGE_SHIFT) < max_low_pfn
+ && page & _PAGE_PRESENT) {
+ page &= PAGE_MASK;
+ page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
+ & (PTRS_PER_PMD - 1)];
+ printk(KERN_CONT "*pde = %016Lx ", page);
+ page &= ~_PAGE_NX;
+ }
+#else
+ printk("*pde = %08lx ", page);
+#endif
+
+ /*
+ * We must not directly access the pte in the highpte
+ * case if the page table is located in highmem.
+ * And let's rather not kmap-atomic the pte, just in case
+ * it's allocated already.
+ */
+ if ((page >> PAGE_SHIFT) < max_low_pfn
+ && (page & _PAGE_PRESENT)
+ && !(page & _PAGE_PSE)) {
+ page &= PAGE_MASK;
+ page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
+ & (PTRS_PER_PTE - 1)];
+ printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
+ }
+
+ printk("\n");
+#else /* CONFIG_X86_64 */
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = (pgd_t *)read_cr3();
+
+ pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
+ pgd += pgd_index(address);
+ if (bad_address(pgd)) goto bad;
+ printk("PGD %lx ", pgd_val(*pgd));
+ if (!pgd_present(*pgd)) goto ret;
+
+ pud = pud_offset(pgd, address);
+ if (bad_address(pud)) goto bad;
+ printk("PUD %lx ", pud_val(*pud));
+ if (!pud_present(*pud)) goto ret;
+
+ pmd = pmd_offset(pud, address);
+ if (bad_address(pmd)) goto bad;
+ printk("PMD %lx ", pmd_val(*pmd));
+ if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
+
+ pte = pte_offset_kernel(pmd, address);
+ if (bad_address(pte)) goto bad;
+ printk("PTE %lx", pte_val(*pte));
+ret:
+ printk("\n");
+ return;
+bad:
+ printk("BAD\n");
+#endif
+}
+
+#ifdef CONFIG_X86_32
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+ unsigned index = pgd_index(address);
+ pgd_t *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+
+ pgd += index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k))
+ return NULL;
+
+ /*
+ * set_pgd(pgd, *pgd_k); here would be useless on PAE
+ * and redundant with the set_pmd() on non-PAE. As would
+ * set_pud.
+ */
+
+ pud = pud_offset(pgd, address);
+ pud_k = pud_offset(pgd_k, address);
+ if (!pud_present(*pud_k))
+ return NULL;
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd_k))
+ return NULL;
+ if (!pmd_present(*pmd)) {
+ set_pmd(pmd, *pmd_k);
+ arch_flush_lazy_mmu_mode();
+ } else
+ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ return pmd_k;
+}
+#endif
+
+#ifdef CONFIG_X86_64
+static const char errata93_warning[] =
+KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
+KERN_ERR "******* Please consider a BIOS update.\n"
+KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+#endif
+
+/* Workaround for K8 erratum #93 & buggy BIOS.
+ BIOS SMM functions are required to use a specific workaround
+ to avoid corruption of the 64bit RIP register on C stepping K8.
+ A lot of BIOS that didn't get tested properly miss this.
+ The OS sees this as a page fault with the upper 32bits of RIP cleared.
+ Try to work around it here.
+ Note we only handle faults in kernel here.
+ Does nothing for X86_32
+ */
+static int is_errata93(struct pt_regs *regs, unsigned long address)
+{
+#ifdef CONFIG_X86_64
+ static int warned;
+ if (address != regs->ip)
+ return 0;
+ if ((address >> 32) != 0)
+ return 0;
+ address |= 0xffffffffUL << 32;
+ if ((address >= (u64)_stext && address <= (u64)_etext) ||
+ (address >= MODULES_VADDR && address <= MODULES_END)) {
+ if (!warned) {
+ printk(errata93_warning);
+ warned = 1;
+ }
+ regs->ip = address;
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+/*
+ * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
+ * addresses >4GB. We catch this in the page fault handler because these
+ * addresses are not reachable. Just detect this case and return. Any code
+ * segment in LDT is compatibility mode.
+ */
+static int is_errata100(struct pt_regs *regs, unsigned long address)
+{
+#ifdef CONFIG_X86_64
+ if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
+ (address >> 32))
+ return 1;
+#endif
+ return 0;
+}
+
+void do_invalid_op(struct pt_regs *, unsigned long);
+
+static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
+{
+#ifdef CONFIG_X86_F00F_BUG
+ unsigned long nr;
+ /*
+ * Pentium F0 0F C7 C8 bug workaround.
+ */
+ if (boot_cpu_data.f00f_bug) {
+ nr = (address - idt_descr.address) >> 3;
+
+ if (nr == 6) {
+ do_invalid_op(regs, 0);
+ return 1;
+ }
+ }
+#endif
+ return 0;
+}
+
+static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+#ifdef CONFIG_X86_32
+ if (!oops_may_print())
+ return;
+#endif
+
+#ifdef CONFIG_X86_PAE
+ if (error_code & PF_INSTR) {
+ int level;
+ pte_t *pte = lookup_address(address, &level);
+
+ if (pte && pte_present(*pte) && !pte_exec(*pte))
+ printk(KERN_CRIT "kernel tried to execute "
+ "NX-protected page - exploit attempt? "
+ "(uid: %d)\n", current->uid);
+ }
+#endif
+
+ printk(KERN_ALERT "BUG: unable to handle kernel ");
+ if (address < PAGE_SIZE)
+ printk(KERN_CONT "NULL pointer dereference");
+ else
+ printk(KERN_CONT "paging request");
+#ifdef CONFIG_X86_32
+ printk(KERN_CONT " at %08lx\n", address);
+#else
+ printk(KERN_CONT " at %016lx\n", address);
+#endif
+ printk(KERN_ALERT "IP:");
+ printk_address(regs->ip, 1);
+ dump_pagetable(address);
+}
+
+#ifdef CONFIG_X86_64
+static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
+ unsigned long error_code)
+{
+ unsigned long flags = oops_begin();
+ struct task_struct *tsk;
+
+ printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
+ current->comm, address);
+ dump_pagetable(address);
+ tsk = current;
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+ if (__die("Bad pagetable", regs, error_code))
+ regs = NULL;
+ oops_end(flags, regs, SIGKILL);
+}
+#endif
+
+/*
+ * Handle a spurious fault caused by a stale TLB entry. This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X). Doing it eagerly is very
+ * expensive since that implies doing a full cross-processor TLB
+ * flush, even if no stale TLB entries exist on other processors.
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ */
+static int spurious_fault(unsigned long address,
+ unsigned long error_code)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /* Reserved-bit violation or user access to kernel space? */
+ if (error_code & (PF_USER | PF_RSVD))
+ return 0;
+
+ pgd = init_mm.pgd + pgd_index(address);
+ if (!pgd_present(*pgd))
+ return 0;
+
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return 0;
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ return 0;
+
+ pte = pte_offset_kernel(pmd, address);
+ if (!pte_present(*pte))
+ return 0;
+
+ if ((error_code & PF_WRITE) && !pte_write(*pte))
+ return 0;
+ if ((error_code & PF_INSTR) && !pte_exec(*pte))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * X86_32
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * X86_64
+ * Handle a fault on the vmalloc area
+ *
+ * This assumes no large pages in there.
+ */
+static int vmalloc_fault(unsigned long address)
+{
+#ifdef CONFIG_X86_32
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+ return 0;
+#else
+ pgd_t *pgd, *pgd_ref;
+ pud_t *pud, *pud_ref;
+ pmd_t *pmd, *pmd_ref;
+ pte_t *pte, *pte_ref;
+
+ /* Copy kernel mappings over when needed. This can also
+ happen within a race in page table update. In the later
+ case just flush. */
+
+ pgd = pgd_offset(current->mm ?: &init_mm, address);
+ pgd_ref = pgd_offset_k(address);
+ if (pgd_none(*pgd_ref))
+ return -1;
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+
+ /* Below here mismatches are bugs because these lower tables
+ are shared */
+
+ pud = pud_offset(pgd, address);
+ pud_ref = pud_offset(pgd_ref, address);
+ if (pud_none(*pud_ref))
+ return -1;
+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+ BUG();
+ pmd = pmd_offset(pud, address);
+ pmd_ref = pmd_offset(pud_ref, address);
+ if (pmd_none(*pmd_ref))
+ return -1;
+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+ BUG();
+ pte_ref = pte_offset_kernel(pmd_ref, address);
+ if (!pte_present(*pte_ref))
+ return -1;
+ pte = pte_offset_kernel(pmd, address);
+ /* Don't use pte_page here, because the mappings can point
+ outside mem_map, and the NUMA hash lookup cannot handle
+ that. */
+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
+ BUG();
+ return 0;
+#endif
+}
+
+int show_unhandled_signals = 1;
+
+/*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long address;
+ int write, si_code;
+ int fault;
+#ifdef CONFIG_X86_64
+ unsigned long flags;
+#endif
+
+ /*
+ * We can fault from pretty much anywhere, with unknown IRQ state.
+ */
+ trace_hardirqs_fixup();
+
+ tsk = current;
+ mm = tsk->mm;
+ prefetchw(&mm->mmap_sem);
+
+ /* get the address */
+ address = read_cr2();
+
+ si_code = SEGV_MAPERR;
+
+ if (notify_page_fault(regs))
+ return;
+
+ /*
+ * We fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ *
+ * This verifies that the fault happens in kernel space
+ * (error_code & 4) == 0, and that the fault was not a
+ * protection error (error_code & 9) == 0.
+ */
+#ifdef CONFIG_X86_32
+ if (unlikely(address >= TASK_SIZE)) {
+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ vmalloc_fault(address) >= 0)
+ return;
+
+ /* Can handle a stale RO->RW TLB */
+ if (spurious_fault(address, error_code))
+ return;
+
+ /*
+ * Don't take the mm semaphore here. If we fixup a prefetch
+ * fault we could otherwise deadlock.
+ */
+ goto bad_area_nosemaphore;
+ }
+
+ /* It's safe to allow irq's after cr2 has been saved and the vmalloc
+ fault has been handled. */
+ if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
+ local_irq_enable();
+
+ /*
+ * If we're in an interrupt, have no user context or are running in an
+ * atomic region then we must not take the fault.
+ */
+ if (in_atomic() || !mm)
+ goto bad_area_nosemaphore;
+#else /* CONFIG_X86_64 */
+ if (unlikely(address >= TASK_SIZE64)) {
+ /*
+ * Don't check for the module range here: its PML4
+ * is always initialized because it's shared with the main
+ * kernel text. Only vmalloc may need PML4 syncups.
+ */
+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ ((address >= VMALLOC_START && address < VMALLOC_END))) {
+ if (vmalloc_fault(address) >= 0)
+ return;
+ }
+
+ /* Can handle a stale RO->RW TLB */
+ if (spurious_fault(address, error_code))
+ return;
+
+ /*
+ * Don't take the mm semaphore here. If we fixup a prefetch
+ * fault we could otherwise deadlock.
+ */
+ goto bad_area_nosemaphore;
+ }
+ if (likely(regs->flags & X86_EFLAGS_IF))
+ local_irq_enable();
+
+ if (unlikely(error_code & PF_RSVD))
+ pgtable_bad(address, regs, error_code);
+
+ /*
+ * If we're in an interrupt, have no user context or are running in an
+ * atomic region then we must not take the fault.
+ */
+ if (unlikely(in_atomic() || !mm))
+ goto bad_area_nosemaphore;
+
+ /*
+ * User-mode registers count as a user access even for any
+ * potential system fault or CPU buglet.
+ */
+ if (user_mode_vm(regs))
+ error_code |= PF_USER;
+again:
+#endif
+ /* When running in the kernel we expect faults to occur only to
+ * addresses in user space. All other faults represent errors in the
+ * kernel and should generate an OOPS. Unfortunately, in the case of an
+ * erroneous fault occurring in a code path which already holds mmap_sem
+ * we will deadlock attempting to validate the fault against the
+ * address space. Luckily the kernel only validly references user
+ * space from well defined areas of code, which are listed in the
+ * exceptions table.
+ *
+ * As the vast majority of faults will be valid we will only perform
+ * the source reference check when there is a possibility of a deadlock.
+ * Attempt to lock the address space, if we cannot we then validate the
+ * source. If this is invalid we can skip the address space check,
+ * thus avoiding the deadlock.
+ */
+ if (!down_read_trylock(&mm->mmap_sem)) {
+ if ((error_code & PF_USER) == 0 &&
+ !search_exception_tables(regs->ip))
+ goto bad_area_nosemaphore;
+ down_read(&mm->mmap_sem);
+ }
+
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+ if (vma->vm_start <= address)
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ if (error_code & PF_USER) {
+ /*
+ * Accessing the stack below %sp is always a bug.
+ * The large cushion allows instructions like enter
+ * and pusha to work. ("enter $65535,$31" pushes
+ * 32 pointers and then decrements %sp by 65535.)
+ */
+ if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
+ goto bad_area;
+ }
+ if (expand_stack(vma, address))
+ goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+ si_code = SEGV_ACCERR;
+ write = 0;
+ switch (error_code & (PF_PROT|PF_WRITE)) {
+ default: /* 3: write, present */
+ /* fall through */
+ case PF_WRITE: /* write, not present */
+ if (!(vma->vm_flags & VM_WRITE))
+ goto bad_area;
+ write++;
+ break;
+ case PF_PROT: /* read, present */
+ goto bad_area;
+ case 0: /* read, not present */
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+ goto bad_area;
+ }
+
+#ifdef CONFIG_X86_32
+survive:
+#endif
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ fault = handle_mm_fault(mm, vma, address, write);
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ if (fault & VM_FAULT_OOM)
+ goto out_of_memory;
+ else if (fault & VM_FAULT_SIGBUS)
+ goto do_sigbus;
+ BUG();
+ }
+ if (fault & VM_FAULT_MAJOR)
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+
+#ifdef CONFIG_X86_32
+ /*
+ * Did it hit the DOS screen memory VA from vm86 mode?
+ */
+ if (v8086_mode(regs)) {
+ unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
+ if (bit < 32)
+ tsk->thread.screen_bitmap |= 1 << bit;
+ }
+#endif
+ up_read(&mm->mmap_sem);
+ return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+ up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & PF_USER) {
+ /*
+ * It's possible to have interrupts off here.
+ */
+ local_irq_enable();
+
+ /*
+ * Valid to do another page fault here because this one came
+ * from user space.
+ */
+ if (is_prefetch(regs, address, error_code))
+ return;
+
+ if (is_errata100(regs, address))
+ return;
+
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+ printk_ratelimit()) {
+ printk(
+#ifdef CONFIG_X86_32
+ "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
+#else
+ "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
+#endif
+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, task_pid_nr(tsk), address, regs->ip,
+ regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
+
+ tsk->thread.cr2 = address;
+ /* Kernel addresses are always protection faults */
+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.trap_no = 14;
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+ return;
+ }
+
+ if (is_f00f_bug(regs, address))
+ return;
+
+no_context:
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs))
+ return;
+
+ /*
+ * X86_32
+ * Valid to do another page fault here, because if this fault
+ * had been triggered by is_prefetch fixup_exception would have
+ * handled it.
+ *
+ * X86_64
+ * Hall of shame of CPU/BIOS bugs.
+ */
+ if (is_prefetch(regs, address, error_code))
+ return;
+
+ if (is_errata93(regs, address))
+ return;
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+#ifdef CONFIG_X86_32
+ bust_spinlocks(1);
+#else
+ flags = oops_begin();
+#endif
+
+ show_fault_oops(regs, error_code, address);
+
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+
+#ifdef CONFIG_X86_32
+ die("Oops", regs, error_code);
+ bust_spinlocks(0);
+ do_exit(SIGKILL);
+#else
+ if (__die("Oops", regs, error_code))
+ regs = NULL;
+ /* Executive summary in case the body of the oops scrolled away */
+ printk(KERN_EMERG "CR2: %016lx\n", address);
+ oops_end(flags, regs, SIGKILL);
+#endif
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+ up_read(&mm->mmap_sem);
+ if (is_global_init(tsk)) {
+ yield();
+#ifdef CONFIG_X86_32
+ down_read(&mm->mmap_sem);
+ goto survive;
+#else
+ goto again;
+#endif
+ }
+
+ printk("VM: killing process %s\n", tsk->comm);
+ if (error_code & PF_USER)
+ do_group_exit(SIGKILL);
+ goto no_context;
+
+do_sigbus:
+ up_read(&mm->mmap_sem);
+
+ /* Kernel mode? Handle exceptions or die */
+ if (!(error_code & PF_USER))
+ goto no_context;
+#ifdef CONFIG_X86_32
+ /* User space => ok to do another page fault */
+ if (is_prefetch(regs, address, error_code))
+ return;
+#endif
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 14;
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+}
+
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+
+void vmalloc_sync_all(void)
+{
+#ifdef CONFIG_X86_32
+ /*
+ * Note that races in the updates of insync and start aren't
+ * problematic: insync can only get set bits added, and updates to
+ * start are only improving performance (without affecting correctness
+ * if undone).
+ */
+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+ static unsigned long start = TASK_SIZE;
+ unsigned long address;
+
+ if (SHARED_KERNEL_PMD)
+ return;
+
+ BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
+ for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+ if (!test_bit(pgd_index(address), insync)) {
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ if (!vmalloc_sync_one(page_address(page),
+ address))
+ break;
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ if (!page)
+ set_bit(pgd_index(address), insync);
+ }
+ if (address == start && test_bit(pgd_index(address), insync))
+ start = address + PGDIR_SIZE;
+ }
+#else /* CONFIG_X86_64 */
+ /*
+ * Note that races in the updates of insync and start aren't
+ * problematic: insync can only get set bits added, and updates to
+ * start are only improving performance (without affecting correctness
+ * if undone).
+ */
+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+ static unsigned long start = VMALLOC_START & PGDIR_MASK;
+ unsigned long address;
+
+ for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+ if (!test_bit(pgd_index(address), insync)) {
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+ spin_lock(&pgd_lock);
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
+ spin_unlock(&pgd_lock);
+ set_bit(pgd_index(address), insync);
+ }
+ if (address == start)
+ start = address + PGDIR_SIZE;
+ }
+ /* Check that there is no need to do the same for the modules area. */
+ BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+ BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+ (__START_KERNEL & PGDIR_MASK)));
+#endif
+}
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
deleted file mode 100644
index a2273d44aa2..00000000000
--- a/arch/x86/mm/fault_32.c
+++ /dev/null
@@ -1,659 +0,0 @@
-/*
- * linux/arch/i386/mm/fault.c
- *
- * Copyright (C) 1995 Linus Torvalds
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/vt_kern.h> /* For unblank_screen() */
-#include <linux/highmem.h>
-#include <linux/bootmem.h> /* for max_low_pfn */
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/kdebug.h>
-#include <linux/kprobes.h>
-
-#include <asm/system.h>
-#include <asm/desc.h>
-#include <asm/segment.h>
-
-extern void die(const char *,struct pt_regs *,long);
-
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs)
-{
- int ret = 0;
-
- /* kprobe_running() needs smp_processor_id() */
- if (!user_mode_vm(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 14))
- ret = 1;
- preempt_enable();
- }
-
- return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs)
-{
- return 0;
-}
-#endif
-
-/*
- * Return EIP plus the CS segment base. The segment limit is also
- * adjusted, clamped to the kernel/user address space (whichever is
- * appropriate), and returned in *eip_limit.
- *
- * The segment is checked, because it might have been changed by another
- * task between the original faulting instruction and here.
- *
- * If CS is no longer a valid code segment, or if EIP is beyond the
- * limit, or if it is a kernel address when CS is not a kernel segment,
- * then the returned value will be greater than *eip_limit.
- *
- * This is slow, but is very rarely executed.
- */
-static inline unsigned long get_segment_eip(struct pt_regs *regs,
- unsigned long *eip_limit)
-{
- unsigned long eip = regs->eip;
- unsigned seg = regs->xcs & 0xffff;
- u32 seg_ar, seg_limit, base, *desc;
-
- /* Unlikely, but must come before segment checks. */
- if (unlikely(regs->eflags & VM_MASK)) {
- base = seg << 4;
- *eip_limit = base + 0xffff;
- return base + (eip & 0xffff);
- }
-
- /* The standard kernel/user address space limit. */
- *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
-
- /* By far the most common cases. */
- if (likely(SEGMENT_IS_FLAT_CODE(seg)))
- return eip;
-
- /* Check the segment exists, is within the current LDT/GDT size,
- that kernel/user (ring 0..3) has the appropriate privilege,
- that it's a code segment, and get the limit. */
- __asm__ ("larl %3,%0; lsll %3,%1"
- : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
- if ((~seg_ar & 0x9800) || eip > seg_limit) {
- *eip_limit = 0;
- return 1; /* So that returned eip > *eip_limit. */
- }
-
- /* Get the GDT/LDT descriptor base.
- When you look for races in this code remember that
- LDT and other horrors are only used in user space. */
- if (seg & (1<<2)) {
- /* Must lock the LDT while reading it. */
- mutex_lock(&current->mm->context.lock);
- desc = current->mm->context.ldt;
- desc = (void *)desc + (seg & ~7);
- } else {
- /* Must disable preemption while reading the GDT. */
- desc = (u32 *)get_cpu_gdt_table(get_cpu());
- desc = (void *)desc + (seg & ~7);
- }
-
- /* Decode the code segment base from the descriptor */
- base = get_desc_base((unsigned long *)desc);
-
- if (seg & (1<<2)) {
- mutex_unlock(&current->mm->context.lock);
- } else
- put_cpu();
-
- /* Adjust EIP and segment limit, and clamp at the kernel limit.
- It's legitimate for segments to wrap at 0xffffffff. */
- seg_limit += base;
- if (seg_limit < *eip_limit && seg_limit >= base)
- *eip_limit = seg_limit;
- return eip + base;
-}
-
-/*
- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
- * Check that here and ignore it.
- */
-static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
-{
- unsigned long limit;
- unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
- int scan_more = 1;
- int prefetch = 0;
- int i;
-
- for (i = 0; scan_more && i < 15; i++) {
- unsigned char opcode;
- unsigned char instr_hi;
- unsigned char instr_lo;
-
- if (instr > (unsigned char *)limit)
- break;
- if (probe_kernel_address(instr, opcode))
- break;
-
- instr_hi = opcode & 0xf0;
- instr_lo = opcode & 0x0f;
- instr++;
-
- switch (instr_hi) {
- case 0x20:
- case 0x30:
- /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */
- scan_more = ((instr_lo & 7) == 0x6);
- break;
-
- case 0x60:
- /* 0x64 thru 0x67 are valid prefixes in all modes. */
- scan_more = (instr_lo & 0xC) == 0x4;
- break;
- case 0xF0:
- /* 0xF0, 0xF2, and 0xF3 are valid prefixes */
- scan_more = !instr_lo || (instr_lo>>1) == 1;
- break;
- case 0x00:
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
- scan_more = 0;
- if (instr > (unsigned char *)limit)
- break;
- if (probe_kernel_address(instr, opcode))
- break;
- prefetch = (instr_lo == 0xF) &&
- (opcode == 0x0D || opcode == 0x18);
- break;
- default:
- scan_more = 0;
- break;
- }
- }
- return prefetch;
-}
-
-static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
- unsigned long error_code)
-{
- if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 >= 6)) {
- /* Catch an obscure case of prefetch inside an NX page. */
- if (nx_enabled && (error_code & 16))
- return 0;
- return __is_prefetch(regs, addr);
- }
- return 0;
-}
-
-static noinline void force_sig_info_fault(int si_signo, int si_code,
- unsigned long address, struct task_struct *tsk)
-{
- siginfo_t info;
-
- info.si_signo = si_signo;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *)address;
- force_sig_info(si_signo, &info, tsk);
-}
-
-fastcall void do_invalid_op(struct pt_regs *, unsigned long);
-
-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
-{
- unsigned index = pgd_index(address);
- pgd_t *pgd_k;
- pud_t *pud, *pud_k;
- pmd_t *pmd, *pmd_k;
-
- pgd += index;
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- return NULL;
-
- /*
- * set_pgd(pgd, *pgd_k); here would be useless on PAE
- * and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
- */
-
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
- if (!pud_present(*pud_k))
- return NULL;
-
- pmd = pmd_offset(pud, address);
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- return NULL;
- if (!pmd_present(*pmd)) {
- set_pmd(pmd, *pmd_k);
- arch_flush_lazy_mmu_mode();
- } else
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
- return pmd_k;
-}
-
-/*
- * Handle a fault on the vmalloc or module mapping area
- *
- * This assumes no large pages in there.
- */
-static inline int vmalloc_fault(unsigned long address)
-{
- unsigned long pgd_paddr;
- pmd_t *pmd_k;
- pte_t *pte_k;
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "current" here. We might be inside
- * an interrupt in the middle of a task switch..
- */
- pgd_paddr = read_cr3();
- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
- if (!pmd_k)
- return -1;
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- return -1;
- return 0;
-}
-
-int show_unhandled_signals = 1;
-
-/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- *
- * error_code:
- * bit 0 == 0 means no page found, 1 means protection fault
- * bit 1 == 0 means read, 1 means write
- * bit 2 == 0 means kernel, 1 means user-mode
- * bit 3 == 1 means use of reserved bit detected
- * bit 4 == 1 means fault was an instruction fetch
- */
-fastcall void __kprobes do_page_fault(struct pt_regs *regs,
- unsigned long error_code)
-{
- struct task_struct *tsk;
- struct mm_struct *mm;
- struct vm_area_struct * vma;
- unsigned long address;
- int write, si_code;
- int fault;
-
- /*
- * We can fault from pretty much anywhere, with unknown IRQ state.
- */
- trace_hardirqs_fixup();
-
- /* get the address */
- address = read_cr2();
-
- tsk = current;
-
- si_code = SEGV_MAPERR;
-
- /*
- * We fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- *
- * This verifies that the fault happens in kernel space
- * (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 9) == 0.
- */
- if (unlikely(address >= TASK_SIZE)) {
- if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
- return;
- if (notify_page_fault(regs))
- return;
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
- */
- goto bad_area_nosemaphore;
- }
-
- if (notify_page_fault(regs))
- return;
-
- /* It's safe to allow irq's after cr2 has been saved and the vmalloc
- fault has been handled. */
- if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
- local_irq_enable();
-
- mm = tsk->mm;
-
- /*
- * If we're in an interrupt, have no user context or are running in an
- * atomic region then we must not take the fault..
- */
- if (in_atomic() || !mm)
- goto bad_area_nosemaphore;
-
- /* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
- * kernel and should generate an OOPS. Unfortunately, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
- * we will deadlock attempting to validate the fault against the
- * address space. Luckily the kernel only validly references user
- * space from well defined areas of code, which are listed in the
- * exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibility of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
- */
- if (!down_read_trylock(&mm->mmap_sem)) {
- if ((error_code & 4) == 0 &&
- !search_exception_tables(regs->eip))
- goto bad_area_nosemaphore;
- down_read(&mm->mmap_sem);
- }
-
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (error_code & 4) {
- /*
- * Accessing the stack below %esp is always a bug.
- * The large cushion allows instructions like enter
- * and pusha to work. ("enter $65535,$31" pushes
- * 32 pointers and then decrements %esp by 65535.)
- */
- if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp)
- goto bad_area;
- }
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
- si_code = SEGV_ACCERR;
- write = 0;
- switch (error_code & 3) {
- default: /* 3: write, present */
- /* fall through */
- case 2: /* write, not present */
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- write++;
- break;
- case 1: /* read, present */
- goto bad_area;
- case 0: /* read, not present */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- }
-
- survive:
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
- fault = handle_mm_fault(mm, vma, address, write);
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
- }
- if (fault & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
-
- /*
- * Did it hit the DOS screen memory VA from vm86 mode?
- */
- if (regs->eflags & VM_MASK) {
- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
- if (bit < 32)
- tsk->thread.screen_bitmap |= 1 << bit;
- }
- up_read(&mm->mmap_sem);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (error_code & 4) {
- /*
- * It's possible to have interrupts off here.
- */
- local_irq_enable();
-
- /*
- * Valid to do another page fault here because this one came
- * from user space.
- */
- if (is_prefetch(regs, address, error_code))
- return;
-
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit()) {
- printk("%s%s[%d]: segfault at %08lx eip %08lx "
- "esp %08lx error %lx\n",
- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, task_pid_nr(tsk), address, regs->eip,
- regs->esp, error_code);
- }
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
- return;
- }
-
-#ifdef CONFIG_X86_F00F_BUG
- /*
- * Pentium F0 0F C7 C8 bug workaround.
- */
- if (boot_cpu_data.f00f_bug) {
- unsigned long nr;
-
- nr = (address - idt_descr.address) >> 3;
-
- if (nr == 6) {
- do_invalid_op(regs, 0);
- return;
- }
- }
-#endif
-
-no_context:
- /* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs))
- return;
-
- /*
- * Valid to do another page fault here, because if this fault
- * had been triggered by is_prefetch fixup_exception would have
- * handled it.
- */
- if (is_prefetch(regs, address, error_code))
- return;
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-
- bust_spinlocks(1);
-
- if (oops_may_print()) {
- __typeof__(pte_val(__pte(0))) page;
-
-#ifdef CONFIG_X86_PAE
- if (error_code & 16) {
- pte_t *pte = lookup_address(address);
-
- if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
- printk(KERN_CRIT "kernel tried to execute "
- "NX-protected page - exploit attempt? "
- "(uid: %d)\n", current->uid);
- }
-#endif
- if (address < PAGE_SIZE)
- printk(KERN_ALERT "BUG: unable to handle kernel NULL "
- "pointer dereference");
- else
- printk(KERN_ALERT "BUG: unable to handle kernel paging"
- " request");
- printk(" at virtual address %08lx\n",address);
- printk(KERN_ALERT "printing eip: %08lx ", regs->eip);
-
- page = read_cr3();
- page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
-#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", page);
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && page & _PAGE_PRESENT) {
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
- & (PTRS_PER_PMD - 1)];
- printk(KERN_CONT "*pde = %016Lx ", page);
- page &= ~_PAGE_NX;
- }
-#else
- printk("*pde = %08lx ", page);
-#endif
-
- /*
- * We must not directly access the pte in the highpte
- * case if the page table is located in highmem.
- * And let's rather not kmap-atomic the pte, just in case
- * it's allocated already.
- */
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && (page & _PAGE_PRESENT)
- && !(page & _PAGE_PSE)) {
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
- printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
- }
-
- printk("\n");
- }
-
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
- die("Oops", regs, error_code);
- bust_spinlocks(0);
- do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (is_global_init(tsk)) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
- }
- printk("VM: killing process %s\n", tsk->comm);
- if (error_code & 4)
- do_group_exit(SIGKILL);
- goto no_context;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(error_code & 4))
- goto no_context;
-
- /* User space => ok to do another page fault */
- if (is_prefetch(regs, address, error_code))
- return;
-
- tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-}
-
-void vmalloc_sync_all(void)
-{
- /*
- * Note that races in the updates of insync and start aren't
- * problematic: insync can only get set bits added, and updates to
- * start are only improving performance (without affecting correctness
- * if undone).
- */
- static DECLARE_BITMAP(insync, PTRS_PER_PGD);
- static unsigned long start = TASK_SIZE;
- unsigned long address;
-
- if (SHARED_KERNEL_PMD)
- return;
-
- BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
- for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
- if (!test_bit(pgd_index(address), insync)) {
- unsigned long flags;
- struct page *page;
-
- spin_lock_irqsave(&pgd_lock, flags);
- for (page = pgd_list; page; page =
- (struct page *)page->index)
- if (!vmalloc_sync_one(page_address(page),
- address)) {
- BUG_ON(page != pgd_list);
- break;
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- if (!page)
- set_bit(pgd_index(address), insync);
- }
- if (address == start && test_bit(pgd_index(address), insync))
- start = address + PGDIR_SIZE;
- }
-}
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
deleted file mode 100644
index 0e26230669c..00000000000
--- a/arch/x86/mm/fault_64.c
+++ /dev/null
@@ -1,623 +0,0 @@
-/*
- * linux/arch/x86-64/mm/fault.c
- *
- * Copyright (C) 1995 Linus Torvalds
- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/vt_kern.h> /* For unblank_screen() */
-#include <linux/compiler.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/kdebug.h>
-#include <linux/kprobes.h>
-
-#include <asm/system.h>
-#include <asm/pgalloc.h>
-#include <asm/smp.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
-#include <asm-generic/sections.h>
-
-/* Page fault error code bits */
-#define PF_PROT (1<<0) /* or no page found */
-#define PF_WRITE (1<<1)
-#define PF_USER (1<<2)
-#define PF_RSVD (1<<3)
-#define PF_INSTR (1<<4)
-
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs)
-{
- int ret = 0;
-
- /* kprobe_running() needs smp_processor_id() */
- if (!user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 14))
- ret = 1;
- preempt_enable();
- }
-
- return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs)
-{
- return 0;
-}
-#endif
-
-/* Sometimes the CPU reports invalid exceptions on prefetch.
- Check that here and ignore.
- Opcode checker based on code by Richard Brunner */
-static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
- unsigned long error_code)
-{
- unsigned char *instr;
- int scan_more = 1;
- int prefetch = 0;
- unsigned char *max_instr;
-
- /* If it was a exec fault ignore */
- if (error_code & PF_INSTR)
- return 0;
-
- instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
- max_instr = instr + 15;
-
- if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
- return 0;
-
- while (scan_more && instr < max_instr) {
- unsigned char opcode;
- unsigned char instr_hi;
- unsigned char instr_lo;
-
- if (probe_kernel_address(instr, opcode))
- break;
-
- instr_hi = opcode & 0xf0;
- instr_lo = opcode & 0x0f;
- instr++;
-
- switch (instr_hi) {
- case 0x20:
- case 0x30:
- /* Values 0x26,0x2E,0x36,0x3E are valid x86
- prefixes. In long mode, the CPU will signal
- invalid opcode if some of these prefixes are
- present so we will never get here anyway */
- scan_more = ((instr_lo & 7) == 0x6);
- break;
-
- case 0x40:
- /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
- Need to figure out under what instruction mode the
- instruction was issued ... */
- /* Could check the LDT for lm, but for now it's good
- enough to assume that long mode only uses well known
- segments or kernel. */
- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
- break;
-
- case 0x60:
- /* 0x64 thru 0x67 are valid prefixes in all modes. */
- scan_more = (instr_lo & 0xC) == 0x4;
- break;
- case 0xF0:
- /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
- scan_more = !instr_lo || (instr_lo>>1) == 1;
- break;
- case 0x00:
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
- scan_more = 0;
- if (probe_kernel_address(instr, opcode))
- break;
- prefetch = (instr_lo == 0xF) &&
- (opcode == 0x0D || opcode == 0x18);
- break;
- default:
- scan_more = 0;
- break;
- }
- }
- return prefetch;
-}
-
-static int bad_address(void *p)
-{
- unsigned long dummy;
- return probe_kernel_address((unsigned long *)p, dummy);
-}
-
-void dump_pagetable(unsigned long address)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = (pgd_t *)read_cr3();
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
- pgd += pgd_index(address);
- if (bad_address(pgd)) goto bad;
- printk("PGD %lx ", pgd_val(*pgd));
- if (!pgd_present(*pgd)) goto ret;
-
- pud = pud_offset(pgd, address);
- if (bad_address(pud)) goto bad;
- printk("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud)) goto ret;
-
- pmd = pmd_offset(pud, address);
- if (bad_address(pmd)) goto bad;
- printk("PMD %lx ", pmd_val(*pmd));
- if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
-
- pte = pte_offset_kernel(pmd, address);
- if (bad_address(pte)) goto bad;
- printk("PTE %lx", pte_val(*pte));
-ret:
- printk("\n");
- return;
-bad:
- printk("BAD\n");
-}
-
-static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
-
-/* Workaround for K8 erratum #93 & buggy BIOS.
- BIOS SMM functions are required to use a specific workaround
- to avoid corruption of the 64bit RIP register on C stepping K8.
- A lot of BIOS that didn't get tested properly miss this.
- The OS sees this as a page fault with the upper 32bits of RIP cleared.
- Try to work around it here.
- Note we only handle faults in kernel here. */
-
-static int is_errata93(struct pt_regs *regs, unsigned long address)
-{
- static int warned;
- if (address != regs->rip)
- return 0;
- if ((address >> 32) != 0)
- return 0;
- address |= 0xffffffffUL << 32;
- if ((address >= (u64)_stext && address <= (u64)_etext) ||
- (address >= MODULES_VADDR && address <= MODULES_END)) {
- if (!warned) {
- printk(errata93_warning);
- warned = 1;
- }
- regs->rip = address;
- return 1;
- }
- return 0;
-}
-
-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
- unsigned long error_code)
-{
- unsigned long flags = oops_begin();
- struct task_struct *tsk;
-
- printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
- current->comm, address);
- dump_pagetable(address);
- tsk = current;
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
- __die("Bad pagetable", regs, error_code);
- oops_end(flags);
- do_exit(SIGKILL);
-}
-
-/*
- * Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
- */
-static int vmalloc_fault(unsigned long address)
-{
- pgd_t *pgd, *pgd_ref;
- pud_t *pud, *pud_ref;
- pmd_t *pmd, *pmd_ref;
- pte_t *pte, *pte_ref;
-
- /* Copy kernel mappings over when needed. This can also
- happen within a race in page table update. In the later
- case just flush. */
-
- pgd = pgd_offset(current->mm ?: &init_mm, address);
- pgd_ref = pgd_offset_k(address);
- if (pgd_none(*pgd_ref))
- return -1;
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-
- /* Below here mismatches are bugs because these lower tables
- are shared */
-
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
- if (pud_none(*pud_ref))
- return -1;
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
- BUG();
- pmd = pmd_offset(pud, address);
- pmd_ref = pmd_offset(pud_ref, address);
- if (pmd_none(*pmd_ref))
- return -1;
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
- BUG();
- pte_ref = pte_offset_kernel(pmd_ref, address);
- if (!pte_present(*pte_ref))
- return -1;
- pte = pte_offset_kernel(pmd, address);
- /* Don't use pte_page here, because the mappings can point
- outside mem_map, and the NUMA hash lookup cannot handle
- that. */
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
- BUG();
- return 0;
-}
-
-int show_unhandled_signals = 1;
-
-/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
- unsigned long error_code)
-{
- struct task_struct *tsk;
- struct mm_struct *mm;
- struct vm_area_struct * vma;
- unsigned long address;
- const struct exception_table_entry *fixup;
- int write, fault;
- unsigned long flags;
- siginfo_t info;
-
- /*
- * We can fault from pretty much anywhere, with unknown IRQ state.
- */
- trace_hardirqs_fixup();
-
- tsk = current;
- mm = tsk->mm;
- prefetchw(&mm->mmap_sem);
-
- /* get the address */
- address = read_cr2();
-
- info.si_code = SEGV_MAPERR;
-
-
- /*
- * We fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- *
- * This verifies that the fault happens in kernel space
- * (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 9) == 0.
- */
- if (unlikely(address >= TASK_SIZE64)) {
- /*
- * Don't check for the module range here: its PML4
- * is always initialized because it's shared with the main
- * kernel text. Only vmalloc may need PML4 syncups.
- */
- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
- ((address >= VMALLOC_START && address < VMALLOC_END))) {
- if (vmalloc_fault(address) >= 0)
- return;
- }
- if (notify_page_fault(regs))
- return;
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
- */
- goto bad_area_nosemaphore;
- }
-
- if (notify_page_fault(regs))
- return;
-
- if (likely(regs->eflags & X86_EFLAGS_IF))
- local_irq_enable();
-
- if (unlikely(error_code & PF_RSVD))
- pgtable_bad(address, regs, error_code);
-
- /*
- * If we're in an interrupt or have no user
- * context, we must not take the fault..
- */
- if (unlikely(in_atomic() || !mm))
- goto bad_area_nosemaphore;
-
- /*
- * User-mode registers count as a user access even for any
- * potential system fault or CPU buglet.
- */
- if (user_mode_vm(regs))
- error_code |= PF_USER;
-
- again:
- /* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
- * kernel and should generate an OOPS. Unfortunately, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
- * we will deadlock attempting to validate the fault against the
- * address space. Luckily the kernel only validly references user
- * space from well defined areas of code, which are listed in the
- * exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibility of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
- */
- if (!down_read_trylock(&mm->mmap_sem)) {
- if ((error_code & PF_USER) == 0 &&
- !search_exception_tables(regs->rip))
- goto bad_area_nosemaphore;
- down_read(&mm->mmap_sem);
- }
-
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (error_code & 4) {
- /* Allow userspace just enough access below the stack pointer
- * to let the 'enter' instruction work.
- */
- if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
- goto bad_area;
- }
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
- info.si_code = SEGV_ACCERR;
- write = 0;
- switch (error_code & (PF_PROT|PF_WRITE)) {
- default: /* 3: write, present */
- /* fall through */
- case PF_WRITE: /* write, not present */
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- write++;
- break;
- case PF_PROT: /* read, present */
- goto bad_area;
- case 0: /* read, not present */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- }
-
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
- fault = handle_mm_fault(mm, vma, address, write);
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
- }
- if (fault & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
- up_read(&mm->mmap_sem);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (error_code & PF_USER) {
-
- /*
- * It's possible to have interrupts off here.
- */
- local_irq_enable();
-
- if (is_prefetch(regs, address, error_code))
- return;
-
- /* Work around K8 erratum #100 K8 in compat mode
- occasionally jumps to illegal addresses >4GB. We
- catch this here in the page fault handler because
- these addresses are not reachable. Just detect this
- case and return. Any code segment in LDT is
- compatibility mode. */
- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
- (address >> 32))
- return;
-
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit()) {
- printk(
- "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
- tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, tsk->pid, address, regs->rip,
- regs->rsp, error_code);
- }
-
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- tsk->thread.trap_no = 14;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void __user *)address;
- force_sig_info(SIGSEGV, &info, tsk);
- return;
- }
-
-no_context:
-
- /* Are we prepared to handle this kernel fault? */
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return;
- }
-
- /*
- * Hall of shame of CPU/BIOS bugs.
- */
-
- if (is_prefetch(regs, address, error_code))
- return;
-
- if (is_errata93(regs, address))
- return;
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-
- flags = oops_begin();
-
- if (address < PAGE_SIZE)
- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
- else
- printk(KERN_ALERT "Unable to handle kernel paging request");
- printk(" at %016lx RIP: \n" KERN_ALERT,address);
- printk_address(regs->rip);
- dump_pagetable(address);
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
- __die("Oops", regs, error_code);
- /* Executive summary in case the body of the oops scrolled away */
- printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end(flags);
- do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (is_global_init(current)) {
- yield();
- goto again;
- }
- printk("VM: killing process %s\n", tsk->comm);
- if (error_code & 4)
- do_group_exit(SIGKILL);
- goto no_context;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(error_code & PF_USER))
- goto no_context;
-
- tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 14;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, tsk);
- return;
-}
-
-DEFINE_SPINLOCK(pgd_lock);
-LIST_HEAD(pgd_list);
-
-void vmalloc_sync_all(void)
-{
- /* Note that races in the updates of insync and start aren't
- problematic:
- insync can only get set bits added, and updates to start are only
- improving performance (without affecting correctness if undone). */
- static DECLARE_BITMAP(insync, PTRS_PER_PGD);
- static unsigned long start = VMALLOC_START & PGDIR_MASK;
- unsigned long address;
-
- for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
- if (!test_bit(pgd_index(address), insync)) {
- const pgd_t *pgd_ref = pgd_offset_k(address);
- struct page *page;
-
- if (pgd_none(*pgd_ref))
- continue;
- spin_lock(&pgd_lock);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
- }
- spin_unlock(&pgd_lock);
- set_bit(pgd_index(address), insync);
- }
- if (address == start)
- start = address + PGDIR_SIZE;
- }
- /* Check that there is no need to do the same for the modules area. */
- BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
- (__START_KERNEL & PGDIR_MASK)));
-}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 1c3bf95f735..3d936f23270 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -18,6 +18,49 @@ void kunmap(struct page *page)
kunmap_high(page);
}
+static void debug_kmap_atomic_prot(enum km_type type)
+{
+#ifdef CONFIG_DEBUG_HIGHMEM
+ static unsigned warn_count = 10;
+
+ if (unlikely(warn_count == 0))
+ return;
+
+ if (unlikely(in_interrupt())) {
+ if (in_irq()) {
+ if (type != KM_IRQ0 && type != KM_IRQ1 &&
+ type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
+ type != KM_BOUNCE_READ) {
+ WARN_ON(1);
+ warn_count--;
+ }
+ } else if (!irqs_disabled()) { /* softirq */
+ if (type != KM_IRQ0 && type != KM_IRQ1 &&
+ type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
+ type != KM_SKB_SUNRPC_DATA &&
+ type != KM_SKB_DATA_SOFTIRQ &&
+ type != KM_BOUNCE_READ) {
+ WARN_ON(1);
+ warn_count--;
+ }
+ }
+ }
+
+ if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
+ type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) {
+ if (!irqs_disabled()) {
+ WARN_ON(1);
+ warn_count--;
+ }
+ } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
+ if (irq_count() == 0 && !irqs_disabled()) {
+ WARN_ON(1);
+ warn_count--;
+ }
+ }
+#endif
+}
+
/*
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
* no global lock is needed and because the kmap code must perform a global TLB
@@ -30,8 +73,10 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
{
enum fixed_addresses idx;
unsigned long vaddr;
-
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+
+ debug_kmap_atomic_prot(type);
+
pagefault_disable();
if (!PageHighMem(page))
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 6c06d9c0488..4fbafb4bc2f 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -15,6 +15,7 @@
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
static unsigned long page_table_shareable(struct vm_area_struct *svma,
struct vm_area_struct *vma,
@@ -88,7 +89,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
spin_lock(&mm->page_table_lock);
if (pud_none(*pud))
- pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK);
+ pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
else
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3c76d194fd2..da524fb2242 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -27,7 +27,6 @@
#include <linux/bootmem.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
-#include <linux/efi.h>
#include <linux/memory_hotplug.h>
#include <linux/initrd.h>
#include <linux/cpumask.h>
@@ -40,8 +39,10 @@
#include <asm/fixmap.h>
#include <asm/e820.h>
#include <asm/apic.h>
+#include <asm/bugs.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/paravirt.h>
@@ -50,7 +51,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
unsigned long highstart_pfn, highend_pfn;
-static int noinline do_test_wp_bit(void);
+static noinline int do_test_wp_bit(void);
/*
* Creates a middle page table and puts a pointer to it in the
@@ -61,26 +62,26 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
{
pud_t *pud;
pmd_t *pmd_table;
-
+
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
+ paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
- if (pmd_table != pmd_offset(pud, 0))
- BUG();
+ BUG_ON(pmd_table != pmd_offset(pud, 0));
}
#endif
pud = pud_offset(pgd, 0);
pmd_table = pmd_offset(pud, 0);
+
return pmd_table;
}
/*
* Create a page table and place a pointer to it in a middle page
- * directory entry.
+ * directory entry:
*/
static pte_t * __init one_page_table_init(pmd_t *pmd)
{
@@ -90,9 +91,10 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
#ifdef CONFIG_DEBUG_PAGEALLOC
page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
#endif
- if (!page_table)
+ if (!page_table) {
page_table =
(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ }
paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -103,22 +105,21 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
}
/*
- * This function initializes a certain range of kernel virtual memory
+ * This function initializes a certain range of kernel virtual memory
* with new bootmem page tables, everywhere page tables are missing in
* the given range.
- */
-
-/*
- * NOTE: The pagetables are allocated contiguous on the physical space
- * so we can cache the place of the first one and move around without
+ *
+ * NOTE: The pagetables are allocated contiguous on the physical space
+ * so we can cache the place of the first one and move around without
* checking the pgd every time.
*/
-static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+static void __init
+page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
{
- pgd_t *pgd;
- pmd_t *pmd;
int pgd_idx, pmd_idx;
unsigned long vaddr;
+ pgd_t *pgd;
+ pmd_t *pmd;
vaddr = start;
pgd_idx = pgd_index(vaddr);
@@ -128,7 +129,8 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
pmd = one_md_table_init(pgd);
pmd = pmd + pmd_index(vaddr);
- for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
+ for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
+ pmd++, pmd_idx++) {
one_page_table_init(pmd);
vaddr += PMD_SIZE;
@@ -145,17 +147,17 @@ static inline int is_kernel_text(unsigned long addr)
}
/*
- * This maps the physical memory to kernel virtual address space, a total
- * of max_low_pfn pages, by creating page tables starting from address
- * PAGE_OFFSET.
+ * This maps the physical memory to kernel virtual address space, a total
+ * of max_low_pfn pages, by creating page tables starting from address
+ * PAGE_OFFSET:
*/
static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
{
+ int pgd_idx, pmd_idx, pte_ofs;
unsigned long pfn;
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte;
- int pgd_idx, pmd_idx, pte_ofs;
pgd_idx = pgd_index(PAGE_OFFSET);
pgd = pgd_base + pgd_idx;
@@ -165,29 +167,43 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
pmd = one_md_table_init(pgd);
if (pfn >= max_low_pfn)
continue;
- for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
- unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
- /* Map with big pages if possible, otherwise create normal page tables. */
+ for (pmd_idx = 0;
+ pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
+ pmd++, pmd_idx++) {
+ unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
+
+ /*
+ * Map with big pages if possible, otherwise
+ * create normal page tables:
+ */
if (cpu_has_pse) {
- unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
- if (is_kernel_text(address) || is_kernel_text(address2))
- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
- else
- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+ unsigned int addr2;
+ pgprot_t prot = PAGE_KERNEL_LARGE;
+
+ addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
+ PAGE_OFFSET + PAGE_SIZE-1;
+
+ if (is_kernel_text(addr) ||
+ is_kernel_text(addr2))
+ prot = PAGE_KERNEL_LARGE_EXEC;
+
+ set_pmd(pmd, pfn_pmd(pfn, prot));
pfn += PTRS_PER_PTE;
- } else {
- pte = one_page_table_init(pmd);
-
- for (pte_ofs = 0;
- pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
- pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
- if (is_kernel_text(address))
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
- else
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
- }
+ continue;
+ }
+ pte = one_page_table_init(pmd);
+
+ for (pte_ofs = 0;
+ pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
+ pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
+ pgprot_t prot = PAGE_KERNEL;
+
+ if (is_kernel_text(addr))
+ prot = PAGE_KERNEL_EXEC;
+
+ set_pte(pte, pfn_pte(pfn, prot));
}
}
}
@@ -200,57 +216,23 @@ static inline int page_kills_ppro(unsigned long pagenr)
return 0;
}
-int page_is_ram(unsigned long pagenr)
-{
- int i;
- unsigned long addr, end;
-
- if (efi_enabled) {
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if (!is_available_memory(md))
- continue;
- addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
-
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
- }
-
- for (i = 0; i < e820.nr_map; i++) {
-
- if (e820.map[i].type != E820_RAM) /* not usable memory */
- continue;
- /*
- * !!!FIXME!!! Some BIOSen report areas as RAM that
- * are not. Notably the 640->1Mb area. We need a sanity
- * check here.
- */
- addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
-}
-
#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
pgprot_t kmap_prot;
-#define kmap_get_fixmap_pte(vaddr) \
- pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
+static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
+{
+ return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
+ vaddr), vaddr), vaddr);
+}
static void __init kmap_init(void)
{
unsigned long kmap_vstart;
- /* cache the first kmap pte */
+ /*
+ * Cache the first kmap pte:
+ */
kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
@@ -259,11 +241,11 @@ static void __init kmap_init(void)
static void __init permanent_kmaps_init(pgd_t *pgd_base)
{
+ unsigned long vaddr;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- unsigned long vaddr;
vaddr = PKMAP_BASE;
page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
@@ -272,7 +254,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
- pkmap_page_table = pte;
+ pkmap_page_table = pte;
}
static void __meminit free_new_highpage(struct page *page)
@@ -291,7 +273,8 @@ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
SetPageReserved(page);
}
-static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+static int __meminit
+add_one_highpage_hotplug(struct page *page, unsigned long pfn)
{
free_new_highpage(page);
totalram_pages++;
@@ -299,6 +282,7 @@ static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long p
max_mapnr = max(pfn, max_mapnr);
#endif
num_physpages++;
+
return 0;
}
@@ -306,7 +290,7 @@ static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long p
* Not currently handling the NUMA case.
* Assuming single node and all memory that
* has been added dynamically that would be
- * onlined here is in HIGHMEM
+ * onlined here is in HIGHMEM.
*/
void __meminit online_page(struct page *page)
{
@@ -314,13 +298,11 @@ void __meminit online_page(struct page *page)
add_one_highpage_hotplug(page, page_to_pfn(page));
}
-
-#ifdef CONFIG_NUMA
-extern void set_highmem_pages_init(int);
-#else
+#ifndef CONFIG_NUMA
static void __init set_highmem_pages_init(int bad_ppro)
{
int pfn;
+
for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
/*
* Holes under sparsemem might not have no mem_map[]:
@@ -330,23 +312,18 @@ static void __init set_highmem_pages_init(int bad_ppro)
}
totalram_pages += totalhigh_pages;
}
-#endif /* CONFIG_FLATMEM */
+#endif /* !CONFIG_NUMA */
#else
-#define kmap_init() do { } while (0)
-#define permanent_kmaps_init(pgd_base) do { } while (0)
-#define set_highmem_pages_init(bad_ppro) do { } while (0)
+# define kmap_init() do { } while (0)
+# define permanent_kmaps_init(pgd_base) do { } while (0)
+# define set_highmem_pages_init(bad_ppro) do { } while (0)
#endif /* CONFIG_HIGHMEM */
-unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
+pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
EXPORT_SYMBOL(__PAGE_KERNEL);
-unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
-#ifdef CONFIG_NUMA
-extern void __init remap_numa_kva(void);
-#else
-#define remap_numa_kva() do {} while (0)
-#endif
+pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
void __init native_pagetable_setup_start(pgd_t *base)
{
@@ -372,7 +349,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
memset(&base[USER_PTRS_PER_PGD], 0,
KERNEL_PGD_PTRS * sizeof(pgd_t));
#else
- paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
+ paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
#endif
}
@@ -410,10 +387,10 @@ void __init native_pagetable_setup_done(pgd_t *base)
* be partially populated, and so it avoids stomping on any existing
* mappings.
*/
-static void __init pagetable_init (void)
+static void __init pagetable_init(void)
{
- unsigned long vaddr, end;
pgd_t *pgd_base = swapper_pg_dir;
+ unsigned long vaddr, end;
paravirt_pagetable_setup_start(pgd_base);
@@ -435,9 +412,11 @@ static void __init pagetable_init (void)
* Fixed mappings, only the page table structure has to be
* created - mappings will be set by set_fixmap():
*/
+ early_ioremap_clear();
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
page_table_range_init(vaddr, end, pgd_base);
+ early_ioremap_reset();
permanent_kmaps_init(pgd_base);
@@ -450,7 +429,7 @@ static void __init pagetable_init (void)
* driver might have split up a kernel 4MB mapping.
*/
char __nosavedata swsusp_pg_dir[PAGE_SIZE]
- __attribute__ ((aligned (PAGE_SIZE)));
+ __attribute__ ((aligned(PAGE_SIZE)));
static inline void save_pg_dir(void)
{
@@ -462,7 +441,7 @@ static inline void save_pg_dir(void)
}
#endif
-void zap_low_mappings (void)
+void zap_low_mappings(void)
{
int i;
@@ -474,22 +453,24 @@ void zap_low_mappings (void)
* Note that "pgd_clear()" doesn't do it for
* us, because pgd_clear() is a no-op on i386.
*/
- for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ for (i = 0; i < USER_PTRS_PER_PGD; i++) {
#ifdef CONFIG_X86_PAE
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else
set_pgd(swapper_pg_dir+i, __pgd(0));
#endif
+ }
flush_tlb_all();
}
-int nx_enabled = 0;
+int nx_enabled;
+
+pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
+EXPORT_SYMBOL_GPL(__supported_pte_mask);
#ifdef CONFIG_X86_PAE
-static int disable_nx __initdata = 0;
-u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
-EXPORT_SYMBOL_GPL(__supported_pte_mask);
+static int disable_nx __initdata;
/*
* noexec = on|off
@@ -506,11 +487,14 @@ static int __init noexec_setup(char *str)
__supported_pte_mask |= _PAGE_NX;
disable_nx = 0;
}
- } else if (!strcmp(str,"off")) {
- disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- } else
- return -EINVAL;
+ } else {
+ if (!strcmp(str, "off")) {
+ disable_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
+ } else {
+ return -EINVAL;
+ }
+ }
return 0;
}
@@ -522,6 +506,7 @@ static void __init set_nx(void)
if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
if ((v[3] & (1 << 20)) && !disable_nx) {
rdmsr(MSR_EFER, l, h);
l |= EFER_NX;
@@ -531,35 +516,6 @@ static void __init set_nx(void)
}
}
}
-
-/*
- * Enables/disables executability of a given kernel page and
- * returns the previous setting.
- */
-int __init set_kernel_exec(unsigned long vaddr, int enable)
-{
- pte_t *pte;
- int ret = 1;
-
- if (!nx_enabled)
- goto out;
-
- pte = lookup_address(vaddr);
- BUG_ON(!pte);
-
- if (!pte_exec_kernel(*pte))
- ret = 0;
-
- if (enable)
- pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
- else
- pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
- pte_update_defer(&init_mm, vaddr, pte);
- __flush_tlb_all();
-out:
- return ret;
-}
-
#endif
/*
@@ -574,9 +530,8 @@ void __init paging_init(void)
#ifdef CONFIG_X86_PAE
set_nx();
if (nx_enabled)
- printk("NX (Execute Disable) protection: active\n");
+ printk(KERN_INFO "NX (Execute Disable) protection: active\n");
#endif
-
pagetable_init();
load_cr3(swapper_pg_dir);
@@ -600,10 +555,10 @@ void __init paging_init(void)
* used to involve black magic jumps to work around some nasty CPU bugs,
* but fortunately the switch to using exceptions got rid of all that.
*/
-
static void __init test_wp_bit(void)
{
- printk("Checking if this processor honours the WP bit even in supervisor mode... ");
+ printk(KERN_INFO
+ "Checking if this processor honours the WP bit even in supervisor mode...");
/* Any page-aligned address will do, the test is non-destructive */
__set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
@@ -611,47 +566,46 @@ static void __init test_wp_bit(void)
clear_fixmap(FIX_WP_TEST);
if (!boot_cpu_data.wp_works_ok) {
- printk("No.\n");
+ printk(KERN_CONT "No.\n");
#ifdef CONFIG_X86_WP_WORKS_OK
- panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
+ panic(
+ "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
#endif
} else {
- printk("Ok.\n");
+ printk(KERN_CONT "Ok.\n");
}
}
-static struct kcore_list kcore_mem, kcore_vmalloc;
+static struct kcore_list kcore_mem, kcore_vmalloc;
void __init mem_init(void)
{
- extern int ppro_with_ram_bug(void);
int codesize, reservedpages, datasize, initsize;
- int tmp;
- int bad_ppro;
+ int tmp, bad_ppro;
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
-
bad_ppro = ppro_with_ram_bug();
#ifdef CONFIG_HIGHMEM
/* check that fixmap and pkmap do not overlap */
- if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
- printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
+ if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
+ printk(KERN_ERR
+ "fixmap and kmap areas overlap - this will crash\n");
printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
- PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
+ PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE,
+ FIXADDR_START);
BUG();
}
#endif
-
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
reservedpages = 0;
for (tmp = 0; tmp < max_low_pfn; tmp++)
/*
- * Only count reserved RAM pages
+ * Only count reserved RAM pages:
*/
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
@@ -662,11 +616,12 @@ void __init mem_init(void)
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
+ kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+ kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
VMALLOC_END-VMALLOC_START);
- printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
+ printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
+ "%dk reserved, %dk data, %dk init, %ldk highmem)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
num_physpages << (PAGE_SHIFT-10),
codesize >> 10,
@@ -677,45 +632,46 @@ void __init mem_init(void)
);
#if 1 /* double-sanity-check paranoia */
- printk("virtual kernel memory layout:\n"
- " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ printk(KERN_INFO "virtual kernel memory layout:\n"
+ " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
- " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#endif
- " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
- " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
- " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
- " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
- " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
- FIXADDR_START, FIXADDR_TOP,
- (FIXADDR_TOP - FIXADDR_START) >> 10,
+ " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
+ " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
+ " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
+ FIXADDR_START, FIXADDR_TOP,
+ (FIXADDR_TOP - FIXADDR_START) >> 10,
#ifdef CONFIG_HIGHMEM
- PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
- (LAST_PKMAP*PAGE_SIZE) >> 10,
+ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+ (LAST_PKMAP*PAGE_SIZE) >> 10,
#endif
- VMALLOC_START, VMALLOC_END,
- (VMALLOC_END - VMALLOC_START) >> 20,
+ VMALLOC_START, VMALLOC_END,
+ (VMALLOC_END - VMALLOC_START) >> 20,
- (unsigned long)__va(0), (unsigned long)high_memory,
- ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
+ (unsigned long)__va(0), (unsigned long)high_memory,
+ ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
- (unsigned long)&__init_begin, (unsigned long)&__init_end,
- ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
+ (unsigned long)&__init_begin, (unsigned long)&__init_end,
+ ((unsigned long)&__init_end -
+ (unsigned long)&__init_begin) >> 10,
- (unsigned long)&_etext, (unsigned long)&_edata,
- ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
+ (unsigned long)&_etext, (unsigned long)&_edata,
+ ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
- (unsigned long)&_text, (unsigned long)&_etext,
- ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
+ (unsigned long)&_text, (unsigned long)&_etext,
+ ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
#ifdef CONFIG_HIGHMEM
- BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
- BUG_ON(VMALLOC_END > PKMAP_BASE);
+ BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
+ BUG_ON(VMALLOC_END > PKMAP_BASE);
#endif
- BUG_ON(VMALLOC_START > VMALLOC_END);
- BUG_ON((unsigned long)high_memory > VMALLOC_START);
+ BUG_ON(VMALLOC_START > VMALLOC_END);
+ BUG_ON((unsigned long)high_memory > VMALLOC_START);
#endif /* double-sanity-check paranoia */
#ifdef CONFIG_X86_PAE
@@ -746,49 +702,38 @@ int arch_add_memory(int nid, u64 start, u64 size)
return __add_pages(zone, start_pfn, nr_pages);
}
-
#endif
-struct kmem_cache *pmd_cache;
-
-void __init pgtable_cache_init(void)
-{
- if (PTRS_PER_PMD > 1)
- pmd_cache = kmem_cache_create("pmd",
- PTRS_PER_PMD*sizeof(pmd_t),
- PTRS_PER_PMD*sizeof(pmd_t),
- SLAB_PANIC,
- pmd_ctor);
-}
-
/*
* This function cannot be __init, since exceptions don't work in that
* section. Put this after the callers, so that it cannot be inlined.
*/
-static int noinline do_test_wp_bit(void)
+static noinline int do_test_wp_bit(void)
{
char tmp_reg;
int flag;
__asm__ __volatile__(
- " movb %0,%1 \n"
- "1: movb %1,%0 \n"
- " xorl %2,%2 \n"
+ " movb %0, %1 \n"
+ "1: movb %1, %0 \n"
+ " xorl %2, %2 \n"
"2: \n"
- ".section __ex_table,\"a\"\n"
+ ".section __ex_table, \"a\"\n"
" .align 4 \n"
- " .long 1b,2b \n"
+ " .long 1b, 2b \n"
".previous \n"
:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
"=q" (tmp_reg),
"=r" (flag)
:"2" (1)
:"memory");
-
+
return flag;
}
#ifdef CONFIG_DEBUG_RODATA
+const int rodata_test_data = 0xC3;
+EXPORT_SYMBOL_GPL(rodata_test_data);
void mark_rodata_ro(void)
{
@@ -801,32 +746,58 @@ void mark_rodata_ro(void)
if (num_possible_cpus() <= 1)
#endif
{
- change_page_attr(virt_to_page(start),
- size >> PAGE_SHIFT, PAGE_KERNEL_RX);
- printk("Write protecting the kernel text: %luk\n", size >> 10);
+ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+ printk(KERN_INFO "Write protecting the kernel text: %luk\n",
+ size >> 10);
+
+#ifdef CONFIG_CPA_DEBUG
+ printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
+ start, start+size);
+ set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
+
+ printk(KERN_INFO "Testing CPA: write protecting again\n");
+ set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
+#endif
}
#endif
start += size;
size = (unsigned long)__end_rodata - start;
- change_page_attr(virt_to_page(start),
- size >> PAGE_SHIFT, PAGE_KERNEL_RO);
- printk("Write protecting the kernel read-only data: %luk\n",
- size >> 10);
+ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+ printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
+ size >> 10);
+ rodata_test();
- /*
- * change_page_attr() requires a global_flush_tlb() call after it.
- * We do this after the printk so that if something went wrong in the
- * change, the printk gets out at least to give a better debug hint
- * of who is the culprit.
- */
- global_flush_tlb();
+#ifdef CONFIG_CPA_DEBUG
+ printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
+ set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
+
+ printk(KERN_INFO "Testing CPA: write protecting again\n");
+ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+#endif
}
#endif
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * If debugging page accesses then do not free this memory but
+ * mark them not present - any buggy init-section access will
+ * create a kernel page fault:
+ */
+ printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
+ begin, PAGE_ALIGN(end));
+ set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+#else
unsigned long addr;
+ /*
+ * We just marked the kernel text read only above, now that
+ * we are going to free part of that, we need to make that
+ * writeable first.
+ */
+ set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
+
for (addr = begin; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
@@ -835,6 +806,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
totalram_pages++;
}
printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+#endif
}
void free_initmem(void)
@@ -850,4 +822,3 @@ void free_initrd_mem(unsigned long start, unsigned long end)
free_init_pages("initrd memory", start, end);
}
#endif
-
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0f9c8c89065..cc50a13ce8d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -43,12 +43,10 @@
#include <asm/proto.h>
#include <asm/smp.h>
#include <asm/sections.h>
+#include <asm/kdebug.h>
+#include <asm/numa.h>
-#ifndef Dprintk
-#define Dprintk(x...)
-#endif
-
-const struct dma_mapping_ops* dma_ops;
+const struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
static unsigned long dma_reserve __initdata;
@@ -65,22 +63,26 @@ void show_mem(void)
{
long i, total = 0, reserved = 0;
long shared = 0, cached = 0;
- pg_data_t *pgdat;
struct page *page;
+ pg_data_t *pgdat;
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
- printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ printk(KERN_INFO "Free swap: %6ldkB\n",
+ nr_swap_pages << (PAGE_SHIFT-10));
for_each_online_pgdat(pgdat) {
- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
- /* this loop can take a while with 256 GB and 4k pages
- so update the NMI watchdog */
- if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
+ for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+ /*
+ * This loop can take a while with 256 GB and
+ * 4k pages so defer the NMI watchdog:
+ */
+ if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
touch_nmi_watchdog();
- }
+
if (!pfn_valid(pgdat->node_start_pfn + i))
continue;
+
page = pfn_to_page(pgdat->node_start_pfn + i);
total++;
if (PageReserved(page))
@@ -89,51 +91,58 @@ void show_mem(void)
cached++;
else if (page_count(page))
shared += page_count(page) - 1;
- }
+ }
}
- printk(KERN_INFO "%lu pages of RAM\n", total);
- printk(KERN_INFO "%lu reserved pages\n",reserved);
- printk(KERN_INFO "%lu pages shared\n",shared);
- printk(KERN_INFO "%lu pages swap cached\n",cached);
+ printk(KERN_INFO "%lu pages of RAM\n", total);
+ printk(KERN_INFO "%lu reserved pages\n", reserved);
+ printk(KERN_INFO "%lu pages shared\n", shared);
+ printk(KERN_INFO "%lu pages swap cached\n", cached);
}
int after_bootmem;
static __init void *spp_getpage(void)
-{
+{
void *ptr;
+
if (after_bootmem)
- ptr = (void *) get_zeroed_page(GFP_ATOMIC);
+ ptr = (void *) get_zeroed_page(GFP_ATOMIC);
else
ptr = alloc_bootmem_pages(PAGE_SIZE);
- if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
- panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
- Dprintk("spp_getpage %p\n", ptr);
+ if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
+ panic("set_pte_phys: cannot allocate page data %s\n",
+ after_bootmem ? "after bootmem" : "");
+ }
+
+ pr_debug("spp_getpage %p\n", ptr);
+
return ptr;
-}
+}
-static __init void set_pte_phys(unsigned long vaddr,
- unsigned long phys, pgprot_t prot)
+static __init void
+set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte, new_pte;
- Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
+ pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys);
pgd = pgd_offset_k(vaddr);
if (pgd_none(*pgd)) {
- printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
+ printk(KERN_ERR
+ "PGD FIXMAP MISSING, it should be setup in head.S!\n");
return;
}
pud = pud_offset(pgd, vaddr);
if (pud_none(*pud)) {
- pmd = (pmd_t *) spp_getpage();
+ pmd = (pmd_t *) spp_getpage();
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
if (pmd != pmd_offset(pud, 0)) {
- printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
+ printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+ pmd, pmd_offset(pud, 0));
return;
}
}
@@ -142,7 +151,7 @@ static __init void set_pte_phys(unsigned long vaddr,
pte = (pte_t *) spp_getpage();
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
if (pte != pte_offset_kernel(pmd, 0)) {
- printk("PAGETABLE BUG #02!\n");
+ printk(KERN_ERR "PAGETABLE BUG #02!\n");
return;
}
}
@@ -162,33 +171,35 @@ static __init void set_pte_phys(unsigned long vaddr,
}
/* NOTE: this is meant to be run only at boot */
-void __init
-__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+void __init
+__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
{
unsigned long address = __fix_to_virt(idx);
if (idx >= __end_of_fixed_addresses) {
- printk("Invalid __set_fixmap\n");
+ printk(KERN_ERR "Invalid __set_fixmap\n");
return;
}
set_pte_phys(address, phys, prot);
}
-unsigned long __meminitdata table_start, table_end;
+static unsigned long __initdata table_start;
+static unsigned long __meminitdata table_end;
static __meminit void *alloc_low_page(unsigned long *phys)
-{
+{
unsigned long pfn = table_end++;
void *adr;
if (after_bootmem) {
adr = (void *)get_zeroed_page(GFP_ATOMIC);
*phys = __pa(adr);
+
return adr;
}
- if (pfn >= end_pfn)
- panic("alloc_low_page: ran out of memory");
+ if (pfn >= end_pfn)
+ panic("alloc_low_page: ran out of memory");
adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
memset(adr, 0, PAGE_SIZE);
@@ -197,44 +208,49 @@ static __meminit void *alloc_low_page(unsigned long *phys)
}
static __meminit void unmap_low_page(void *adr)
-{
-
+{
if (after_bootmem)
return;
early_iounmap(adr, PAGE_SIZE);
-}
+}
/* Must run before zap_low_mappings */
__meminit void *early_ioremap(unsigned long addr, unsigned long size)
{
- unsigned long vaddr;
pmd_t *pmd, *last_pmd;
+ unsigned long vaddr;
int i, pmds;
pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
vaddr = __START_KERNEL_map;
pmd = level2_kernel_pgt;
last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
+
for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
for (i = 0; i < pmds; i++) {
if (pmd_present(pmd[i]))
- goto next;
+ goto continue_outer_loop;
}
vaddr += addr & ~PMD_MASK;
addr &= PMD_MASK;
+
for (i = 0; i < pmds; i++, addr += PMD_SIZE)
- set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
- __flush_tlb();
+ set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+ __flush_tlb_all();
+
return (void *)vaddr;
- next:
+continue_outer_loop:
;
}
- printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
+ printk(KERN_ERR "early_ioremap(0x%lx, %lu) failed\n", addr, size);
+
return NULL;
}
-/* To avoid virtual aliases later */
+/*
+ * To avoid virtual aliases later:
+ */
__meminit void early_iounmap(void *addr, unsigned long size)
{
unsigned long vaddr;
@@ -244,9 +260,11 @@ __meminit void early_iounmap(void *addr, unsigned long size)
vaddr = (unsigned long)addr;
pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
pmd = level2_kernel_pgt + pmd_index(vaddr);
+
for (i = 0; i < pmds; i++)
pmd_clear(pmd + i);
- __flush_tlb();
+
+ __flush_tlb_all();
}
static void __meminit
@@ -259,16 +277,17 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
pmd_t *pmd = pmd_page + pmd_index(address);
if (address >= end) {
- if (!after_bootmem)
+ if (!after_bootmem) {
for (; i < PTRS_PER_PMD; i++, pmd++)
set_pmd(pmd, __pmd(0));
+ }
break;
}
if (pmd_val(*pmd))
continue;
- entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+ entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
entry &= __supported_pte_mask;
set_pmd(pmd, __pmd(entry));
}
@@ -277,19 +296,19 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
static void __meminit
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
{
- pmd_t *pmd = pmd_offset(pud,0);
+ pmd_t *pmd = pmd_offset(pud, 0);
spin_lock(&init_mm.page_table_lock);
phys_pmd_init(pmd, address, end);
spin_unlock(&init_mm.page_table_lock);
__flush_tlb_all();
}
-static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
-{
+static void __meminit
+phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+{
int i = pud_index(addr);
-
- for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
+ for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
unsigned long pmd_phys;
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
@@ -297,10 +316,11 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
if (addr >= end)
break;
- if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
- set_pud(pud, __pud(0));
+ if (!after_bootmem &&
+ !e820_any_mapped(addr, addr+PUD_SIZE, 0)) {
+ set_pud(pud, __pud(0));
continue;
- }
+ }
if (pud_val(*pud)) {
phys_pmd_update(pud, addr, end);
@@ -308,14 +328,16 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
}
pmd = alloc_low_page(&pmd_phys);
+
spin_lock(&init_mm.page_table_lock);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
phys_pmd_init(pmd, addr, end);
spin_unlock(&init_mm.page_table_lock);
+
unmap_low_page(pmd);
}
- __flush_tlb();
-}
+ __flush_tlb_all();
+}
static void __init find_early_table_space(unsigned long end)
{
@@ -326,14 +348,23 @@ static void __init find_early_table_space(unsigned long end)
tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
- /* RED-PEN putting page tables only on node 0 could
- cause a hotspot and fill up ZONE_DMA. The page tables
- need roughly 0.5KB per GB. */
- start = 0x8000;
- table_start = find_e820_area(start, end, tables);
+ /*
+ * RED-PEN putting page tables only on node 0 could
+ * cause a hotspot and fill up ZONE_DMA. The page tables
+ * need roughly 0.5KB per GB.
+ */
+ start = 0x8000;
+ table_start = find_e820_area(start, end, tables);
if (table_start == -1UL)
panic("Cannot find space for the kernel page tables");
+ /*
+ * When you have a lot of RAM like 256GB, early_table will not fit
+ * into 0x8000 range, find_e820_area() will find area after kernel
+ * bss but the table_start is not page aligned, so need to round it
+ * up to avoid overlap with bss:
+ */
+ table_start = round_up(table_start, PAGE_SIZE);
table_start >>= PAGE_SHIFT;
table_end = table_start;
@@ -342,20 +373,23 @@ static void __init find_early_table_space(unsigned long end)
(table_start << PAGE_SHIFT) + tables);
}
-/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
- This runs before bootmem is initialized and gets pages directly from the
- physical memory. To access them they are temporarily mapped. */
+/*
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs before bootmem is initialized and gets pages directly from
+ * the physical memory. To access them they are temporarily mapped.
+ */
void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
-{
- unsigned long next;
+{
+ unsigned long next;
- Dprintk("init_memory_mapping\n");
+ pr_debug("init_memory_mapping\n");
- /*
+ /*
* Find space for the kernel direct mapping tables.
- * Later we should allocate these tables in the local node of the memory
- * mapped. Unfortunately this is done currently before the nodes are
- * discovered.
+ *
+ * Later we should allocate these tables in the local node of the
+ * memory mapped. Unfortunately this is done currently before the
+ * nodes are discovered.
*/
if (!after_bootmem)
find_early_table_space(end);
@@ -364,8 +398,8 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
end = (unsigned long)__va(end);
for (; start < end; start = next) {
- unsigned long pud_phys;
pgd_t *pgd = pgd_offset_k(start);
+ unsigned long pud_phys;
pud_t *pud;
if (after_bootmem)
@@ -374,23 +408,26 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
pud = alloc_low_page(&pud_phys);
next = start + PGDIR_SIZE;
- if (next > end)
- next = end;
+ if (next > end)
+ next = end;
phys_pud_init(pud, __pa(start), __pa(next));
if (!after_bootmem)
set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
unmap_low_page(pud);
- }
+ }
if (!after_bootmem)
mmu_cr4_features = read_cr4();
__flush_tlb_all();
+
+ reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
}
#ifndef CONFIG_NUMA
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
+
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
@@ -402,39 +439,48 @@ void __init paging_init(void)
}
#endif
-/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
- from the CPU leading to inconsistent cache lines. address and size
- must be aligned to 2MB boundaries.
- Does nothing when the mapping doesn't exist. */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
+/*
+ * Unmap a kernel mapping if it exists. This is useful to avoid
+ * prefetches from the CPU leading to inconsistent cache lines.
+ * address and size must be aligned to 2MB boundaries.
+ * Does nothing when the mapping doesn't exist.
+ */
+void __init clear_kernel_mapping(unsigned long address, unsigned long size)
{
unsigned long end = address + size;
BUG_ON(address & ~LARGE_PAGE_MASK);
- BUG_ON(size & ~LARGE_PAGE_MASK);
-
- for (; address < end; address += LARGE_PAGE_SIZE) {
+ BUG_ON(size & ~LARGE_PAGE_MASK);
+
+ for (; address < end; address += LARGE_PAGE_SIZE) {
pgd_t *pgd = pgd_offset_k(address);
pud_t *pud;
pmd_t *pmd;
+
if (pgd_none(*pgd))
continue;
+
pud = pud_offset(pgd, address);
if (pud_none(*pud))
- continue;
+ continue;
+
pmd = pmd_offset(pud, address);
if (!pmd || pmd_none(*pmd))
- continue;
- if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
- /* Could handle this, but it should not happen currently. */
- printk(KERN_ERR
- "clear_kernel_mapping: mapping has been split. will leak memory\n");
- pmd_ERROR(*pmd);
+ continue;
+
+ if (!(pmd_val(*pmd) & _PAGE_PSE)) {
+ /*
+ * Could handle this, but it should not happen
+ * currently:
+ */
+ printk(KERN_ERR "clear_kernel_mapping: "
+ "mapping has been split. will leak memory\n");
+ pmd_ERROR(*pmd);
}
- set_pmd(pmd, __pmd(0));
+ set_pmd(pmd, __pmd(0));
}
__flush_tlb_all();
-}
+}
/*
* Memory hotplug specific functions
@@ -461,16 +507,12 @@ int arch_add_memory(int nid, u64 start, u64 size)
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
- init_memory_mapping(start, (start + size -1));
+ init_memory_mapping(start, start + size-1);
ret = __add_pages(zone, start_pfn, nr_pages);
- if (ret)
- goto error;
+ WARN_ON(1);
return ret;
-error:
- printk("%s: Problem encountered in __add_pages!\n", __func__);
- return ret;
}
EXPORT_SYMBOL_GPL(arch_add_memory);
@@ -484,36 +526,8 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif /* CONFIG_MEMORY_HOTPLUG */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
- * just online the pages.
- */
-int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
-{
- int err = -EIO;
- unsigned long pfn;
- unsigned long total = 0, mem = 0;
- for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
- if (pfn_valid(pfn)) {
- online_page(pfn_to_page(pfn));
- err = 0;
- mem++;
- }
- total++;
- }
- if (!err) {
- z->spanned_pages += total;
- z->present_pages += mem;
- z->zone_pgdat->node_spanned_pages += total;
- z->zone_pgdat->node_present_pages += mem;
- }
- return err;
-}
-#endif
-
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
- kcore_vsyscall;
+static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
+ kcore_modules, kcore_vsyscall;
void __init mem_init(void)
{
@@ -521,8 +535,15 @@ void __init mem_init(void)
pci_iommu_alloc();
- /* clear the zero-page */
- memset(empty_zero_page, 0, PAGE_SIZE);
+ /* clear_bss() already clear the empty_zero_page */
+
+ /* temporary debugging - double check it's true: */
+ {
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ WARN_ON_ONCE(empty_zero_page[i]);
+ }
reservedpages = 0;
@@ -534,7 +555,6 @@ void __init mem_init(void)
#endif
reservedpages = end_pfn - totalram_pages -
absent_pages_in_range(0, end_pfn);
-
after_bootmem = 1;
codesize = (unsigned long) &_etext - (unsigned long) &_text;
@@ -542,15 +562,16 @@ void __init mem_init(void)
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
+ kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+ kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
VMALLOC_END-VMALLOC_START);
kclist_add(&kcore_kernel, &_stext, _end - _stext);
kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
+ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
VSYSCALL_END - VSYSCALL_START);
- printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
+ printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
+ "%ldk reserved, %ldk data, %ldk init)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
end_pfn << (PAGE_SHIFT-10),
codesize >> 10,
@@ -566,19 +587,27 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
if (begin >= end)
return;
+ /*
+ * If debugging page accesses then do not free this memory but
+ * mark them not present - any buggy init-section access will
+ * create a kernel page fault:
+ */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
+ begin, PAGE_ALIGN(end));
+ set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+#else
printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+
for (addr = begin; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)),
POISON_FREE_INITMEM, PAGE_SIZE);
- if (addr >= __START_KERNEL_map)
- change_page_attr_addr(addr, 1, __pgprot(0));
free_page(addr);
totalram_pages++;
}
- if (addr > __START_KERNEL_map)
- global_flush_tlb();
+#endif
}
void free_initmem(void)
@@ -589,6 +618,8 @@ void free_initmem(void)
}
#ifdef CONFIG_DEBUG_RODATA
+const int rodata_test_data = 0xC3;
+EXPORT_SYMBOL_GPL(rodata_test_data);
void mark_rodata_ro(void)
{
@@ -603,25 +634,27 @@ void mark_rodata_ro(void)
#ifdef CONFIG_KPROBES
start = (unsigned long)__start_rodata;
#endif
-
+
end = (unsigned long)__end_rodata;
start = (start + PAGE_SIZE - 1) & PAGE_MASK;
end &= PAGE_MASK;
if (end <= start)
return;
- change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
+ set_memory_ro(start, (end - start) >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
- /*
- * change_page_attr_addr() requires a global_flush_tlb() call after it.
- * We do this after the printk so that if something went wrong in the
- * change, the printk gets out at least to give a better debug hint
- * of who is the culprit.
- */
- global_flush_tlb();
+ rodata_test();
+
+#ifdef CONFIG_CPA_DEBUG
+ printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
+ set_memory_rw(start, (end-start) >> PAGE_SHIFT);
+
+ printk(KERN_INFO "Testing CPA: again\n");
+ set_memory_ro(start, (end-start) >> PAGE_SHIFT);
+#endif
}
#endif
@@ -632,17 +665,21 @@ void free_initrd_mem(unsigned long start, unsigned long end)
}
#endif
-void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
-{
+void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
+{
#ifdef CONFIG_NUMA
int nid = phys_to_nid(phys);
#endif
unsigned long pfn = phys >> PAGE_SHIFT;
+
if (pfn >= end_pfn) {
- /* This can happen with kdump kernels when accessing firmware
- tables. */
+ /*
+ * This can happen with kdump kernels when accessing
+ * firmware tables:
+ */
if (pfn < end_pfn_map)
return;
+
printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
phys, len);
return;
@@ -650,9 +687,9 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
- reserve_bootmem_node(NODE_DATA(nid), phys, len);
-#else
- reserve_bootmem(phys, len);
+ reserve_bootmem_node(NODE_DATA(nid), phys, len);
+#else
+ reserve_bootmem(phys, len);
#endif
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
dma_reserve += len / PAGE_SIZE;
@@ -660,46 +697,49 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
}
}
-int kern_addr_valid(unsigned long addr)
-{
+int kern_addr_valid(unsigned long addr)
+{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
if (above != 0 && above != -1UL)
- return 0;
-
+ return 0;
+
pgd = pgd_offset_k(addr);
if (pgd_none(*pgd))
return 0;
pud = pud_offset(pgd, addr);
if (pud_none(*pud))
- return 0;
+ return 0;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return 0;
+
if (pmd_large(*pmd))
return pfn_valid(pmd_pfn(*pmd));
pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte))
return 0;
+
return pfn_valid(pte_pfn(*pte));
}
-/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
- covers the 64bit vsyscall page now. 32bit has a real VMA now and does
- not need special handling anymore. */
-
+/*
+ * A pseudo VMA to allow ptrace access for the vsyscall page. This only
+ * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
+ * not need special handling anymore:
+ */
static struct vm_area_struct gate_vma = {
- .vm_start = VSYSCALL_START,
- .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
- .vm_page_prot = PAGE_READONLY_EXEC,
- .vm_flags = VM_READ | VM_EXEC
+ .vm_start = VSYSCALL_START,
+ .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE),
+ .vm_page_prot = PAGE_READONLY_EXEC,
+ .vm_flags = VM_READ | VM_EXEC
};
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
@@ -714,14 +754,17 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
int in_gate_area(struct task_struct *task, unsigned long addr)
{
struct vm_area_struct *vma = get_gate_vma(task);
+
if (!vma)
return 0;
+
return (addr >= vma->vm_start) && (addr < vma->vm_end);
}
-/* Use this when you have no reliable task/vma, typically from interrupt
- * context. It is less reliable than using the task's vma and may give
- * false positives.
+/*
+ * Use this when you have no reliable task/vma, typically from interrupt
+ * context. It is less reliable than using the task's vma and may give
+ * false positives:
*/
int in_gate_area_no_task(unsigned long addr)
{
@@ -741,8 +784,8 @@ const char *arch_vma_name(struct vm_area_struct *vma)
/*
* Initialise the sparsemem vmemmap using huge-pages at the PMD level.
*/
-int __meminit vmemmap_populate(struct page *start_page,
- unsigned long size, int node)
+int __meminit
+vmemmap_populate(struct page *start_page, unsigned long size, int node)
{
unsigned long addr = (unsigned long)start_page;
unsigned long end = (unsigned long)(start_page + size);
@@ -757,6 +800,7 @@ int __meminit vmemmap_populate(struct page *start_page,
pgd = vmemmap_pgd_populate(addr, node);
if (!pgd)
return -ENOMEM;
+
pud = vmemmap_pud_populate(pgd, addr, node);
if (!pud)
return -ENOMEM;
@@ -764,20 +808,22 @@ int __meminit vmemmap_populate(struct page *start_page,
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
pte_t entry;
- void *p = vmemmap_alloc_block(PMD_SIZE, node);
+ void *p;
+
+ p = vmemmap_alloc_block(PMD_SIZE, node);
if (!p)
return -ENOMEM;
- entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
- mk_pte_huge(entry);
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
+ PAGE_KERNEL_LARGE);
set_pmd(pmd, __pmd(pte_val(entry)));
printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
addr, addr + PMD_SIZE - 1, p, node);
- } else
+ } else {
vmemmap_verify((pte_t *)pmd, node, addr, next);
+ }
}
-
return 0;
}
#endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
new file mode 100644
index 00000000000..ed795721ca8
--- /dev/null
+++ b/arch/x86/mm/ioremap.c
@@ -0,0 +1,501 @@
+/*
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/cacheflush.h>
+#include <asm/e820.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+enum ioremap_mode {
+ IOR_MODE_UNCACHED,
+ IOR_MODE_CACHED,
+};
+
+#ifdef CONFIG_X86_64
+
+unsigned long __phys_addr(unsigned long x)
+{
+ if (x >= __START_KERNEL_map)
+ return x - __START_KERNEL_map + phys_base;
+ return x - PAGE_OFFSET;
+}
+EXPORT_SYMBOL(__phys_addr);
+
+#endif
+
+int page_is_ram(unsigned long pagenr)
+{
+ unsigned long addr, end;
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ /*
+ * Not usable memory:
+ */
+ if (e820.map[i].type != E820_RAM)
+ continue;
+ addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
+ end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
+
+ /*
+ * Sanity check: Some BIOSen report areas as RAM that
+ * are not. Notably the 640->1Mb area, which is the
+ * PCI BIOS area.
+ */
+ if (addr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
+ end < (BIOS_END >> PAGE_SHIFT))
+ continue;
+
+ if ((pagenr >= addr) && (pagenr < end))
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Fix up the linear direct mapping of the kernel to avoid cache attribute
+ * conflicts.
+ */
+static int ioremap_change_attr(unsigned long paddr, unsigned long size,
+ enum ioremap_mode mode)
+{
+ unsigned long vaddr = (unsigned long)__va(paddr);
+ unsigned long nrpages = size >> PAGE_SHIFT;
+ int err, level;
+
+ /* No change for pages after the last mapping */
+ if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT))
+ return 0;
+
+ /*
+ * If there is no identity map for this address,
+ * change_page_attr_addr is unnecessary
+ */
+ if (!lookup_address(vaddr, &level))
+ return 0;
+
+ switch (mode) {
+ case IOR_MODE_UNCACHED:
+ default:
+ err = set_memory_uc(vaddr, nrpages);
+ break;
+ case IOR_MODE_CACHED:
+ err = set_memory_wb(vaddr, nrpages);
+ break;
+ }
+
+ return err;
+}
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
+ enum ioremap_mode mode)
+{
+ void __iomem *addr;
+ struct vm_struct *area;
+ unsigned long offset, last_addr;
+ pgprot_t prot;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
+ /*
+ * Don't remap the low PCI/ISA area, it's always mapped..
+ */
+ if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
+ return (__force void __iomem *)phys_to_virt(phys_addr);
+
+ /*
+ * Don't allow anybody to remap normal RAM that we're using..
+ */
+ for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped &&
+ (offset << PAGE_SHIFT) < last_addr; offset++) {
+ if (page_is_ram(offset))
+ return NULL;
+ }
+
+ switch (mode) {
+ case IOR_MODE_UNCACHED:
+ default:
+ prot = PAGE_KERNEL_NOCACHE;
+ break;
+ case IOR_MODE_CACHED:
+ prot = PAGE_KERNEL;
+ break;
+ }
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+ /*
+ * Ok, go for it..
+ */
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
+ return NULL;
+ area->phys_addr = phys_addr;
+ addr = (void __iomem *) area->addr;
+ if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
+ phys_addr, prot)) {
+ remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+ return NULL;
+ }
+
+ if (ioremap_change_attr(phys_addr, size, mode) < 0) {
+ vunmap(addr);
+ return NULL;
+ }
+
+ return (void __iomem *) (offset + (char __iomem *)addr);
+}
+
+/**
+ * ioremap_nocache - map bus memory into CPU space
+ * @offset: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked uncachable
+ * on the CPU as well as honouring existing caching rules from things like
+ * the PCI bus. Note that there are other caches and buffers on many
+ * busses. In particular driver authors should read up on PCI writes
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
+ *
+ * Must be freed with iounmap.
+ */
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
+{
+ return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
+}
+EXPORT_SYMBOL(ioremap_nocache);
+
+void __iomem *ioremap_cache(unsigned long phys_addr, unsigned long size)
+{
+ return __ioremap(phys_addr, size, IOR_MODE_CACHED);
+}
+EXPORT_SYMBOL(ioremap_cache);
+
+/**
+ * iounmap - Free a IO remapping
+ * @addr: virtual address from ioremap_*
+ *
+ * Caller must ensure there is only one unmapping for the same pointer.
+ */
+void iounmap(volatile void __iomem *addr)
+{
+ struct vm_struct *p, *o;
+
+ if ((void __force *)addr <= high_memory)
+ return;
+
+ /*
+ * __ioremap special-cases the PCI/ISA range by not instantiating a
+ * vm_area and by simply returning an address into the kernel mapping
+ * of ISA space. So handle that here.
+ */
+ if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
+ addr < phys_to_virt(ISA_END_ADDRESS))
+ return;
+
+ addr = (volatile void __iomem *)
+ (PAGE_MASK & (unsigned long __force)addr);
+
+ /* Use the vm area unlocked, assuming the caller
+ ensures there isn't another iounmap for the same address
+ in parallel. Reuse of the virtual address is prevented by
+ leaving it in the global lists until we're done with it.
+ cpa takes care of the direct mappings. */
+ read_lock(&vmlist_lock);
+ for (p = vmlist; p; p = p->next) {
+ if (p->addr == addr)
+ break;
+ }
+ read_unlock(&vmlist_lock);
+
+ if (!p) {
+ printk(KERN_ERR "iounmap: bad address %p\n", addr);
+ dump_stack();
+ return;
+ }
+
+ /* Reset the direct mapping. Can block */
+ ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED);
+
+ /* Finally remove it */
+ o = remove_vm_area((void *)addr);
+ BUG_ON(p != o || o == NULL);
+ kfree(p);
+}
+EXPORT_SYMBOL(iounmap);
+
+#ifdef CONFIG_X86_32
+
+int __initdata early_ioremap_debug;
+
+static int __init early_ioremap_debug_setup(char *str)
+{
+ early_ioremap_debug = 1;
+
+ return 0;
+}
+early_param("early_ioremap_debug", early_ioremap_debug_setup);
+
+static __initdata int after_paging_init;
+static __initdata unsigned long bm_pte[1024]
+ __attribute__((aligned(PAGE_SIZE)));
+
+static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
+{
+ return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+}
+
+static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
+{
+ return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+}
+
+void __init early_ioremap_init(void)
+{
+ unsigned long *pgd;
+
+ if (early_ioremap_debug)
+ printk(KERN_INFO "early_ioremap_init()\n");
+
+ pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
+ *pgd = __pa(bm_pte) | _PAGE_TABLE;
+ memset(bm_pte, 0, sizeof(bm_pte));
+ /*
+ * The boot-ioremap range spans multiple pgds, for which
+ * we are not prepared:
+ */
+ if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+ WARN_ON(1);
+ printk(KERN_WARNING "pgd %p != %p\n",
+ pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+ printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
+ FIX_BTMAP_BEGIN);
+ }
+}
+
+void __init early_ioremap_clear(void)
+{
+ unsigned long *pgd;
+
+ if (early_ioremap_debug)
+ printk(KERN_INFO "early_ioremap_clear()\n");
+
+ pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
+ *pgd = 0;
+ paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT);
+ __flush_tlb_all();
+}
+
+void __init early_ioremap_reset(void)
+{
+ enum fixed_addresses idx;
+ unsigned long *pte, phys, addr;
+
+ after_paging_init = 1;
+ for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
+ addr = fix_to_virt(idx);
+ pte = early_ioremap_pte(addr);
+ if (!*pte & _PAGE_PRESENT) {
+ phys = *pte & PAGE_MASK;
+ set_fixmap(idx, phys);
+ }
+ }
+}
+
+static void __init __early_set_fixmap(enum fixed_addresses idx,
+ unsigned long phys, pgprot_t flags)
+{
+ unsigned long *pte, addr = __fix_to_virt(idx);
+
+ if (idx >= __end_of_fixed_addresses) {
+ BUG();
+ return;
+ }
+ pte = early_ioremap_pte(addr);
+ if (pgprot_val(flags))
+ *pte = (phys & PAGE_MASK) | pgprot_val(flags);
+ else
+ *pte = 0;
+ __flush_tlb_one(addr);
+}
+
+static inline void __init early_set_fixmap(enum fixed_addresses idx,
+ unsigned long phys)
+{
+ if (after_paging_init)
+ set_fixmap(idx, phys);
+ else
+ __early_set_fixmap(idx, phys, PAGE_KERNEL);
+}
+
+static inline void __init early_clear_fixmap(enum fixed_addresses idx)
+{
+ if (after_paging_init)
+ clear_fixmap(idx);
+ else
+ __early_set_fixmap(idx, 0, __pgprot(0));
+}
+
+
+int __initdata early_ioremap_nested;
+
+static int __init check_early_ioremap_leak(void)
+{
+ if (!early_ioremap_nested)
+ return 0;
+
+ printk(KERN_WARNING
+ "Debug warning: early ioremap leak of %d areas detected.\n",
+ early_ioremap_nested);
+ printk(KERN_WARNING
+ "please boot with early_ioremap_debug and report the dmesg.\n");
+ WARN_ON(1);
+
+ return 1;
+}
+late_initcall(check_early_ioremap_leak);
+
+void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
+{
+ unsigned long offset, last_addr;
+ unsigned int nrpages, nesting;
+ enum fixed_addresses idx0, idx;
+
+ WARN_ON(system_state != SYSTEM_BOOTING);
+
+ nesting = early_ioremap_nested;
+ if (early_ioremap_debug) {
+ printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
+ phys_addr, size, nesting);
+ dump_stack();
+ }
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr) {
+ WARN_ON(1);
+ return NULL;
+ }
+
+ if (nesting >= FIX_BTMAPS_NESTING) {
+ WARN_ON(1);
+ return NULL;
+ }
+ early_ioremap_nested++;
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr) - phys_addr;
+
+ /*
+ * Mappings have to fit in the FIX_BTMAP area.
+ */
+ nrpages = size >> PAGE_SHIFT;
+ if (nrpages > NR_FIX_BTMAPS) {
+ WARN_ON(1);
+ return NULL;
+ }
+
+ /*
+ * Ok, go for it..
+ */
+ idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
+ idx = idx0;
+ while (nrpages > 0) {
+ early_set_fixmap(idx, phys_addr);
+ phys_addr += PAGE_SIZE;
+ --idx;
+ --nrpages;
+ }
+ if (early_ioremap_debug)
+ printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
+
+ return (void *) (offset + fix_to_virt(idx0));
+}
+
+void __init early_iounmap(void *addr, unsigned long size)
+{
+ unsigned long virt_addr;
+ unsigned long offset;
+ unsigned int nrpages;
+ enum fixed_addresses idx;
+ unsigned int nesting;
+
+ nesting = --early_ioremap_nested;
+ WARN_ON(nesting < 0);
+
+ if (early_ioremap_debug) {
+ printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
+ size, nesting);
+ dump_stack();
+ }
+
+ virt_addr = (unsigned long)addr;
+ if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
+ WARN_ON(1);
+ return;
+ }
+ offset = virt_addr & ~PAGE_MASK;
+ nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
+
+ idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
+ while (nrpages > 0) {
+ early_clear_fixmap(idx);
+ --idx;
+ --nrpages;
+ }
+}
+
+void __this_fixmap_does_not_exist(void)
+{
+ WARN_ON(1);
+}
+
+#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
deleted file mode 100644
index 0b278315d73..00000000000
--- a/arch/x86/mm/ioremap_32.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * arch/i386/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/io.h>
-#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
-
-#define ISA_START_ADDRESS 0xa0000
-#define ISA_END_ADDRESS 0x100000
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
-{
- void __iomem * addr;
- struct vm_struct * area;
- unsigned long offset, last_addr;
- pgprot_t prot;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
- return (void __iomem *) phys_to_virt(phys_addr);
-
- /*
- * Don't allow anybody to remap normal RAM that we're using..
- */
- if (phys_addr <= virt_to_phys(high_memory - 1)) {
- char *t_addr, *t_end;
- struct page *page;
-
- t_addr = __va(phys_addr);
- t_end = t_addr + (size - 1);
-
- for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
- if(!PageReserved(page))
- return NULL;
- }
-
- prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY
- | _PAGE_ACCESSED | flags);
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP | (flags << 20));
- if (!area)
- return NULL;
- area->phys_addr = phys_addr;
- addr = (void __iomem *) area->addr;
- if (ioremap_page_range((unsigned long) addr,
- (unsigned long) addr + size, phys_addr, prot)) {
- vunmap((void __force *) addr);
- return NULL;
- }
- return (void __iomem *) (offset + (char __iomem *)addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-/**
- * ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- *
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- unsigned long last_addr;
- void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
- if (!p)
- return p;
-
- /* Guaranteed to be > phys_addr, as per __ioremap() */
- last_addr = phys_addr + size - 1;
-
- if (last_addr < virt_to_phys(high_memory) - 1) {
- struct page *ppage = virt_to_page(__va(phys_addr));
- unsigned long npages;
-
- phys_addr &= PAGE_MASK;
-
- /* This might overflow and become zero.. */
- last_addr = PAGE_ALIGN(last_addr);
-
- /* .. but that's ok, because modulo-2**n arithmetic will make
- * the page-aligned "last - first" come out right.
- */
- npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
- if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
- iounmap(p);
- p = NULL;
- }
- global_flush_tlb();
- }
-
- return p;
-}
-EXPORT_SYMBOL(ioremap_nocache);
-
-/**
- * iounmap - Free a IO remapping
- * @addr: virtual address from ioremap_*
- *
- * Caller must ensure there is only one unmapping for the same pointer.
- */
-void iounmap(volatile void __iomem *addr)
-{
- struct vm_struct *p, *o;
-
- if ((void __force *)addr <= high_memory)
- return;
-
- /*
- * __ioremap special-cases the PCI/ISA range by not instantiating a
- * vm_area and by simply returning an address into the kernel mapping
- * of ISA space. So handle that here.
- */
- if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
- addr < phys_to_virt(ISA_END_ADDRESS))
- return;
-
- addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
-
- /* Use the vm area unlocked, assuming the caller
- ensures there isn't another iounmap for the same address
- in parallel. Reuse of the virtual address is prevented by
- leaving it in the global lists until we're done with it.
- cpa takes care of the direct mappings. */
- read_lock(&vmlist_lock);
- for (p = vmlist; p; p = p->next) {
- if (p->addr == addr)
- break;
- }
- read_unlock(&vmlist_lock);
-
- if (!p) {
- printk("iounmap: bad address %p\n", addr);
- dump_stack();
- return;
- }
-
- /* Reset the direct mapping. Can block */
- if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
- change_page_attr(virt_to_page(__va(p->phys_addr)),
- get_vm_area_size(p) >> PAGE_SHIFT,
- PAGE_KERNEL);
- global_flush_tlb();
- }
-
- /* Finally remove it */
- o = remove_vm_area((void *)addr);
- BUG_ON(p != o || o == NULL);
- kfree(p);
-}
-EXPORT_SYMBOL(iounmap);
-
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
- unsigned long offset, last_addr;
- unsigned int nrpages;
- enum fixed_addresses idx;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
- return phys_to_virt(phys_addr);
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr) - phys_addr;
-
- /*
- * Mappings have to fit in the FIX_BTMAP area.
- */
- nrpages = size >> PAGE_SHIFT;
- if (nrpages > NR_FIX_BTMAPS)
- return NULL;
-
- /*
- * Ok, go for it..
- */
- idx = FIX_BTMAP_BEGIN;
- while (nrpages > 0) {
- set_fixmap(idx, phys_addr);
- phys_addr += PAGE_SIZE;
- --idx;
- --nrpages;
- }
- return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
- unsigned long virt_addr;
- unsigned long offset;
- unsigned int nrpages;
- enum fixed_addresses idx;
-
- virt_addr = (unsigned long)addr;
- if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
- return;
- offset = virt_addr & ~PAGE_MASK;
- nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
-
- idx = FIX_BTMAP_BEGIN;
- while (nrpages > 0) {
- clear_fixmap(idx);
- --idx;
- --nrpages;
- }
-}
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
deleted file mode 100644
index 6cac90aa503..00000000000
--- a/arch/x86/mm/ioremap_64.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * arch/x86_64/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/io.h>
-
-#include <asm/pgalloc.h>
-#include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-#include <asm/cacheflush.h>
-#include <asm/proto.h>
-
-unsigned long __phys_addr(unsigned long x)
-{
- if (x >= __START_KERNEL_map)
- return x - __START_KERNEL_map + phys_base;
- return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-
-#define ISA_START_ADDRESS 0xa0000
-#define ISA_END_ADDRESS 0x100000
-
-/*
- * Fix up the linear direct mapping of the kernel to avoid cache attribute
- * conflicts.
- */
-static int
-ioremap_change_attr(unsigned long phys_addr, unsigned long size,
- unsigned long flags)
-{
- int err = 0;
- if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
- unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long vaddr = (unsigned long) __va(phys_addr);
-
- /*
- * Must use a address here and not struct page because the phys addr
- * can be a in hole between nodes and not have an memmap entry.
- */
- err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
- if (!err)
- global_flush_tlb();
- }
- return err;
-}
-
-/*
- * Generic mapping function
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
-{
- void * addr;
- struct vm_struct * area;
- unsigned long offset, last_addr;
- pgprot_t pgprot;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
- return (__force void __iomem *)phys_to_virt(phys_addr);
-
-#ifdef CONFIG_FLATMEM
- /*
- * Don't allow anybody to remap normal RAM that we're using..
- */
- if (last_addr < virt_to_phys(high_memory)) {
- char *t_addr, *t_end;
- struct page *page;
-
- t_addr = __va(phys_addr);
- t_end = t_addr + (size - 1);
-
- for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
- if(!PageReserved(page))
- return NULL;
- }
-#endif
-
- pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL
- | _PAGE_DIRTY | _PAGE_ACCESSED | flags);
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP | (flags << 20));
- if (!area)
- return NULL;
- area->phys_addr = phys_addr;
- addr = area->addr;
- if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
- phys_addr, pgprot)) {
- remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
- return NULL;
- }
- if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) {
- area->flags &= 0xffffff;
- vunmap(addr);
- return NULL;
- }
- return (__force void __iomem *) (offset + (char *)addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-/**
- * ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- *
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- return __ioremap(phys_addr, size, _PAGE_PCD);
-}
-EXPORT_SYMBOL(ioremap_nocache);
-
-/**
- * iounmap - Free a IO remapping
- * @addr: virtual address from ioremap_*
- *
- * Caller must ensure there is only one unmapping for the same pointer.
- */
-void iounmap(volatile void __iomem *addr)
-{
- struct vm_struct *p, *o;
-
- if (addr <= high_memory)
- return;
- if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
- addr < phys_to_virt(ISA_END_ADDRESS))
- return;
-
- addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
- /* Use the vm area unlocked, assuming the caller
- ensures there isn't another iounmap for the same address
- in parallel. Reuse of the virtual address is prevented by
- leaving it in the global lists until we're done with it.
- cpa takes care of the direct mappings. */
- read_lock(&vmlist_lock);
- for (p = vmlist; p; p = p->next) {
- if (p->addr == addr)
- break;
- }
- read_unlock(&vmlist_lock);
-
- if (!p) {
- printk("iounmap: bad address %p\n", addr);
- dump_stack();
- return;
- }
-
- /* Reset the direct mapping. Can block */
- if (p->flags >> 20)
- ioremap_change_attr(p->phys_addr, p->size, 0);
-
- /* Finally remove it */
- o = remove_vm_area((void *)addr);
- BUG_ON(p != o || o == NULL);
- kfree(p);
-}
-EXPORT_SYMBOL(iounmap);
-
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index a96006f7ae0..7a2ebce87df 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -1,9 +1,9 @@
-/*
+/*
* AMD K8 NUMA support.
* Discover the memory map and associated nodes.
- *
+ *
* This version reads it directly from the K8 northbridge.
- *
+ *
* Copyright 2002,2003 Andi Kleen, SuSE Labs.
*/
#include <linux/kernel.h>
@@ -22,132 +22,135 @@
static __init int find_northbridge(void)
{
- int num;
+ int num;
- for (num = 0; num < 32; num++) {
+ for (num = 0; num < 32; num++) {
u32 header;
-
- header = read_pci_config(0, num, 0, 0x00);
- if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)))
- continue;
-
- header = read_pci_config(0, num, 1, 0x00);
- if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)))
- continue;
- return num;
- }
-
- return -1;
+
+ header = read_pci_config(0, num, 0, 0x00);
+ if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) &&
+ header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) &&
+ header != (PCI_VENDOR_ID_AMD | (0x1300<<16)))
+ continue;
+
+ header = read_pci_config(0, num, 1, 0x00);
+ if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) &&
+ header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) &&
+ header != (PCI_VENDOR_ID_AMD | (0x1301<<16)))
+ continue;
+ return num;
+ }
+
+ return -1;
}
int __init k8_scan_nodes(unsigned long start, unsigned long end)
-{
+{
unsigned long prevbase;
struct bootnode nodes[8];
- int nodeid, i, j, nb;
+ int nodeid, i, nb;
unsigned char nodeids[8];
int found = 0;
u32 reg;
unsigned numnodes;
- unsigned num_cores;
+ unsigned cores;
+ unsigned bits;
+ int j;
if (!early_pci_allowed())
return -1;
- nb = find_northbridge();
- if (nb < 0)
+ nb = find_northbridge();
+ if (nb < 0)
return nb;
- printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
-
- num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
- printk(KERN_INFO "CPU has %d num_cores\n", num_cores);
+ printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
- reg = read_pci_config(0, nb, 0, 0x60);
+ reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1;
if (numnodes <= 1)
return -1;
printk(KERN_INFO "Number of nodes %d\n", numnodes);
- memset(&nodes,0,sizeof(nodes));
+ memset(&nodes, 0, sizeof(nodes));
prevbase = 0;
- for (i = 0; i < 8; i++) {
- unsigned long base,limit;
+ for (i = 0; i < 8; i++) {
+ unsigned long base, limit;
u32 nodeid;
-
+
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
- nodeid = limit & 7;
+ nodeid = limit & 7;
nodeids[i] = nodeid;
- if ((base & 3) == 0) {
+ if ((base & 3) == 0) {
if (i < numnodes)
- printk("Skipping disabled node %d\n", i);
+ printk("Skipping disabled node %d\n", i);
continue;
- }
+ }
if (nodeid >= numnodes) {
printk("Ignoring excess node %d (%lx:%lx)\n", nodeid,
- base, limit);
+ base, limit);
continue;
- }
+ }
- if (!limit) {
- printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i,
- base);
+ if (!limit) {
+ printk(KERN_INFO "Skipping node entry %d (base %lx)\n",
+ i, base);
continue;
}
if ((base >> 8) & 3 || (limit >> 8) & 3) {
- printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
- nodeid, (base>>8)&3, (limit>>8) & 3);
- return -1;
- }
+ printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
+ nodeid, (base>>8)&3, (limit>>8) & 3);
+ return -1;
+ }
if (node_isset(nodeid, node_possible_map)) {
- printk(KERN_INFO "Node %d already present. Skipping\n",
+ printk(KERN_INFO "Node %d already present. Skipping\n",
nodeid);
continue;
}
- limit >>= 16;
- limit <<= 24;
+ limit >>= 16;
+ limit <<= 24;
limit |= (1<<24)-1;
limit++;
if (limit > end_pfn << PAGE_SHIFT)
limit = end_pfn << PAGE_SHIFT;
if (limit <= base)
- continue;
-
+ continue;
+
base >>= 16;
- base <<= 24;
-
- if (base < start)
- base = start;
- if (limit > end)
- limit = end;
- if (limit == base) {
- printk(KERN_ERR "Empty node %d\n", nodeid);
- continue;
+ base <<= 24;
+
+ if (base < start)
+ base = start;
+ if (limit > end)
+ limit = end;
+ if (limit == base) {
+ printk(KERN_ERR "Empty node %d\n", nodeid);
+ continue;
}
- if (limit < base) {
+ if (limit < base) {
printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
- nodeid, base, limit);
+ nodeid, base, limit);
continue;
- }
-
+ }
+
/* Could sort here, but pun for now. Should not happen anyroads. */
- if (prevbase > base) {
+ if (prevbase > base) {
printk(KERN_ERR "Node map not sorted %lx,%lx\n",
- prevbase,base);
+ prevbase, base);
return -1;
}
-
- printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
- nodeid, base, limit);
-
+
+ printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
+ nodeid, base, limit);
+
found++;
-
- nodes[nodeid].start = base;
+
+ nodes[nodeid].start = base;
nodes[nodeid].end = limit;
e820_register_active_regions(nodeid,
nodes[nodeid].start >> PAGE_SHIFT,
@@ -156,27 +159,31 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = base;
node_set(nodeid, node_possible_map);
- }
+ }
if (!found)
- return -1;
+ return -1;
memnode_shift = compute_hash_shift(nodes, 8);
- if (memnode_shift < 0) {
- printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
- return -1;
- }
- printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
+ if (memnode_shift < 0) {
+ printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
+ return -1;
+ }
+ printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
+
+ /* use the coreid bits from early_identify_cpu */
+ bits = boot_cpu_data.x86_coreid_bits;
+ cores = (1<<bits);
for (i = 0; i < 8; i++) {
- if (nodes[i].start != nodes[i].end) {
+ if (nodes[i].start != nodes[i].end) {
nodeid = nodeids[i];
- for (j = 0; j < num_cores; j++)
- apicid_to_node[(nodeid * num_cores) + j] = i;
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
+ for (j = 0; j < cores; j++)
+ apicid_to_node[(nodeid << bits) + j] = i;
+ setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ }
}
numa_init_array();
return 0;
-}
+}
diff --git a/arch/x86/mm/mmap_32.c b/arch/x86/mm/mmap.c
index 552e0847375..56fe7124fbe 100644
--- a/arch/x86/mm/mmap_32.c
+++ b/arch/x86/mm/mmap.c
@@ -1,10 +1,13 @@
/*
- * linux/arch/i386/mm/mmap.c
+ * Flexible mmap layout support
*
- * flexible mmap layout support
+ * Based on code by Ingo Molnar and Andi Kleen, copyrighted
+ * as follows:
*
* Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
* All Rights Reserved.
+ * Copyright 2005 Andi Kleen, SUSE Labs.
+ * Copyright 2007 Jiri Kosina, SUSE Labs.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,14 +22,12 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *
- * Started by Ingo Molnar <mingo@elte.hu>
*/
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/random.h>
+#include <linux/limits.h>
#include <linux/sched.h>
/*
@@ -37,20 +38,71 @@
#define MIN_GAP (128*1024*1024)
#define MAX_GAP (TASK_SIZE/6*5)
-static inline unsigned long mmap_base(struct mm_struct *mm)
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static int mmap_is_ia32(void)
{
- unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
- unsigned long random_factor = 0;
+#ifdef CONFIG_X86_32
+ return 1;
+#endif
+#ifdef CONFIG_IA32_EMULATION
+ if (test_thread_flag(TIF_IA32))
+ return 1;
+#endif
+ return 0;
+}
- if (current->flags & PF_RANDOMIZE)
- random_factor = get_random_int() % (1024*1024);
+static int mmap_is_legacy(void)
+{
+ if (current->personality & ADDR_COMPAT_LAYOUT)
+ return 1;
+
+ if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+ return 1;
+
+ return sysctl_legacy_va_layout;
+}
+
+static unsigned long mmap_rnd(void)
+{
+ unsigned long rnd = 0;
+
+ /*
+ * 8 bits of randomness in 32bit mmaps, 20 address space bits
+ * 28 bits of randomness in 64bit mmaps, 40 address space bits
+ */
+ if (current->flags & PF_RANDOMIZE) {
+ if (mmap_is_ia32())
+ rnd = (long)get_random_int() % (1<<8);
+ else
+ rnd = (long)(get_random_int() % (1<<28));
+ }
+ return rnd << PAGE_SHIFT;
+}
+
+static unsigned long mmap_base(void)
+{
+ unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
else if (gap > MAX_GAP)
gap = MAX_GAP;
- return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
+ return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
+}
+
+/*
+ * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
+ * does, but not when emulating X86_32
+ */
+static unsigned long mmap_legacy_base(void)
+{
+ if (mmap_is_ia32())
+ return TASK_UNMAPPED_BASE;
+ else
+ return TASK_UNMAPPED_BASE + mmap_rnd();
}
/*
@@ -59,18 +111,12 @@ static inline unsigned long mmap_base(struct mm_struct *mm)
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
{
- /*
- * Fall back to the standard layout if the personality
- * bit is set, or if the expected stack growth is unlimited:
- */
- if (sysctl_legacy_va_layout ||
- (current->personality & ADDR_COMPAT_LAYOUT) ||
- current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
+ if (mmap_is_legacy()) {
+ mm->mmap_base = mmap_legacy_base();
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
- mm->mmap_base = mmap_base(mm);
+ mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
diff --git a/arch/x86/mm/mmap_64.c b/arch/x86/mm/mmap_64.c
deleted file mode 100644
index 80bba0dc000..00000000000
--- a/arch/x86/mm/mmap_64.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright 2005 Andi Kleen, SuSE Labs.
- * Licensed under GPL, v.2
- */
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/random.h>
-#include <asm/ia32.h>
-
-/* Notebook: move the mmap code from sys_x86_64.c over here. */
-
-void arch_pick_mmap_layout(struct mm_struct *mm)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (current_thread_info()->flags & _TIF_IA32)
- return ia32_pick_mmap_layout(mm);
-#endif
- mm->mmap_base = TASK_UNMAPPED_BASE;
- if (current->flags & PF_RANDOMIZE) {
- /* Add 28bit randomness which is about 40bits of address space
- because mmap base has to be page aligned.
- or ~1/128 of the total user VM
- (total user address space is 47bits) */
- unsigned rnd = get_random_int() & 0xfffffff;
- mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
- }
- mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
-}
-
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 3d6926ba899..dc3b1f7e145 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -1,7 +1,7 @@
-/*
+/*
* Generic VM initialization for x86-64 NUMA setups.
* Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
+ */
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
@@ -11,35 +11,45 @@
#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/nodemask.h>
+#include <linux/sched.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/dma.h>
#include <asm/numa.h>
#include <asm/acpi.h>
+#include <asm/k8.h>
#ifndef Dprintk
#define Dprintk(x...)
#endif
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
bootmem_data_t plat_node_bdata[MAX_NUMNODES];
struct memnode memnode;
-unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
+int x86_cpu_to_node_map_init[NR_CPUS] = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE
};
-unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+void *x86_cpu_to_node_map_early_ptr;
+DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
+
+s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
-cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+
+cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_to_cpumask_map);
int numa_off __initdata;
unsigned long __initdata nodemap_addr;
unsigned long __initdata nodemap_size;
-
/*
* Given a shift value, try to populate memnodemap[]
* Returns :
@@ -47,14 +57,13 @@ unsigned long __initdata nodemap_size;
* 0 if memnodmap[] too small (of shift too small)
* -1 if node overlap or lost ram (shift too big)
*/
-static int __init
-populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
+static int __init populate_memnodemap(const struct bootnode *nodes,
+ int numnodes, int shift)
{
- int i;
- int res = -1;
unsigned long addr, end;
+ int i, res = -1;
- memset(memnodemap, 0xff, memnodemapsize);
+ memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
for (i = 0; i < numnodes; i++) {
addr = nodes[i].start;
end = nodes[i].end;
@@ -63,13 +72,13 @@ populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
if ((end >> shift) >= memnodemapsize)
return 0;
do {
- if (memnodemap[addr >> shift] != 0xff)
+ if (memnodemap[addr >> shift] != NUMA_NO_NODE)
return -1;
memnodemap[addr >> shift] = i;
addr += (1UL << shift);
} while (addr < end);
res = 1;
- }
+ }
return res;
}
@@ -78,12 +87,12 @@ static int __init allocate_cachealigned_memnodemap(void)
unsigned long pad, pad_addr;
memnodemap = memnode.embedded_map;
- if (memnodemapsize <= 48)
+ if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
return 0;
pad = L1_CACHE_BYTES - 1;
pad_addr = 0x8000;
- nodemap_size = pad + memnodemapsize;
+ nodemap_size = pad + sizeof(s16) * memnodemapsize;
nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT,
nodemap_size);
if (nodemap_addr == -1UL) {
@@ -94,6 +103,7 @@ static int __init allocate_cachealigned_memnodemap(void)
}
pad_addr = (nodemap_addr + pad) & ~pad;
memnodemap = phys_to_virt(pad_addr);
+ reserve_early(nodemap_addr, nodemap_addr + nodemap_size);
printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
nodemap_addr, nodemap_addr + nodemap_size);
@@ -104,8 +114,8 @@ static int __init allocate_cachealigned_memnodemap(void)
* The LSB of all start and end addresses in the node map is the value of the
* maximum possible shift.
*/
-static int __init
-extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes)
+static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
+ int numnodes)
{
int i, nodes_used = 0;
unsigned long start, end;
@@ -140,51 +150,50 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
shift);
if (populate_memnodemap(nodes, numnodes, shift) != 1) {
- printk(KERN_INFO
- "Your memory is not aligned you need to rebuild your kernel "
- "with a bigger NODEMAPSIZE shift=%d\n",
- shift);
+ printk(KERN_INFO "Your memory is not aligned you need to "
+ "rebuild your kernel with a bigger NODEMAPSIZE "
+ "shift=%d\n", shift);
return -1;
}
return shift;
}
-#ifdef CONFIG_SPARSEMEM
int early_pfn_to_nid(unsigned long pfn)
{
return phys_to_nid(pfn << PAGE_SHIFT);
}
-#endif
-static void * __init
-early_node_mem(int nodeid, unsigned long start, unsigned long end,
- unsigned long size)
+static void * __init early_node_mem(int nodeid, unsigned long start,
+ unsigned long end, unsigned long size)
{
unsigned long mem = find_e820_area(start, end, size);
void *ptr;
+
if (mem != -1L)
return __va(mem);
ptr = __alloc_bootmem_nopanic(size,
SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
if (ptr == NULL) {
printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
- size, nodeid);
+ size, nodeid);
return NULL;
}
return ptr;
}
/* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
-{
- unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
- unsigned long nodedata_phys;
+void __init setup_node_bootmem(int nodeid, unsigned long start,
+ unsigned long end)
+{
+ unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size;
+ unsigned long bootmap_start, nodedata_phys;
void *bootmap;
const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
- start = round_up(start, ZONE_ALIGN);
+ start = round_up(start, ZONE_ALIGN);
- printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
+ printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
+ start, end);
start_pfn = start >> PAGE_SHIFT;
end_pfn = end >> PAGE_SHIFT;
@@ -200,75 +209,55 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
/* Find a place for the bootmem map */
- bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
+ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
bootmap = early_node_mem(nodeid, bootmap_start, end,
bootmap_pages<<PAGE_SHIFT);
if (bootmap == NULL) {
if (nodedata_phys < start || nodedata_phys >= end)
- free_bootmem((unsigned long)node_data[nodeid],pgdat_size);
+ free_bootmem((unsigned long)node_data[nodeid],
+ pgdat_size);
node_data[nodeid] = NULL;
return;
}
bootmap_start = __pa(bootmap);
- Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
-
+ Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
+
bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
- bootmap_start >> PAGE_SHIFT,
- start_pfn, end_pfn);
+ bootmap_start >> PAGE_SHIFT,
+ start_pfn, end_pfn);
free_bootmem_with_active_regions(nodeid, end);
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
- reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
+ reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
+ reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+ bootmap_pages<<PAGE_SHIFT);
#ifdef CONFIG_ACPI_NUMA
srat_reserve_add_area(nodeid);
#endif
node_set_online(nodeid);
-}
-
-/* Initialize final allocator for a zone */
-void __init setup_node_zones(int nodeid)
-{
- unsigned long start_pfn, end_pfn, memmapsize, limit;
-
- start_pfn = node_start_pfn(nodeid);
- end_pfn = node_end_pfn(nodeid);
-
- Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
- nodeid, start_pfn, end_pfn);
-
- /* Try to allocate mem_map at end to not fill up precious <4GB
- memory. */
- memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
- limit = end_pfn << PAGE_SHIFT;
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
- NODE_DATA(nodeid)->node_mem_map =
- __alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
- memmapsize, SMP_CACHE_BYTES,
- round_down(limit - memmapsize, PAGE_SIZE),
- limit);
-#endif
-}
+}
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
void __init numa_init_array(void)
{
int rr, i;
- /* There are unfortunately some poorly designed mainboards around
- that only connect memory to a single CPU. This breaks the 1:1 cpu->node
- mapping. To avoid this fill in the mapping for all possible
- CPUs, as the number of CPUs is not known yet.
- We round robin the existing nodes. */
+
rr = first_node(node_online_map);
for (i = 0; i < NR_CPUS; i++) {
- if (cpu_to_node(i) != NUMA_NO_NODE)
+ if (early_cpu_to_node(i) != NUMA_NO_NODE)
continue;
- numa_set_node(i, rr);
+ numa_set_node(i, rr);
rr = next_node(rr, node_online_map);
if (rr == MAX_NUMNODES)
rr = first_node(node_online_map);
}
-
}
#ifdef CONFIG_NUMA_EMU
@@ -276,15 +265,17 @@ void __init numa_init_array(void)
char *cmdline __initdata;
/*
- * Setups up nid to range from addr to addr + size. If the end boundary is
- * greater than max_addr, then max_addr is used instead. The return value is 0
- * if there is additional memory left for allocation past addr and -1 otherwise.
- * addr is adjusted to be at the end of the node.
+ * Setups up nid to range from addr to addr + size. If the end
+ * boundary is greater than max_addr, then max_addr is used instead.
+ * The return value is 0 if there is additional memory left for
+ * allocation past addr and -1 otherwise. addr is adjusted to be at
+ * the end of the node.
*/
static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
u64 size, u64 max_addr)
{
int ret = 0;
+
nodes[nid].start = *addr;
*addr += size;
if (*addr >= max_addr) {
@@ -335,6 +326,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
for (i = node_start; i < num_nodes + node_start; i++) {
u64 end = *addr + size;
+
if (i < big)
end += FAKE_NODE_MIN_SIZE;
/*
@@ -380,14 +372,9 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
{
struct bootnode nodes[MAX_NUMNODES];
- u64 addr = start_pfn << PAGE_SHIFT;
+ u64 size, addr = start_pfn << PAGE_SHIFT;
u64 max_addr = end_pfn << PAGE_SHIFT;
- int num_nodes = 0;
- int coeff_flag;
- int coeff = -1;
- int num = 0;
- u64 size;
- int i;
+ int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
memset(&nodes, 0, sizeof(nodes));
/*
@@ -395,8 +382,9 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
* system RAM into N fake nodes.
*/
if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
- num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0,
- simple_strtol(cmdline, NULL, 0));
+ long n = simple_strtol(cmdline, NULL, 0);
+
+ num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n);
if (num_nodes < 0)
return num_nodes;
goto out;
@@ -483,46 +471,47 @@ out:
for_each_node_mask(i, node_possible_map) {
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
acpi_fake_nodes(nodes, num_nodes);
- numa_init_array();
- return 0;
+ numa_init_array();
+ return 0;
}
#endif /* CONFIG_NUMA_EMU */
void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
+{
int i;
nodes_clear(node_possible_map);
#ifdef CONFIG_NUMA_EMU
if (cmdline && !numa_emulation(start_pfn, end_pfn))
- return;
+ return;
nodes_clear(node_possible_map);
#endif
#ifdef CONFIG_ACPI_NUMA
if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
end_pfn << PAGE_SHIFT))
- return;
+ return;
nodes_clear(node_possible_map);
#endif
#ifdef CONFIG_K8_NUMA
- if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
+ if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
+ end_pfn<<PAGE_SHIFT))
return;
nodes_clear(node_possible_map);
#endif
printk(KERN_INFO "%s\n",
numa_off ? "NUMA turned off" : "No NUMA configuration found");
- printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
+ printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
start_pfn << PAGE_SHIFT,
- end_pfn << PAGE_SHIFT);
- /* setup dummy node covering all memory */
- memnode_shift = 63;
+ end_pfn << PAGE_SHIFT);
+ /* setup dummy node covering all memory */
+ memnode_shift = 63;
memnodemap = memnode.embedded_map;
memnodemap[0] = 0;
nodes_clear(node_online_map);
@@ -530,36 +519,48 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
node_set(0, node_possible_map);
for (i = 0; i < NR_CPUS; i++)
numa_set_node(i, 0);
- node_to_cpumask[0] = cpumask_of_cpu(0);
+ /* cpumask_of_cpu() may not be available during early startup */
+ memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0]));
+ cpu_set(0, node_to_cpumask_map[0]);
e820_register_active_regions(0, start_pfn, end_pfn);
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
}
__cpuinit void numa_add_cpu(int cpu)
{
- set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
-}
+ set_bit(cpu,
+ (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
void __cpuinit numa_set_node(int cpu, int node)
{
+ int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
+
cpu_pda(cpu)->nodenumber = node;
- cpu_to_node(cpu) = node;
+
+ if(cpu_to_node_map)
+ cpu_to_node_map[cpu] = node;
+ else if(per_cpu_offset(cpu))
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
+ else
+ Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
}
-unsigned long __init numa_free_all_bootmem(void)
-{
- int i;
+unsigned long __init numa_free_all_bootmem(void)
+{
unsigned long pages = 0;
- for_each_online_node(i) {
+ int i;
+
+ for_each_online_node(i)
pages += free_all_bootmem_node(NODE_DATA(i));
- }
+
return pages;
-}
+}
void __init paging_init(void)
-{
- int i;
+{
unsigned long max_zone_pfns[MAX_NR_ZONES];
+
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
@@ -568,32 +569,27 @@ void __init paging_init(void)
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
- for_each_online_node(i) {
- setup_node_zones(i);
- }
-
free_area_init_nodes(max_zone_pfns);
-}
+}
static __init int numa_setup(char *opt)
-{
+{
if (!opt)
return -EINVAL;
- if (!strncmp(opt,"off",3))
+ if (!strncmp(opt, "off", 3))
numa_off = 1;
#ifdef CONFIG_NUMA_EMU
if (!strncmp(opt, "fake=", 5))
cmdline = opt + 5;
#endif
#ifdef CONFIG_ACPI_NUMA
- if (!strncmp(opt,"noacpi",6))
- acpi_numa = -1;
- if (!strncmp(opt,"hotadd=", 7))
+ if (!strncmp(opt, "noacpi", 6))
+ acpi_numa = -1;
+ if (!strncmp(opt, "hotadd=", 7))
hotadd_percent = simple_strtoul(opt+7, NULL, 10);
#endif
return 0;
-}
-
+}
early_param("numa", numa_setup);
/*
@@ -611,38 +607,16 @@ early_param("numa", numa_setup);
void __init init_cpu_to_node(void)
{
int i;
- for (i = 0; i < NR_CPUS; i++) {
- u8 apicid = x86_cpu_to_apicid_init[i];
+
+ for (i = 0; i < NR_CPUS; i++) {
+ u16 apicid = x86_cpu_to_apicid_init[i];
+
if (apicid == BAD_APICID)
continue;
if (apicid_to_node[apicid] == NUMA_NO_NODE)
continue;
- numa_set_node(i,apicid_to_node[apicid]);
+ numa_set_node(i, apicid_to_node[apicid]);
}
}
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode);
-EXPORT_SYMBOL(node_data);
-
-#ifdef CONFIG_DISCONTIGMEM
-/*
- * Functions to convert PFNs from/to per node page addresses.
- * These are out of line because they are quite big.
- * They could be all tuned by pre caching more state.
- * Should do that.
- */
-int pfn_valid(unsigned long pfn)
-{
- unsigned nid;
- if (pfn >= num_physpages)
- return 0;
- nid = pfn_to_nid(pfn);
- if (nid == 0xff)
- return 0;
- return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid);
-}
-EXPORT_SYMBOL(pfn_valid);
-#endif
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
new file mode 100644
index 00000000000..06353d43f72
--- /dev/null
+++ b/arch/x86/mm/pageattr-test.c
@@ -0,0 +1,224 @@
+/*
+ * self test for change_page_attr.
+ *
+ * Clears the global bit on random pages in the direct mapping, then reverts
+ * and compares page tables forwards and afterwards.
+ */
+#include <linux/bootmem.h>
+#include <linux/random.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/kdebug.h>
+
+enum {
+ NTEST = 4000,
+#ifdef CONFIG_X86_64
+ LPS = (1 << PMD_SHIFT),
+#elif defined(CONFIG_X86_PAE)
+ LPS = (1 << PMD_SHIFT),
+#else
+ LPS = (1 << 22),
+#endif
+ GPS = (1<<30)
+};
+
+struct split_state {
+ long lpg, gpg, spg, exec;
+ long min_exec, max_exec;
+};
+
+static __init int print_split(struct split_state *s)
+{
+ long i, expected, missed = 0;
+ int printed = 0;
+ int err = 0;
+
+ s->lpg = s->gpg = s->spg = s->exec = 0;
+ s->min_exec = ~0UL;
+ s->max_exec = 0;
+ for (i = 0; i < max_pfn_mapped; ) {
+ unsigned long addr = (unsigned long)__va(i << PAGE_SHIFT);
+ int level;
+ pte_t *pte;
+
+ pte = lookup_address(addr, &level);
+ if (!pte) {
+ if (!printed) {
+ dump_pagetable(addr);
+ printk(KERN_INFO "CPA %lx no pte level %d\n",
+ addr, level);
+ printed = 1;
+ }
+ missed++;
+ i++;
+ continue;
+ }
+
+ if (level == PG_LEVEL_1G && sizeof(long) == 8) {
+ s->gpg++;
+ i += GPS/PAGE_SIZE;
+ } else if (level == PG_LEVEL_2M) {
+ if (!(pte_val(*pte) & _PAGE_PSE)) {
+ printk(KERN_ERR
+ "%lx level %d but not PSE %Lx\n",
+ addr, level, (u64)pte_val(*pte));
+ err = 1;
+ }
+ s->lpg++;
+ i += LPS/PAGE_SIZE;
+ } else {
+ s->spg++;
+ i++;
+ }
+ if (!(pte_val(*pte) & _PAGE_NX)) {
+ s->exec++;
+ if (addr < s->min_exec)
+ s->min_exec = addr;
+ if (addr > s->max_exec)
+ s->max_exec = addr;
+ }
+ }
+ printk(KERN_INFO
+ "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
+ s->spg, s->lpg, s->gpg, s->exec,
+ s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed);
+
+ expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
+ if (expected != i) {
+ printk(KERN_ERR "CPA max_pfn_mapped %lu but expected %lu\n",
+ max_pfn_mapped, expected);
+ return 1;
+ }
+ return err;
+}
+
+static unsigned long __initdata addr[NTEST];
+static unsigned int __initdata len[NTEST];
+
+/* Change the global bit on random pages in the direct mapping */
+static __init int exercise_pageattr(void)
+{
+ struct split_state sa, sb, sc;
+ unsigned long *bm;
+ pte_t *pte, pte0;
+ int failed = 0;
+ int level;
+ int i, k;
+ int err;
+
+ printk(KERN_INFO "CPA exercising pageattr\n");
+
+ bm = vmalloc((max_pfn_mapped + 7) / 8);
+ if (!bm) {
+ printk(KERN_ERR "CPA Cannot vmalloc bitmap\n");
+ return -ENOMEM;
+ }
+ memset(bm, 0, (max_pfn_mapped + 7) / 8);
+
+ failed += print_split(&sa);
+ srandom32(100);
+
+ for (i = 0; i < NTEST; i++) {
+ unsigned long pfn = random32() % max_pfn_mapped;
+
+ addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT);
+ len[i] = random32() % 100;
+ len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1);
+
+ if (len[i] == 0)
+ len[i] = 1;
+
+ pte = NULL;
+ pte0 = pfn_pte(0, __pgprot(0)); /* shut gcc up */
+
+ for (k = 0; k < len[i]; k++) {
+ pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
+ if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) {
+ addr[i] = 0;
+ break;
+ }
+ if (k == 0) {
+ pte0 = *pte;
+ } else {
+ if (pgprot_val(pte_pgprot(*pte)) !=
+ pgprot_val(pte_pgprot(pte0))) {
+ len[i] = k;
+ break;
+ }
+ }
+ if (test_bit(pfn + k, bm)) {
+ len[i] = k;
+ break;
+ }
+ __set_bit(pfn + k, bm);
+ }
+ if (!addr[i] || !pte || !k) {
+ addr[i] = 0;
+ continue;
+ }
+
+ err = change_page_attr_clear(addr[i], len[i],
+ __pgprot(_PAGE_GLOBAL));
+ if (err < 0) {
+ printk(KERN_ERR "CPA %d failed %d\n", i, err);
+ failed++;
+ }
+
+ pte = lookup_address(addr[i], &level);
+ if (!pte || pte_global(*pte) || pte_huge(*pte)) {
+ printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i],
+ pte ? (u64)pte_val(*pte) : 0ULL);
+ failed++;
+ }
+ if (level != PG_LEVEL_4K) {
+ printk(KERN_ERR "CPA %lx: unexpected level %d\n",
+ addr[i], level);
+ failed++;
+ }
+
+ }
+ vfree(bm);
+
+ failed += print_split(&sb);
+
+ printk(KERN_INFO "CPA reverting everything\n");
+ for (i = 0; i < NTEST; i++) {
+ if (!addr[i])
+ continue;
+ pte = lookup_address(addr[i], &level);
+ if (!pte) {
+ printk(KERN_ERR "CPA lookup of %lx failed\n", addr[i]);
+ failed++;
+ continue;
+ }
+ err = change_page_attr_set(addr[i], len[i],
+ __pgprot(_PAGE_GLOBAL));
+ if (err < 0) {
+ printk(KERN_ERR "CPA reverting failed: %d\n", err);
+ failed++;
+ }
+ pte = lookup_address(addr[i], &level);
+ if (!pte || !pte_global(*pte)) {
+ printk(KERN_ERR "CPA %lx: bad pte after revert %Lx\n",
+ addr[i], pte ? (u64)pte_val(*pte) : 0ULL);
+ failed++;
+ }
+
+ }
+
+ failed += print_split(&sc);
+
+ if (failed) {
+ printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
+ WARN_ON(1);
+ } else {
+ printk(KERN_INFO "CPA selftests PASSED\n");
+ }
+
+ return 0;
+}
+module_init(exercise_pageattr);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
new file mode 100644
index 00000000000..1cc6607eacb
--- /dev/null
+++ b/arch/x86/mm/pageattr.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Thanks to Ben LaHaise for precious feedback.
+ */
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/e820.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr < end;
+}
+
+/*
+ * Flushing functions
+ */
+
+/**
+ * clflush_cache_range - flush a cache range with clflush
+ * @addr: virtual start address
+ * @size: number of bytes to flush
+ *
+ * clflush is an unordered instruction which needs fencing with mfence
+ * to avoid ordering issues.
+ */
+void clflush_cache_range(void *vaddr, unsigned int size)
+{
+ void *vend = vaddr + size - 1;
+
+ mb();
+
+ for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
+ clflush(vaddr);
+ /*
+ * Flush any possible final partial cacheline:
+ */
+ clflush(vend);
+
+ mb();
+}
+
+static void __cpa_flush_all(void *arg)
+{
+ /*
+ * Flush all to work around Errata in early athlons regarding
+ * large page flushing.
+ */
+ __flush_tlb_all();
+
+ if (boot_cpu_data.x86_model >= 4)
+ wbinvd();
+}
+
+static void cpa_flush_all(void)
+{
+ BUG_ON(irqs_disabled());
+
+ on_each_cpu(__cpa_flush_all, NULL, 1, 1);
+}
+
+static void __cpa_flush_range(void *arg)
+{
+ /*
+ * We could optimize that further and do individual per page
+ * tlb invalidates for a low number of pages. Caveat: we must
+ * flush the high aliases on 64bit as well.
+ */
+ __flush_tlb_all();
+}
+
+static void cpa_flush_range(unsigned long start, int numpages)
+{
+ unsigned int i, level;
+ unsigned long addr;
+
+ BUG_ON(irqs_disabled());
+ WARN_ON(PAGE_ALIGN(start) != start);
+
+ on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+
+ /*
+ * We only need to flush on one CPU,
+ * clflush is a MESI-coherent instruction that
+ * will cause all other CPUs to flush the same
+ * cachelines:
+ */
+ for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
+ pte_t *pte = lookup_address(addr, &level);
+
+ /*
+ * Only flush present addresses:
+ */
+ if (pte && pte_present(*pte))
+ clflush_cache_range((void *) addr, PAGE_SIZE);
+ }
+}
+
+/*
+ * Certain areas of memory on x86 require very specific protection flags,
+ * for example the BIOS area or kernel text. Callers don't always get this
+ * right (again, ioremap() on BIOS memory is not uncommon) so this function
+ * checks and fixes these known static required protection bits.
+ */
+static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
+{
+ pgprot_t forbidden = __pgprot(0);
+
+ /*
+ * The BIOS area between 640k and 1Mb needs to be executable for
+ * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
+ */
+ if (within(__pa(address), BIOS_BEGIN, BIOS_END))
+ pgprot_val(forbidden) |= _PAGE_NX;
+
+ /*
+ * The kernel text needs to be executable for obvious reasons
+ * Does not cover __inittext since that is gone later on
+ */
+ if (within(address, (unsigned long)_text, (unsigned long)_etext))
+ pgprot_val(forbidden) |= _PAGE_NX;
+
+#ifdef CONFIG_DEBUG_RODATA
+ /* The .rodata section needs to be read-only */
+ if (within(address, (unsigned long)__start_rodata,
+ (unsigned long)__end_rodata))
+ pgprot_val(forbidden) |= _PAGE_RW;
+#endif
+
+ prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
+
+ return prot;
+}
+
+pte_t *lookup_address(unsigned long address, int *level)
+{
+ pgd_t *pgd = pgd_offset_k(address);
+ pud_t *pud;
+ pmd_t *pmd;
+
+ *level = PG_LEVEL_NONE;
+
+ if (pgd_none(*pgd))
+ return NULL;
+ pud = pud_offset(pgd, address);
+ if (pud_none(*pud))
+ return NULL;
+ pmd = pmd_offset(pud, address);
+ if (pmd_none(*pmd))
+ return NULL;
+
+ *level = PG_LEVEL_2M;
+ if (pmd_large(*pmd))
+ return (pte_t *)pmd;
+
+ *level = PG_LEVEL_4K;
+ return pte_offset_kernel(pmd, address);
+}
+
+static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+{
+ /* change init_mm */
+ set_pte_atomic(kpte, pte);
+#ifdef CONFIG_X86_32
+ if (!SHARED_KERNEL_PMD) {
+ struct page *page;
+
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ pud = pud_offset(pgd, address);
+ pmd = pmd_offset(pud, address);
+ set_pte_atomic((pte_t *)pmd, pte);
+ }
+ }
+#endif
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address)
+{
+ pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ gfp_t gfp_flags = GFP_KERNEL;
+ unsigned long flags;
+ unsigned long addr;
+ pte_t *pbase, *tmp;
+ struct page *base;
+ unsigned int i, level;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
+ gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
+#endif
+ base = alloc_pages(gfp_flags, 0);
+ if (!base)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ /*
+ * Check for races, another CPU might have split this page
+ * up for us already:
+ */
+ tmp = lookup_address(address, &level);
+ if (tmp != kpte) {
+ WARN_ON_ONCE(1);
+ goto out_unlock;
+ }
+
+ address = __pa(address);
+ addr = address & LARGE_PAGE_MASK;
+ pbase = (pte_t *)page_address(base);
+#ifdef CONFIG_X86_32
+ paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+#endif
+
+ pgprot_val(ref_prot) &= ~_PAGE_NX;
+ for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+ set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+
+ /*
+ * Install the new, split up pagetable. Important detail here:
+ *
+ * On Intel the NX bit of all levels must be cleared to make a
+ * page executable. See section 4.13.2 of Intel 64 and IA-32
+ * Architectures Software Developer's Manual).
+ */
+ ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
+ __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+ base = NULL;
+
+out_unlock:
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ if (base)
+ __free_pages(base, 0);
+
+ return 0;
+}
+
+static int
+__change_page_attr(unsigned long address, unsigned long pfn,
+ pgprot_t mask_set, pgprot_t mask_clr)
+{
+ struct page *kpte_page;
+ int level, err = 0;
+ pte_t *kpte;
+
+#ifdef CONFIG_X86_32
+ BUG_ON(pfn > max_low_pfn);
+#endif
+
+repeat:
+ kpte = lookup_address(address, &level);
+ if (!kpte)
+ return -EINVAL;
+
+ kpte_page = virt_to_page(kpte);
+ BUG_ON(PageLRU(kpte_page));
+ BUG_ON(PageCompound(kpte_page));
+
+ if (level == PG_LEVEL_4K) {
+ pgprot_t new_prot = pte_pgprot(*kpte);
+ pte_t new_pte, old_pte = *kpte;
+
+ pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
+ pgprot_val(new_prot) |= pgprot_val(mask_set);
+
+ new_prot = static_protections(new_prot, address);
+
+ new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
+ BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte));
+
+ set_pte_atomic(kpte, new_pte);
+ } else {
+ err = split_large_page(kpte, address);
+ if (!err)
+ goto repeat;
+ }
+ return err;
+}
+
+/**
+ * change_page_attr_addr - Change page table attributes in linear mapping
+ * @address: Virtual address in linear mapping.
+ * @prot: New page table attribute (PAGE_*)
+ *
+ * Change page attributes of a page in the direct mapping. This is a variant
+ * of change_page_attr() that also works on memory holes that do not have
+ * mem_map entry (pfn_valid() is false).
+ *
+ * See change_page_attr() documentation for more details.
+ *
+ * Modules and drivers should use the set_memory_* APIs instead.
+ */
+
+#define HIGH_MAP_START __START_KERNEL_map
+#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
+
+static int
+change_page_attr_addr(unsigned long address, pgprot_t mask_set,
+ pgprot_t mask_clr)
+{
+ unsigned long phys_addr = __pa(address);
+ unsigned long pfn = phys_addr >> PAGE_SHIFT;
+ int err;
+
+#ifdef CONFIG_X86_64
+ /*
+ * If we are inside the high mapped kernel range, then we
+ * fixup the low mapping first. __va() returns the virtual
+ * address in the linear mapping:
+ */
+ if (within(address, HIGH_MAP_START, HIGH_MAP_END))
+ address = (unsigned long) __va(phys_addr);
+#endif
+
+ err = __change_page_attr(address, pfn, mask_set, mask_clr);
+ if (err)
+ return err;
+
+#ifdef CONFIG_X86_64
+ /*
+ * If the physical address is inside the kernel map, we need
+ * to touch the high mapped kernel as well:
+ */
+ if (within(phys_addr, 0, KERNEL_TEXT_SIZE)) {
+ /*
+ * Calc the high mapping address. See __phys_addr()
+ * for the non obvious details.
+ */
+ address = phys_addr + HIGH_MAP_START - phys_base;
+ /* Make sure the kernel mappings stay executable */
+ pgprot_val(mask_clr) |= _PAGE_NX;
+
+ /*
+ * Our high aliases are imprecise, because we check
+ * everything between 0 and KERNEL_TEXT_SIZE, so do
+ * not propagate lookup failures back to users:
+ */
+ __change_page_attr(address, pfn, mask_set, mask_clr);
+ }
+#endif
+ return err;
+}
+
+static int __change_page_attr_set_clr(unsigned long addr, int numpages,
+ pgprot_t mask_set, pgprot_t mask_clr)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) {
+ ret = change_page_attr_addr(addr, mask_set, mask_clr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int change_page_attr_set_clr(unsigned long addr, int numpages,
+ pgprot_t mask_set, pgprot_t mask_clr)
+{
+ int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
+ mask_clr);
+
+ /*
+ * On success we use clflush, when the CPU supports it to
+ * avoid the wbindv. If the CPU does not support it and in the
+ * error case we fall back to cpa_flush_all (which uses
+ * wbindv):
+ */
+ if (!ret && cpu_has_clflush)
+ cpa_flush_range(addr, numpages);
+ else
+ cpa_flush_all();
+
+ return ret;
+}
+
+static inline int change_page_attr_set(unsigned long addr, int numpages,
+ pgprot_t mask)
+{
+ return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
+}
+
+static inline int change_page_attr_clear(unsigned long addr, int numpages,
+ pgprot_t mask)
+{
+ return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
+
+}
+
+int set_memory_uc(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(addr, numpages,
+ __pgprot(_PAGE_PCD | _PAGE_PWT));
+}
+EXPORT_SYMBOL(set_memory_uc);
+
+int set_memory_wb(unsigned long addr, int numpages)
+{
+ return change_page_attr_clear(addr, numpages,
+ __pgprot(_PAGE_PCD | _PAGE_PWT));
+}
+EXPORT_SYMBOL(set_memory_wb);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
+}
+EXPORT_SYMBOL(set_memory_x);
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
+}
+EXPORT_SYMBOL(set_memory_nx);
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
+}
+
+int set_memory_np(unsigned long addr, int numpages)
+{
+ return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+}
+
+int set_pages_uc(struct page *page, int numpages)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ return set_memory_uc(addr, numpages);
+}
+EXPORT_SYMBOL(set_pages_uc);
+
+int set_pages_wb(struct page *page, int numpages)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ return set_memory_wb(addr, numpages);
+}
+EXPORT_SYMBOL(set_pages_wb);
+
+int set_pages_x(struct page *page, int numpages)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ return set_memory_x(addr, numpages);
+}
+EXPORT_SYMBOL(set_pages_x);
+
+int set_pages_nx(struct page *page, int numpages)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ return set_memory_nx(addr, numpages);
+}
+EXPORT_SYMBOL(set_pages_nx);
+
+int set_pages_ro(struct page *page, int numpages)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ return set_memory_ro(addr, numpages);
+}
+
+int set_pages_rw(struct page *page, int numpages)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ return set_memory_rw(addr, numpages);
+}
+
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
+static inline int __change_page_attr_set(unsigned long addr, int numpages,
+ pgprot_t mask)
+{
+ return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
+}
+
+static inline int __change_page_attr_clear(unsigned long addr, int numpages,
+ pgprot_t mask)
+{
+ return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
+}
+#endif
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+
+static int __set_pages_p(struct page *page, int numpages)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ return __change_page_attr_set(addr, numpages,
+ __pgprot(_PAGE_PRESENT | _PAGE_RW));
+}
+
+static int __set_pages_np(struct page *page, int numpages)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+
+ return __change_page_attr_clear(addr, numpages,
+ __pgprot(_PAGE_PRESENT));
+}
+
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ if (PageHighMem(page))
+ return;
+ if (!enable) {
+ debug_check_no_locks_freed(page_address(page),
+ numpages * PAGE_SIZE);
+ }
+
+ /*
+ * If page allocator is not up yet then do not call c_p_a():
+ */
+ if (!debug_pagealloc_enabled)
+ return;
+
+ /*
+ * The return value is ignored - the calls cannot fail,
+ * large pages are disabled at boot time:
+ */
+ if (enable)
+ __set_pages_p(page, numpages);
+ else
+ __set_pages_np(page, numpages);
+
+ /*
+ * We should perform an IPI and flush all tlbs,
+ * but that can deadlock->flush only current cpu:
+ */
+ __flush_tlb_all();
+}
+#endif
+
+/*
+ * The testcases use internal knowledge of the implementation that shouldn't
+ * be exposed to the rest of the kernel. Include these directly here.
+ */
+#ifdef CONFIG_CPA_DEBUG
+#include "pageattr-test.c"
+#endif
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
deleted file mode 100644
index 260073c0760..00000000000
--- a/arch/x86/mm/pageattr_32.c
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Thanks to Ben LaHaise for precious feedback.
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
-#include <asm/sections.h>
-
-static DEFINE_SPINLOCK(cpa_lock);
-static struct list_head df_list = LIST_HEAD_INIT(df_list);
-
-
-pte_t *lookup_address(unsigned long address)
-{
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
- if (pgd_none(*pgd))
- return NULL;
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- return NULL;
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd))
- return NULL;
- if (pmd_large(*pmd))
- return (pte_t *)pmd;
- return pte_offset_kernel(pmd, address);
-}
-
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
- pgprot_t ref_prot)
-{
- int i;
- unsigned long addr;
- struct page *base;
- pte_t *pbase;
-
- spin_unlock_irq(&cpa_lock);
- base = alloc_pages(GFP_KERNEL, 0);
- spin_lock_irq(&cpa_lock);
- if (!base)
- return NULL;
-
- /*
- * page_private is used to track the number of entries in
- * the page table page that have non standard attributes.
- */
- SetPagePrivate(base);
- page_private(base) = 0;
-
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
- pbase = (pte_t *)page_address(base);
- paravirt_alloc_pt(&init_mm, page_to_pfn(base));
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : ref_prot));
- }
- return base;
-}
-
-static void cache_flush_page(struct page *p)
-{
- void *adr = page_address(p);
- int i;
- for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- clflush(adr+i);
-}
-
-static void flush_kernel_map(void *arg)
-{
- struct list_head *lh = (struct list_head *)arg;
- struct page *p;
-
- /* High level code is not ready for clflush yet */
- if (0 && cpu_has_clflush) {
- list_for_each_entry (p, lh, lru)
- cache_flush_page(p);
- } else if (boot_cpu_data.x86_model >= 4)
- wbinvd();
-
- /* Flush all to work around Errata in early athlons regarding
- * large page flushing.
- */
- __flush_tlb_all();
-}
-
-static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
-{
- struct page *page;
- unsigned long flags;
-
- set_pte_atomic(kpte, pte); /* change init_mm */
- if (SHARED_KERNEL_PMD)
- return;
-
- spin_lock_irqsave(&pgd_lock, flags);
- for (page = pgd_list; page; page = (struct page *)page->index) {
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- pud = pud_offset(pgd, address);
- pmd = pmd_offset(pud, address);
- set_pte_atomic((pte_t *)pmd, pte);
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
-}
-
-/*
- * No more special protections in this 2/4MB area - revert to a
- * large page again.
- */
-static inline void revert_page(struct page *kpte_page, unsigned long address)
-{
- pgprot_t ref_prot;
- pte_t *linear;
-
- ref_prot =
- ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
- ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
-
- linear = (pte_t *)
- pmd_offset(pud_offset(pgd_offset_k(address), address), address);
- set_pmd_pte(linear, address,
- pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
- ref_prot));
-}
-
-static inline void save_page(struct page *kpte_page)
-{
- if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
- list_add(&kpte_page->lru, &df_list);
-}
-
-static int
-__change_page_attr(struct page *page, pgprot_t prot)
-{
- pte_t *kpte;
- unsigned long address;
- struct page *kpte_page;
-
- BUG_ON(PageHighMem(page));
- address = (unsigned long)page_address(page);
-
- kpte = lookup_address(address);
- if (!kpte)
- return -EINVAL;
- kpte_page = virt_to_page(kpte);
- BUG_ON(PageLRU(kpte_page));
- BUG_ON(PageCompound(kpte_page));
-
- if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
- if (!pte_huge(*kpte)) {
- set_pte_atomic(kpte, mk_pte(page, prot));
- } else {
- pgprot_t ref_prot;
- struct page *split;
-
- ref_prot =
- ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
- ? PAGE_KERNEL_EXEC : PAGE_KERNEL;
- split = split_large_page(address, prot, ref_prot);
- if (!split)
- return -ENOMEM;
- set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
- kpte_page = split;
- }
- page_private(kpte_page)++;
- } else if (!pte_huge(*kpte)) {
- set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
- BUG_ON(page_private(kpte_page) == 0);
- page_private(kpte_page)--;
- } else
- BUG();
-
- /*
- * If the pte was reserved, it means it was created at boot
- * time (not via split_large_page) and in turn we must not
- * replace it with a largepage.
- */
-
- save_page(kpte_page);
- if (!PageReserved(kpte_page)) {
- if (cpu_has_pse && (page_private(kpte_page) == 0)) {
- paravirt_release_pt(page_to_pfn(kpte_page));
- revert_page(kpte_page, address);
- }
- }
- return 0;
-}
-
-static inline void flush_map(struct list_head *l)
-{
- on_each_cpu(flush_kernel_map, l, 1, 1);
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere.
- * This function only deals with the kernel linear map.
- *
- * Caller must call global_flush_tlb() after this.
- */
-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
- int err = 0;
- int i;
- unsigned long flags;
-
- spin_lock_irqsave(&cpa_lock, flags);
- for (i = 0; i < numpages; i++, page++) {
- err = __change_page_attr(page, prot);
- if (err)
- break;
- }
- spin_unlock_irqrestore(&cpa_lock, flags);
- return err;
-}
-
-void global_flush_tlb(void)
-{
- struct list_head l;
- struct page *pg, *next;
-
- BUG_ON(irqs_disabled());
-
- spin_lock_irq(&cpa_lock);
- list_replace_init(&df_list, &l);
- spin_unlock_irq(&cpa_lock);
- flush_map(&l);
- list_for_each_entry_safe(pg, next, &l, lru) {
- list_del(&pg->lru);
- clear_bit(PG_arch_1, &pg->flags);
- if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
- continue;
- ClearPagePrivate(pg);
- __free_page(pg);
- }
-}
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-void kernel_map_pages(struct page *page, int numpages, int enable)
-{
- if (PageHighMem(page))
- return;
- if (!enable)
- debug_check_no_locks_freed(page_address(page),
- numpages * PAGE_SIZE);
-
- /* the return value is ignored - the calls cannot fail,
- * large pages are disabled at boot time.
- */
- change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
- /* we should perform an IPI and flush all tlbs,
- * but that can deadlock->flush only current cpu.
- */
- __flush_tlb_all();
-}
-#endif
-
-EXPORT_SYMBOL(change_page_attr);
-EXPORT_SYMBOL(global_flush_tlb);
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
deleted file mode 100644
index c40afbaaf93..00000000000
--- a/arch/x86/mm/pageattr_64.c
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Thanks to Ben LaHaise for precious feedback.
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
-
-pte_t *lookup_address(unsigned long address)
-{
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- if (pgd_none(*pgd))
- return NULL;
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- return NULL;
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- return NULL;
- if (pmd_large(*pmd))
- return (pte_t *)pmd;
- pte = pte_offset_kernel(pmd, address);
- if (pte && !pte_present(*pte))
- pte = NULL;
- return pte;
-}
-
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
- pgprot_t ref_prot)
-{
- int i;
- unsigned long addr;
- struct page *base = alloc_pages(GFP_KERNEL, 0);
- pte_t *pbase;
- if (!base)
- return NULL;
- /*
- * page_private is used to track the number of entries in
- * the page table page have non standard attributes.
- */
- SetPagePrivate(base);
- page_private(base) = 0;
-
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
- pbase = (pte_t *)page_address(base);
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : ref_prot);
- }
- return base;
-}
-
-void clflush_cache_range(void *adr, int size)
-{
- int i;
- for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
- clflush(adr+i);
-}
-
-static void flush_kernel_map(void *arg)
-{
- struct list_head *l = (struct list_head *)arg;
- struct page *pg;
-
- /* When clflush is available always use it because it is
- much cheaper than WBINVD. */
- /* clflush is still broken. Disable for now. */
- if (1 || !cpu_has_clflush)
- asm volatile("wbinvd" ::: "memory");
- else list_for_each_entry(pg, l, lru) {
- void *adr = page_address(pg);
- clflush_cache_range(adr, PAGE_SIZE);
- }
- __flush_tlb_all();
-}
-
-static inline void flush_map(struct list_head *l)
-{
- on_each_cpu(flush_kernel_map, l, 1, 1);
-}
-
-static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
-
-static inline void save_page(struct page *fpage)
-{
- if (!test_and_set_bit(PG_arch_1, &fpage->flags))
- list_add(&fpage->lru, &deferred_pages);
-}
-
-/*
- * No more special protections in this 2/4MB area - revert to a
- * large page again.
- */
-static void revert_page(unsigned long address, pgprot_t ref_prot)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t large_pte;
- unsigned long pfn;
-
- pgd = pgd_offset_k(address);
- BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd,address);
- BUG_ON(pud_none(*pud));
- pmd = pmd_offset(pud, address);
- BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
- pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
- large_pte = pfn_pte(pfn, ref_prot);
- large_pte = pte_mkhuge(large_pte);
- set_pte((pte_t *)pmd, large_pte);
-}
-
-static int
-__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
- pgprot_t ref_prot)
-{
- pte_t *kpte;
- struct page *kpte_page;
- pgprot_t ref_prot2;
-
- kpte = lookup_address(address);
- if (!kpte) return 0;
- kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
- BUG_ON(PageLRU(kpte_page));
- BUG_ON(PageCompound(kpte_page));
- if (pgprot_val(prot) != pgprot_val(ref_prot)) {
- if (!pte_huge(*kpte)) {
- set_pte(kpte, pfn_pte(pfn, prot));
- } else {
- /*
- * split_large_page will take the reference for this
- * change_page_attr on the split page.
- */
- struct page *split;
- ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
- split = split_large_page(address, prot, ref_prot2);
- if (!split)
- return -ENOMEM;
- pgprot_val(ref_prot2) &= ~_PAGE_NX;
- set_pte(kpte, mk_pte(split, ref_prot2));
- kpte_page = split;
- }
- page_private(kpte_page)++;
- } else if (!pte_huge(*kpte)) {
- set_pte(kpte, pfn_pte(pfn, ref_prot));
- BUG_ON(page_private(kpte_page) == 0);
- page_private(kpte_page)--;
- } else
- BUG();
-
- /* on x86-64 the direct mapping set at boot is not using 4k pages */
- BUG_ON(PageReserved(kpte_page));
-
- save_page(kpte_page);
- if (page_private(kpte_page) == 0)
- revert_page(address, ref_prot);
- return 0;
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere.
- * This function only deals with the kernel linear map.
- *
- * Caller must call global_flush_tlb() after this.
- */
-int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
-{
- int err = 0, kernel_map = 0;
- int i;
-
- if (address >= __START_KERNEL_map
- && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
- address = (unsigned long)__va(__pa(address));
- kernel_map = 1;
- }
-
- down_write(&init_mm.mmap_sem);
- for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
- unsigned long pfn = __pa(address) >> PAGE_SHIFT;
-
- if (!kernel_map || pte_present(pfn_pte(0, prot))) {
- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
- if (err)
- break;
- }
- /* Handle kernel mapping too which aliases part of the
- * lowmem */
- if (__pa(address) < KERNEL_TEXT_SIZE) {
- unsigned long addr2;
- pgprot_t prot2;
- addr2 = __START_KERNEL_map + __pa(address);
- /* Make sure the kernel mappings stay executable */
- prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
- err = __change_page_attr(addr2, pfn, prot2,
- PAGE_KERNEL_EXEC);
- }
- }
- up_write(&init_mm.mmap_sem);
- return err;
-}
-
-/* Don't call this for MMIO areas that may not have a mem_map entry */
-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
- unsigned long addr = (unsigned long)page_address(page);
- return change_page_attr_addr(addr, numpages, prot);
-}
-
-void global_flush_tlb(void)
-{
- struct page *pg, *next;
- struct list_head l;
-
- /*
- * Write-protect the semaphore, to exclude two contexts
- * doing a list_replace_init() call in parallel and to
- * exclude new additions to the deferred_pages list:
- */
- down_write(&init_mm.mmap_sem);
- list_replace_init(&deferred_pages, &l);
- up_write(&init_mm.mmap_sem);
-
- flush_map(&l);
-
- list_for_each_entry_safe(pg, next, &l, lru) {
- list_del(&pg->lru);
- clear_bit(PG_arch_1, &pg->flags);
- if (page_private(pg) != 0)
- continue;
- ClearPagePrivate(pg);
- __free_page(pg);
- }
-}
-
-EXPORT_SYMBOL(change_page_attr);
-EXPORT_SYMBOL(global_flush_tlb);
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index be61a1d845a..2ae5999a795 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -195,11 +195,6 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
return pte;
}
-void pmd_ctor(struct kmem_cache *cache, void *pmd)
-{
- memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
-}
-
/*
* List of all pgd's needed for non-PAE so it can invalidate entries
* in both cached and uncached pgd's; not needed for PAE since the
@@ -210,27 +205,18 @@ void pmd_ctor(struct kmem_cache *cache, void *pmd)
* vmalloc faults work because attached pagetables are never freed.
* -- wli
*/
-DEFINE_SPINLOCK(pgd_lock);
-struct page *pgd_list;
-
static inline void pgd_list_add(pgd_t *pgd)
{
struct page *page = virt_to_page(pgd);
- page->index = (unsigned long)pgd_list;
- if (pgd_list)
- set_page_private(pgd_list, (unsigned long)&page->index);
- pgd_list = page;
- set_page_private(page, (unsigned long)&pgd_list);
+
+ list_add(&page->lru, &pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
- struct page *next, **pprev, *page = virt_to_page(pgd);
- next = (struct page *)page->index;
- pprev = (struct page **)page_private(page);
- *pprev = next;
- if (next)
- set_page_private(next, (unsigned long)pprev);
+ struct page *page = virt_to_page(pgd);
+
+ list_del(&page->lru);
}
@@ -285,7 +271,6 @@ static void pgd_dtor(void *pgd)
if (SHARED_KERNEL_PMD)
return;
- paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
@@ -294,77 +279,96 @@ static void pgd_dtor(void *pgd)
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-/* If we allocate a pmd for part of the kernel address space, then
- make sure its initialized with the appropriate kernel mappings.
- Otherwise use a cached zeroed pmd. */
-static pmd_t *pmd_cache_alloc(int idx)
+#ifdef CONFIG_X86_PAE
+/*
+ * Mop up any pmd pages which may still be attached to the pgd.
+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
+ * preallocate which never got a corresponding vma will need to be
+ * freed manually.
+ */
+static void pgd_mop_up_pmds(pgd_t *pgdp)
{
- pmd_t *pmd;
+ int i;
- if (idx >= USER_PTRS_PER_PGD) {
- pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
+ for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
+ pgd_t pgd = pgdp[i];
- if (pmd)
- memcpy(pmd,
- (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
+ if (pgd_val(pgd) != 0) {
+ pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+
+ pgdp[i] = native_make_pgd(0);
+
+ paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
+ pmd_free(pmd);
+ }
+ }
+}
+
+/*
+ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
+ * updating the top-level pagetable entries to guarantee the
+ * processor notices the update. Since this is expensive, and
+ * all 4 top-level entries are used almost immediately in a
+ * new process's life, we just pre-populate them here.
+ *
+ * Also, if we're in a paravirt environment where the kernel pmd is
+ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
+ * and initialize the kernel pmds here.
+ */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+ pud_t *pud;
+ unsigned long addr;
+ int i;
+
+ pud = pud_offset(pgd, 0);
+ for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
+ i++, pud++, addr += PUD_SIZE) {
+ pmd_t *pmd = pmd_alloc_one(mm, addr);
+
+ if (!pmd) {
+ pgd_mop_up_pmds(pgd);
+ return 0;
+ }
+
+ if (i >= USER_PTRS_PER_PGD)
+ memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
sizeof(pmd_t) * PTRS_PER_PMD);
- } else
- pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
- return pmd;
+ pud_populate(mm, pud, pmd);
+ }
+
+ return 1;
+}
+#else /* !CONFIG_X86_PAE */
+/* No need to prepopulate any pagetable entries in non-PAE modes. */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+ return 1;
}
-static void pmd_cache_free(pmd_t *pmd, int idx)
+static void pgd_mop_up_pmds(pgd_t *pgd)
{
- if (idx >= USER_PTRS_PER_PGD)
- free_page((unsigned long)pmd);
- else
- kmem_cache_free(pmd_cache, pmd);
}
+#endif /* CONFIG_X86_PAE */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- int i;
pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
- if (PTRS_PER_PMD == 1 || !pgd)
- return pgd;
+ mm->pgd = pgd; /* so that alloc_pd can use it */
- for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = pmd_cache_alloc(i);
-
- if (!pmd)
- goto out_oom;
-
- paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
+ quicklist_free(0, pgd_dtor, pgd);
+ pgd = NULL;
}
- return pgd;
-out_oom:
- for (i--; i >= 0; i--) {
- pgd_t pgdent = pgd[i];
- void* pmd = (void *)__va(pgd_val(pgdent)-1);
- paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
- pmd_cache_free(pmd, i);
- }
- quicklist_free(0, pgd_dtor, pgd);
- return NULL;
+ return pgd;
}
void pgd_free(pgd_t *pgd)
{
- int i;
-
- /* in the PAE case user pgd entries are overwritten before usage */
- if (PTRS_PER_PMD > 1)
- for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
- pgd_t pgdent = pgd[i];
- void* pmd = (void *)__va(pgd_val(pgdent)-1);
- paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
- pmd_cache_free(pmd, i);
- }
- /* in the non-PAE case, free_pgtables() clears user pgd entries */
+ pgd_mop_up_pmds(pgd);
quicklist_free(0, pgd_dtor, pgd);
}
@@ -372,4 +376,3 @@ void check_pgt_cache(void)
{
quicklist_trim(0, pgd_dtor, 25, 16);
}
-
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index ea85172fc0c..65416f843e5 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -130,6 +130,9 @@ void __init
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
{
int pxm, node;
+ int apic_id;
+
+ apic_id = pa->apic_id;
if (srat_disabled())
return;
if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
@@ -145,68 +148,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
bad_srat();
return;
}
- apicid_to_node[pa->apic_id] = node;
+ apicid_to_node[apic_id] = node;
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
- pxm, pa->apic_id, node);
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Protect against too large hotadd areas that would fill up memory.
- */
-static int hotadd_enough_memory(struct bootnode *nd)
-{
- static unsigned long allocated;
- static unsigned long last_area_end;
- unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
- long mem = pages * sizeof(struct page);
- unsigned long addr;
- unsigned long allowed;
- unsigned long oldpages = pages;
-
- if (mem < 0)
- return 0;
- allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
- allowed = (allowed / 100) * hotadd_percent;
- if (allocated + mem > allowed) {
- unsigned long range;
- /* Give them at least part of their hotadd memory upto hotadd_percent
- It would be better to spread the limit out
- over multiple hotplug areas, but that is too complicated
- right now */
- if (allocated >= allowed)
- return 0;
- range = allowed - allocated;
- pages = (range / PAGE_SIZE);
- mem = pages * sizeof(struct page);
- nd->end = nd->start + range;
- }
- /* Not completely fool proof, but a good sanity check */
- addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
- if (addr == -1UL)
- return 0;
- if (pages != oldpages)
- printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
- pages << PAGE_SHIFT);
- last_area_end = addr + mem;
- allocated += mem;
- return 1;
-}
-
-static int update_end_of_memory(unsigned long end)
-{
- found_add_area = 1;
- if ((end >> PAGE_SHIFT) > end_pfn)
- end_pfn = end >> PAGE_SHIFT;
- return 1;
+ pxm, apic_id, node);
}
-static inline int save_add_info(void)
-{
- return hotadd_percent > 0;
-}
-#else
int update_end_of_memory(unsigned long end) {return -1;}
static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
@@ -214,10 +161,9 @@ static inline int save_add_info(void) {return 1;}
#else
static inline int save_add_info(void) {return 0;}
#endif
-#endif
/*
* Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add infomation.
+ * Both SPARSE and RESERVE need nodes_add information.
* This code supports one contiguous hot add area per node.
*/
static int reserve_hotadd(int node, unsigned long start, unsigned long end)
@@ -377,7 +323,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
return 1;
}
-static void unparse_node(int node)
+static void __init unparse_node(int node)
{
int i;
node_clear(node, nodes_parsed);
@@ -400,7 +346,12 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
/* First clean up the node list */
for (i = 0; i < MAX_NUMNODES; i++) {
cutoff_node(i, start, end);
- if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
+ /*
+ * don't confuse VM with a node that doesn't have the
+ * minimum memory.
+ */
+ if (nodes[i].end &&
+ (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
unparse_node(i);
node_set_offline(i);
}
@@ -431,9 +382,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
for (i = 0; i < NR_CPUS; i++) {
- if (cpu_to_node(i) == NUMA_NO_NODE)
+ int node = early_cpu_to_node(i);
+
+ if (node == NUMA_NO_NODE)
continue;
- if (!node_isset(cpu_to_node(i), node_possible_map))
+ if (!node_isset(node, node_possible_map))
numa_set_node(i, NUMA_NO_NODE);
}
numa_init_array();
@@ -441,6 +394,12 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
}
#ifdef CONFIG_NUMA_EMU
+static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
+ [0 ... MAX_NUMNODES-1] = PXM_INVAL
+};
+static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
static int __init find_node_by_addr(unsigned long addr)
{
int ret = NUMA_NO_NODE;
@@ -457,7 +416,7 @@ static int __init find_node_by_addr(unsigned long addr)
break;
}
}
- return i;
+ return ret;
}
/*
@@ -471,12 +430,6 @@ static int __init find_node_by_addr(unsigned long addr)
void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
{
int i, j;
- int fake_node_to_pxm_map[MAX_NUMNODES] = {
- [0 ... MAX_NUMNODES-1] = PXM_INVAL
- };
- unsigned char fake_apicid_to_node[MAX_LOCAL_APIC] = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
- };
printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
"topology.\n");
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 0ed046a187f..e2095cba409 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -32,7 +32,7 @@ static int backtrace_stack(void *data, char *name)
return 0;
}
-static void backtrace_address(void *data, unsigned long addr)
+static void backtrace_address(void *data, unsigned long addr, int reliable)
{
unsigned int *depth = data;
@@ -48,7 +48,7 @@ static struct stacktrace_ops backtrace_ops = {
};
struct frame_head {
- struct frame_head *ebp;
+ struct frame_head *bp;
unsigned long ret;
} __attribute__((packed));
@@ -67,21 +67,21 @@ dump_user_backtrace(struct frame_head * head)
/* frame pointers should strictly progress back up the stack
* (towards higher addresses) */
- if (head >= bufhead[0].ebp)
+ if (head >= bufhead[0].bp)
return NULL;
- return bufhead[0].ebp;
+ return bufhead[0].bp;
}
void
x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{
struct frame_head *head = (struct frame_head *)frame_pointer(regs);
- unsigned long stack = stack_pointer(regs);
+ unsigned long stack = kernel_trap_sp(regs);
if (!user_mode_vm(regs)) {
if (depth)
- dump_trace(NULL, regs, (unsigned long *)stack,
+ dump_trace(NULL, regs, (unsigned long *)stack, 0,
&backtrace_ops, &depth);
return;
}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index c8ab79ef427..1f11cf0a307 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -18,11 +18,11 @@
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
-
+
#include "op_counter.h"
#include "op_x86_model.h"
-static struct op_x86_model_spec const * model;
+static struct op_x86_model_spec const *model;
static struct op_msrs cpu_msrs[NR_CPUS];
static unsigned long saved_lvtpc[NR_CPUS];
@@ -41,7 +41,6 @@ static int nmi_suspend(struct sys_device *dev, pm_message_t state)
return 0;
}
-
static int nmi_resume(struct sys_device *dev)
{
if (nmi_enabled == 1)
@@ -49,29 +48,27 @@ static int nmi_resume(struct sys_device *dev)
return 0;
}
-
static struct sysdev_class oprofile_sysclass = {
.name = "oprofile",
.resume = nmi_resume,
.suspend = nmi_suspend,
};
-
static struct sys_device device_oprofile = {
.id = 0,
.cls = &oprofile_sysclass,
};
-
static int __init init_sysfs(void)
{
int error;
- if (!(error = sysdev_class_register(&oprofile_sysclass)))
+
+ error = sysdev_class_register(&oprofile_sysclass);
+ if (!error)
error = sysdev_register(&device_oprofile);
return error;
}
-
static void exit_sysfs(void)
{
sysdev_unregister(&device_oprofile);
@@ -90,7 +87,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
int ret = NOTIFY_DONE;
int cpu = smp_processor_id();
- switch(val) {
+ switch (val) {
case DIE_NMI:
if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
ret = NOTIFY_STOP;
@@ -101,24 +98,24 @@ static int profile_exceptions_notify(struct notifier_block *self,
return ret;
}
-static void nmi_cpu_save_registers(struct op_msrs * msrs)
+static void nmi_cpu_save_registers(struct op_msrs *msrs)
{
unsigned int const nr_ctrs = model->num_counters;
- unsigned int const nr_ctrls = model->num_controls;
- struct op_msr * counters = msrs->counters;
- struct op_msr * controls = msrs->controls;
+ unsigned int const nr_ctrls = model->num_controls;
+ struct op_msr *counters = msrs->counters;
+ struct op_msr *controls = msrs->controls;
unsigned int i;
for (i = 0; i < nr_ctrs; ++i) {
- if (counters[i].addr){
+ if (counters[i].addr) {
rdmsr(counters[i].addr,
counters[i].saved.low,
counters[i].saved.high);
}
}
-
+
for (i = 0; i < nr_ctrls; ++i) {
- if (controls[i].addr){
+ if (controls[i].addr) {
rdmsr(controls[i].addr,
controls[i].saved.low,
controls[i].saved.high);
@@ -126,15 +123,13 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs)
}
}
-
-static void nmi_save_registers(void * dummy)
+static void nmi_save_registers(void *dummy)
{
int cpu = smp_processor_id();
- struct op_msrs * msrs = &cpu_msrs[cpu];
+ struct op_msrs *msrs = &cpu_msrs[cpu];
nmi_cpu_save_registers(msrs);
}
-
static void free_msrs(void)
{
int i;
@@ -146,7 +141,6 @@ static void free_msrs(void)
}
}
-
static int allocate_msrs(void)
{
int success = 1;
@@ -173,11 +167,10 @@ static int allocate_msrs(void)
return success;
}
-
-static void nmi_cpu_setup(void * dummy)
+static void nmi_cpu_setup(void *dummy)
{
int cpu = smp_processor_id();
- struct op_msrs * msrs = &cpu_msrs[cpu];
+ struct op_msrs *msrs = &cpu_msrs[cpu];
spin_lock(&oprofilefs_lock);
model->setup_ctrs(msrs);
spin_unlock(&oprofilefs_lock);
@@ -193,13 +186,14 @@ static struct notifier_block profile_exceptions_nb = {
static int nmi_setup(void)
{
- int err=0;
+ int err = 0;
int cpu;
if (!allocate_msrs())
return -ENOMEM;
- if ((err = register_die_notifier(&profile_exceptions_nb))){
+ err = register_die_notifier(&profile_exceptions_nb);
+ if (err) {
free_msrs();
return err;
}
@@ -210,7 +204,7 @@ static int nmi_setup(void)
/* Assume saved/restored counters are the same on all CPUs */
model->fill_in_addresses(&cpu_msrs[0]);
- for_each_possible_cpu (cpu) {
+ for_each_possible_cpu(cpu) {
if (cpu != 0) {
memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters,
sizeof(struct op_msr) * model->num_counters);
@@ -226,39 +220,37 @@ static int nmi_setup(void)
return 0;
}
-
-static void nmi_restore_registers(struct op_msrs * msrs)
+static void nmi_restore_registers(struct op_msrs *msrs)
{
unsigned int const nr_ctrs = model->num_counters;
- unsigned int const nr_ctrls = model->num_controls;
- struct op_msr * counters = msrs->counters;
- struct op_msr * controls = msrs->controls;
+ unsigned int const nr_ctrls = model->num_controls;
+ struct op_msr *counters = msrs->counters;
+ struct op_msr *controls = msrs->controls;
unsigned int i;
for (i = 0; i < nr_ctrls; ++i) {
- if (controls[i].addr){
+ if (controls[i].addr) {
wrmsr(controls[i].addr,
controls[i].saved.low,
controls[i].saved.high);
}
}
-
+
for (i = 0; i < nr_ctrs; ++i) {
- if (counters[i].addr){
+ if (counters[i].addr) {
wrmsr(counters[i].addr,
counters[i].saved.low,
counters[i].saved.high);
}
}
}
-
-static void nmi_cpu_shutdown(void * dummy)
+static void nmi_cpu_shutdown(void *dummy)
{
unsigned int v;
int cpu = smp_processor_id();
- struct op_msrs * msrs = &cpu_msrs[cpu];
-
+ struct op_msrs *msrs = &cpu_msrs[cpu];
+
/* restoring APIC_LVTPC can trigger an apic error because the delivery
* mode and vector nr combination can be illegal. That's by design: on
* power on apic lvt contain a zero vector nr which are legal only for
@@ -271,7 +263,6 @@ static void nmi_cpu_shutdown(void * dummy)
nmi_restore_registers(msrs);
}
-
static void nmi_shutdown(void)
{
nmi_enabled = 0;
@@ -281,45 +272,40 @@ static void nmi_shutdown(void)
free_msrs();
}
-
-static void nmi_cpu_start(void * dummy)
+static void nmi_cpu_start(void *dummy)
{
- struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+ struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()];
model->start(msrs);
}
-
static int nmi_start(void)
{
on_each_cpu(nmi_cpu_start, NULL, 0, 1);
return 0;
}
-
-
-static void nmi_cpu_stop(void * dummy)
+
+static void nmi_cpu_stop(void *dummy)
{
- struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+ struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()];
model->stop(msrs);
}
-
-
+
static void nmi_stop(void)
{
on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
}
-
struct op_counter_config counter_config[OP_MAX_COUNTER];
-static int nmi_create_files(struct super_block * sb, struct dentry * root)
+static int nmi_create_files(struct super_block *sb, struct dentry *root)
{
unsigned int i;
for (i = 0; i < model->num_counters; ++i) {
- struct dentry * dir;
+ struct dentry *dir;
char buf[4];
-
- /* quick little hack to _not_ expose a counter if it is not
+
+ /* quick little hack to _not_ expose a counter if it is not
* available for use. This should protect userspace app.
* NOTE: assumes 1:1 mapping here (that counters are organized
* sequentially in their struct assignment).
@@ -329,21 +315,21 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
- oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
- oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
- oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
- oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
- oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
- oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
+ oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
+ oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
+ oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
+ oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
+ oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
+ oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
}
return 0;
}
-
+
static int p4force;
module_param(p4force, int, 0);
-
-static int __init p4_init(char ** cpu_type)
+
+static int __init p4_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
@@ -356,15 +342,15 @@ static int __init p4_init(char ** cpu_type)
return 1;
#else
switch (smp_num_siblings) {
- case 1:
- *cpu_type = "i386/p4";
- model = &op_p4_spec;
- return 1;
-
- case 2:
- *cpu_type = "i386/p4-ht";
- model = &op_p4_ht2_spec;
- return 1;
+ case 1:
+ *cpu_type = "i386/p4";
+ model = &op_p4_spec;
+ return 1;
+
+ case 2:
+ *cpu_type = "i386/p4-ht";
+ model = &op_p4_ht2_spec;
+ return 1;
}
#endif
@@ -373,8 +359,7 @@ static int __init p4_init(char ** cpu_type)
return 0;
}
-
-static int __init ppro_init(char ** cpu_type)
+static int __init ppro_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
@@ -409,52 +394,52 @@ int __init op_nmi_init(struct oprofile_operations *ops)
if (!cpu_has_apic)
return -ENODEV;
-
+
switch (vendor) {
- case X86_VENDOR_AMD:
- /* Needs to be at least an Athlon (or hammer in 32bit mode) */
+ case X86_VENDOR_AMD:
+ /* Needs to be at least an Athlon (or hammer in 32bit mode) */
- switch (family) {
- default:
+ switch (family) {
+ default:
+ return -ENODEV;
+ case 6:
+ model = &op_athlon_spec;
+ cpu_type = "i386/athlon";
+ break;
+ case 0xf:
+ model = &op_athlon_spec;
+ /* Actually it could be i386/hammer too, but give
+ user space an consistent name. */
+ cpu_type = "x86-64/hammer";
+ break;
+ case 0x10:
+ model = &op_athlon_spec;
+ cpu_type = "x86-64/family10";
+ break;
+ }
+ break;
+
+ case X86_VENDOR_INTEL:
+ switch (family) {
+ /* Pentium IV */
+ case 0xf:
+ if (!p4_init(&cpu_type))
return -ENODEV;
- case 6:
- model = &op_athlon_spec;
- cpu_type = "i386/athlon";
- break;
- case 0xf:
- model = &op_athlon_spec;
- /* Actually it could be i386/hammer too, but give
- user space an consistent name. */
- cpu_type = "x86-64/hammer";
- break;
- case 0x10:
- model = &op_athlon_spec;
- cpu_type = "x86-64/family10";
- break;
- }
break;
-
- case X86_VENDOR_INTEL:
- switch (family) {
- /* Pentium IV */
- case 0xf:
- if (!p4_init(&cpu_type))
- return -ENODEV;
- break;
-
- /* A P6-class processor */
- case 6:
- if (!ppro_init(&cpu_type))
- return -ENODEV;
- break;
-
- default:
- return -ENODEV;
- }
+
+ /* A P6-class processor */
+ case 6:
+ if (!ppro_init(&cpu_type))
+ return -ENODEV;
break;
default:
return -ENODEV;
+ }
+ break;
+
+ default:
+ return -ENODEV;
}
init_sysfs();
@@ -469,7 +454,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
return 0;
}
-
void op_nmi_exit(void)
{
if (using_nmi)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 86274639066..52deabc72a6 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -109,6 +109,19 @@ static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
}
}
+static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+{
+ struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
+
+ if (rom_r->parent)
+ return;
+ if (rom_r->start)
+ /* we deal with BIOS assigned ROM later */
+ return;
+ if (!(pci_probe & PCI_ASSIGN_ROMS))
+ rom_r->start = rom_r->end = rom_r->flags = 0;
+}
+
/*
* Called after each bus is probed, but before its children
* are examined.
@@ -116,8 +129,12 @@ static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
void __devinit pcibios_fixup_bus(struct pci_bus *b)
{
+ struct pci_dev *dev;
+
pcibios_fixup_ghosts(b);
pci_read_bridge_bases(b);
+ list_for_each_entry(dev, &b->devices, bus_list)
+ pcibios_fixup_device_resources(dev);
}
/*
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 6cff66dd0c9..cb63007e20b 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -19,7 +19,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
printk(KERN_WARNING "PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
reg = 0xd0;
- for(pxb=0; pxb<2; pxb++) {
+ for(pxb = 0; pxb < 2; pxb++) {
pci_read_config_byte(d, reg++, &busno);
pci_read_config_byte(d, reg++, &suba);
pci_read_config_byte(d, reg++, &subb);
@@ -56,7 +56,7 @@ static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
int i;
printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", pci_name(d));
- for(i=0; i<4; i++)
+ for(i = 0; i < 4; i++)
d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide);
@@ -127,7 +127,7 @@ static void pci_fixup_via_northbridge_bug(struct pci_dev *d)
NB latency to zero */
pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
- where = 0x95; /* the memory write queue timer register is
+ where = 0x95; /* the memory write queue timer register is
different for the KT266x's: 0x95 not 0x55 */
} else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
(d->revision == VIA_8363_KL133_REVISION_ID ||
@@ -230,7 +230,7 @@ static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int wh
if ((offset) && (where == offset))
value = value & 0xfffffffc;
-
+
return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
}
@@ -271,8 +271,8 @@ static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
* after hot-remove, the pbus->devices is empty and this code
* will set the offsets to zero and the bus ops to parent's bus
* ops, which is unmodified.
- */
- for (i= GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i)
+ */
+ for (i = GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i)
quirk_aspm_offset[i] = 0;
pbus->ops = pbus->parent->ops;
@@ -286,17 +286,17 @@ static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
list_for_each_entry(dev, &pbus->devices, bus_list) {
/* There are 0 to 8 devices attached to this bus */
cap_base = pci_find_capability(dev, PCI_CAP_ID_EXP);
- quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)]= cap_base + 0x10;
+ quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)] = cap_base + 0x10;
}
pbus->ops = &quirk_pcie_aspm_ops;
}
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA, pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA1, pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB, pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB1, pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC, pcie_rootport_aspm_quirk );
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_rootport_aspm_quirk );
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA, pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA1, pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB, pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB1, pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC, pcie_rootport_aspm_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_rootport_aspm_quirk);
/*
* Fixup to mark boot BIOS video selected by BIOS before it changes
@@ -336,8 +336,8 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
* PCI header type NORMAL.
*/
if (bridge
- &&((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE)
- ||(bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) {
+ && ((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+ || (bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) {
pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
&config);
if (!(config & PCI_BRIDGE_CTL_VGA))
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 88d8f5c0ecb..ed07ce6c171 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -200,6 +200,7 @@ static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+ WARN_ON_ONCE(pirq >= 16);
return irqmap[read_config_nybble(router, 0x48, pirq-1)];
}
@@ -207,7 +208,8 @@ static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
{
static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
unsigned int val = irqmap[irq];
-
+
+ WARN_ON_ONCE(pirq >= 16);
if (val) {
write_config_nybble(router, 0x48, pirq-1, val);
return 1;
@@ -257,12 +259,16 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+
+ WARN_ON_ONCE(pirq >= 5);
return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
}
static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
{
static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+
+ WARN_ON_ONCE(pirq >= 5);
write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
return 1;
}
@@ -275,12 +281,16 @@ static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq
static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+
+ WARN_ON_ONCE(pirq >= 4);
return read_config_nybble(router,0x43, pirqmap[pirq-1]);
}
static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
{
static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+
+ WARN_ON_ONCE(pirq >= 4);
write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
return 1;
}
@@ -419,6 +429,7 @@ static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
+ WARN_ON_ONCE(pirq >= 9);
if (pirq > 8) {
printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
return 0;
@@ -428,6 +439,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
{
+ WARN_ON_ONCE(pirq >= 9);
if (pirq > 8) {
printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
return 0;
@@ -449,14 +461,14 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
*/
static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
- outb_p(pirq, 0xc00);
+ outb(pirq, 0xc00);
return inb(0xc01) & 0xf;
}
static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
{
- outb_p(pirq, 0xc00);
- outb_p(irq, 0xc01);
+ outb(pirq, 0xc00);
+ outb(irq, 0xc01);
return 1;
}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 998fd3ec0d6..efcf620d143 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -19,7 +19,7 @@ unsigned long saved_context_esp, saved_context_ebp;
unsigned long saved_context_esi, saved_context_edi;
unsigned long saved_context_eflags;
-void __save_processor_state(struct saved_context *ctxt)
+static void __save_processor_state(struct saved_context *ctxt)
{
mtrr_save_fixed_ranges(NULL);
kernel_fpu_begin();
@@ -74,19 +74,19 @@ static void fix_processor_context(void)
/*
* Now maybe reload the debug registers
*/
- if (current->thread.debugreg[7]){
- set_debugreg(current->thread.debugreg[0], 0);
- set_debugreg(current->thread.debugreg[1], 1);
- set_debugreg(current->thread.debugreg[2], 2);
- set_debugreg(current->thread.debugreg[3], 3);
+ if (current->thread.debugreg7) {
+ set_debugreg(current->thread.debugreg0, 0);
+ set_debugreg(current->thread.debugreg1, 1);
+ set_debugreg(current->thread.debugreg2, 2);
+ set_debugreg(current->thread.debugreg3, 3);
/* no 4 and 5 */
- set_debugreg(current->thread.debugreg[6], 6);
- set_debugreg(current->thread.debugreg[7], 7);
+ set_debugreg(current->thread.debugreg6, 6);
+ set_debugreg(current->thread.debugreg7, 7);
}
}
-void __restore_processor_state(struct saved_context *ctxt)
+static void __restore_processor_state(struct saved_context *ctxt)
{
/*
* control registers
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore
index f8b69d84238..60274d5746e 100644
--- a/arch/x86/vdso/.gitignore
+++ b/arch/x86/vdso/.gitignore
@@ -1 +1,6 @@
vdso.lds
+vdso-syms.lds
+vdso32-syms.lds
+vdso32-syscall-syms.lds
+vdso32-sysenter-syms.lds
+vdso32-int80-syms.lds
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index e7bff0fbac2..d28dda57470 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -1,39 +1,37 @@
#
-# x86-64 vDSO.
+# Building vDSO images for x86.
#
+VDSO64-$(CONFIG_X86_64) := y
+VDSO32-$(CONFIG_X86_32) := y
+VDSO32-$(CONFIG_COMPAT) := y
+
+vdso-install-$(VDSO64-y) += vdso.so
+vdso-install-$(VDSO32-y) += $(vdso32-y:=.so)
+
+
# files to link into the vdso
-# vdso-start.o has to be first
-vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
# files to link into kernel
-obj-y := vma.o vdso.o vdso-syms.o
+obj-$(VDSO64-y) += vma.o vdso.o
+obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
$(obj)/vdso.o: $(obj)/vdso.so
-targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) vdso-syms.o
-
-# The DSO images are built using a special linker script.
-quiet_cmd_syscall = SYSCALL $@
- cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
- -Wl,-T,$(filter-out FORCE,$^) -o $@
+targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
export CPPFLAGS_vdso.lds += -P -C
-vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv) \
- -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
-SYSCFLAGS_vdso.so = $(vdso-flags)
-SYSCFLAGS_vdso.so.dbg = $(vdso-flags)
+VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \
+ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
-$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
-
$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
- $(call if_changed,syscall)
+ $(call if_changed,vdso)
$(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg FORCE
@@ -41,24 +39,96 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
-$(obj)/vclock_gettime.o: KBUILD_CFLAGS = $(CFL)
-$(obj)/vgetcpu.o: KBUILD_CFLAGS = $(CFL)
+$(vobjs): KBUILD_CFLAGS = $(CFL)
+
+targets += vdso-syms.lds
+obj-$(VDSO64-y) += vdso-syms.lds
+
+#
+# Match symbols in the DSO that look like VDSO*; produce a file of constants.
+#
+sed-vdsosym := -e 's/^00*/0/' \
+ -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
+
+$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
+ $(call if_changed,vdsosym)
+
+#
+# Build multiple 32-bit vDSO images to choose from at boot time.
+#
+obj-$(VDSO32-y) += vdso32-syms.lds
+vdso32.so-$(CONFIG_X86_32) += int80
+vdso32.so-$(CONFIG_COMPAT) += syscall
+vdso32.so-$(VDSO32-y) += sysenter
+
+CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
+
+# This makes sure the $(obj) subdirectory exists even though vdso32/
+# is not a kbuild sub-make subdirectory.
+override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vdso-syms.o
-$(obj)/built-in.o: $(obj)/vdso-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
+targets += vdso32/vdso32.lds
+targets += $(vdso32.so-y:%=vdso32-%.so.dbg) $(vdso32.so-y:%=vdso32-%.so)
+targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
-SYSCFLAGS_vdso-syms.o = -r -d
-$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
- $(call if_changed,syscall)
+extra-y += $(vdso32.so-y:%=vdso32-%.so)
+$(obj)/vdso32.o: $(vdso32.so-y:%=$(obj)/vdso32-%.so)
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
+$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32
+
+$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
+ $(obj)/vdso32/vdso32.lds \
+ $(obj)/vdso32/note.o \
+ $(obj)/vdso32/%.o
+ $(call if_changed,vdso)
+
+# Make vdso32-*-syms.lds from each image, and then make sure they match.
+# The only difference should be that some do not define VDSO32_SYSENTER_RETURN.
+
+targets += vdso32-syms.lds $(vdso32.so-y:%=vdso32-%-syms.lds)
+
+quiet_cmd_vdso32sym = VDSOSYM $@
+define cmd_vdso32sym
+ if LC_ALL=C sort -u $(filter-out FORCE,$^) > $(@D)/.tmp_$(@F) && \
+ $(foreach H,$(filter-out FORCE,$^),\
+ if grep -q VDSO32_SYSENTER_RETURN $H; \
+ then diff -u $(@D)/.tmp_$(@F) $H; \
+ else sed /VDSO32_SYSENTER_RETURN/d $(@D)/.tmp_$(@F) | \
+ diff -u - $H; fi &&) : ;\
+ then mv -f $(@D)/.tmp_$(@F) $@; \
+ else rm -f $(@D)/.tmp_$(@F); exit 1; \
+ fi
+endef
+
+$(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE
+ $(call if_changed,vdso32sym)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO $@
+ cmd_vdso = $(CC) -nostdlib -o $@ \
+ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
+
+VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
+
+#
+# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
+#
quiet_cmd_vdso_install = INSTALL $@
cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-vdso.so:
+$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
@mkdir -p $(MODLIB)/vdso
$(call cmd,vdso_install)
-vdso_install: vdso.so
+PHONY += vdso_install $(vdso-install-y)
+vdso_install: $(vdso-install-y)
+
+clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80*
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 5b54cdfb2b0..23476c2ebfc 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -19,7 +19,6 @@
#include <asm/hpet.h>
#include <asm/unistd.h>
#include <asm/io.h>
-#include <asm/vgtod.h>
#include "vextern.h"
#define gtod vdso_vsyscall_gtod_data
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
new file mode 100644
index 00000000000..634a2cf6204
--- /dev/null
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -0,0 +1,64 @@
+/*
+ * Linker script for vDSO. This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+SECTIONS
+{
+ . = VDSO_PRELINK + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .rodata : { *(.rodata*) } :text
+ .data : {
+ *(.data*)
+ *(.sdata*)
+ *(.got.plt) *(.got)
+ *(.gnu.linkonce.d.*)
+ *(.bss*)
+ *(.dynbss*)
+ *(.gnu.linkonce.b.*)
+ }
+
+ .altinstructions : { *(.altinstructions) }
+ .altinstr_replacement : { *(.altinstr_replacement) }
+
+ /*
+ * Align the actual code well away from the non-instruction data.
+ * This is the best thing for the I-cache.
+ */
+ . = ALIGN(0x100);
+
+ .text : { *(.text*) } :text =0x90909090
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
diff --git a/arch/x86/vdso/vdso-start.S b/arch/x86/vdso/vdso-start.S
deleted file mode 100644
index 2dc2cdb84d6..00000000000
--- a/arch/x86/vdso/vdso-start.S
+++ /dev/null
@@ -1,2 +0,0 @@
- .globl vdso_kernel_start
-vdso_kernel_start:
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
index 667d3245d97..4e5dd3b4de7 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/vdso/vdso.lds.S
@@ -1,79 +1,37 @@
/*
- * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page). This script controls its layout.
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO. We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
*/
-#include <asm/asm-offsets.h>
-#include "voffset.h"
#define VDSO_PRELINK 0xffffffffff700000
-
-SECTIONS
-{
- . = VDSO_PRELINK + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- /* This linker script is used both with -r and with -shared.
- For the layouts to match, we need to skip more than enough
- space for the dynamic symbol table et al. If this amount
- is insufficient, ld -shared will barf. Just increase it here. */
- . = VDSO_PRELINK + VDSO_TEXT_OFFSET;
-
- .text : { *(.text*) } :text
- .rodata : { *(.rodata*) } :text
- .data : {
- *(.data*)
- *(.sdata*)
- *(.bss*)
- *(.dynbss*)
- } :text
-
- .altinstructions : { *(.altinstructions) } :text
- .altinstr_replacement : { *(.altinstr_replacement) } :text
-
- .note : { *(.note.*) } :text :note
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .dynamic : { *(.dynamic) } :text :dynamic
- .useless : {
- *(.got.plt) *(.got)
- *(.gnu.linkonce.d.*)
- *(.gnu.linkonce.b.*)
- } :text
-}
+#include "vdso-layout.lds.S"
/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ * This controls what userland symbols we export from the vDSO.
*/
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+VERSION {
+ LINUX_2.6 {
+ global:
+ clock_gettime;
+ __vdso_clock_gettime;
+ gettimeofday;
+ __vdso_gettimeofday;
+ getcpu;
+ __vdso_getcpu;
+ local: *;
+ };
}
+VDSO64_PRELINK = VDSO_PRELINK;
+
/*
- * This controls what symbols we export from the DSO.
+ * Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL.
*/
-VERSION
-{
- LINUX_2.6 {
- global:
- clock_gettime;
- __vdso_clock_gettime;
- gettimeofday;
- __vdso_gettimeofday;
- getcpu;
- __vdso_getcpu;
- local: *;
- };
-}
+#define VEXTERN(x) VDSO64_ ## x = vdso_ ## x;
+#include "vextern.h"
+#undef VEXTERN
diff --git a/arch/x86/kernel/sysenter_32.c b/arch/x86/vdso/vdso32-setup.c
index 5a2d951e260..348f1341e1c 100644
--- a/arch/x86/kernel/sysenter_32.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -23,6 +23,8 @@
#include <asm/unistd.h>
#include <asm/elf.h>
#include <asm/tlbflush.h>
+#include <asm/vdso.h>
+#include <asm/proto.h>
enum {
VDSO_DISABLED = 0,
@@ -36,14 +38,24 @@ enum {
#define VDSO_DEFAULT VDSO_ENABLED
#endif
+#ifdef CONFIG_X86_64
+#define vdso_enabled sysctl_vsyscall32
+#define arch_setup_additional_pages syscall32_setup_pages
+#endif
+
+/*
+ * This is the difference between the prelinked addresses in the vDSO images
+ * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
+ * in the user address space.
+ */
+#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
+
/*
* Should the kernel map a VDSO page into processes and pass its
* address down to glibc upon exec()?
*/
unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
-EXPORT_SYMBOL_GPL(vdso_enabled);
-
static int __init vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
@@ -51,9 +63,18 @@ static int __init vdso_setup(char *s)
return 1;
}
-__setup("vdso=", vdso_setup);
+/*
+ * For consistency, the argument vdso32=[012] affects the 32-bit vDSO
+ * behavior on both 64-bit and 32-bit kernels.
+ * On 32-bit kernels, vdso=[012] means the same thing.
+ */
+__setup("vdso32=", vdso_setup);
+
+#ifdef CONFIG_X86_32
+__setup_param("vdso=", vdso32_setup, vdso_setup, 0);
-extern asmlinkage void sysenter_entry(void);
+EXPORT_SYMBOL_GPL(vdso_enabled);
+#endif
static __init void reloc_symtab(Elf32_Ehdr *ehdr,
unsigned offset, unsigned size)
@@ -78,7 +99,7 @@ static __init void reloc_symtab(Elf32_Ehdr *ehdr,
case STT_FUNC:
case STT_SECTION:
case STT_FILE:
- sym->st_value += VDSO_HIGH_BASE;
+ sym->st_value += VDSO_ADDR_ADJUST;
}
}
}
@@ -104,7 +125,7 @@ static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
case DT_VERNEED:
case DT_ADDRRNGLO ... DT_ADDRRNGHI:
/* definitely pointers needing relocation */
- dyn->d_un.d_ptr += VDSO_HIGH_BASE;
+ dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
break;
case DT_ENCODING ... OLD_DT_LOOS-1:
@@ -113,7 +134,7 @@ static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
they're even */
if (dyn->d_tag >= DT_ENCODING &&
(dyn->d_tag & 1) == 0)
- dyn->d_un.d_ptr += VDSO_HIGH_BASE;
+ dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
break;
case DT_VERDEFNUM:
@@ -142,15 +163,15 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
int i;
BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
- !elf_check_arch(ehdr) ||
+ !elf_check_arch_ia32(ehdr) ||
ehdr->e_type != ET_DYN);
- ehdr->e_entry += VDSO_HIGH_BASE;
+ ehdr->e_entry += VDSO_ADDR_ADJUST;
/* rebase phdrs */
phdr = (void *)ehdr + ehdr->e_phoff;
for (i = 0; i < ehdr->e_phnum; i++) {
- phdr[i].p_vaddr += VDSO_HIGH_BASE;
+ phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
/* relocate dynamic stuff */
if (phdr[i].p_type == PT_DYNAMIC)
@@ -163,7 +184,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
if (!(shdr[i].sh_flags & SHF_ALLOC))
continue;
- shdr[i].sh_addr += VDSO_HIGH_BASE;
+ shdr[i].sh_addr += VDSO_ADDR_ADJUST;
if (shdr[i].sh_type == SHT_SYMTAB ||
shdr[i].sh_type == SHT_DYNSYM)
@@ -172,6 +193,45 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
}
}
+/*
+ * These symbols are defined by vdso32.S to mark the bounds
+ * of the ELF DSO images included therein.
+ */
+extern const char vdso32_default_start, vdso32_default_end;
+extern const char vdso32_sysenter_start, vdso32_sysenter_end;
+static struct page *vdso32_pages[1];
+
+#ifdef CONFIG_X86_64
+
+static int use_sysenter __read_mostly = -1;
+
+#define vdso32_sysenter() (use_sysenter > 0)
+
+/* May not be __init: called during resume */
+void syscall32_cpu_init(void)
+{
+ if (use_sysenter < 0)
+ use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
+
+ /* Load these always in case some future AMD CPU supports
+ SYSENTER from compat mode too. */
+ checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+ checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
+ checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+
+ wrmsrl(MSR_CSTAR, ia32_cstar_target);
+}
+
+#define compat_uses_vma 1
+
+static inline void map_compat_vdso(int map)
+{
+}
+
+#else /* CONFIG_X86_32 */
+
+#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
+
void enable_sep_cpu(void)
{
int cpu = get_cpu();
@@ -183,10 +243,10 @@ void enable_sep_cpu(void)
}
tss->x86_tss.ss1 = __KERNEL_CS;
- tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+ tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
- wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
+ wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
+ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
put_cpu();
}
@@ -209,13 +269,7 @@ static int __init gate_vma_init(void)
return 0;
}
-/*
- * These symbols are defined by vsyscall.o to mark the bounds
- * of the ELF DSO images included therein.
- */
-extern const char vsyscall_int80_start, vsyscall_int80_end;
-extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static struct page *syscall_pages[1];
+#define compat_uses_vma 0
static void map_compat_vdso(int map)
{
@@ -226,31 +280,35 @@ static void map_compat_vdso(int map)
vdso_mapped = map;
- __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
+ __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
map ? PAGE_READONLY_EXEC : PAGE_NONE);
/* flush stray tlbs */
flush_tlb_all();
}
+#endif /* CONFIG_X86_64 */
+
int __init sysenter_setup(void)
{
void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
const void *vsyscall;
size_t vsyscall_len;
- syscall_pages[0] = virt_to_page(syscall_page);
+ vdso32_pages[0] = virt_to_page(syscall_page);
+#ifdef CONFIG_X86_32
gate_vma_init();
printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
+#endif
- if (!boot_cpu_has(X86_FEATURE_SEP)) {
- vsyscall = &vsyscall_int80_start;
- vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
+ if (!vdso32_sysenter()) {
+ vsyscall = &vdso32_default_start;
+ vsyscall_len = &vdso32_default_end - &vdso32_default_start;
} else {
- vsyscall = &vsyscall_sysenter_start;
- vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
+ vsyscall = &vdso32_sysenter_start;
+ vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
}
memcpy(syscall_page, vsyscall, vsyscall_len);
@@ -259,9 +317,6 @@ int __init sysenter_setup(void)
return 0;
}
-/* Defined in vsyscall-sysenter.S */
-extern void SYSENTER_RETURN;
-
/* Setup a VMA at program startup for the vsyscall page */
int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
{
@@ -286,7 +341,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
ret = addr;
goto up_fail;
}
+ }
+ if (compat_uses_vma || !compat) {
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*
@@ -300,7 +357,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
VM_ALWAYSDUMP,
- syscall_pages);
+ vdso32_pages);
if (ret)
goto up_fail;
@@ -308,7 +365,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
current->mm->context.vdso = (void *)addr;
current_thread_info()->sysenter_return =
- (void *)VDSO_SYM(&SYSENTER_RETURN);
+ VDSO32_SYMBOL(addr, SYSENTER_RETURN);
up_fail:
up_write(&mm->mmap_sem);
@@ -316,6 +373,45 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
return ret;
}
+#ifdef CONFIG_X86_64
+
+__initcall(sysenter_setup);
+
+#ifdef CONFIG_SYSCTL
+/* Register vsyscall32 into the ABI table */
+#include <linux/sysctl.h>
+
+static ctl_table abi_table2[] = {
+ {
+ .procname = "vsyscall32",
+ .data = &sysctl_vsyscall32,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {}
+};
+
+static ctl_table abi_root_table2[] = {
+ {
+ .ctl_name = CTL_ABI,
+ .procname = "abi",
+ .mode = 0555,
+ .child = abi_table2
+ },
+ {}
+};
+
+static __init int ia32_binfmt_init(void)
+{
+ register_sysctl_table(abi_root_table2);
+ return 0;
+}
+__initcall(ia32_binfmt_init);
+#endif
+
+#else /* CONFIG_X86_32 */
+
const char *arch_vma_name(struct vm_area_struct *vma)
{
if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
@@ -344,3 +440,5 @@ int in_gate_area_no_task(unsigned long addr)
{
return 0;
}
+
+#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S
new file mode 100644
index 00000000000..1e36f72cab8
--- /dev/null
+++ b/arch/x86/vdso/vdso32.S
@@ -0,0 +1,19 @@
+#include <linux/init.h>
+
+__INITDATA
+
+ .globl vdso32_default_start, vdso32_default_end
+vdso32_default_start:
+#ifdef CONFIG_X86_32
+ .incbin "arch/x86/vdso/vdso32-int80.so"
+#else
+ .incbin "arch/x86/vdso/vdso32-syscall.so"
+#endif
+vdso32_default_end:
+
+ .globl vdso32_sysenter_start, vdso32_sysenter_end
+vdso32_sysenter_start:
+ .incbin "arch/x86/vdso/vdso32-sysenter.so"
+vdso32_sysenter_end:
+
+__FINIT
diff --git a/arch/x86/vdso/vdso32/.gitignore b/arch/x86/vdso/vdso32/.gitignore
new file mode 100644
index 00000000000..e45fba9d0ce
--- /dev/null
+++ b/arch/x86/vdso/vdso32/.gitignore
@@ -0,0 +1 @@
+vdso32.lds
diff --git a/arch/x86/kernel/vsyscall-int80_32.S b/arch/x86/vdso/vdso32/int80.S
index 103cab6aa7c..b15b7c01aed 100644
--- a/arch/x86/kernel/vsyscall-int80_32.S
+++ b/arch/x86/vdso/vdso32/int80.S
@@ -1,15 +1,15 @@
/*
- * Code for the vsyscall page. This version uses the old int $0x80 method.
+ * Code for the vDSO. This version uses the old int $0x80 method.
*
- * NOTE:
- * 1) __kernel_vsyscall _must_ be first in this page.
- * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
- * for details.
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
*/
+#include "sigreturn.S"
.text
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
+ ALIGN
__kernel_vsyscall:
.LSTART_vsyscall:
int $0x80
@@ -47,7 +47,10 @@ __kernel_vsyscall:
.LENDFDEDLSI:
.previous
-/*
- * Get the common code for the sigreturn entry points.
- */
-#include "vsyscall-sigreturn_32.S"
+ /*
+ * Pad out the segment to match the size of the sysenter.S version.
+ */
+VDSO32_vsyscall_eh_frame_size = 0x40
+ .section .data,"aw",@progbits
+ .space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
+ .previous
diff --git a/arch/x86/kernel/vsyscall-note_32.S b/arch/x86/vdso/vdso32/note.S
index fcf376a37f7..c83f2573469 100644
--- a/arch/x86/kernel/vsyscall-note_32.S
+++ b/arch/x86/vdso/vdso32/note.S
@@ -33,12 +33,11 @@ ELFNOTE_END
* at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
*/
-#include "../../x86/xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
+#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
- .globl VDSO_NOTE_MASK
ELFNOTE_START(GNU, 2, "a")
.long 1 /* ncaps */
-VDSO_NOTE_MASK:
+VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
.long 0 /* mask */
.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
ELFNOTE_END
diff --git a/arch/x86/kernel/vsyscall-sigreturn_32.S b/arch/x86/vdso/vdso32/sigreturn.S
index a92262f4165..31776d0efc8 100644
--- a/arch/x86/kernel/vsyscall-sigreturn_32.S
+++ b/arch/x86/vdso/vdso32/sigreturn.S
@@ -1,41 +1,42 @@
/*
- * Common code for the sigreturn entry points on the vsyscall page.
+ * Common code for the sigreturn entry points in vDSO images.
* So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by vsyscall-*.S to define them after the
- * vsyscall entry point. The kernel assumes that the addresses of these
- * routines are constant for all vsyscall implementations.
+ * This file is #include'd by int80.S et al to define them first thing.
+ * The kernel assumes that the addresses of these routines are constant
+ * for all vDSO implementations.
*/
-#include <asm/unistd.h>
+#include <linux/linkage.h>
+#include <asm/unistd_32.h>
#include <asm/asm-offsets.h>
-
-/* XXX
- Should these be named "_sigtramp" or something?
-*/
+#ifndef SYSCALL_ENTER_KERNEL
+#define SYSCALL_ENTER_KERNEL int $0x80
+#endif
.text
- .org __kernel_vsyscall+32,0x90
.globl __kernel_sigreturn
.type __kernel_sigreturn,@function
+ ALIGN
__kernel_sigreturn:
.LSTART_sigreturn:
popl %eax /* XXX does this mean it needs unwind info? */
movl $__NR_sigreturn, %eax
- int $0x80
+ SYSCALL_ENTER_KERNEL
.LEND_sigreturn:
+ nop
.size __kernel_sigreturn,.-.LSTART_sigreturn
- .balign 32
.globl __kernel_rt_sigreturn
.type __kernel_rt_sigreturn,@function
+ ALIGN
__kernel_rt_sigreturn:
.LSTART_rt_sigreturn:
movl $__NR_rt_sigreturn, %eax
- int $0x80
+ SYSCALL_ENTER_KERNEL
.LEND_rt_sigreturn:
+ nop
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
- .balign 32
.previous
.section .eh_frame,"a",@progbits
@@ -70,9 +71,9 @@ __kernel_rt_sigreturn:
be the value of the stack pointer in the caller. This means
that we must define the CFA of this body of code to be the
saved value of the stack pointer in the sigcontext. Which
- also means that there is no fixed relation to the other
+ also means that there is no fixed relation to the other
saved registers, which means that we must use DW_CFA_expression
- to compute their addresses. It also means that when we
+ to compute their addresses. It also means that when we
adjust the stack with the popl, we have to do it all over again. */
#define do_cfa_expr(offset) \
@@ -91,27 +92,27 @@ __kernel_rt_sigreturn:
.sleb128 offset; /* offset */ \
1:
- do_cfa_expr(SIGCONTEXT_esp+4)
- do_expr(0, SIGCONTEXT_eax+4)
- do_expr(1, SIGCONTEXT_ecx+4)
- do_expr(2, SIGCONTEXT_edx+4)
- do_expr(3, SIGCONTEXT_ebx+4)
- do_expr(5, SIGCONTEXT_ebp+4)
- do_expr(6, SIGCONTEXT_esi+4)
- do_expr(7, SIGCONTEXT_edi+4)
- do_expr(8, SIGCONTEXT_eip+4)
+ do_cfa_expr(IA32_SIGCONTEXT_sp+4)
+ do_expr(0, IA32_SIGCONTEXT_ax+4)
+ do_expr(1, IA32_SIGCONTEXT_cx+4)
+ do_expr(2, IA32_SIGCONTEXT_dx+4)
+ do_expr(3, IA32_SIGCONTEXT_bx+4)
+ do_expr(5, IA32_SIGCONTEXT_bp+4)
+ do_expr(6, IA32_SIGCONTEXT_si+4)
+ do_expr(7, IA32_SIGCONTEXT_di+4)
+ do_expr(8, IA32_SIGCONTEXT_ip+4)
.byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
- do_cfa_expr(SIGCONTEXT_esp)
- do_expr(0, SIGCONTEXT_eax)
- do_expr(1, SIGCONTEXT_ecx)
- do_expr(2, SIGCONTEXT_edx)
- do_expr(3, SIGCONTEXT_ebx)
- do_expr(5, SIGCONTEXT_ebp)
- do_expr(6, SIGCONTEXT_esi)
- do_expr(7, SIGCONTEXT_edi)
- do_expr(8, SIGCONTEXT_eip)
+ do_cfa_expr(IA32_SIGCONTEXT_sp)
+ do_expr(0, IA32_SIGCONTEXT_ax)
+ do_expr(1, IA32_SIGCONTEXT_cx)
+ do_expr(2, IA32_SIGCONTEXT_dx)
+ do_expr(3, IA32_SIGCONTEXT_bx)
+ do_expr(5, IA32_SIGCONTEXT_bp)
+ do_expr(6, IA32_SIGCONTEXT_si)
+ do_expr(7, IA32_SIGCONTEXT_di)
+ do_expr(8, IA32_SIGCONTEXT_ip)
.align 4
.LENDFDEDLSI1:
@@ -128,15 +129,15 @@ __kernel_rt_sigreturn:
slightly less complicated than the above, since we don't
modify the stack pointer in the process. */
- do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp)
- do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax)
- do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx)
- do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx)
- do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx)
- do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp)
- do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi)
- do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi)
- do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip)
+ do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
+ do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
+ do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
+ do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
+ do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
+ do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
+ do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
+ do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
+ do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
.align 4
.LENDFDEDLSI2:
diff --git a/arch/x86/ia32/vsyscall-syscall.S b/arch/x86/vdso/vdso32/syscall.S
index cf9ef678de3..5415b5613d5 100644
--- a/arch/x86/ia32/vsyscall-syscall.S
+++ b/arch/x86/vdso/vdso32/syscall.S
@@ -1,16 +1,18 @@
/*
- * Code for the vsyscall page. This version uses the syscall instruction.
+ * Code for the vDSO. This version uses the syscall instruction.
+ *
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
*/
+#define SYSCALL_ENTER_KERNEL syscall
+#include "sigreturn.S"
-#include <asm/ia32_unistd.h>
-#include <asm/asm-offsets.h>
#include <asm/segment.h>
- .code32
.text
- .section .text.vsyscall,"ax"
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
+ ALIGN
__kernel_vsyscall:
.LSTART_vsyscall:
push %ebp
@@ -64,6 +66,12 @@ __kernel_vsyscall:
.uleb128 4
.align 4
.LENDFDE1:
+ .previous
-#define SYSCALL_ENTER_KERNEL syscall
-#include "vsyscall-sigreturn.S"
+ /*
+ * Pad out the segment to match the size of the sysenter.S version.
+ */
+VDSO32_vsyscall_eh_frame_size = 0x40
+ .section .data,"aw",@progbits
+ .space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
+ .previous
diff --git a/arch/x86/kernel/vsyscall-sysenter_32.S b/arch/x86/vdso/vdso32/sysenter.S
index ed879bf4299..e2800affa75 100644
--- a/arch/x86/kernel/vsyscall-sysenter_32.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -1,11 +1,10 @@
/*
- * Code for the vsyscall page. This version uses the sysenter instruction.
+ * Code for the vDSO. This version uses the sysenter instruction.
*
- * NOTE:
- * 1) __kernel_vsyscall _must_ be first in this page.
- * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
- * for details.
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
*/
+#include "sigreturn.S"
/*
* The caller puts arg2 in %ecx, which gets pushed. The kernel will use
@@ -23,11 +22,12 @@
* arg6 from the stack.
*
* You can not use this vsyscall for the clone() syscall because the
- * three dwords on the parent stack do not get copied to the child.
+ * three words on the parent stack do not get copied to the child.
*/
.text
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
+ ALIGN
__kernel_vsyscall:
.LSTART_vsyscall:
push %ecx
@@ -45,8 +45,7 @@ __kernel_vsyscall:
/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
jmp .Lenter_kernel
/* 16: System call normal return point is here! */
- .globl SYSENTER_RETURN /* Symbol used by sysenter.c */
-SYSENTER_RETURN:
+VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
pop %ebp
.Lpop_ebp:
pop %edx
@@ -85,38 +84,33 @@ SYSENTER_RETURN:
.uleb128 0
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpush_ecx-.LSTART_vsyscall
+ .byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x08 /* RA at offset 8 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpush_edx-.Lpush_ecx
+ .byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x0c /* RA at offset 12 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lenter_kernel-.Lpush_edx
+ .byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x10 /* RA at offset 16 now */
.byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
/* Finally the epilogue. */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_ebp-.Lenter_kernel
+ .byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x0c /* RA at offset 12 now */
.byte 0xc5 /* DW_CFA_restore %ebp */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_edx-.Lpop_ebp
+ .byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x08 /* RA at offset 8 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_ecx-.Lpop_edx
+ .byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x04 /* RA at offset 4 now */
.align 4
.LENDFDEDLSI:
.previous
-/*
- * Get the common code for the sigreturn entry points.
- */
-#include "vsyscall-sigreturn_32.S"
+ /*
+ * Emit a symbol with the size of this .eh_frame data,
+ * to verify it matches the other versions.
+ */
+VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S
new file mode 100644
index 00000000000..976124bb5f9
--- /dev/null
+++ b/arch/x86/vdso/vdso32/vdso32.lds.S
@@ -0,0 +1,37 @@
+/*
+ * Linker script for 32-bit vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO. We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ */
+
+#define VDSO_PRELINK 0
+#include "../vdso-layout.lds.S"
+
+/* The ELF entry point can be used to set the AT_SYSINFO value. */
+ENTRY(__kernel_vsyscall);
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION
+{
+ LINUX_2.5 {
+ global:
+ __kernel_vsyscall;
+ __kernel_sigreturn;
+ __kernel_rt_sigreturn;
+ local: *;
+ };
+}
+
+/*
+ * Symbols we define here called VDSO* get their values into vdso32-syms.h.
+ */
+VDSO32_PRELINK = VDSO_PRELINK;
+VDSO32_vsyscall = __kernel_vsyscall;
+VDSO32_sigreturn = __kernel_sigreturn;
+VDSO32_rt_sigreturn = __kernel_rt_sigreturn;
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 3b1ae1abfba..c8097f17f8a 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -15,11 +15,11 @@
long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
{
- unsigned int dummy, p;
+ unsigned int p;
if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
- rdtscp(dummy, dummy, p);
+ native_read_tscp(&p);
} else {
/* Load per CPU data from GDT */
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index ff9333e5fb0..3fdd51497a8 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -11,23 +11,20 @@
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
-#include "voffset.h"
+#include <asm/vdso.h>
-int vdso_enabled = 1;
-
-#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
-#include "vextern.h"
+#include "vextern.h" /* Just for VMAGIC. */
#undef VEXTERN
-extern char vdso_kernel_start[], vdso_start[], vdso_end[];
+int vdso_enabled = 1;
+
+extern char vdso_start[], vdso_end[];
extern unsigned short vdso_sync_cpuid;
struct page **vdso_pages;
-static inline void *var_ref(void *vbase, char *var, char *name)
+static inline void *var_ref(void *p, char *name)
{
- unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
- void *p = vbase + offset;
if (*(void **)p != (void *)VMAGIC) {
printk("VDSO: variable %s broken\n", name);
vdso_enabled = 0;
@@ -62,9 +59,8 @@ static int __init init_vdso_vars(void)
vdso_enabled = 0;
}
-#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
#define VEXTERN(x) \
- V(vdso_ ## x) = &__ ## x;
+ *(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x;
#include "vextern.h"
#undef VEXTERN
return 0;
diff --git a/arch/x86/vdso/voffset.h b/arch/x86/vdso/voffset.h
deleted file mode 100644
index 4af67c79085..00000000000
--- a/arch/x86/vdso/voffset.h
+++ /dev/null
@@ -1 +0,0 @@
-#define VDSO_TEXT_OFFSET 0x600
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index fbfa55ce0d5..4d5f2649bee 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,6 +5,7 @@
config XEN
bool "Xen guest support"
select PARAVIRT
+ depends on X86_32
depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER)
help
This is the Linux Xen port. Enabling this will allow the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 79ad1525215..de647bc6e74 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -141,8 +141,8 @@ static void __init xen_banner(void)
printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
}
-static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+ unsigned int *cx, unsigned int *dx)
{
unsigned maskedx = ~0;
@@ -150,18 +150,18 @@ static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
- if (*eax == 1)
+ if (*ax == 1)
maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
(1 << X86_FEATURE_ACPI) | /* disable ACPI */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
asm(XEN_EMULATE_PREFIX "cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
- *edx &= maskedx;
+ : "=a" (*ax),
+ "=b" (*bx),
+ "=c" (*cx),
+ "=d" (*dx)
+ : "0" (*ax), "2" (*cx));
+ *dx &= maskedx;
}
static void xen_set_debugreg(int reg, unsigned long val)
@@ -275,19 +275,12 @@ static unsigned long xen_store_tr(void)
static void xen_set_ldt(const void *addr, unsigned entries)
{
- unsigned long linear_addr = (unsigned long)addr;
struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
op = mcs.args;
op->cmd = MMUEXT_SET_LDT;
- if (linear_addr) {
- /* ldt my be vmalloced, use arbitrary_virt_to_machine */
- xmaddr_t maddr;
- maddr = arbitrary_virt_to_machine((unsigned long)addr);
- linear_addr = (unsigned long)maddr.maddr;
- }
- op->arg1.linear_addr = linear_addr;
+ op->arg1.linear_addr = (unsigned long)addr;
op->arg2.nr_ents = entries;
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
@@ -295,7 +288,7 @@ static void xen_set_ldt(const void *addr, unsigned entries)
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
-static void xen_load_gdt(const struct Xgt_desc_struct *dtr)
+static void xen_load_gdt(const struct desc_ptr *dtr)
{
unsigned long *frames;
unsigned long va = dtr->address;
@@ -357,11 +350,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
}
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
- u32 low, u32 high)
+ const void *ptr)
{
unsigned long lp = (unsigned long)&dt[entrynum];
xmaddr_t mach_lp = virt_to_machine(lp);
- u64 entry = (u64)high << 32 | low;
+ u64 entry = *(u64 *)ptr;
preempt_disable();
@@ -395,12 +388,11 @@ static int cvt_gate_to_trap(int vector, u32 low, u32 high,
}
/* Locations of each CPU's IDT */
-static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc);
+static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
/* Set an IDT entry. If the entry is part of the current IDT, then
also update Xen. */
-static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
- u32 low, u32 high)
+static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
{
unsigned long p = (unsigned long)&dt[entrynum];
unsigned long start, end;
@@ -412,14 +404,15 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
xen_mc_flush();
- write_dt_entry(dt, entrynum, low, high);
+ native_write_idt_entry(dt, entrynum, g);
if (p >= start && (p + 8) <= end) {
struct trap_info info[2];
+ u32 *desc = (u32 *)g;
info[1].address = 0;
- if (cvt_gate_to_trap(entrynum, low, high, &info[0]))
+ if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0]))
if (HYPERVISOR_set_trap_table(info))
BUG();
}
@@ -427,7 +420,7 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
preempt_enable();
}
-static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
+static void xen_convert_trap_info(const struct desc_ptr *desc,
struct trap_info *traps)
{
unsigned in, out, count;
@@ -446,7 +439,7 @@ static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
void xen_copy_trap_info(struct trap_info *traps)
{
- const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
+ const struct desc_ptr *desc = &__get_cpu_var(idt_desc);
xen_convert_trap_info(desc, traps);
}
@@ -454,7 +447,7 @@ void xen_copy_trap_info(struct trap_info *traps)
/* Load a new IDT into Xen. In principle this can be per-CPU, so we
hold a spinlock to protect the static traps[] array (static because
it avoids allocation, and saves stack space). */
-static void xen_load_idt(const struct Xgt_desc_struct *desc)
+static void xen_load_idt(const struct desc_ptr *desc)
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
@@ -475,22 +468,21 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc)
/* Write a GDT descriptor entry. Ignore LDT descriptors, since
they're handled differently. */
static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
- u32 low, u32 high)
+ const void *desc, int type)
{
preempt_disable();
- switch ((high >> 8) & 0xff) {
- case DESCTYPE_LDT:
- case DESCTYPE_TSS:
+ switch (type) {
+ case DESC_LDT:
+ case DESC_TSS:
/* ignore */
break;
default: {
xmaddr_t maddr = virt_to_machine(&dt[entry]);
- u64 desc = (u64)high << 32 | low;
xen_mc_flush();
- if (HYPERVISOR_update_descriptor(maddr.maddr, desc))
+ if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
BUG();
}
@@ -499,11 +491,11 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
preempt_enable();
}
-static void xen_load_esp0(struct tss_struct *tss,
+static void xen_load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
struct multicall_space mcs = xen_mc_entry(0);
- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
+ MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
@@ -521,12 +513,12 @@ static void xen_io_delay(void)
}
#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long xen_apic_read(unsigned long reg)
+static u32 xen_apic_read(unsigned long reg)
{
return 0;
}
-static void xen_apic_write(unsigned long reg, unsigned long val)
+static void xen_apic_write(unsigned long reg, u32 val)
{
/* Warn to see if there's any stray references */
WARN_ON(1);
@@ -666,6 +658,13 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
}
+/* Early release_pt assumes that all pts are pinned, since there's
+ only init_mm and anything attached to that is pinned. */
+static void xen_release_pt_init(u32 pfn)
+{
+ make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+}
+
static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
{
struct mmuext_op op;
@@ -677,7 +676,7 @@ static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
/* This needs to make sure the new pte page is pinned iff its being
attached to a pinned pagetable. */
-static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
@@ -686,7 +685,7 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
- pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+ pin_pagetable_pfn(level, pfn);
} else
/* make sure there are no stray mappings of
this page */
@@ -694,6 +693,16 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
}
}
+static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
+{
+ xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L1_TABLE);
+}
+
+static void xen_alloc_pd(struct mm_struct *mm, u32 pfn)
+{
+ xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L2_TABLE);
+}
+
/* This should never happen until we're OK to use struct page */
static void xen_release_pt(u32 pfn)
{
@@ -796,6 +805,9 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
/* This will work as long as patching hasn't happened yet
(which it hasn't) */
pv_mmu_ops.alloc_pt = xen_alloc_pt;
+ pv_mmu_ops.alloc_pd = xen_alloc_pd;
+ pv_mmu_ops.release_pt = xen_release_pt;
+ pv_mmu_ops.release_pd = xen_release_pt;
pv_mmu_ops.set_pte = xen_set_pte;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -953,7 +965,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.read_pmc = native_read_pmc,
.iret = (void *)&hypercall_page[__HYPERVISOR_iret],
- .irq_enable_sysexit = NULL, /* never called */
+ .irq_enable_syscall_ret = NULL, /* never called */
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
@@ -968,7 +980,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.write_ldt_entry = xen_write_ldt_entry,
.write_gdt_entry = xen_write_gdt_entry,
.write_idt_entry = xen_write_idt_entry,
- .load_esp0 = xen_load_esp0,
+ .load_sp0 = xen_load_sp0,
.set_iopl_mask = xen_set_iopl_mask,
.io_delay = xen_io_delay,
@@ -1019,10 +1031,10 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.pte_update_defer = paravirt_nop,
.alloc_pt = xen_alloc_pt_init,
- .release_pt = xen_release_pt,
- .alloc_pd = paravirt_nop,
+ .release_pt = xen_release_pt_init,
+ .alloc_pd = xen_alloc_pt_init,
.alloc_pd_clone = paravirt_nop,
- .release_pd = paravirt_nop,
+ .release_pd = xen_release_pt_init,
#ifdef CONFIG_HIGHPTE
.kmap_atomic_pte = xen_kmap_atomic_pte,
diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
index 6d1da5809e6..dcf613e1758 100644
--- a/arch/x86/xen/events.c
+++ b/arch/x86/xen/events.c
@@ -465,7 +465,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
-fastcall void xen_evtchn_do_upcall(struct pt_regs *regs)
+void xen_evtchn_do_upcall(struct pt_regs *regs)
{
int cpu = get_cpu();
struct shared_info *s = HYPERVISOR_shared_info;
@@ -487,7 +487,7 @@ fastcall void xen_evtchn_do_upcall(struct pt_regs *regs)
int irq = evtchn_to_irq[port];
if (irq != -1) {
- regs->orig_eax = ~irq;
+ regs->orig_ax = ~irq;
do_IRQ(regs);
}
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0ac6c5dc49b..45aa771e73a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,7 +58,8 @@
xmaddr_t arbitrary_virt_to_machine(unsigned long address)
{
- pte_t *pte = lookup_address(address);
+ int level;
+ pte_t *pte = lookup_address(address, &level);
unsigned offset = address & PAGE_MASK;
BUG_ON(pte == NULL);
@@ -70,8 +71,9 @@ void make_lowmem_page_readonly(void *vaddr)
{
pte_t *pte, ptev;
unsigned long address = (unsigned long)vaddr;
+ int level;
- pte = lookup_address(address);
+ pte = lookup_address(address, &level);
BUG_ON(pte == NULL);
ptev = pte_wrprotect(*pte);
@@ -84,8 +86,9 @@ void make_lowmem_page_readwrite(void *vaddr)
{
pte_t *pte, ptev;
unsigned long address = (unsigned long)vaddr;
+ int level;
- pte = lookup_address(address);
+ pte = lookup_address(address, &level);
BUG_ON(pte == NULL);
ptev = pte_mkwrite(*pte);
@@ -241,12 +244,12 @@ unsigned long long xen_pgd_val(pgd_t pgd)
pte_t xen_make_pte(unsigned long long pte)
{
- if (pte & 1)
+ if (pte & _PAGE_PRESENT) {
pte = phys_to_machine(XPADDR(pte)).maddr;
+ pte &= ~(_PAGE_PCD | _PAGE_PWT);
+ }
- pte &= ~_PAGE_PCD;
-
- return (pte_t){ pte, pte >> 32 };
+ return (pte_t){ .pte = pte };
}
pmd_t xen_make_pmd(unsigned long long pmd)
@@ -290,10 +293,10 @@ unsigned long xen_pgd_val(pgd_t pgd)
pte_t xen_make_pte(unsigned long pte)
{
- if (pte & _PAGE_PRESENT)
+ if (pte & _PAGE_PRESENT) {
pte = phys_to_machine(XPADDR(pte)).maddr;
-
- pte &= ~_PAGE_PCD;
+ pte &= ~(_PAGE_PCD | _PAGE_PWT);
+ }
return (pte_t){ pte };
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f84e7722664..3bad4773a2f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -10,6 +10,7 @@
#include <linux/pm.h>
#include <asm/elf.h>
+#include <asm/vdso.h>
#include <asm/e820.h>
#include <asm/setup.h>
#include <asm/xen/hypervisor.h>
@@ -59,12 +60,10 @@ static void xen_idle(void)
/*
* Set the bit indicating "nosegneg" library variants should be used.
*/
-static void fiddle_vdso(void)
+static void __init fiddle_vdso(void)
{
- extern u32 VDSO_NOTE_MASK; /* See ../kernel/vsyscall-note.S. */
- extern char vsyscall_int80_start;
- u32 *mask = (u32 *) ((unsigned long) &VDSO_NOTE_MASK - VDSO_PRELINK +
- &vsyscall_int80_start);
+ extern const char vdso32_default_start;
+ u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c1b131bcdcb..aafc5443740 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -146,7 +146,7 @@ void __init xen_smp_prepare_boot_cpu(void)
old memory can be recycled */
make_lowmem_page_readwrite(&per_cpu__gdt_page);
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ for_each_possible_cpu(cpu) {
cpus_clear(per_cpu(cpu_sibling_map, cpu));
/*
* cpu_core_map lives in a per cpu area that is cleared
@@ -163,7 +163,7 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus)
{
unsigned cpu;
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ for_each_possible_cpu(cpu) {
cpus_clear(per_cpu(cpu_sibling_map, cpu));
/*
* cpu_core_ map will be zeroed when the per
@@ -239,10 +239,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt);
ctxt->user_regs.cs = __KERNEL_CS;
- ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
+ ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.esp0;
+ ctxt->kernel_sp = idle->thread.sp0;
ctxt->event_callback_cs = __KERNEL_CS;
ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index d083ff5ef08..b3721fd6877 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -592,7 +592,7 @@ __init void xen_time_init(void)
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- tsc_disable = 0;
+ setup_force_cpu_cap(X86_FEATURE_TSC);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index f8d6937db2e..288d587ce73 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -4,16 +4,18 @@
#ifdef CONFIG_XEN
#include <linux/elfnote.h>
+#include <linux/init.h>
#include <asm/boot.h>
#include <xen/interface/elfnote.h>
-.pushsection .init.text
+ __INIT
ENTRY(startup_xen)
movl %esi,xen_start_info
cld
movl $(init_thread_union+THREAD_SIZE),%esp
jmp xen_start_kernel
-.popsection
+
+ __FINIT
.pushsection .bss.page_aligned
.align PAGE_SIZE_asm
diff --git a/drivers/Makefile b/drivers/Makefile
index 8cb37e3557d..d92d4d82d00 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_SCSI) += scsi/
obj-$(CONFIG_ATA) += ata/
obj-$(CONFIG_FUSION) += message/
obj-$(CONFIG_FIREWIRE) += firewire/
-obj-$(CONFIG_IEEE1394) += ieee1394/
+obj-y += ieee1394/
obj-$(CONFIG_UIO) += uio/
obj-y += cdrom/
obj-y += auxdisplay/
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 2235f4e02d2..eb1f82f7915 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -357,6 +357,26 @@ int acpi_processor_resume(struct acpi_device * device)
return 0;
}
+#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
+static int tsc_halts_in_c(int state)
+{
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ /*
+ * AMD Fam10h TSC will tick in all
+ * C/P/S0/S1 states when this bit is set.
+ */
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return 0;
+ /*FALL THROUGH*/
+ case X86_VENDOR_INTEL:
+ /* Several cases known where TSC halts in C2 too */
+ default:
+ return state > ACPI_STATE_C1;
+ }
+}
+#endif
+
#ifndef CONFIG_CPU_IDLE
static void acpi_processor_idle(void)
{
@@ -516,7 +536,8 @@ static void acpi_processor_idle(void)
#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
/* TSC halts in C2, so notify users */
- mark_tsc_unstable("possible TSC halt in C2");
+ if (tsc_halts_in_c(ACPI_STATE_C2))
+ mark_tsc_unstable("possible TSC halt in C2");
#endif
/* Compute time (ticks) that we were actually asleep */
sleep_ticks = ticks_elapsed(t1, t2);
@@ -534,6 +555,7 @@ static void acpi_processor_idle(void)
break;
case ACPI_STATE_C3:
+ acpi_unlazy_tlb(smp_processor_id());
/*
* Must be done before busmaster disable as we might
* need to access HPET !
@@ -579,7 +601,8 @@ static void acpi_processor_idle(void)
#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
/* TSC halts in C3, so notify users */
- mark_tsc_unstable("TSC halts in C3");
+ if (tsc_halts_in_c(ACPI_STATE_C3))
+ mark_tsc_unstable("TSC halts in C3");
#endif
/* Compute time (ticks) that we were actually asleep */
sleep_ticks = ticks_elapsed(t1, t2);
@@ -1423,6 +1446,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
return 0;
}
+ acpi_unlazy_tlb(smp_processor_id());
/*
* Must be done before busmaster disable as we might need to
* access HPET !
@@ -1443,7 +1467,8 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
/* TSC could halt in idle, so notify users */
- mark_tsc_unstable("TSC halts in idle");;
+ if (tsc_halts_in_c(cx->type))
+ mark_tsc_unstable("TSC halts in idle");;
#endif
sleep_ticks = ticks_elapsed(t1, t2);
@@ -1554,7 +1579,8 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
/* TSC could halt in idle, so notify users */
- mark_tsc_unstable("TSC halts in idle");
+ if (tsc_halts_in_c(ACPI_STATE_C3))
+ mark_tsc_unstable("TSC halts in idle");
#endif
sleep_ticks = ticks_elapsed(t1, t2);
/* Tell the scheduler how much we idled: */
diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c
index aa5ddb716ff..1ffb381130c 100644
--- a/drivers/char/agp/ali-agp.c
+++ b/drivers/char/agp/ali-agp.c
@@ -145,7 +145,6 @@ static void *m1541_alloc_page(struct agp_bridge_data *bridge)
void *addr = agp_generic_alloc_page(agp_bridge);
u32 temp;
- global_flush_tlb();
if (!addr)
return NULL;
@@ -162,7 +161,6 @@ static void ali_destroy_page(void * addr, int flags)
if (flags & AGP_PAGE_DESTROY_UNMAP) {
global_cache_flush(); /* is this really needed? --hch */
agp_generic_destroy_page(addr, flags);
- global_flush_tlb();
} else
agp_generic_destroy_page(addr, flags);
}
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 832ded20fe7..2720882e66f 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -147,7 +147,6 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
printk(KERN_ERR PFX "unable to get memory for scratch page.\n");
return -ENOMEM;
}
- flush_agp_mappings();
bridge->scratch_page_real = virt_to_gart(addr);
bridge->scratch_page =
@@ -191,7 +190,6 @@ err_out:
if (bridge->driver->needs_scratch_page) {
bridge->driver->agp_destroy_page(gart_to_virt(bridge->scratch_page_real),
AGP_PAGE_DESTROY_UNMAP);
- flush_agp_mappings();
bridge->driver->agp_destroy_page(gart_to_virt(bridge->scratch_page_real),
AGP_PAGE_DESTROY_FREE);
}
@@ -219,7 +217,6 @@ static void agp_backend_cleanup(struct agp_bridge_data *bridge)
bridge->driver->needs_scratch_page) {
bridge->driver->agp_destroy_page(gart_to_virt(bridge->scratch_page_real),
AGP_PAGE_DESTROY_UNMAP);
- flush_agp_mappings();
bridge->driver->agp_destroy_page(gart_to_virt(bridge->scratch_page_real),
AGP_PAGE_DESTROY_FREE);
}
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 64b2f6d7059..1a4674ce0c7 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -197,7 +197,6 @@ void agp_free_memory(struct agp_memory *curr)
for (i = 0; i < curr->page_count; i++) {
curr->bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[i]), AGP_PAGE_DESTROY_UNMAP);
}
- flush_agp_mappings();
for (i = 0; i < curr->page_count; i++) {
curr->bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[i]), AGP_PAGE_DESTROY_FREE);
}
@@ -267,8 +266,6 @@ struct agp_memory *agp_allocate_memory(struct agp_bridge_data *bridge,
}
new->bridge = bridge;
- flush_agp_mappings();
-
return new;
}
EXPORT_SYMBOL(agp_allocate_memory);
diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c
index e72a83e2bad..76f581c85a7 100644
--- a/drivers/char/agp/i460-agp.c
+++ b/drivers/char/agp/i460-agp.c
@@ -527,7 +527,6 @@ static void *i460_alloc_page (struct agp_bridge_data *bridge)
if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) {
page = agp_generic_alloc_page(agp_bridge);
- global_flush_tlb();
} else
/* Returning NULL would cause problems */
/* AK: really dubious code. */
@@ -539,7 +538,6 @@ static void i460_destroy_page (void *page, int flags)
{
if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) {
agp_generic_destroy_page(page, flags);
- global_flush_tlb();
}
}
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 03eac1eb8e0..189efb6ef97 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -210,13 +210,11 @@ static void *i8xx_alloc_pages(void)
if (page == NULL)
return NULL;
- if (change_page_attr(page, 4, PAGE_KERNEL_NOCACHE) < 0) {
- change_page_attr(page, 4, PAGE_KERNEL);
- global_flush_tlb();
+ if (set_pages_uc(page, 4) < 0) {
+ set_pages_wb(page, 4);
__free_pages(page, 2);
return NULL;
}
- global_flush_tlb();
get_page(page);
atomic_inc(&agp_bridge->current_memory_agp);
return page_address(page);
@@ -230,8 +228,7 @@ static void i8xx_destroy_pages(void *addr)
return;
page = virt_to_page(addr);
- change_page_attr(page, 4, PAGE_KERNEL);
- global_flush_tlb();
+ set_pages_wb(page, 4);
put_page(page);
__free_pages(page, 2);
atomic_dec(&agp_bridge->current_memory_agp);
@@ -341,7 +338,6 @@ static struct agp_memory *alloc_agpphysmem_i8xx(size_t pg_count, int type)
switch (pg_count) {
case 1: addr = agp_bridge->driver->agp_alloc_page(agp_bridge);
- global_flush_tlb();
break;
case 4:
/* kludge to get 4 physical pages for ARGB cursor */
@@ -404,7 +400,6 @@ static void intel_i810_free_by_type(struct agp_memory *curr)
else {
agp_bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[0]),
AGP_PAGE_DESTROY_UNMAP);
- global_flush_tlb();
agp_bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[0]),
AGP_PAGE_DESTROY_FREE);
}
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 4c16778e3f8..465ad35ed38 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -600,63 +600,6 @@ static int hpet_is_known(struct hpet_data *hdp)
return 0;
}
-EXPORT_SYMBOL(hpet_alloc);
-EXPORT_SYMBOL(hpet_register);
-EXPORT_SYMBOL(hpet_unregister);
-EXPORT_SYMBOL(hpet_control);
-
-int hpet_register(struct hpet_task *tp, int periodic)
-{
- unsigned int i;
- u64 mask;
- struct hpet_timer __iomem *timer;
- struct hpet_dev *devp;
- struct hpets *hpetp;
-
- switch (periodic) {
- case 1:
- mask = Tn_PER_INT_CAP_MASK;
- break;
- case 0:
- mask = 0;
- break;
- default:
- return -EINVAL;
- }
-
- tp->ht_opaque = NULL;
-
- spin_lock_irq(&hpet_task_lock);
- spin_lock(&hpet_lock);
-
- for (devp = NULL, hpetp = hpets; hpetp && !devp; hpetp = hpetp->hp_next)
- for (timer = hpetp->hp_hpet->hpet_timers, i = 0;
- i < hpetp->hp_ntimer; i++, timer++) {
- if ((readq(&timer->hpet_config) & Tn_PER_INT_CAP_MASK)
- != mask)
- continue;
-
- devp = &hpetp->hp_dev[i];
-
- if (devp->hd_flags & HPET_OPEN || devp->hd_task) {
- devp = NULL;
- continue;
- }
-
- tp->ht_opaque = devp;
- devp->hd_task = tp;
- break;
- }
-
- spin_unlock(&hpet_lock);
- spin_unlock_irq(&hpet_task_lock);
-
- if (tp->ht_opaque)
- return 0;
- else
- return -EBUSY;
-}
-
static inline int hpet_tpcheck(struct hpet_task *tp)
{
struct hpet_dev *devp;
@@ -706,24 +649,6 @@ int hpet_unregister(struct hpet_task *tp)
return 0;
}
-int hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
-{
- struct hpet_dev *devp;
- int err;
-
- if ((err = hpet_tpcheck(tp)))
- return err;
-
- spin_lock_irq(&hpet_lock);
- devp = tp->ht_opaque;
- if (devp->hd_task != tp) {
- spin_unlock_irq(&hpet_lock);
- return -ENXIO;
- }
- spin_unlock_irq(&hpet_lock);
- return hpet_ioctl_common(devp, cmd, arg, 1);
-}
-
static ctl_table hpet_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
@@ -806,14 +731,14 @@ static unsigned long hpet_calibrate(struct hpets *hpetp)
int hpet_alloc(struct hpet_data *hdp)
{
- u64 cap, mcfg;
+ u64 cap, mcfg, hpet_config;
struct hpet_dev *devp;
- u32 i, ntimer;
+ u32 i, ntimer, irq;
struct hpets *hpetp;
size_t siz;
struct hpet __iomem *hpet;
static struct hpets *last = NULL;
- unsigned long period;
+ unsigned long period, irq_bitmap;
unsigned long long temp;
/*
@@ -840,11 +765,47 @@ int hpet_alloc(struct hpet_data *hdp)
hpetp->hp_hpet_phys = hdp->hd_phys_address;
hpetp->hp_ntimer = hdp->hd_nirqs;
+ hpet = hpetp->hp_hpet;
- for (i = 0; i < hdp->hd_nirqs; i++)
- hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i];
+ /* Assign IRQs statically for legacy devices */
+ hpetp->hp_dev[0].hd_hdwirq = hdp->hd_irq[0];
+ hpetp->hp_dev[1].hd_hdwirq = hdp->hd_irq[1];
- hpet = hpetp->hp_hpet;
+ /* Assign IRQs dynamically for the others */
+ for (i = 2, devp = &hpetp->hp_dev[2]; i < hdp->hd_nirqs; i++, devp++) {
+ struct hpet_timer __iomem *timer;
+
+ timer = &hpet->hpet_timers[devp - hpetp->hp_dev];
+
+ /* Check if there's already an IRQ assigned to the timer */
+ if (hdp->hd_irq[i]) {
+ hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i];
+ continue;
+ }
+
+ hpet_config = readq(&timer->hpet_config);
+ irq_bitmap = (hpet_config & Tn_INT_ROUTE_CAP_MASK)
+ >> Tn_INT_ROUTE_CAP_SHIFT;
+ if (!irq_bitmap)
+ irq = 0; /* No valid IRQ Assignable */
+ else {
+ irq = find_first_bit(&irq_bitmap, 32);
+ do {
+ hpet_config |= irq << Tn_INT_ROUTE_CNF_SHIFT;
+ writeq(hpet_config, &timer->hpet_config);
+
+ /*
+ * Verify whether we have written a valid
+ * IRQ number by reading it back again
+ */
+ hpet_config = readq(&timer->hpet_config);
+ if (irq == (hpet_config & Tn_INT_ROUTE_CNF_MASK)
+ >> Tn_INT_ROUTE_CNF_SHIFT)
+ break; /* Success */
+ } while ((irq = (find_next_bit(&irq_bitmap, 32, irq))));
+ }
+ hpetp->hp_dev[i].hd_hdwirq = irq;
+ }
cap = readq(&hpet->hpet_cap);
@@ -875,7 +836,8 @@ int hpet_alloc(struct hpet_data *hdp)
hpetp->hp_which, hdp->hd_phys_address,
hpetp->hp_ntimer > 1 ? "s" : "");
for (i = 0; i < hpetp->hp_ntimer; i++)
- printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]);
+ printk("%s %d", i > 0 ? "," : "",
+ hpetp->hp_dev[i].hd_hdwirq);
printk("\n");
printk(KERN_INFO "hpet%u: %u %d-bit timers, %Lu Hz\n",
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 0c66b802736..78b151c4d20 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -1,5 +1,5 @@
/*
- * Real Time Clock interface for Linux
+ * Real Time Clock interface for Linux
*
* Copyright (C) 1996 Paul Gortmaker
*
@@ -17,7 +17,7 @@
* has been received. If a RTC interrupt has already happened,
* it will output an unsigned long and then block. The output value
* contains the interrupt status in the low byte and the number of
- * interrupts since the last read in the remaining high bytes. The
+ * interrupts since the last read in the remaining high bytes. The
* /dev/rtc interface can also be used with the select(2) call.
*
* This program is free software; you can redistribute it and/or
@@ -104,12 +104,14 @@ static int rtc_has_irq = 1;
#ifndef CONFIG_HPET_EMULATE_RTC
#define is_hpet_enabled() 0
-#define hpet_set_alarm_time(hrs, min, sec) 0
-#define hpet_set_periodic_freq(arg) 0
-#define hpet_mask_rtc_irq_bit(arg) 0
-#define hpet_set_rtc_irq_bit(arg) 0
-#define hpet_rtc_timer_init() do { } while (0)
-#define hpet_rtc_dropped_irq() 0
+#define hpet_set_alarm_time(hrs, min, sec) 0
+#define hpet_set_periodic_freq(arg) 0
+#define hpet_mask_rtc_irq_bit(arg) 0
+#define hpet_set_rtc_irq_bit(arg) 0
+#define hpet_rtc_timer_init() do { } while (0)
+#define hpet_rtc_dropped_irq() 0
+#define hpet_register_irq_handler(h) 0
+#define hpet_unregister_irq_handler(h) 0
#ifdef RTC_IRQ
static irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
{
@@ -147,7 +149,7 @@ static int rtc_ioctl(struct inode *inode, struct file *file,
static unsigned int rtc_poll(struct file *file, poll_table *wait);
#endif
-static void get_rtc_alm_time (struct rtc_time *alm_tm);
+static void get_rtc_alm_time(struct rtc_time *alm_tm);
#ifdef RTC_IRQ
static void set_rtc_irq_bit_locked(unsigned char bit);
static void mask_rtc_irq_bit_locked(unsigned char bit);
@@ -185,9 +187,9 @@ static int rtc_proc_open(struct inode *inode, struct file *file);
* rtc_status but before mod_timer is called, which would then reenable the
* timer (but you would need to have an awful timing before you'd trip on it)
*/
-static unsigned long rtc_status = 0; /* bitmapped status byte. */
-static unsigned long rtc_freq = 0; /* Current periodic IRQ rate */
-static unsigned long rtc_irq_data = 0; /* our output to the world */
+static unsigned long rtc_status; /* bitmapped status byte. */
+static unsigned long rtc_freq; /* Current periodic IRQ rate */
+static unsigned long rtc_irq_data; /* our output to the world */
static unsigned long rtc_max_user_freq = 64; /* > this, need CAP_SYS_RESOURCE */
#ifdef RTC_IRQ
@@ -195,7 +197,7 @@ static unsigned long rtc_max_user_freq = 64; /* > this, need CAP_SYS_RESOURCE */
* rtc_task_lock nests inside rtc_lock.
*/
static DEFINE_SPINLOCK(rtc_task_lock);
-static rtc_task_t *rtc_callback = NULL;
+static rtc_task_t *rtc_callback;
#endif
/*
@@ -205,7 +207,7 @@ static rtc_task_t *rtc_callback = NULL;
static unsigned long epoch = 1900; /* year corresponding to 0x00 */
-static const unsigned char days_in_mo[] =
+static const unsigned char days_in_mo[] =
{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
/*
@@ -242,7 +244,7 @@ irqreturn_t rtc_interrupt(int irq, void *dev_id)
* the last read in the remainder of rtc_irq_data.
*/
- spin_lock (&rtc_lock);
+ spin_lock(&rtc_lock);
rtc_irq_data += 0x100;
rtc_irq_data &= ~0xff;
if (is_hpet_enabled()) {
@@ -259,16 +261,16 @@ irqreturn_t rtc_interrupt(int irq, void *dev_id)
if (rtc_status & RTC_TIMER_ON)
mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
- spin_unlock (&rtc_lock);
+ spin_unlock(&rtc_lock);
/* Now do the rest of the actions */
spin_lock(&rtc_task_lock);
if (rtc_callback)
rtc_callback->func(rtc_callback->private_data);
spin_unlock(&rtc_task_lock);
- wake_up_interruptible(&rtc_wait);
+ wake_up_interruptible(&rtc_wait);
- kill_fasync (&rtc_async_queue, SIGIO, POLL_IN);
+ kill_fasync(&rtc_async_queue, SIGIO, POLL_IN);
return IRQ_HANDLED;
}
@@ -335,7 +337,7 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
DECLARE_WAITQUEUE(wait, current);
unsigned long data;
ssize_t retval;
-
+
if (rtc_has_irq == 0)
return -EIO;
@@ -358,11 +360,11 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
* confusing. And no, xchg() is not the answer. */
__set_current_state(TASK_INTERRUPTIBLE);
-
- spin_lock_irq (&rtc_lock);
+
+ spin_lock_irq(&rtc_lock);
data = rtc_irq_data;
rtc_irq_data = 0;
- spin_unlock_irq (&rtc_lock);
+ spin_unlock_irq(&rtc_lock);
if (data != 0)
break;
@@ -378,10 +380,13 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
schedule();
} while (1);
- if (count == sizeof(unsigned int))
- retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int);
- else
- retval = put_user(data, (unsigned long __user *)buf) ?: sizeof(long);
+ if (count == sizeof(unsigned int)) {
+ retval = put_user(data,
+ (unsigned int __user *)buf) ?: sizeof(int);
+ } else {
+ retval = put_user(data,
+ (unsigned long __user *)buf) ?: sizeof(long);
+ }
if (!retval)
retval = count;
out:
@@ -394,7 +399,7 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
{
- struct rtc_time wtime;
+ struct rtc_time wtime;
#ifdef RTC_IRQ
if (rtc_has_irq == 0) {
@@ -426,35 +431,41 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
}
case RTC_PIE_OFF: /* Mask periodic int. enab. bit */
{
- unsigned long flags; /* can be called from isr via rtc_control() */
- spin_lock_irqsave (&rtc_lock, flags);
+ /* can be called from isr via rtc_control() */
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
mask_rtc_irq_bit_locked(RTC_PIE);
if (rtc_status & RTC_TIMER_ON) {
rtc_status &= ~RTC_TIMER_ON;
del_timer(&rtc_irq_timer);
}
- spin_unlock_irqrestore (&rtc_lock, flags);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
return 0;
}
case RTC_PIE_ON: /* Allow periodic ints */
{
- unsigned long flags; /* can be called from isr via rtc_control() */
+ /* can be called from isr via rtc_control() */
+ unsigned long flags;
+
/*
* We don't really want Joe User enabling more
* than 64Hz of interrupts on a multi-user machine.
*/
if (!kernel && (rtc_freq > rtc_max_user_freq) &&
- (!capable(CAP_SYS_RESOURCE)))
+ (!capable(CAP_SYS_RESOURCE)))
return -EACCES;
- spin_lock_irqsave (&rtc_lock, flags);
+ spin_lock_irqsave(&rtc_lock, flags);
if (!(rtc_status & RTC_TIMER_ON)) {
mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq +
2*HZ/100);
rtc_status |= RTC_TIMER_ON;
}
set_rtc_irq_bit_locked(RTC_PIE);
- spin_unlock_irqrestore (&rtc_lock, flags);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
return 0;
}
case RTC_UIE_OFF: /* Mask ints from RTC updates. */
@@ -477,7 +488,7 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
*/
memset(&wtime, 0, sizeof(struct rtc_time));
get_rtc_alm_time(&wtime);
- break;
+ break;
}
case RTC_ALM_SET: /* Store a time into the alarm */
{
@@ -505,16 +516,21 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
*/
}
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
- RTC_ALWAYS_BCD)
- {
- if (sec < 60) BIN_TO_BCD(sec);
- else sec = 0xff;
-
- if (min < 60) BIN_TO_BCD(min);
- else min = 0xff;
-
- if (hrs < 24) BIN_TO_BCD(hrs);
- else hrs = 0xff;
+ RTC_ALWAYS_BCD) {
+ if (sec < 60)
+ BIN_TO_BCD(sec);
+ else
+ sec = 0xff;
+
+ if (min < 60)
+ BIN_TO_BCD(min);
+ else
+ min = 0xff;
+
+ if (hrs < 24)
+ BIN_TO_BCD(hrs);
+ else
+ hrs = 0xff;
}
CMOS_WRITE(hrs, RTC_HOURS_ALARM);
CMOS_WRITE(min, RTC_MINUTES_ALARM);
@@ -563,11 +579,12 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr)))
return -EINVAL;
-
+
if ((hrs >= 24) || (min >= 60) || (sec >= 60))
return -EINVAL;
- if ((yrs -= epoch) > 255) /* They are unsigned */
+ yrs -= epoch;
+ if (yrs > 255) /* They are unsigned */
return -EINVAL;
spin_lock_irq(&rtc_lock);
@@ -635,9 +652,10 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
{
int tmp = 0;
unsigned char val;
- unsigned long flags; /* can be called from isr via rtc_control() */
+ /* can be called from isr via rtc_control() */
+ unsigned long flags;
- /*
+ /*
* The max we can do is 8192Hz.
*/
if ((arg < 2) || (arg > 8192))
@@ -646,7 +664,8 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
* We don't really want Joe User generating more
* than 64Hz of interrupts on a multi-user machine.
*/
- if (!kernel && (arg > rtc_max_user_freq) && (!capable(CAP_SYS_RESOURCE)))
+ if (!kernel && (arg > rtc_max_user_freq) &&
+ !capable(CAP_SYS_RESOURCE))
return -EACCES;
while (arg > (1<<tmp))
@@ -674,11 +693,11 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
#endif
case RTC_EPOCH_READ: /* Read the epoch. */
{
- return put_user (epoch, (unsigned long __user *)arg);
+ return put_user(epoch, (unsigned long __user *)arg);
}
case RTC_EPOCH_SET: /* Set the epoch. */
{
- /*
+ /*
* There were no RTC clocks before 1900.
*/
if (arg < 1900)
@@ -693,7 +712,8 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
default:
return -ENOTTY;
}
- return copy_to_user((void __user *)arg, &wtime, sizeof wtime) ? -EFAULT : 0;
+ return copy_to_user((void __user *)arg,
+ &wtime, sizeof wtime) ? -EFAULT : 0;
}
static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
@@ -712,26 +732,25 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
* needed here. Or anywhere else in this driver. */
static int rtc_open(struct inode *inode, struct file *file)
{
- spin_lock_irq (&rtc_lock);
+ spin_lock_irq(&rtc_lock);
- if(rtc_status & RTC_IS_OPEN)
+ if (rtc_status & RTC_IS_OPEN)
goto out_busy;
rtc_status |= RTC_IS_OPEN;
rtc_irq_data = 0;
- spin_unlock_irq (&rtc_lock);
+ spin_unlock_irq(&rtc_lock);
return 0;
out_busy:
- spin_unlock_irq (&rtc_lock);
+ spin_unlock_irq(&rtc_lock);
return -EBUSY;
}
-static int rtc_fasync (int fd, struct file *filp, int on)
-
+static int rtc_fasync(int fd, struct file *filp, int on)
{
- return fasync_helper (fd, filp, on, &rtc_async_queue);
+ return fasync_helper(fd, filp, on, &rtc_async_queue);
}
static int rtc_release(struct inode *inode, struct file *file)
@@ -762,16 +781,16 @@ static int rtc_release(struct inode *inode, struct file *file)
}
spin_unlock_irq(&rtc_lock);
- if (file->f_flags & FASYNC) {
- rtc_fasync (-1, file, 0);
- }
+ if (file->f_flags & FASYNC)
+ rtc_fasync(-1, file, 0);
no_irq:
#endif
- spin_lock_irq (&rtc_lock);
+ spin_lock_irq(&rtc_lock);
rtc_irq_data = 0;
rtc_status &= ~RTC_IS_OPEN;
- spin_unlock_irq (&rtc_lock);
+ spin_unlock_irq(&rtc_lock);
+
return 0;
}
@@ -786,9 +805,9 @@ static unsigned int rtc_poll(struct file *file, poll_table *wait)
poll_wait(file, &rtc_wait, wait);
- spin_lock_irq (&rtc_lock);
+ spin_lock_irq(&rtc_lock);
l = rtc_irq_data;
- spin_unlock_irq (&rtc_lock);
+ spin_unlock_irq(&rtc_lock);
if (l != 0)
return POLLIN | POLLRDNORM;
@@ -796,14 +815,6 @@ static unsigned int rtc_poll(struct file *file, poll_table *wait)
}
#endif
-/*
- * exported stuffs
- */
-
-EXPORT_SYMBOL(rtc_register);
-EXPORT_SYMBOL(rtc_unregister);
-EXPORT_SYMBOL(rtc_control);
-
int rtc_register(rtc_task_t *task)
{
#ifndef RTC_IRQ
@@ -829,6 +840,7 @@ int rtc_register(rtc_task_t *task)
return 0;
#endif
}
+EXPORT_SYMBOL(rtc_register);
int rtc_unregister(rtc_task_t *task)
{
@@ -845,7 +857,7 @@ int rtc_unregister(rtc_task_t *task)
return -ENXIO;
}
rtc_callback = NULL;
-
+
/* disable controls */
if (!hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE)) {
tmp = CMOS_READ(RTC_CONTROL);
@@ -865,6 +877,7 @@ int rtc_unregister(rtc_task_t *task)
return 0;
#endif
}
+EXPORT_SYMBOL(rtc_unregister);
int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
{
@@ -883,7 +896,7 @@ int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
return rtc_do_ioctl(cmd, arg, 1);
#endif
}
-
+EXPORT_SYMBOL(rtc_control);
/*
* The various file operations we support.
@@ -910,11 +923,11 @@ static struct miscdevice rtc_dev = {
#ifdef CONFIG_PROC_FS
static const struct file_operations rtc_proc_fops = {
- .owner = THIS_MODULE,
- .open = rtc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+ .owner = THIS_MODULE,
+ .open = rtc_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
};
#endif
@@ -965,7 +978,7 @@ static int __init rtc_init(void)
#ifdef CONFIG_SPARC32
for_each_ebus(ebus) {
for_each_ebusdev(edev, ebus) {
- if(strcmp(edev->prom_node->name, "rtc") == 0) {
+ if (strcmp(edev->prom_node->name, "rtc") == 0) {
rtc_port = edev->resource[0].start;
rtc_irq = edev->irqs[0];
goto found;
@@ -986,7 +999,8 @@ found:
* XXX Interrupt pin #7 in Espresso is shared between RTC and
* PCI Slot 2 INTA# (and some INTx# in Slot 1).
*/
- if (request_irq(rtc_irq, rtc_interrupt, IRQF_SHARED, "rtc", (void *)&rtc_port)) {
+ if (request_irq(rtc_irq, rtc_interrupt, IRQF_SHARED, "rtc",
+ (void *)&rtc_port)) {
rtc_has_irq = 0;
printk(KERN_ERR "rtc: cannot register IRQ %d\n", rtc_irq);
return -EIO;
@@ -1015,16 +1029,26 @@ no_irq:
#ifdef RTC_IRQ
if (is_hpet_enabled()) {
+ int err;
+
rtc_int_handler_ptr = hpet_rtc_interrupt;
+ err = hpet_register_irq_handler(rtc_interrupt);
+ if (err != 0) {
+ printk(KERN_WARNING "hpet_register_irq_handler failed "
+ "in rtc_init().");
+ return err;
+ }
} else {
rtc_int_handler_ptr = rtc_interrupt;
}
- if(request_irq(RTC_IRQ, rtc_int_handler_ptr, IRQF_DISABLED, "rtc", NULL)) {
+ if (request_irq(RTC_IRQ, rtc_int_handler_ptr, IRQF_DISABLED,
+ "rtc", NULL)) {
/* Yeah right, seeing as irq 8 doesn't even hit the bus. */
rtc_has_irq = 0;
printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ);
rtc_release_region();
+
return -EIO;
}
hpet_rtc_timer_init();
@@ -1036,6 +1060,7 @@ no_irq:
if (misc_register(&rtc_dev)) {
#ifdef RTC_IRQ
free_irq(RTC_IRQ, NULL);
+ hpet_unregister_irq_handler(rtc_interrupt);
rtc_has_irq = 0;
#endif
rtc_release_region();
@@ -1052,21 +1077,21 @@ no_irq:
#if defined(__alpha__) || defined(__mips__)
rtc_freq = HZ;
-
+
/* Each operating system on an Alpha uses its own epoch.
Let's try to guess which one we are using now. */
-
+
if (rtc_is_updating() != 0)
msleep(20);
-
+
spin_lock_irq(&rtc_lock);
year = CMOS_READ(RTC_YEAR);
ctrl = CMOS_READ(RTC_CONTROL);
spin_unlock_irq(&rtc_lock);
-
+
if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
BCD_TO_BIN(year); /* This should never happen... */
-
+
if (year < 20) {
epoch = 2000;
guess = "SRM (post-2000)";
@@ -1087,7 +1112,8 @@ no_irq:
#endif
}
if (guess)
- printk(KERN_INFO "rtc: %s epoch (%lu) detected\n", guess, epoch);
+ printk(KERN_INFO "rtc: %s epoch (%lu) detected\n",
+ guess, epoch);
#endif
#ifdef RTC_IRQ
if (rtc_has_irq == 0)
@@ -1096,8 +1122,12 @@ no_irq:
spin_lock_irq(&rtc_lock);
rtc_freq = 1024;
if (!hpet_set_periodic_freq(rtc_freq)) {
- /* Initialize periodic freq. to CMOS reset default, which is 1024Hz */
- CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06), RTC_FREQ_SELECT);
+ /*
+ * Initialize periodic frequency to CMOS reset default,
+ * which is 1024Hz
+ */
+ CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06),
+ RTC_FREQ_SELECT);
}
spin_unlock_irq(&rtc_lock);
no_irq2:
@@ -1110,20 +1140,22 @@ no_irq2:
return 0;
}
-static void __exit rtc_exit (void)
+static void __exit rtc_exit(void)
{
cleanup_sysctl();
- remove_proc_entry ("driver/rtc", NULL);
+ remove_proc_entry("driver/rtc", NULL);
misc_deregister(&rtc_dev);
#ifdef CONFIG_SPARC32
if (rtc_has_irq)
- free_irq (rtc_irq, &rtc_port);
+ free_irq(rtc_irq, &rtc_port);
#else
rtc_release_region();
#ifdef RTC_IRQ
- if (rtc_has_irq)
- free_irq (RTC_IRQ, NULL);
+ if (rtc_has_irq) {
+ free_irq(RTC_IRQ, NULL);
+ hpet_unregister_irq_handler(hpet_rtc_interrupt);
+ }
#endif
#endif /* CONFIG_SPARC32 */
}
@@ -1133,14 +1165,14 @@ module_exit(rtc_exit);
#ifdef RTC_IRQ
/*
- * At IRQ rates >= 4096Hz, an interrupt may get lost altogether.
+ * At IRQ rates >= 4096Hz, an interrupt may get lost altogether.
* (usually during an IDE disk interrupt, with IRQ unmasking off)
* Since the interrupt handler doesn't get called, the IRQ status
* byte doesn't get read, and the RTC stops generating interrupts.
* A timer is set, and will call this function if/when that happens.
* To get it out of this stalled state, we just read the status.
* At least a jiffy of interrupts (rtc_freq/HZ) will have been lost.
- * (You *really* shouldn't be trying to use a non-realtime system
+ * (You *really* shouldn't be trying to use a non-realtime system
* for something that requires a steady > 1KHz signal anyways.)
*/
@@ -1148,7 +1180,7 @@ static void rtc_dropped_irq(unsigned long data)
{
unsigned long freq;
- spin_lock_irq (&rtc_lock);
+ spin_lock_irq(&rtc_lock);
if (hpet_rtc_dropped_irq()) {
spin_unlock_irq(&rtc_lock);
@@ -1167,13 +1199,15 @@ static void rtc_dropped_irq(unsigned long data)
spin_unlock_irq(&rtc_lock);
- if (printk_ratelimit())
- printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq);
+ if (printk_ratelimit()) {
+ printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+ freq);
+ }
/* Now we have new data */
wake_up_interruptible(&rtc_wait);
- kill_fasync (&rtc_async_queue, SIGIO, POLL_IN);
+ kill_fasync(&rtc_async_queue, SIGIO, POLL_IN);
}
#endif
@@ -1277,7 +1311,7 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
* can take just over 2ms. We wait 20ms. There is no need to
* to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
* If you need to know *exactly* when a second has started, enable
- * periodic update complete interrupts, (via ioctl) and then
+ * periodic update complete interrupts, (via ioctl) and then
* immediately read /dev/rtc which will block until you get the IRQ.
* Once the read clears, read the RTC time (again via ioctl). Easy.
*/
@@ -1307,8 +1341,7 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
ctrl = CMOS_READ(RTC_CONTROL);
spin_unlock_irqrestore(&rtc_lock, flags);
- if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- {
+ if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
BCD_TO_BIN(rtc_tm->tm_sec);
BCD_TO_BIN(rtc_tm->tm_min);
BCD_TO_BIN(rtc_tm->tm_hour);
@@ -1326,7 +1359,8 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
* Account for differences between how the RTC uses the values
* and how they are defined in a struct rtc_time;
*/
- if ((rtc_tm->tm_year += (epoch - 1900)) <= 69)
+ rtc_tm->tm_year += epoch - 1900;
+ if (rtc_tm->tm_year <= 69)
rtc_tm->tm_year += 100;
rtc_tm->tm_mon--;
@@ -1347,8 +1381,7 @@ static void get_rtc_alm_time(struct rtc_time *alm_tm)
ctrl = CMOS_READ(RTC_CONTROL);
spin_unlock_irq(&rtc_lock);
- if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- {
+ if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
BCD_TO_BIN(alm_tm->tm_sec);
BCD_TO_BIN(alm_tm->tm_min);
BCD_TO_BIN(alm_tm->tm_hour);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5efd5550f4c..b730d670952 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1604,7 +1604,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
memcpy(&policy->cpuinfo, &data->cpuinfo,
sizeof(struct cpufreq_cpuinfo));
- if (policy->min > data->min && policy->min > policy->max) {
+ if (policy->min > data->max || policy->max < data->min) {
ret = -EINVAL;
goto error_out;
}
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 5e596a7e360..9008ed5ef4c 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -8,6 +8,8 @@
#include <linux/slab.h>
#include <asm/dmi.h>
+static char dmi_empty_string[] = " ";
+
static char * __init dmi_string(const struct dmi_header *dm, u8 s)
{
const u8 *bp = ((u8 *) dm) + dm->length;
@@ -21,11 +23,16 @@ static char * __init dmi_string(const struct dmi_header *dm, u8 s)
}
if (*bp != 0) {
- str = dmi_alloc(strlen(bp) + 1);
+ size_t len = strlen(bp)+1;
+ size_t cmp_len = len > 8 ? 8 : len;
+
+ if (!memcmp(bp, dmi_empty_string, cmp_len))
+ return dmi_empty_string;
+ str = dmi_alloc(len);
if (str != NULL)
strcpy(str, bp);
else
- printk(KERN_ERR "dmi_string: out of memory.\n");
+ printk(KERN_ERR "dmi_string: cannot allocate %Zu bytes.\n", len);
}
}
@@ -175,12 +182,23 @@ static void __init dmi_save_devices(const struct dmi_header *dm)
}
}
+static struct dmi_device empty_oem_string_dev = {
+ .name = dmi_empty_string,
+};
+
static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
{
int i, count = *(u8 *)(dm + 1);
struct dmi_device *dev;
for (i = 1; i <= count; i++) {
+ char *devname = dmi_string(dm, i);
+
+ if (!strcmp(devname, dmi_empty_string)) {
+ list_add(&empty_oem_string_dev.list, &dmi_devices);
+ continue;
+ }
+
dev = dmi_alloc(sizeof(*dev));
if (!dev) {
printk(KERN_ERR
@@ -189,7 +207,7 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
}
dev->type = DMI_DEV_TYPE_OEM_STRING;
- dev->name = dmi_string(dm, i);
+ dev->name = devname;
dev->device_data = NULL;
list_add(&dev->list, &dmi_devices);
@@ -331,9 +349,11 @@ void __init dmi_scan_machine(void)
rc = dmi_present(q);
if (!rc) {
dmi_available = 1;
+ dmi_iounmap(p, 0x10000);
return;
}
}
+ dmi_iounmap(p, 0x10000);
}
out: printk(KERN_INFO "DMI not present or invalid.\n");
}
diff --git a/drivers/ieee1394/Makefile b/drivers/ieee1394/Makefile
index 489c133664d..1f8153b5750 100644
--- a/drivers/ieee1394/Makefile
+++ b/drivers/ieee1394/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_IEEE1394_SBP2) += sbp2.o
obj-$(CONFIG_IEEE1394_DV1394) += dv1394.o
obj-$(CONFIG_IEEE1394_ETH1394) += eth1394.o
+obj-$(CONFIG_PROVIDE_OHCI1394_DMA_INIT) += init_ohci1394_dma.o
diff --git a/drivers/ieee1394/init_ohci1394_dma.c b/drivers/ieee1394/init_ohci1394_dma.c
new file mode 100644
index 00000000000..ddaab6eb8ac
--- /dev/null
+++ b/drivers/ieee1394/init_ohci1394_dma.c
@@ -0,0 +1,285 @@
+/*
+ * init_ohci1394_dma.c - Initializes physical DMA on all OHCI 1394 controllers
+ *
+ * Copyright (C) 2006-2007 Bernhard Kaindl <bk@suse.de>
+ *
+ * Derived from drivers/ieee1394/ohci1394.c and arch/x86/kernel/early-quirks.c
+ * this file has functions to:
+ * - scan the PCI very early on boot for all OHCI 1394-compliant controllers
+ * - reset and initialize them and make them join the IEEE1394 bus and
+ * - enable physical DMA on them to allow remote debugging
+ *
+ * All code and data is marked as __init and __initdata, respective as
+ * during boot, all OHCI1394 controllers may be claimed by the firewire
+ * stack and at this point, this code should not touch them anymore.
+ *
+ * To use physical DMA after the initialization of the firewire stack,
+ * be sure that the stack enables it and (re-)attach after the bus reset
+ * which may be caused by the firewire stack initialization.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/interrupt.h> /* for ohci1394.h */
+#include <linux/delay.h>
+#include <linux/pci.h> /* for PCI defines */
+#include <linux/init_ohci1394_dma.h>
+#include <asm/pci-direct.h> /* for direct PCI config space access */
+#include <asm/fixmap.h>
+
+#include "ieee1394_types.h"
+#include "ohci1394.h"
+
+int __initdata init_ohci1394_dma_early;
+
+/* Reads a PHY register of an OHCI-1394 controller */
+static inline u8 __init get_phy_reg(struct ti_ohci *ohci, u8 addr)
+{
+ int i;
+ quadlet_t r;
+
+ reg_write(ohci, OHCI1394_PhyControl, (addr << 8) | 0x00008000);
+
+ for (i = 0; i < OHCI_LOOP_COUNT; i++) {
+ if (reg_read(ohci, OHCI1394_PhyControl) & 0x80000000)
+ break;
+ mdelay(1);
+ }
+ r = reg_read(ohci, OHCI1394_PhyControl);
+
+ return (r & 0x00ff0000) >> 16;
+}
+
+/* Writes to a PHY register of an OHCI-1394 controller */
+static inline void __init set_phy_reg(struct ti_ohci *ohci, u8 addr, u8 data)
+{
+ int i;
+
+ reg_write(ohci, OHCI1394_PhyControl, (addr << 8) | data | 0x00004000);
+
+ for (i = 0; i < OHCI_LOOP_COUNT; i++) {
+ u32 r = reg_read(ohci, OHCI1394_PhyControl);
+ if (!(r & 0x00004000))
+ break;
+ mdelay(1);
+ }
+}
+
+/* Resets an OHCI-1394 controller (for sane state before initialization) */
+static inline void __init init_ohci1394_soft_reset(struct ti_ohci *ohci) {
+ int i;
+
+ reg_write(ohci, OHCI1394_HCControlSet, OHCI1394_HCControl_softReset);
+
+ for (i = 0; i < OHCI_LOOP_COUNT; i++) {
+ if (!(reg_read(ohci, OHCI1394_HCControlSet)
+ & OHCI1394_HCControl_softReset))
+ break;
+ mdelay(1);
+ }
+}
+
+/* Basic OHCI-1394 register and port inititalization */
+static inline void __init init_ohci1394_initialize(struct ti_ohci *ohci)
+{
+ quadlet_t bus_options;
+ int num_ports, i;
+
+ /* Put some defaults to these undefined bus options */
+ bus_options = reg_read(ohci, OHCI1394_BusOptions);
+ bus_options |= 0x60000000; /* Enable CMC and ISC */
+ bus_options &= ~0x00ff0000; /* XXX: Set cyc_clk_acc to zero for now */
+ bus_options &= ~0x18000000; /* Disable PMC and BMC */
+ reg_write(ohci, OHCI1394_BusOptions, bus_options);
+
+ /* Set the bus number */
+ reg_write(ohci, OHCI1394_NodeID, 0x0000ffc0);
+
+ /* Enable posted writes */
+ reg_write(ohci, OHCI1394_HCControlSet,
+ OHCI1394_HCControl_postedWriteEnable);
+
+ /* Clear link control register */
+ reg_write(ohci, OHCI1394_LinkControlClear, 0xffffffff);
+
+ /* enable phys */
+ reg_write(ohci, OHCI1394_LinkControlSet,
+ OHCI1394_LinkControl_RcvPhyPkt);
+
+ /* Don't accept phy packets into AR request context */
+ reg_write(ohci, OHCI1394_LinkControlClear, 0x00000400);
+
+ /* Clear the Isochonouys interrupt masks */
+ reg_write(ohci, OHCI1394_IsoRecvIntMaskClear, 0xffffffff);
+ reg_write(ohci, OHCI1394_IsoRecvIntEventClear, 0xffffffff);
+ reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, 0xffffffff);
+ reg_write(ohci, OHCI1394_IsoXmitIntEventClear, 0xffffffff);
+
+ /* Accept asyncronous transfer requests from all nodes for now */
+ reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0x80000000);
+
+ /* Specify asyncronous transfer retries */
+ reg_write(ohci, OHCI1394_ATRetries,
+ OHCI1394_MAX_AT_REQ_RETRIES |
+ (OHCI1394_MAX_AT_RESP_RETRIES<<4) |
+ (OHCI1394_MAX_PHYS_RESP_RETRIES<<8));
+
+ /* We don't want hardware swapping */
+ reg_write(ohci, OHCI1394_HCControlClear, OHCI1394_HCControl_noByteSwap);
+
+ /* Enable link */
+ reg_write(ohci, OHCI1394_HCControlSet, OHCI1394_HCControl_linkEnable);
+
+ /* If anything is connected to a port, make sure it is enabled */
+ num_ports = get_phy_reg(ohci, 2) & 0xf;
+ for (i = 0; i < num_ports; i++) {
+ unsigned int status;
+
+ set_phy_reg(ohci, 7, i);
+ status = get_phy_reg(ohci, 8);
+
+ if (status & 0x20)
+ set_phy_reg(ohci, 8, status & ~1);
+ }
+}
+
+/**
+ * init_ohci1394_wait_for_busresets - wait until bus resets are completed
+ *
+ * OHCI1394 initialization itself and any device going on- or offline
+ * and any cable issue cause a IEEE1394 bus reset. The OHCI1394 spec
+ * specifies that physical DMA is disabled on each bus reset and it
+ * has to be enabled after each bus reset when needed. We resort
+ * to polling here because on early boot, we have no interrupts.
+ */
+static inline void __init init_ohci1394_wait_for_busresets(struct ti_ohci *ohci)
+{
+ int i, events;
+
+ for (i=0; i < 9; i++) {
+ mdelay(200);
+ events = reg_read(ohci, OHCI1394_IntEventSet);
+ if (events & OHCI1394_busReset)
+ reg_write(ohci, OHCI1394_IntEventClear,
+ OHCI1394_busReset);
+ }
+}
+
+/**
+ * init_ohci1394_enable_physical_dma - Enable physical DMA for remote debugging
+ * This enables remote DMA access over IEEE1394 from every host for the low
+ * 4GB of address space. DMA accesses above 4GB are not available currently.
+ */
+static inline void __init init_ohci1394_enable_physical_dma(struct ti_ohci *hci)
+{
+ reg_write(hci, OHCI1394_PhyReqFilterHiSet, 0xffffffff);
+ reg_write(hci, OHCI1394_PhyReqFilterLoSet, 0xffffffff);
+ reg_write(hci, OHCI1394_PhyUpperBound, 0xffff0000);
+}
+
+/**
+ * init_ohci1394_reset_and_init_dma - init controller and enable DMA
+ * This initializes the given controller and enables physical DMA engine in it.
+ */
+static inline void __init init_ohci1394_reset_and_init_dma(struct ti_ohci *ohci)
+{
+ /* Start off with a soft reset, clears everything to a sane state. */
+ init_ohci1394_soft_reset(ohci);
+
+ /* Accessing some registers without LPS enabled may cause lock up */
+ reg_write(ohci, OHCI1394_HCControlSet, OHCI1394_HCControl_LPS);
+
+ /* Disable and clear interrupts */
+ reg_write(ohci, OHCI1394_IntEventClear, 0xffffffff);
+ reg_write(ohci, OHCI1394_IntMaskClear, 0xffffffff);
+
+ mdelay(50); /* Wait 50msec to make sure we have full link enabled */
+
+ init_ohci1394_initialize(ohci);
+ /*
+ * The initialization causes at least one IEEE1394 bus reset. Enabling
+ * physical DMA only works *after* *all* bus resets have calmed down:
+ */
+ init_ohci1394_wait_for_busresets(ohci);
+
+ /* We had to wait and do this now if we want to debug early problems */
+ init_ohci1394_enable_physical_dma(ohci);
+}
+
+/**
+ * init_ohci1394_controller - Map the registers of the controller and init DMA
+ * This maps the registers of the specified controller and initializes it
+ */
+static inline void __init init_ohci1394_controller(int num, int slot, int func)
+{
+ unsigned long ohci_base;
+ struct ti_ohci ohci;
+
+ printk(KERN_INFO "init_ohci1394_dma: initializing OHCI-1394"
+ " at %02x:%02x.%x\n", num, slot, func);
+
+ ohci_base = read_pci_config(num, slot, func, PCI_BASE_ADDRESS_0+(0<<2))
+ & PCI_BASE_ADDRESS_MEM_MASK;
+
+ set_fixmap_nocache(FIX_OHCI1394_BASE, ohci_base);
+
+ ohci.registers = (void *)fix_to_virt(FIX_OHCI1394_BASE);
+
+ init_ohci1394_reset_and_init_dma(&ohci);
+}
+
+/**
+ * debug_init_ohci1394_dma - scan for OHCI1394 controllers and init DMA on them
+ * Scans the whole PCI space for OHCI1394 controllers and inits DMA on them
+ */
+void __init init_ohci1394_dma_on_all_controllers(void)
+{
+ int num, slot, func;
+
+ if (!early_pci_allowed())
+ return;
+
+ /* Poor man's PCI discovery, the only thing we can do at early boot */
+ for (num = 0; num < 32; num++) {
+ for (slot = 0; slot < 32; slot++) {
+ for (func = 0; func < 8; func++) {
+ u32 class = read_pci_config(num,slot,func,
+ PCI_CLASS_REVISION);
+ if ((class == 0xffffffff))
+ continue; /* No device at this func */
+
+ if (class>>8 != PCI_CLASS_SERIAL_FIREWIRE_OHCI)
+ continue; /* Not an OHCI-1394 device */
+
+ init_ohci1394_controller(num, slot, func);
+ break; /* Assume one controller per device */
+ }
+ }
+ }
+ printk(KERN_INFO "init_ohci1394_dma: finished initializing OHCI DMA\n");
+}
+
+/**
+ * setup_init_ohci1394_early - enables early OHCI1394 DMA initialization
+ */
+static int __init setup_ohci1394_dma(char *opt)
+{
+ if (!strcmp(opt, "early"))
+ init_ohci1394_dma_early = 1;
+ return 0;
+}
+
+/* passing ohci1394_dma=early on boot causes early OHCI1394 DMA initialization */
+early_param("ohci1394_dma", setup_ohci1394_dma);
diff --git a/drivers/input/mouse/pc110pad.c b/drivers/input/mouse/pc110pad.c
index 8991ab0b4fe..61cff8374e6 100644
--- a/drivers/input/mouse/pc110pad.c
+++ b/drivers/input/mouse/pc110pad.c
@@ -39,6 +39,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
+#include <linux/delay.h>
#include <asm/io.h>
#include <asm/irq.h>
@@ -62,8 +63,10 @@ static irqreturn_t pc110pad_interrupt(int irq, void *ptr)
int value = inb_p(pc110pad_io);
int handshake = inb_p(pc110pad_io + 2);
- outb_p(handshake | 1, pc110pad_io + 2);
- outb_p(handshake & ~1, pc110pad_io + 2);
+ outb(handshake | 1, pc110pad_io + 2);
+ udelay(2);
+ outb(handshake & ~1, pc110pad_io + 2);
+ udelay(2);
inb_p(0x64);
pc110pad_data[pc110pad_count++] = value;
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 4e04e49a2f1..ced4ac1955d 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -290,7 +290,7 @@ static void svm_hardware_enable(void *garbage)
#ifdef CONFIG_X86_64
struct desc_ptr gdt_descr;
#else
- struct Xgt_desc_struct gdt_descr;
+ struct desc_ptr gdt_descr;
#endif
struct desc_struct *gdt;
int me = raw_smp_processor_id();
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index bb56ae3f89b..5b397b6c9f9 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -524,7 +524,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
if (vcpu->rmode.active)
- rflags |= IOPL_MASK | X86_EFLAGS_VM;
+ rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
vmcs_writel(GUEST_RFLAGS, rflags);
}
@@ -1050,7 +1050,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
flags = vmcs_readl(GUEST_RFLAGS);
- flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
+ flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
vmcs_writel(GUEST_RFLAGS, flags);
@@ -1107,9 +1107,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
flags = vmcs_readl(GUEST_RFLAGS);
- vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+ vcpu->rmode.save_iopl = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
- flags |= IOPL_MASK | X86_EFLAGS_VM;
+ flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
vmcs_writel(GUEST_RFLAGS, flags);
vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 96d0fd07c57..44adb00e149 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -94,7 +94,7 @@ static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
/* Set up the two "TSS" members which tell the CPU what stack to use
* for traps which do directly into the Guest (ie. traps at privilege
* level 1). */
- pages->state.guest_tss.esp1 = lg->esp1;
+ pages->state.guest_tss.sp1 = lg->esp1;
pages->state.guest_tss.ss1 = lg->ss1;
/* Copy direct-to-Guest trap entries. */
@@ -416,7 +416,7 @@ void __init lguest_arch_host_init(void)
/* We know where we want the stack to be when the Guest enters
* the switcher: in pages->regs. The stack grows upwards, so
* we start it at the end of that structure. */
- state->guest_tss.esp0 = (long)(&pages->regs + 1);
+ state->guest_tss.sp0 = (long)(&pages->regs + 1);
/* And this is the GDT entry to use for the stack: we keep a
* couple of special LGUEST entries. */
state->guest_tss.ss0 = LGUEST_DS;
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 5dba68fe33f..a8364d81522 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -61,7 +61,7 @@ set_base(gdt[(selname) >> 3], (u32)(address)); \
set_limit(gdt[(selname) >> 3], size); \
} while(0)
-static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
+static struct desc_struct bad_bios_desc;
/*
* At some point we want to use this stack frame pointer to unwind
@@ -477,6 +477,9 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
pnp_bios_callpoint.offset = header->fields.pm16offset;
pnp_bios_callpoint.segment = PNP_CS16;
+ bad_bios_desc.a = 0;
+ bad_bios_desc.b = 0x00409200;
+
set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
_set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
for (i = 0; i < NR_CPUS; i++) {
diff --git a/drivers/video/vermilion/vermilion.c b/drivers/video/vermilion/vermilion.c
index c31f549ebea..1c656667b93 100644
--- a/drivers/video/vermilion/vermilion.c
+++ b/drivers/video/vermilion/vermilion.c
@@ -88,9 +88,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
{
gfp_t flags;
unsigned long i;
- pgprot_t wc_pageprot;
- wc_pageprot = PAGE_KERNEL_NOCACHE;
max_order++;
do {
/*
@@ -126,14 +124,8 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
/*
* Change caching policy of the linear kernel map to avoid
* mapping type conflicts with user-space mappings.
- * The first global_flush_tlb() is really only there to do a global
- * wbinvd().
*/
-
- global_flush_tlb();
- change_page_attr(virt_to_page(va->logical), va->size >> PAGE_SHIFT,
- wc_pageprot);
- global_flush_tlb();
+ set_pages_uc(virt_to_page(va->logical), va->size >> PAGE_SHIFT);
printk(KERN_DEBUG MODULE_NAME
": Allocated %ld bytes vram area at 0x%08lx\n",
@@ -157,9 +149,8 @@ static void vmlfb_free_vram_area(struct vram_area *va)
* Reset the linear kernel map caching policy.
*/
- change_page_attr(virt_to_page(va->logical),
- va->size >> PAGE_SHIFT, PAGE_KERNEL);
- global_flush_tlb();
+ set_pages_wb(virt_to_page(va->logical),
+ va->size >> PAGE_SHIFT);
/*
* Decrease the usage count on the pages we've used
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index d4fc6095466..7c3d5f923da 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -23,6 +23,10 @@ config BINFMT_ELF
ld.so (check the file <file:Documentation/Changes> for location and
latest version).
+config COMPAT_BINFMT_ELF
+ bool
+ depends on COMPAT && MMU
+
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y
diff --git a/fs/Makefile b/fs/Makefile
index 500cf15cdb4..1e7a11bd4da 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
obj-y += binfmt_script.o
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
+obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o
obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
diff --git a/fs/aio.c b/fs/aio.c
index 9dec7d2d546..8a37dbbf343 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -397,7 +397,7 @@ void fastcall __put_ioctx(struct kioctx *ctx)
* This prevents races between the aio code path referencing the
* req (after submitting it) and aio_complete() freeing the req.
*/
-static struct kiocb *FASTCALL(__aio_get_req(struct kioctx *ctx));
+static struct kiocb *__aio_get_req(struct kioctx *ctx);
static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
{
struct kiocb *req = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f0b3171842f..18ed6dd906c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,8 @@
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
static int load_elf_library(struct file *);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
+static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
+ int, int, unsigned long);
/*
* If we don't support core dumping, then supply a NULL so we
@@ -298,33 +299,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
#ifndef elf_map
static unsigned long elf_map(struct file *filep, unsigned long addr,
- struct elf_phdr *eppnt, int prot, int type)
+ struct elf_phdr *eppnt, int prot, int type,
+ unsigned long total_size)
{
unsigned long map_addr;
- unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
+ unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
+ unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
+ addr = ELF_PAGESTART(addr);
+ size = ELF_PAGEALIGN(size);
- down_write(&current->mm->mmap_sem);
/* mmap() will return -EINVAL if given a zero size, but a
* segment with zero filesize is perfectly valid */
- if (eppnt->p_filesz + pageoffset)
- map_addr = do_mmap(filep, ELF_PAGESTART(addr),
- eppnt->p_filesz + pageoffset, prot, type,
- eppnt->p_offset - pageoffset);
- else
- map_addr = ELF_PAGESTART(addr);
+ if (!size)
+ return addr;
+
+ down_write(&current->mm->mmap_sem);
+ /*
+ * total_size is the size of the ELF (interpreter) image.
+ * The _first_ mmap needs to know the full size, otherwise
+ * randomization might put this image into an overlapping
+ * position with the ELF binary image. (since size < total_size)
+ * So we first map the 'big' image - and unmap the remainder at
+ * the end. (which unmap is needed for ELF images with holes.)
+ */
+ if (total_size) {
+ total_size = ELF_PAGEALIGN(total_size);
+ map_addr = do_mmap(filep, addr, total_size, prot, type, off);
+ if (!BAD_ADDR(map_addr))
+ do_munmap(current->mm, map_addr+size, total_size-size);
+ } else
+ map_addr = do_mmap(filep, addr, size, prot, type, off);
+
up_write(&current->mm->mmap_sem);
return(map_addr);
}
#endif /* !elf_map */
+static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
+{
+ int i, first_idx = -1, last_idx = -1;
+
+ for (i = 0; i < nr; i++) {
+ if (cmds[i].p_type == PT_LOAD) {
+ last_idx = i;
+ if (first_idx == -1)
+ first_idx = i;
+ }
+ }
+ if (first_idx == -1)
+ return 0;
+
+ return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
+ ELF_PAGESTART(cmds[first_idx].p_vaddr);
+}
+
+
/* This is much more generalized than the library routine read function,
so we keep this separate. Technically the library read function
is only provided so that we can read a.out libraries that have
an ELF header */
static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
- struct file *interpreter, unsigned long *interp_load_addr)
+ struct file *interpreter, unsigned long *interp_map_addr,
+ unsigned long no_base)
{
struct elf_phdr *elf_phdata;
struct elf_phdr *eppnt;
@@ -332,6 +370,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
int load_addr_set = 0;
unsigned long last_bss = 0, elf_bss = 0;
unsigned long error = ~0UL;
+ unsigned long total_size;
int retval, i, size;
/* First of all, some simple consistency checks */
@@ -370,6 +409,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out_close;
}
+ total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+ if (!total_size) {
+ error = -EINVAL;
+ goto out_close;
+ }
+
eppnt = elf_phdata;
for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
if (eppnt->p_type == PT_LOAD) {
@@ -387,9 +432,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
vaddr = eppnt->p_vaddr;
if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
elf_type |= MAP_FIXED;
+ else if (no_base && interp_elf_ex->e_type == ET_DYN)
+ load_addr = -vaddr;
map_addr = elf_map(interpreter, load_addr + vaddr,
- eppnt, elf_prot, elf_type);
+ eppnt, elf_prot, elf_type, total_size);
+ total_size = 0;
+ if (!*interp_map_addr)
+ *interp_map_addr = map_addr;
error = map_addr;
if (BAD_ADDR(map_addr))
goto out_close;
@@ -455,8 +505,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out_close;
}
- *interp_load_addr = load_addr;
- error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
+ error = load_addr;
out_close:
kfree(elf_phdata);
@@ -546,14 +595,14 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
int load_addr_set = 0;
char * elf_interpreter = NULL;
unsigned int interpreter_type = INTERPRETER_NONE;
- unsigned char ibcs2_interpreter = 0;
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata;
unsigned long elf_bss, elf_brk;
int elf_exec_fileno;
int retval, i;
unsigned int size;
- unsigned long elf_entry, interp_load_addr = 0;
+ unsigned long elf_entry;
+ unsigned long interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc = 0;
char passed_fileno[6];
@@ -663,14 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
goto out_free_interp;
- /* If the program interpreter is one of these two,
- * then assume an iBCS2 image. Otherwise assume
- * a native linux image.
- */
- if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
- strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
- ibcs2_interpreter = 1;
-
/*
* The early SET_PERSONALITY here is so that the lookup
* for the interpreter happens in the namespace of the
@@ -690,7 +731,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
* switch really is going to happen - do this in
* flush_thread(). - akpm
*/
- SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+ SET_PERSONALITY(loc->elf_ex, 0);
interpreter = open_exec(elf_interpreter);
retval = PTR_ERR(interpreter);
@@ -769,7 +810,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
goto out_free_dentry;
} else {
/* Executables without an interpreter also need a personality */
- SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+ SET_PERSONALITY(loc->elf_ex, 0);
}
/* OK, we are done with that, now set up the arg stuff,
@@ -803,7 +844,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
- SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+ SET_PERSONALITY(loc->elf_ex, 0);
if (elf_read_implies_exec(loc->elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
@@ -825,9 +866,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
current->mm->start_stack = bprm->p;
/* Now we do a little grungy work by mmaping the ELF image into
- the correct location in memory. At this point, we assume that
- the image should be loaded at fixed address, not at a variable
- address. */
+ the correct location in memory. */
for(i = 0, elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
@@ -881,11 +920,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
+#ifdef CONFIG_X86
+ load_bias = 0;
+#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+#endif
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags);
+ elf_prot, elf_flags, 0);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, 0);
retval = IS_ERR((void *)error) ?
@@ -961,13 +1004,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
}
if (elf_interpreter) {
- if (interpreter_type == INTERPRETER_AOUT)
+ if (interpreter_type == INTERPRETER_AOUT) {
elf_entry = load_aout_interp(&loc->interp_ex,
interpreter);
- else
+ } else {
+ unsigned long uninitialized_var(interp_map_addr);
+
elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
- &interp_load_addr);
+ &interp_map_addr,
+ load_bias);
+ if (!IS_ERR((void *)elf_entry)) {
+ /*
+ * load_elf_interp() returns relocation
+ * adjustment
+ */
+ interp_load_addr = elf_entry;
+ elf_entry += loc->interp_elf_ex.e_entry;
+ }
+ }
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = IS_ERR((void *)elf_entry) ?
@@ -1021,6 +1076,12 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p;
+#ifdef arch_randomize_brk
+ if (current->flags & PF_RANDOMIZE)
+ current->mm->brk = current->mm->start_brk =
+ arch_randomize_brk(current->mm);
+#endif
+
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
and some applications "depend" upon this behavior.
@@ -1325,7 +1386,8 @@ static int writenote(struct memelfnote *men, struct file *file,
if (!dump_seek(file, (off))) \
goto end_coredump;
-static void fill_elf_header(struct elfhdr *elf, int segs)
+static void fill_elf_header(struct elfhdr *elf, int segs,
+ u16 machine, u32 flags, u8 osabi)
{
memcpy(elf->e_ident, ELFMAG, SELFMAG);
elf->e_ident[EI_CLASS] = ELF_CLASS;
@@ -1335,12 +1397,12 @@ static void fill_elf_header(struct elfhdr *elf, int segs)
memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
elf->e_type = ET_CORE;
- elf->e_machine = ELF_ARCH;
+ elf->e_machine = machine;
elf->e_version = EV_CURRENT;
elf->e_entry = 0;
elf->e_phoff = sizeof(struct elfhdr);
elf->e_shoff = 0;
- elf->e_flags = ELF_CORE_EFLAGS;
+ elf->e_flags = flags;
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize = sizeof(struct elf_phdr);
elf->e_phnum = segs;
@@ -1447,6 +1509,238 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
return 0;
}
+static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
+{
+ elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
+ int i = 0;
+ do
+ i += 2;
+ while (auxv[i - 2] != AT_NULL);
+ fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
+}
+
+#ifdef CORE_DUMP_USE_REGSET
+#include <linux/regset.h>
+
+struct elf_thread_core_info {
+ struct elf_thread_core_info *next;
+ struct task_struct *task;
+ struct elf_prstatus prstatus;
+ struct memelfnote notes[0];
+};
+
+struct elf_note_info {
+ struct elf_thread_core_info *thread;
+ struct memelfnote psinfo;
+ struct memelfnote auxv;
+ size_t size;
+ int thread_notes;
+};
+
+static int fill_thread_core_info(struct elf_thread_core_info *t,
+ const struct user_regset_view *view,
+ long signr, size_t *total)
+{
+ unsigned int i;
+
+ /*
+ * NT_PRSTATUS is the one special case, because the regset data
+ * goes into the pr_reg field inside the note contents, rather
+ * than being the whole note contents. We fill the reset in here.
+ * We assume that regset 0 is NT_PRSTATUS.
+ */
+ fill_prstatus(&t->prstatus, t->task, signr);
+ (void) view->regsets[0].get(t->task, &view->regsets[0],
+ 0, sizeof(t->prstatus.pr_reg),
+ &t->prstatus.pr_reg, NULL);
+
+ fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
+ sizeof(t->prstatus), &t->prstatus);
+ *total += notesize(&t->notes[0]);
+
+ /*
+ * Each other regset might generate a note too. For each regset
+ * that has no core_note_type or is inactive, we leave t->notes[i]
+ * all zero and we'll know to skip writing it later.
+ */
+ for (i = 1; i < view->n; ++i) {
+ const struct user_regset *regset = &view->regsets[i];
+ if (regset->core_note_type &&
+ (!regset->active || regset->active(t->task, regset))) {
+ int ret;
+ size_t size = regset->n * regset->size;
+ void *data = kmalloc(size, GFP_KERNEL);
+ if (unlikely(!data))
+ return 0;
+ ret = regset->get(t->task, regset,
+ 0, size, data, NULL);
+ if (unlikely(ret))
+ kfree(data);
+ else {
+ if (regset->core_note_type != NT_PRFPREG)
+ fill_note(&t->notes[i], "LINUX",
+ regset->core_note_type,
+ size, data);
+ else {
+ t->prstatus.pr_fpvalid = 1;
+ fill_note(&t->notes[i], "CORE",
+ NT_PRFPREG, size, data);
+ }
+ *total += notesize(&t->notes[i]);
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int fill_note_info(struct elfhdr *elf, int phdrs,
+ struct elf_note_info *info,
+ long signr, struct pt_regs *regs)
+{
+ struct task_struct *dump_task = current;
+ const struct user_regset_view *view = task_user_regset_view(dump_task);
+ struct elf_thread_core_info *t;
+ struct elf_prpsinfo *psinfo;
+ struct task_struct *g, *p;
+ unsigned int i;
+
+ info->size = 0;
+ info->thread = NULL;
+
+ psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
+ fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+
+ if (psinfo == NULL)
+ return 0;
+
+ /*
+ * Figure out how many notes we're going to need for each thread.
+ */
+ info->thread_notes = 0;
+ for (i = 0; i < view->n; ++i)
+ if (view->regsets[i].core_note_type != 0)
+ ++info->thread_notes;
+
+ /*
+ * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
+ * since it is our one special case.
+ */
+ if (unlikely(info->thread_notes == 0) ||
+ unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
+ WARN_ON(1);
+ return 0;
+ }
+
+ /*
+ * Initialize the ELF file header.
+ */
+ fill_elf_header(elf, phdrs,
+ view->e_machine, view->e_flags, view->ei_osabi);
+
+ /*
+ * Allocate a structure for each thread.
+ */
+ rcu_read_lock();
+ do_each_thread(g, p)
+ if (p->mm == dump_task->mm) {
+ t = kzalloc(offsetof(struct elf_thread_core_info,
+ notes[info->thread_notes]),
+ GFP_ATOMIC);
+ if (unlikely(!t)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ t->task = p;
+ if (p == dump_task || !info->thread) {
+ t->next = info->thread;
+ info->thread = t;
+ } else {
+ /*
+ * Make sure to keep the original task at
+ * the head of the list.
+ */
+ t->next = info->thread->next;
+ info->thread->next = t;
+ }
+ }
+ while_each_thread(g, p);
+ rcu_read_unlock();
+
+ /*
+ * Now fill in each thread's information.
+ */
+ for (t = info->thread; t != NULL; t = t->next)
+ if (!fill_thread_core_info(t, view, signr, &info->size))
+ return 0;
+
+ /*
+ * Fill in the two process-wide notes.
+ */
+ fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
+ info->size += notesize(&info->psinfo);
+
+ fill_auxv_note(&info->auxv, current->mm);
+ info->size += notesize(&info->auxv);
+
+ return 1;
+}
+
+static size_t get_note_info_size(struct elf_note_info *info)
+{
+ return info->size;
+}
+
+/*
+ * Write all the notes for each thread. When writing the first thread, the
+ * process-wide notes are interleaved after the first thread-specific note.
+ */
+static int write_note_info(struct elf_note_info *info,
+ struct file *file, loff_t *foffset)
+{
+ bool first = 1;
+ struct elf_thread_core_info *t = info->thread;
+
+ do {
+ int i;
+
+ if (!writenote(&t->notes[0], file, foffset))
+ return 0;
+
+ if (first && !writenote(&info->psinfo, file, foffset))
+ return 0;
+ if (first && !writenote(&info->auxv, file, foffset))
+ return 0;
+
+ for (i = 1; i < info->thread_notes; ++i)
+ if (t->notes[i].data &&
+ !writenote(&t->notes[i], file, foffset))
+ return 0;
+
+ first = 0;
+ t = t->next;
+ } while (t);
+
+ return 1;
+}
+
+static void free_note_info(struct elf_note_info *info)
+{
+ struct elf_thread_core_info *threads = info->thread;
+ while (threads) {
+ unsigned int i;
+ struct elf_thread_core_info *t = threads;
+ threads = t->next;
+ WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
+ for (i = 1; i < info->thread_notes; ++i)
+ kfree(t->notes[i].data);
+ kfree(t);
+ }
+ kfree(info->psinfo.data);
+}
+
+#else
+
/* Here is the structure in which status of each thread is captured. */
struct elf_thread_status
{
@@ -1499,6 +1793,176 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
return sz;
}
+struct elf_note_info {
+ struct memelfnote *notes;
+ struct elf_prstatus *prstatus; /* NT_PRSTATUS */
+ struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
+ struct list_head thread_list;
+ elf_fpregset_t *fpu;
+#ifdef ELF_CORE_COPY_XFPREGS
+ elf_fpxregset_t *xfpu;
+#endif
+ int thread_status_size;
+ int numnote;
+};
+
+static int fill_note_info(struct elfhdr *elf, int phdrs,
+ struct elf_note_info *info,
+ long signr, struct pt_regs *regs)
+{
+#define NUM_NOTES 6
+ struct list_head *t;
+ struct task_struct *g, *p;
+
+ info->notes = NULL;
+ info->prstatus = NULL;
+ info->psinfo = NULL;
+ info->fpu = NULL;
+#ifdef ELF_CORE_COPY_XFPREGS
+ info->xfpu = NULL;
+#endif
+ INIT_LIST_HEAD(&info->thread_list);
+
+ info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
+ GFP_KERNEL);
+ if (!info->notes)
+ return 0;
+ info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
+ if (!info->psinfo)
+ return 0;
+ info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
+ if (!info->prstatus)
+ return 0;
+ info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
+ if (!info->fpu)
+ return 0;
+#ifdef ELF_CORE_COPY_XFPREGS
+ info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
+ if (!info->xfpu)
+ return 0;
+#endif
+
+ info->thread_status_size = 0;
+ if (signr) {
+ struct elf_thread_status *tmp;
+ rcu_read_lock();
+ do_each_thread(g, p)
+ if (current->mm == p->mm && current != p) {
+ tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+ if (!tmp) {
+ rcu_read_unlock();
+ return 0;
+ }
+ tmp->thread = p;
+ list_add(&tmp->list, &info->thread_list);
+ }
+ while_each_thread(g, p);
+ rcu_read_unlock();
+ list_for_each(t, &info->thread_list) {
+ struct elf_thread_status *tmp;
+ int sz;
+
+ tmp = list_entry(t, struct elf_thread_status, list);
+ sz = elf_dump_thread_status(signr, tmp);
+ info->thread_status_size += sz;
+ }
+ }
+ /* now collect the dump for the current */
+ memset(info->prstatus, 0, sizeof(*info->prstatus));
+ fill_prstatus(info->prstatus, current, signr);
+ elf_core_copy_regs(&info->prstatus->pr_reg, regs);
+
+ /* Set up header */
+ fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
+
+ /*
+ * Set up the notes in similar form to SVR4 core dumps made
+ * with info from their /proc.
+ */
+
+ fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
+ sizeof(*info->prstatus), info->prstatus);
+ fill_psinfo(info->psinfo, current->group_leader, current->mm);
+ fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
+ sizeof(*info->psinfo), info->psinfo);
+
+ info->numnote = 2;
+
+ fill_auxv_note(&info->notes[info->numnote++], current->mm);
+
+ /* Try to dump the FPU. */
+ info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
+ info->fpu);
+ if (info->prstatus->pr_fpvalid)
+ fill_note(info->notes + info->numnote++,
+ "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
+#ifdef ELF_CORE_COPY_XFPREGS
+ if (elf_core_copy_task_xfpregs(current, info->xfpu))
+ fill_note(info->notes + info->numnote++,
+ "LINUX", ELF_CORE_XFPREG_TYPE,
+ sizeof(*info->xfpu), info->xfpu);
+#endif
+
+ return 1;
+
+#undef NUM_NOTES
+}
+
+static size_t get_note_info_size(struct elf_note_info *info)
+{
+ int sz = 0;
+ int i;
+
+ for (i = 0; i < info->numnote; i++)
+ sz += notesize(info->notes + i);
+
+ sz += info->thread_status_size;
+
+ return sz;
+}
+
+static int write_note_info(struct elf_note_info *info,
+ struct file *file, loff_t *foffset)
+{
+ int i;
+ struct list_head *t;
+
+ for (i = 0; i < info->numnote; i++)
+ if (!writenote(info->notes + i, file, foffset))
+ return 0;
+
+ /* write out the thread status notes section */
+ list_for_each(t, &info->thread_list) {
+ struct elf_thread_status *tmp =
+ list_entry(t, struct elf_thread_status, list);
+
+ for (i = 0; i < tmp->num_notes; i++)
+ if (!writenote(&tmp->notes[i], file, foffset))
+ return 0;
+ }
+
+ return 1;
+}
+
+static void free_note_info(struct elf_note_info *info)
+{
+ while (!list_empty(&info->thread_list)) {
+ struct list_head *tmp = info->thread_list.next;
+ list_del(tmp);
+ kfree(list_entry(tmp, struct elf_thread_status, list));
+ }
+
+ kfree(info->prstatus);
+ kfree(info->psinfo);
+ kfree(info->notes);
+ kfree(info->fpu);
+#ifdef ELF_CORE_COPY_XFPREGS
+ kfree(info->xfpu);
+#endif
+}
+
+#endif
+
static struct vm_area_struct *first_vma(struct task_struct *tsk,
struct vm_area_struct *gate_vma)
{
@@ -1534,29 +1998,15 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
*/
static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
{
-#define NUM_NOTES 6
int has_dumped = 0;
mm_segment_t fs;
int segs;
size_t size = 0;
- int i;
struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff, foffset;
- int numnote;
- struct memelfnote *notes = NULL;
- struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
- struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
- struct task_struct *g, *p;
- LIST_HEAD(thread_list);
- struct list_head *t;
- elf_fpregset_t *fpu = NULL;
-#ifdef ELF_CORE_COPY_XFPREGS
- elf_fpxregset_t *xfpu = NULL;
-#endif
- int thread_status_size = 0;
- elf_addr_t *auxv;
unsigned long mm_flags;
+ struct elf_note_info info;
/*
* We no longer stop all VM operations.
@@ -1574,52 +2024,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
elf = kmalloc(sizeof(*elf), GFP_KERNEL);
if (!elf)
goto cleanup;
- prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
- if (!prstatus)
- goto cleanup;
- psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
- if (!psinfo)
- goto cleanup;
- notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
- if (!notes)
- goto cleanup;
- fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
- if (!fpu)
- goto cleanup;
-#ifdef ELF_CORE_COPY_XFPREGS
- xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
- if (!xfpu)
- goto cleanup;
-#endif
-
- if (signr) {
- struct elf_thread_status *tmp;
- rcu_read_lock();
- do_each_thread(g,p)
- if (current->mm == p->mm && current != p) {
- tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
- if (!tmp) {
- rcu_read_unlock();
- goto cleanup;
- }
- tmp->thread = p;
- list_add(&tmp->list, &thread_list);
- }
- while_each_thread(g,p);
- rcu_read_unlock();
- list_for_each(t, &thread_list) {
- struct elf_thread_status *tmp;
- int sz;
-
- tmp = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(signr, tmp);
- thread_status_size += sz;
- }
- }
- /* now collect the dump for the current */
- memset(prstatus, 0, sizeof(*prstatus));
- fill_prstatus(prstatus, current, signr);
- elf_core_copy_regs(&prstatus->pr_reg, regs);
segs = current->mm->map_count;
#ifdef ELF_CORE_EXTRA_PHDRS
@@ -1630,42 +2034,16 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
if (gate_vma != NULL)
segs++;
- /* Set up header */
- fill_elf_header(elf, segs + 1); /* including notes section */
-
- has_dumped = 1;
- current->flags |= PF_DUMPCORE;
-
/*
- * Set up the notes in similar form to SVR4 core dumps made
- * with info from their /proc.
+ * Collect all the non-memory information about the process for the
+ * notes. This also sets up the file header.
*/
+ if (!fill_note_info(elf, segs + 1, /* including notes section */
+ &info, signr, regs))
+ goto cleanup;
- fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
- fill_psinfo(psinfo, current->group_leader, current->mm);
- fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
-
- numnote = 2;
-
- auxv = (elf_addr_t *)current->mm->saved_auxv;
-
- i = 0;
- do
- i += 2;
- while (auxv[i - 2] != AT_NULL);
- fill_note(&notes[numnote++], "CORE", NT_AUXV,
- i * sizeof(elf_addr_t), auxv);
-
- /* Try to dump the FPU. */
- if ((prstatus->pr_fpvalid =
- elf_core_copy_task_fpregs(current, regs, fpu)))
- fill_note(notes + numnote++,
- "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
-#ifdef ELF_CORE_COPY_XFPREGS
- if (elf_core_copy_task_xfpregs(current, xfpu))
- fill_note(notes + numnote++,
- "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu);
-#endif
+ has_dumped = 1;
+ current->flags |= PF_DUMPCORE;
fs = get_fs();
set_fs(KERNEL_DS);
@@ -1678,12 +2056,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
/* Write notes phdr entry */
{
struct elf_phdr phdr;
- int sz = 0;
-
- for (i = 0; i < numnote; i++)
- sz += notesize(notes + i);
-
- sz += thread_status_size;
+ size_t sz = get_note_info_size(&info);
sz += elf_coredump_extra_notes_size();
@@ -1728,23 +2101,12 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
#endif
/* write out the notes section */
- for (i = 0; i < numnote; i++)
- if (!writenote(notes + i, file, &foffset))
- goto end_coredump;
+ if (!write_note_info(&info, file, &foffset))
+ goto end_coredump;
if (elf_coredump_extra_notes_write(file, &foffset))
goto end_coredump;
- /* write out the thread status notes section */
- list_for_each(t, &thread_list) {
- struct elf_thread_status *tmp =
- list_entry(t, struct elf_thread_status, list);
-
- for (i = 0; i < tmp->num_notes; i++)
- if (!writenote(&tmp->notes[i], file, &foffset))
- goto end_coredump;
- }
-
/* Align to page */
DUMP_SEEK(dataoff - foffset);
@@ -1795,22 +2157,9 @@ end_coredump:
set_fs(fs);
cleanup:
- while (!list_empty(&thread_list)) {
- struct list_head *tmp = thread_list.next;
- list_del(tmp);
- kfree(list_entry(tmp, struct elf_thread_status, list));
- }
-
kfree(elf);
- kfree(prstatus);
- kfree(psinfo);
- kfree(notes);
- kfree(fpu);
-#ifdef ELF_CORE_COPY_XFPREGS
- kfree(xfpu);
-#endif
+ free_note_info(&info);
return has_dumped;
-#undef NUM_NOTES
}
#endif /* USE_ELF_CORE_DUMP */
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
new file mode 100644
index 00000000000..0adced2f296
--- /dev/null
+++ b/fs/compat_binfmt_elf.c
@@ -0,0 +1,131 @@
+/*
+ * 32-bit compatibility support for ELF format executables and core dumps.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * Red Hat Author: Roland McGrath.
+ *
+ * This file is used in a 64-bit kernel that wants to support 32-bit ELF.
+ * asm/elf.h is responsible for defining the compat_* and COMPAT_* macros
+ * used below, with definitions appropriate for 32-bit ABI compatibility.
+ *
+ * We use macros to rename the ABI types and machine-dependent
+ * functions used in binfmt_elf.c to compat versions.
+ */
+
+#include <linux/elfcore-compat.h>
+#include <linux/time.h>
+
+/*
+ * Rename the basic ELF layout types to refer to the 32-bit class of files.
+ */
+#undef ELF_CLASS
+#define ELF_CLASS ELFCLASS32
+
+#undef elfhdr
+#undef elf_phdr
+#undef elf_note
+#undef elf_addr_t
+#define elfhdr elf32_hdr
+#define elf_phdr elf32_phdr
+#define elf_note elf32_note
+#define elf_addr_t Elf32_Addr
+
+/*
+ * The machine-dependent core note format types are defined in elfcore-compat.h,
+ * which requires asm/elf.h to define compat_elf_gregset_t et al.
+ */
+#define elf_prstatus compat_elf_prstatus
+#define elf_prpsinfo compat_elf_prpsinfo
+
+/*
+ * Compat version of cputime_to_compat_timeval, perhaps this
+ * should be an inline in <linux/compat.h>.
+ */
+static void cputime_to_compat_timeval(const cputime_t cputime,
+ struct compat_timeval *value)
+{
+ struct timeval tv;
+ cputime_to_timeval(cputime, &tv);
+ value->tv_sec = tv.tv_sec;
+ value->tv_usec = tv.tv_usec;
+}
+
+#undef cputime_to_timeval
+#define cputime_to_timeval cputime_to_compat_timeval
+
+
+/*
+ * To use this file, asm/elf.h must define compat_elf_check_arch.
+ * The other following macros can be defined if the compat versions
+ * differ from the native ones, or omitted when they match.
+ */
+
+#undef ELF_ARCH
+#undef elf_check_arch
+#define elf_check_arch compat_elf_check_arch
+
+#ifdef COMPAT_ELF_PLATFORM
+#undef ELF_PLATFORM
+#define ELF_PLATFORM COMPAT_ELF_PLATFORM
+#endif
+
+#ifdef COMPAT_ELF_HWCAP
+#undef ELF_HWCAP
+#define ELF_HWCAP COMPAT_ELF_HWCAP
+#endif
+
+#ifdef COMPAT_ARCH_DLINFO
+#undef ARCH_DLINFO
+#define ARCH_DLINFO COMPAT_ARCH_DLINFO
+#endif
+
+#ifdef COMPAT_ELF_ET_DYN_BASE
+#undef ELF_ET_DYN_BASE
+#define ELF_ET_DYN_BASE COMPAT_ELF_ET_DYN_BASE
+#endif
+
+#ifdef COMPAT_ELF_EXEC_PAGESIZE
+#undef ELF_EXEC_PAGESIZE
+#define ELF_EXEC_PAGESIZE COMPAT_ELF_EXEC_PAGESIZE
+#endif
+
+#ifdef COMPAT_ELF_PLAT_INIT
+#undef ELF_PLAT_INIT
+#define ELF_PLAT_INIT COMPAT_ELF_PLAT_INIT
+#endif
+
+#ifdef COMPAT_SET_PERSONALITY
+#undef SET_PERSONALITY
+#define SET_PERSONALITY COMPAT_SET_PERSONALITY
+#endif
+
+#ifdef compat_start_thread
+#undef start_thread
+#define start_thread compat_start_thread
+#endif
+
+#ifdef compat_arch_setup_additional_pages
+#undef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+#undef arch_setup_additional_pages
+#define arch_setup_additional_pages compat_arch_setup_additional_pages
+#endif
+
+/*
+ * Rename a few of the symbols that binfmt_elf.c will define.
+ * These are all local so the names don't really matter, but it
+ * might make some debugging less confusing not to duplicate them.
+ */
+#define elf_format compat_elf_format
+#define init_elf_binfmt init_compat_elf_binfmt
+#define exit_elf_binfmt exit_compat_elf_binfmt
+
+/*
+ * We share all the actual code with the native (64-bit) version.
+ */
+#include "binfmt_elf.c"
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 0f69c416eeb..a5432bbbfb8 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -347,7 +347,8 @@ restart:
break;
}
retry = __process_buffer(journal, jh, bhs,&batch_count);
- if (!retry && lock_need_resched(&journal->j_list_lock)){
+ if (!retry && (need_resched() ||
+ spin_needbreak(&journal->j_list_lock))) {
spin_unlock(&journal->j_list_lock);
retry = 1;
break;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 610264b99a8..31853eb65b4 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -265,7 +265,7 @@ write_out_data:
put_bh(bh);
}
- if (lock_need_resched(&journal->j_list_lock)) {
+ if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
spin_unlock(&journal->j_list_lock);
goto write_out_data;
}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1b7f282c1ae..6914598022c 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -353,7 +353,8 @@ restart:
}
retry = __process_buffer(journal, jh, bhs, &batch_count,
transaction);
- if (!retry && lock_need_resched(&journal->j_list_lock)){
+ if (!retry && (need_resched() ||
+ spin_needbreak(&journal->j_list_lock))) {
spin_unlock(&journal->j_list_lock);
retry = 1;
break;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index da8d0eb3b7b..4f302d27927 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -341,7 +341,7 @@ write_out_data:
put_bh(bh);
}
- if (lock_need_resched(&journal->j_list_lock)) {
+ if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
spin_unlock(&journal->j_list_lock);
goto write_out_data;
}
diff --git a/include/acpi/reboot.h b/include/acpi/reboot.h
new file mode 100644
index 00000000000..8857f57e0b7
--- /dev/null
+++ b/include/acpi/reboot.h
@@ -0,0 +1,9 @@
+
+/*
+ * Dummy placeholder to make the EFI patches apply to the x86 tree.
+ * Andrew/Len, please just kill this file if you encounter it.
+ */
+#ifndef acpi_reboot
+# define acpi_reboot() do { } while (0)
+#endif
+
diff --git a/include/asm-alpha/agp.h b/include/asm-alpha/agp.h
index ef855a3bc0f..26c17913529 100644
--- a/include/asm-alpha/agp.h
+++ b/include/asm-alpha/agp.h
@@ -7,7 +7,6 @@
#define map_page_into_agp(page)
#define unmap_page_from_agp(page)
-#define flush_agp_mappings()
#define flush_agp_cache() mb()
/* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index d56fedbb457..2632328d864 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -31,14 +31,19 @@ struct bug_entry {
#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while(0)
#endif
-#ifndef HAVE_ARCH_WARN_ON
+#ifndef __WARN
+#ifndef __ASSEMBLY__
+extern void warn_on_slowpath(const char *file, const int line);
+#define WANT_WARN_ON_SLOWPATH
+#endif
+#define __WARN() warn_on_slowpath(__FILE__, __LINE__)
+#endif
+
+#ifndef WARN_ON
#define WARN_ON(condition) ({ \
int __ret_warn_on = !!(condition); \
- if (unlikely(__ret_warn_on)) { \
- printk("WARNING: at %s:%d %s()\n", __FILE__, \
- __LINE__, __FUNCTION__); \
- dump_stack(); \
- } \
+ if (unlikely(__ret_warn_on)) \
+ __WARN(); \
unlikely(__ret_warn_on); \
})
#endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index d85172e9ed4..4b8d31cda1a 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -3,54 +3,79 @@
#include <linux/compiler.h>
#include <linux/threads.h>
-#define __GENERIC_PER_CPU
+/*
+ * Determine the real variable name from the name visible in the
+ * kernel sources.
+ */
+#define per_cpu_var(var) per_cpu__##var
+
#ifdef CONFIG_SMP
+/*
+ * per_cpu_offset() is the offset that has to be added to a
+ * percpu variable to get to the instance for a certain processor.
+ *
+ * Most arches use the __per_cpu_offset array for those offsets but
+ * some arches have their own ways of determining the offset (x86_64, s390).
+ */
+#ifndef __per_cpu_offset
extern unsigned long __per_cpu_offset[NR_CPUS];
#define per_cpu_offset(x) (__per_cpu_offset[x])
+#endif
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.shared_aligned"))) \
- __typeof__(type) per_cpu__##name \
- ____cacheline_aligned_in_smp
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({ \
- extern int simple_identifier_##var(void); \
- RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-#define __get_cpu_var(var) per_cpu(var, smp_processor_id())
-#define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id())
-
-/* A macro to avoid #include hell... */
-#define percpu_modcopy(pcpudst, src, size) \
-do { \
- unsigned int __i; \
- for_each_possible_cpu(__i) \
- memcpy((pcpudst)+__per_cpu_offset[__i], \
- (src), (size)); \
-} while (0)
-#else /* ! SMP */
+/*
+ * Determine the offset for the currently active processor.
+ * An arch may define __my_cpu_offset to provide a more effective
+ * means of obtaining the offset to the per cpu variables of the
+ * current processor.
+ */
+#ifndef __my_cpu_offset
+#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
+#define my_cpu_offset per_cpu_offset(smp_processor_id())
+#else
+#define my_cpu_offset __my_cpu_offset
+#endif
+
+/*
+ * Add a offset to a pointer but keep the pointer as is.
+ *
+ * Only S390 provides its own means of moving the pointer.
+ */
+#ifndef SHIFT_PERCPU_PTR
+#define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset))
+#endif
-#define DEFINE_PER_CPU(type, name) \
- __typeof__(type) per_cpu__##name
+/*
+ * A percpu variable may point to a discarded regions. The following are
+ * established ways to produce a usable pointer from the percpu variable
+ * offset.
+ */
+#define per_cpu(var, cpu) \
+ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))
+#define __get_cpu_var(var) \
+ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
+#define __raw_get_cpu_var(var) \
+ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
-#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var))
-#define __get_cpu_var(var) per_cpu__##var
-#define __raw_get_cpu_var(var) per_cpu__##var
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+extern void setup_per_cpu_areas(void);
+#endif
+
+#else /* ! SMP */
+
+#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var)))
+#define __get_cpu_var(var) per_cpu_var(var)
+#define __raw_get_cpu_var(var) per_cpu_var(var)
#endif /* SMP */
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+#ifndef PER_CPU_ATTRIBUTES
+#define PER_CPU_ATTRIBUTES
+#endif
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
+ __typeof__(type) per_cpu_var(name)
#endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 75f2bfab614..6ce9f3ab928 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -15,7 +15,6 @@
#include <linux/swap.h>
#include <linux/quicklist.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
/*
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 76df771be58..f784d2f3414 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -184,6 +184,7 @@
VMLINUX_SYMBOL(__start___param) = .; \
*(__param) \
VMLINUX_SYMBOL(__stop___param) = .; \
+ . = ALIGN((align)); \
VMLINUX_SYMBOL(__end_rodata) = .; \
} \
. = ALIGN((align));
diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h
index 81bcd5e5178..cd1cc39b559 100644
--- a/include/asm-ia64/acpi.h
+++ b/include/asm-ia64/acpi.h
@@ -127,6 +127,8 @@ extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
#endif
+#define acpi_unlazy_tlb(x)
+
#endif /*__KERNEL__*/
#endif /*_ASM_ACPI_H*/
diff --git a/include/asm-ia64/agp.h b/include/asm-ia64/agp.h
index 4e517f0e6af..c11fdd8ab4d 100644
--- a/include/asm-ia64/agp.h
+++ b/include/asm-ia64/agp.h
@@ -15,7 +15,6 @@
*/
#define map_page_into_agp(page) /* nothing */
#define unmap_page_from_agp(page) /* nothing */
-#define flush_agp_mappings() /* nothing */
#define flush_agp_cache() mb()
/* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h
index c4f1e328a5b..0095bcf7984 100644
--- a/include/asm-ia64/percpu.h
+++ b/include/asm-ia64/percpu.h
@@ -16,28 +16,11 @@
#include <linux/threads.h>
#ifdef HAVE_MODEL_SMALL_ATTRIBUTE
-# define __SMALL_ADDR_AREA __attribute__((__model__ (__small__)))
-#else
-# define __SMALL_ADDR_AREA
+# define PER_CPU_ATTRIBUTES __attribute__((__model__ (__small__)))
#endif
#define DECLARE_PER_CPU(type, name) \
- extern __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
-
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) \
- __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
-
-#ifdef CONFIG_SMP
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.shared_aligned"))) \
- __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name \
- ____cacheline_aligned_in_smp
-#else
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
-#endif
+ extern PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
/*
* Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an
@@ -68,9 +51,6 @@ extern void *per_cpu_init(void);
#endif /* SMP */
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
/*
* Be extremely careful when taking the address of this variable! Due to virtual
* remapping, it is different from the canonical address returned by __get_cpu_var(var)!
diff --git a/include/asm-m32r/signal.h b/include/asm-m32r/signal.h
index 937258686ba..1a607066bc6 100644
--- a/include/asm-m32r/signal.h
+++ b/include/asm-m32r/signal.h
@@ -157,7 +157,7 @@ typedef struct sigaltstack {
#undef __HAVE_ARCH_SIG_BITOPS
struct pt_regs;
-extern int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset));
+extern int do_signal(struct pt_regs *regs, sigset_t *oldset);
#define ptrace_signal_deliver(regs, cookie) do { } while (0)
diff --git a/include/asm-parisc/agp.h b/include/asm-parisc/agp.h
index 9f61d4eb6c0..9651660da63 100644
--- a/include/asm-parisc/agp.h
+++ b/include/asm-parisc/agp.h
@@ -9,7 +9,6 @@
#define map_page_into_agp(page) /* nothing */
#define unmap_page_from_agp(page) /* nothing */
-#define flush_agp_mappings() /* nothing */
#define flush_agp_cache() mb()
/* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-powerpc/agp.h b/include/asm-powerpc/agp.h
index e5ccaca2f5a..86455c4c31e 100644
--- a/include/asm-powerpc/agp.h
+++ b/include/asm-powerpc/agp.h
@@ -6,7 +6,6 @@
#define map_page_into_agp(page)
#define unmap_page_from_agp(page)
-#define flush_agp_mappings()
#define flush_agp_cache() mb()
/* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h
index 6b229626d3f..cc1cbf656b0 100644
--- a/include/asm-powerpc/percpu.h
+++ b/include/asm-powerpc/percpu.h
@@ -16,15 +16,6 @@
#define __my_cpu_offset() get_paca()->data_offset
#define per_cpu_offset(x) (__per_cpu_offset(x))
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.shared_aligned"))) \
- __typeof__(type) per_cpu__##name \
- ____cacheline_aligned_in_smp
-
/* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
@@ -43,11 +34,6 @@ extern void setup_per_cpu_areas(void);
#else /* ! SMP */
-#define DEFINE_PER_CPU(type, name) \
- __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
-
#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var))
#define __get_cpu_var(var) per_cpu__##var
#define __raw_get_cpu_var(var) per_cpu__##var
@@ -56,9 +42,6 @@ extern void setup_per_cpu_areas(void);
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
#else
#include <asm-generic/percpu.h>
#endif
diff --git a/include/asm-powerpc/ptrace.h b/include/asm-powerpc/ptrace.h
index 13fccc5a411..3063363f679 100644
--- a/include/asm-powerpc/ptrace.h
+++ b/include/asm-powerpc/ptrace.h
@@ -119,6 +119,13 @@ do { \
} while (0)
#endif /* __powerpc64__ */
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step() (1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index 545857e6444..2d676a87385 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -4,8 +4,6 @@
#include <linux/compiler.h>
#include <asm/lowcore.h>
-#define __GENERIC_PER_CPU
-
/*
* s390 uses its own implementation for per cpu data, the offset of
* the cpu local data area is cached in the cpu's lowcore memory.
@@ -36,16 +34,6 @@
extern unsigned long __per_cpu_offset[NR_CPUS];
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) \
- __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.shared_aligned"))) \
- __typeof__(type) per_cpu__##name \
- ____cacheline_aligned_in_smp
-
#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
@@ -62,11 +50,6 @@ do { \
#else /* ! SMP */
-#define DEFINE_PER_CPU(type, name) \
- __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
-
#define __get_cpu_var(var) __reloc_hide(var,0)
#define __raw_get_cpu_var(var) __reloc_hide(var,0)
#define per_cpu(var,cpu) __reloc_hide(var,0)
@@ -75,7 +58,4 @@ do { \
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
#endif /* __ARCH_S390_PERCPU__ */
diff --git a/include/asm-sparc64/agp.h b/include/asm-sparc64/agp.h
index 58f8cb6ae76..e9fcf0e781e 100644
--- a/include/asm-sparc64/agp.h
+++ b/include/asm-sparc64/agp.h
@@ -5,7 +5,6 @@
#define map_page_into_agp(page)
#define unmap_page_from_agp(page)
-#define flush_agp_mappings()
#define flush_agp_cache() mb()
/* Convert a physical address to an address suitable for the GART. */
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index a1f53a4da40..c7e52decba9 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -16,15 +16,6 @@ extern unsigned long __per_cpu_shift;
(__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
#define per_cpu_offset(x) (__per_cpu_offset(x))
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.shared_aligned"))) \
- __typeof__(type) per_cpu__##name \
- ____cacheline_aligned_in_smp
-
/* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset))
@@ -41,10 +32,6 @@ do { \
#else /* ! SMP */
#define real_setup_per_cpu_areas() do { } while (0)
-#define DEFINE_PER_CPU(type, name) \
- __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var))
#define __get_cpu_var(var) per_cpu__##var
@@ -54,7 +41,4 @@ do { \
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
#endif /* __ARCH_SPARC64_PERCPU__ */
diff --git a/include/asm-um/asm.h b/include/asm-um/asm.h
new file mode 100644
index 00000000000..af1269a1e9e
--- /dev/null
+++ b/include/asm-um/asm.h
@@ -0,0 +1,6 @@
+#ifndef __UM_ASM_H
+#define __UM_ASM_H
+
+#include "asm/arch/asm.h"
+
+#endif
diff --git a/include/asm-um/linkage.h b/include/asm-um/linkage.h
index 78b862472b3..cdb3024a699 100644
--- a/include/asm-um/linkage.h
+++ b/include/asm-um/linkage.h
@@ -6,7 +6,6 @@
/* <linux/linkage.h> will pick sane defaults */
#ifdef CONFIG_GPROF
-#undef FASTCALL
#undef fastcall
#endif
diff --git a/include/asm-um/nops.h b/include/asm-um/nops.h
new file mode 100644
index 00000000000..814e9bf5dea
--- /dev/null
+++ b/include/asm-um/nops.h
@@ -0,0 +1,6 @@
+#ifndef __UM_NOPS_H
+#define __UM_NOPS_H
+
+#include "asm/arch/nops.h"
+
+#endif
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 12db5a1cdd7..e6189b22914 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -9,15 +9,13 @@ header-y += prctl.h
header-y += ptrace-abi.h
header-y += sigcontext32.h
header-y += ucontext.h
-header-y += vsyscall32.h
unifdef-y += e820.h
unifdef-y += ist.h
unifdef-y += mce.h
unifdef-y += msr.h
unifdef-y += mtrr.h
-unifdef-y += page_32.h
-unifdef-y += page_64.h
+unifdef-y += page.h
unifdef-y += posix_types_32.h
unifdef-y += posix_types_64.h
unifdef-y += ptrace.h
diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index f8a89793ac8..98a9ca26653 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -1,13 +1,123 @@
#ifndef _ASM_X86_ACPI_H
#define _ASM_X86_ACPI_H
-#ifdef CONFIG_X86_32
-# include "acpi_32.h"
-#else
-# include "acpi_64.h"
-#endif
+/*
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <acpi/pdc_intel.h>
+#include <asm/numa.h>
#include <asm/processor.h>
+#include <asm/mmu.h>
+
+#define COMPILER_DEPENDENT_INT64 long long
+#define COMPILER_DEPENDENT_UINT64 unsigned long long
+
+/*
+ * Calling conventions:
+ *
+ * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads)
+ * ACPI_EXTERNAL_XFACE - External ACPI interfaces
+ * ACPI_INTERNAL_XFACE - Internal ACPI interfaces
+ * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces
+ */
+#define ACPI_SYSTEM_XFACE
+#define ACPI_EXTERNAL_XFACE
+#define ACPI_INTERNAL_XFACE
+#define ACPI_INTERNAL_VAR_XFACE
+
+/* Asm macros */
+
+#define ACPI_ASM_MACROS
+#define BREAKPOINT3
+#define ACPI_DISABLE_IRQS() local_irq_disable()
+#define ACPI_ENABLE_IRQS() local_irq_enable()
+#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+
+int __acpi_acquire_global_lock(unsigned int *lock);
+int __acpi_release_global_lock(unsigned int *lock);
+
+#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
+ ((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
+
+#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
+ ((Acq) = __acpi_release_global_lock(&facs->global_lock))
+
+/*
+ * Math helper asm macros
+ */
+#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
+ asm("divl %2;" \
+ :"=a"(q32), "=d"(r32) \
+ :"r"(d32), \
+ "0"(n_lo), "1"(n_hi))
+
+
+#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
+ asm("shrl $1,%2 ;" \
+ "rcrl $1,%3;" \
+ :"=r"(n_hi), "=r"(n_lo) \
+ :"0"(n_hi), "1"(n_lo))
+
+#ifdef CONFIG_ACPI
+extern int acpi_lapic;
+extern int acpi_ioapic;
+extern int acpi_noirq;
+extern int acpi_strict;
+extern int acpi_disabled;
+extern int acpi_ht;
+extern int acpi_pci_disabled;
+extern int acpi_skip_timer_override;
+extern int acpi_use_timer_override;
+
+static inline void disable_acpi(void)
+{
+ acpi_disabled = 1;
+ acpi_ht = 0;
+ acpi_pci_disabled = 1;
+ acpi_noirq = 1;
+}
+
+/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
+#define FIX_ACPI_PAGES 4
+
+extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
+
+static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
+static inline void acpi_disable_pci(void)
+{
+ acpi_pci_disabled = 1;
+ acpi_noirq_set();
+}
+extern int acpi_irq_balance_set(char *str);
+
+/* routines for saving/restoring kernel state */
+extern int acpi_save_state_mem(void);
+extern void acpi_restore_state_mem(void);
+
+extern unsigned long acpi_wakeup_address;
+
+/* early initialization routine */
+extern void acpi_reserve_bootmem(void);
/*
* Check if the CPU can handle C2 and deeper
@@ -29,4 +139,35 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
return max_cstate;
}
+#else /* !CONFIG_ACPI */
+
+#define acpi_lapic 0
+#define acpi_ioapic 0
+static inline void acpi_noirq_set(void) { }
+static inline void acpi_disable_pci(void) { }
+static inline void disable_acpi(void) { }
+
+#endif /* !CONFIG_ACPI */
+
+#define ARCH_HAS_POWER_INIT 1
+
+struct bootnode;
+
+#ifdef CONFIG_ACPI_NUMA
+extern int acpi_numa;
+extern int acpi_scan_nodes(unsigned long start, unsigned long end);
+#ifdef CONFIG_X86_64
+# define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
+#endif
+extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
+ int num_nodes);
+#else
+static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
+ int num_nodes)
+{
+}
#endif
+
+#define acpi_unlazy_tlb(x) leave_mm(x)
+
+#endif /*__X86_ASM_ACPI_H*/
diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h
deleted file mode 100644
index 723493e6c85..00000000000
--- a/include/asm-x86/acpi_32.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * asm-i386/acpi.h
- *
- * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _ASM_ACPI_H
-#define _ASM_ACPI_H
-
-#ifdef __KERNEL__
-
-#include <acpi/pdc_intel.h>
-
-#include <asm/system.h> /* defines cmpxchg */
-
-#define COMPILER_DEPENDENT_INT64 long long
-#define COMPILER_DEPENDENT_UINT64 unsigned long long
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE - External ACPI interfaces
- * ACPI_INTERNAL_XFACE - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* Asm macros */
-
-#define ACPI_ASM_MACROS
-#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS() local_irq_enable()
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
-
-int __acpi_acquire_global_lock(unsigned int *lock);
-int __acpi_release_global_lock(unsigned int *lock);
-
-#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
- ((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
-
-#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
- ((Acq) = __acpi_release_global_lock(&facs->global_lock))
-
-/*
- * Math helper asm macros
- */
-#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
- asm("divl %2;" \
- :"=a"(q32), "=d"(r32) \
- :"r"(d32), \
- "0"(n_lo), "1"(n_hi))
-
-
-#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
- asm("shrl $1,%2;" \
- "rcrl $1,%3;" \
- :"=r"(n_hi), "=r"(n_lo) \
- :"0"(n_hi), "1"(n_lo))
-
-extern void early_quirks(void);
-
-#ifdef CONFIG_ACPI
-extern int acpi_lapic;
-extern int acpi_ioapic;
-extern int acpi_noirq;
-extern int acpi_strict;
-extern int acpi_disabled;
-extern int acpi_ht;
-extern int acpi_pci_disabled;
-static inline void disable_acpi(void)
-{
- acpi_disabled = 1;
- acpi_ht = 0;
- acpi_pci_disabled = 1;
- acpi_noirq = 1;
-}
-
-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
-#define FIX_ACPI_PAGES 4
-
-extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
-
-#ifdef CONFIG_X86_IO_APIC
-extern int acpi_skip_timer_override;
-extern int acpi_use_timer_override;
-#endif
-
-static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
-static inline void acpi_disable_pci(void)
-{
- acpi_pci_disabled = 1;
- acpi_noirq_set();
-}
-extern int acpi_irq_balance_set(char *str);
-
-/* routines for saving/restoring kernel state */
-extern int acpi_save_state_mem(void);
-extern void acpi_restore_state_mem(void);
-
-extern unsigned long acpi_wakeup_address;
-
-/* early initialization routine */
-extern void acpi_reserve_bootmem(void);
-
-#else /* !CONFIG_ACPI */
-
-#define acpi_lapic 0
-#define acpi_ioapic 0
-static inline void acpi_noirq_set(void) { }
-static inline void acpi_disable_pci(void) { }
-static inline void disable_acpi(void) { }
-
-#endif /* !CONFIG_ACPI */
-
-#define ARCH_HAS_POWER_INIT 1
-
-#endif /*__KERNEL__*/
-
-#endif /*_ASM_ACPI_H*/
diff --git a/include/asm-x86/acpi_64.h b/include/asm-x86/acpi_64.h
deleted file mode 100644
index 98173357dd8..00000000000
--- a/include/asm-x86/acpi_64.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * asm-x86_64/acpi.h
- *
- * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _ASM_ACPI_H
-#define _ASM_ACPI_H
-
-#ifdef __KERNEL__
-
-#include <acpi/pdc_intel.h>
-#include <asm/numa.h>
-
-#define COMPILER_DEPENDENT_INT64 long long
-#define COMPILER_DEPENDENT_UINT64 unsigned long long
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE - External ACPI interfaces
- * ACPI_INTERNAL_XFACE - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* Asm macros */
-
-#define ACPI_ASM_MACROS
-#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS() local_irq_enable()
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
-
-int __acpi_acquire_global_lock(unsigned int *lock);
-int __acpi_release_global_lock(unsigned int *lock);
-
-#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
- ((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
-
-#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
- ((Acq) = __acpi_release_global_lock(&facs->global_lock))
-
-/*
- * Math helper asm macros
- */
-#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
- asm("divl %2;" \
- :"=a"(q32), "=d"(r32) \
- :"r"(d32), \
- "0"(n_lo), "1"(n_hi))
-
-
-#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
- asm("shrl $1,%2;" \
- "rcrl $1,%3;" \
- :"=r"(n_hi), "=r"(n_lo) \
- :"0"(n_hi), "1"(n_lo))
-
-#ifdef CONFIG_ACPI
-extern int acpi_lapic;
-extern int acpi_ioapic;
-extern int acpi_noirq;
-extern int acpi_strict;
-extern int acpi_disabled;
-extern int acpi_pci_disabled;
-extern int acpi_ht;
-static inline void disable_acpi(void)
-{
- acpi_disabled = 1;
- acpi_ht = 0;
- acpi_pci_disabled = 1;
- acpi_noirq = 1;
-}
-
-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
-#define FIX_ACPI_PAGES 4
-
-extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
-static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
-static inline void acpi_disable_pci(void)
-{
- acpi_pci_disabled = 1;
- acpi_noirq_set();
-}
-extern int acpi_irq_balance_set(char *str);
-
-/* routines for saving/restoring kernel state */
-extern int acpi_save_state_mem(void);
-extern void acpi_restore_state_mem(void);
-
-extern unsigned long acpi_wakeup_address;
-
-/* early initialization routine */
-extern void acpi_reserve_bootmem(void);
-
-#else /* !CONFIG_ACPI */
-
-#define acpi_lapic 0
-#define acpi_ioapic 0
-static inline void acpi_noirq_set(void) { }
-static inline void acpi_disable_pci(void) { }
-
-#endif /* !CONFIG_ACPI */
-
-extern int acpi_numa;
-extern int acpi_scan_nodes(unsigned long start, unsigned long end);
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
-extern int acpi_disabled;
-extern int acpi_pci_disabled;
-
-#define ARCH_HAS_POWER_INIT 1
-
-extern int acpi_skip_timer_override;
-extern int acpi_use_timer_override;
-
-#ifdef CONFIG_ACPI_NUMA
-extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
- int num_nodes);
-#else
-static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
- int num_nodes)
-{
-}
-#endif
-
-#endif /*__KERNEL__*/
-
-#endif /*_ASM_ACPI_H*/
diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h
index 62df2a9e713..e4004a9f6a9 100644
--- a/include/asm-x86/agp.h
+++ b/include/asm-x86/agp.h
@@ -12,13 +12,8 @@
* page. This avoids data corruption on some CPUs.
*/
-/*
- * Caller's responsibility to call global_flush_tlb() for performance
- * reasons
- */
-#define map_page_into_agp(page) change_page_attr(page, 1, PAGE_KERNEL_NOCACHE)
-#define unmap_page_from_agp(page) change_page_attr(page, 1, PAGE_KERNEL)
-#define flush_agp_mappings() global_flush_tlb()
+#define map_page_into_agp(page) set_pages_uc(page, 1)
+#define unmap_page_from_agp(page) set_pages_wb(page, 1)
/*
* Could use CLFLUSH here if the cpu supports it. But then it would
diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index 9eef6a32a13..d8bacf3c4b0 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -1,5 +1,161 @@
-#ifdef CONFIG_X86_32
-# include "alternative_32.h"
+#ifndef _ASM_X86_ALTERNATIVE_H
+#define _ASM_X86_ALTERNATIVE_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/asm.h>
+
+/*
+ * Alternative inline assembly for SMP.
+ *
+ * The LOCK_PREFIX macro defined here replaces the LOCK and
+ * LOCK_PREFIX macros used everywhere in the source tree.
+ *
+ * SMP alternatives use the same data structures as the other
+ * alternatives and the X86_FEATURE_UP flag to indicate the case of a
+ * UP system running a SMP kernel. The existing apply_alternatives()
+ * works fine for patching a SMP kernel for UP.
+ *
+ * The SMP alternative tables can be kept after boot and contain both
+ * UP and SMP versions of the instructions to allow switching back to
+ * SMP at runtime, when hotplugging in a new CPU, which is especially
+ * useful in virtualized environments.
+ *
+ * The very common lock prefix is handled as special case in a
+ * separate table which is a pure address list without replacement ptr
+ * and size information. That keeps the table sizes small.
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX \
+ ".section .smp_locks,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR "661f\n" /* address */ \
+ ".previous\n" \
+ "661:\n\tlock; "
+
+#else /* ! CONFIG_SMP */
+#define LOCK_PREFIX ""
+#endif
+
+/* This must be included *after* the definition of LOCK_PREFIX */
+#include <asm/cpufeature.h>
+
+struct alt_instr {
+ u8 *instr; /* original instruction */
+ u8 *replacement;
+ u8 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction, <= instrlen */
+ u8 pad1;
+#ifdef CONFIG_X86_64
+ u32 pad2;
+#endif
+};
+
+extern void alternative_instructions(void);
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+
+struct module;
+
+#ifdef CONFIG_SMP
+extern void alternatives_smp_module_add(struct module *mod, char *name,
+ void *locks, void *locks_end,
+ void *text, void *text_end);
+extern void alternatives_smp_module_del(struct module *mod);
+extern void alternatives_smp_switch(int smp);
+#else
+static inline void alternatives_smp_module_add(struct module *mod, char *name,
+ void *locks, void *locks_end,
+ void *text, void *text_end) {}
+static inline void alternatives_smp_module_del(struct module *mod) {}
+static inline void alternatives_smp_switch(int smp) {}
+#endif /* CONFIG_SMP */
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * length of oldinstr must be longer or equal the length of newinstr
+ * It can be padded with nops as needed.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, newinstr, feature) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR "661b\n" /* label */ \
+ _ASM_PTR "663f\n" /* new instruction */ \
+ " .byte %c0\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" :: "i" (feature) : "memory")
+
+/*
+ * Alternative inline assembly with input.
+ *
+ * Pecularities:
+ * No memory clobber here.
+ * Argument numbers start with 1.
+ * Best is to use constraints that are fixed size (like (%1) ... "r")
+ * If you use variable sized constraints like "m" or "g" in the
+ * replacement make sure to pad to the worst case length.
+ */
+#define alternative_input(oldinstr, newinstr, feature, input...) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR "661b\n" /* label */ \
+ _ASM_PTR "663f\n" /* new instruction */ \
+ " .byte %c0\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" :: "i" (feature), ##input)
+
+/* Like alternative_input, but with a single output argument */
+#define alternative_io(oldinstr, newinstr, feature, output, input...) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR "661b\n" /* label */ \
+ _ASM_PTR "663f\n" /* new instruction */ \
+ " .byte %c[feat]\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" : output : [feat] "i" (feature), ##input)
+
+/*
+ * use this macro(s) if you need more than one output parameter
+ * in alternative_io
+ */
+#define ASM_OUTPUT2(a, b) a, b
+
+struct paravirt_patch_site;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end);
#else
-# include "alternative_64.h"
+static inline void
+apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end)
+{}
+#define __parainstructions NULL
+#define __parainstructions_end NULL
#endif
+
+extern void text_poke(void *addr, unsigned char *opcode, int len);
+
+#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/include/asm-x86/alternative_32.h b/include/asm-x86/alternative_32.h
deleted file mode 100644
index bda6c810c0f..00000000000
--- a/include/asm-x86/alternative_32.h
+++ /dev/null
@@ -1,154 +0,0 @@
-#ifndef _I386_ALTERNATIVE_H
-#define _I386_ALTERNATIVE_H
-
-#include <asm/types.h>
-#include <linux/stddef.h>
-#include <linux/types.h>
-
-struct alt_instr {
- u8 *instr; /* original instruction */
- u8 *replacement;
- u8 cpuid; /* cpuid bit set for replacement */
- u8 instrlen; /* length of original instruction */
- u8 replacementlen; /* length of new instruction, <= instrlen */
- u8 pad;
-};
-
-extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-
-struct module;
-#ifdef CONFIG_SMP
-extern void alternatives_smp_module_add(struct module *mod, char *name,
- void *locks, void *locks_end,
- void *text, void *text_end);
-extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
-#else
-static inline void alternatives_smp_module_add(struct module *mod, char *name,
- void *locks, void *locks_end,
- void *text, void *text_end) {}
-static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
-#endif /* CONFIG_SMP */
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 4\n" \
- " .long 661b\n" /* label */ \
- " .long 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */\
- ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Pecularities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement maake sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 4\n" \
- " .long 661b\n" /* label */ \
- " .long 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */\
- ".previous" :: "i" (feature), ##input)
-
-/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 4\n" \
- " .long 661b\n" /* label */ \
- " .long 663f\n" /* new instruction */ \
- " .byte %c[feat]\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" : output : [feat] "i" (feature), ##input)
-
-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a, b) a, b
-
-/*
- * Alternative inline assembly for SMP.
- *
- * The LOCK_PREFIX macro defined here replaces the LOCK and
- * LOCK_PREFIX macros used everywhere in the source tree.
- *
- * SMP alternatives use the same data structures as the other
- * alternatives and the X86_FEATURE_UP flag to indicate the case of a
- * UP system running a SMP kernel. The existing apply_alternatives()
- * works fine for patching a SMP kernel for UP.
- *
- * The SMP alternative tables can be kept after boot and contain both
- * UP and SMP versions of the instructions to allow switching back to
- * SMP at runtime, when hotplugging in a new CPU, which is especially
- * useful in virtualized environments.
- *
- * The very common lock prefix is handled as special case in a
- * separate table which is a pure address list without replacement ptr
- * and size information. That keeps the table sizes small.
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX \
- ".section .smp_locks,\"a\"\n" \
- " .align 4\n" \
- " .long 661f\n" /* address */ \
- ".previous\n" \
- "661:\n\tlock; "
-
-#else /* ! CONFIG_SMP */
-#define LOCK_PREFIX ""
-#endif
-
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end);
-#else
-static inline void
-apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end)
-{}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
-#endif
-
-extern void text_poke(void *addr, unsigned char *opcode, int len);
-
-#endif /* _I386_ALTERNATIVE_H */
diff --git a/include/asm-x86/alternative_64.h b/include/asm-x86/alternative_64.h
deleted file mode 100644
index ab161e81015..00000000000
--- a/include/asm-x86/alternative_64.h
+++ /dev/null
@@ -1,159 +0,0 @@
-#ifndef _X86_64_ALTERNATIVE_H
-#define _X86_64_ALTERNATIVE_H
-
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/stddef.h>
-
-/*
- * Alternative inline assembly for SMP.
- *
- * The LOCK_PREFIX macro defined here replaces the LOCK and
- * LOCK_PREFIX macros used everywhere in the source tree.
- *
- * SMP alternatives use the same data structures as the other
- * alternatives and the X86_FEATURE_UP flag to indicate the case of a
- * UP system running a SMP kernel. The existing apply_alternatives()
- * works fine for patching a SMP kernel for UP.
- *
- * The SMP alternative tables can be kept after boot and contain both
- * UP and SMP versions of the instructions to allow switching back to
- * SMP at runtime, when hotplugging in a new CPU, which is especially
- * useful in virtualized environments.
- *
- * The very common lock prefix is handled as special case in a
- * separate table which is a pure address list without replacement ptr
- * and size information. That keeps the table sizes small.
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX \
- ".section .smp_locks,\"a\"\n" \
- " .align 8\n" \
- " .quad 661f\n" /* address */ \
- ".previous\n" \
- "661:\n\tlock; "
-
-#else /* ! CONFIG_SMP */
-#define LOCK_PREFIX ""
-#endif
-
-/* This must be included *after* the definition of LOCK_PREFIX */
-#include <asm/cpufeature.h>
-
-struct alt_instr {
- u8 *instr; /* original instruction */
- u8 *replacement;
- u8 cpuid; /* cpuid bit set for replacement */
- u8 instrlen; /* length of original instruction */
- u8 replacementlen; /* length of new instruction, <= instrlen */
- u8 pad[5];
-};
-
-extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-
-struct module;
-
-#ifdef CONFIG_SMP
-extern void alternatives_smp_module_add(struct module *mod, char *name,
- void *locks, void *locks_end,
- void *text, void *text_end);
-extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
-#else
-static inline void alternatives_smp_module_add(struct module *mod, char *name,
- void *locks, void *locks_end,
- void *text, void *text_end) {}
-static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
-#endif
-
-#endif
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 8\n" \
- " .quad 661b\n" /* label */ \
- " .quad 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Pecularities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 8\n" \
- " .quad 661b\n" /* label */ \
- " .quad 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature), ##input)
-
-/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 8\n" \
- " .quad 661b\n" /* label */ \
- " .quad 663f\n" /* new instruction */ \
- " .byte %c[feat]\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" : output : [feat] "i" (feature), ##input)
-
-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a, b) a, b
-
-struct paravirt_patch;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
-#else
-static inline void
-apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
-{}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
-#endif
-
-extern void text_poke(void *addr, unsigned char *opcode, int len);
-
-#endif /* _X86_64_ALTERNATIVE_H */
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 9fbcc0bd2ac..bcfc07fd366 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -1,5 +1,140 @@
-#ifdef CONFIG_X86_32
-# include "apic_32.h"
+#ifndef _ASM_X86_APIC_H
+#define _ASM_X86_APIC_H
+
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#define ARCH_APICTIMER_STOPS_ON_C3 1
+
+#define Dprintk(x...)
+
+/*
+ * Debugging macros
+ */
+#define APIC_QUIET 0
+#define APIC_VERBOSE 1
+#define APIC_DEBUG 2
+
+/*
+ * Define the default level of output to be very little
+ * This can be turned up by using apic=verbose for more
+ * information and apic=debug for _lots_ of information.
+ * apic_verbosity is defined in apic.c
+ */
+#define apic_printk(v, s, a...) do { \
+ if ((v) <= apic_verbosity) \
+ printk(s, ##a); \
+ } while (0)
+
+
+extern void generic_apic_probe(void);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+extern int apic_verbosity;
+extern int timer_over_8254;
+extern int local_apic_timer_c2_ok;
+extern int local_apic_timer_disabled;
+
+extern int apic_runs_main_timer;
+extern int ioapic_force;
+extern int disable_apic;
+extern int disable_apic_timer;
+extern unsigned boot_cpu_id;
+
+/*
+ * Basic functions accessing APICs.
+ */
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
#else
-# include "apic_64.h"
+#define apic_write native_apic_write
+#define apic_write_atomic native_apic_write_atomic
+#define apic_read native_apic_read
+#define setup_boot_clock setup_boot_APIC_clock
+#define setup_secondary_clock setup_secondary_APIC_clock
#endif
+
+static inline void native_apic_write(unsigned long reg, u32 v)
+{
+ *((volatile u32 *)(APIC_BASE + reg)) = v;
+}
+
+static inline void native_apic_write_atomic(unsigned long reg, u32 v)
+{
+ (void) xchg((u32*)(APIC_BASE + reg), v);
+}
+
+static inline u32 native_apic_read(unsigned long reg)
+{
+ return *((volatile u32 *)(APIC_BASE + reg));
+}
+
+extern void apic_wait_icr_idle(void);
+extern u32 safe_apic_wait_icr_idle(void);
+extern int get_physical_broadcast(void);
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x, y) apic_write((x), (y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x, y) apic_write_atomic((x), (y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+ /*
+ * ack_APIC_irq() actually gets compiled as a single instruction:
+ * - a single rmw on Pentium/82489DX
+ * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+ * ... yummie.
+ */
+
+ /* Docs say use 0 for future compatibility */
+ apic_write_around(APIC_EOI, 0);
+}
+
+extern int lapic_get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern int verify_local_APIC(void);
+extern void cache_APIC_registers(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void end_local_APIC_setup(void);
+extern void init_apic_mappings(void);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+extern void enable_NMI_through_LVT0(void);
+
+/*
+ * On 32bit this is mach-xxx local
+ */
+#ifdef CONFIG_X86_64
+extern void setup_apic_routing(void);
+#endif
+
+extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask);
+extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
+
+extern int apic_is_clustered_box(void);
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+#define local_apic_timer_c2_ok 1
+
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
+#endif /* __ASM_APIC_H */
diff --git a/include/asm-x86/apic_32.h b/include/asm-x86/apic_32.h
deleted file mode 100644
index be158b27d54..00000000000
--- a/include/asm-x86/apic_32.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef __ASM_APIC_H
-#define __ASM_APIC_H
-
-#include <linux/pm.h>
-#include <linux/delay.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-
-#define Dprintk(x...)
-
-/*
- * Debugging macros
- */
-#define APIC_QUIET 0
-#define APIC_VERBOSE 1
-#define APIC_DEBUG 2
-
-extern int apic_verbosity;
-
-/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
- */
-#define apic_printk(v, s, a...) do { \
- if ((v) <= apic_verbosity) \
- printk(s, ##a); \
- } while (0)
-
-
-extern void generic_apic_probe(void);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/*
- * Basic functions accessing APICs.
- */
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define apic_write native_apic_write
-#define apic_write_atomic native_apic_write_atomic
-#define apic_read native_apic_read
-#define setup_boot_clock setup_boot_APIC_clock
-#define setup_secondary_clock setup_secondary_APIC_clock
-#endif
-
-static __inline fastcall void native_apic_write(unsigned long reg,
- unsigned long v)
-{
- *((volatile unsigned long *)(APIC_BASE+reg)) = v;
-}
-
-static __inline fastcall void native_apic_write_atomic(unsigned long reg,
- unsigned long v)
-{
- xchg((volatile unsigned long *)(APIC_BASE+reg), v);
-}
-
-static __inline fastcall unsigned long native_apic_read(unsigned long reg)
-{
- return *((volatile unsigned long *)(APIC_BASE+reg));
-}
-
-void apic_wait_icr_idle(void);
-unsigned long safe_apic_wait_icr_idle(void);
-int get_physical_broadcast(void);
-
-#ifdef CONFIG_X86_GOOD_APIC
-# define FORCE_READ_AROUND_WRITE 0
-# define apic_read_around(x)
-# define apic_write_around(x,y) apic_write((x),(y))
-#else
-# define FORCE_READ_AROUND_WRITE 1
-# define apic_read_around(x) apic_read(x)
-# define apic_write_around(x,y) apic_write_atomic((x),(y))
-#endif
-
-static inline void ack_APIC_irq(void)
-{
- /*
- * ack_APIC_irq() actually gets compiled as a single instruction:
- * - a single rmw on Pentium/82489DX
- * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
- * ... yummie.
- */
-
- /* Docs say use 0 for future compatibility */
- apic_write_around(APIC_EOI, 0);
-}
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (int virt_wire_setup);
-extern void disable_local_APIC (void);
-extern void lapic_shutdown (void);
-extern int verify_local_APIC (void);
-extern void cache_APIC_registers (void);
-extern void sync_Arb_IDs (void);
-extern void init_bsp_APIC (void);
-extern void setup_local_APIC (void);
-extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (void);
-extern void setup_boot_APIC_clock (void);
-extern void setup_secondary_APIC_clock (void);
-extern int APIC_init_uniprocessor (void);
-
-extern void enable_NMI_through_LVT0 (void * dummy);
-
-#define ARCH_APICTIMER_STOPS_ON_C3 1
-
-extern int timer_over_8254;
-extern int local_apic_timer_c2_ok;
-
-extern int local_apic_timer_disabled;
-
-#else /* !CONFIG_X86_LOCAL_APIC */
-static inline void lapic_shutdown(void) { }
-#define local_apic_timer_c2_ok 1
-
-#endif /* !CONFIG_X86_LOCAL_APIC */
-
-#endif /* __ASM_APIC_H */
diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h
deleted file mode 100644
index 2747a11a2b1..00000000000
--- a/include/asm-x86/apic_64.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef __ASM_APIC_H
-#define __ASM_APIC_H
-
-#include <linux/pm.h>
-#include <linux/delay.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/system.h>
-
-#define Dprintk(x...)
-
-/*
- * Debugging macros
- */
-#define APIC_QUIET 0
-#define APIC_VERBOSE 1
-#define APIC_DEBUG 2
-
-extern int apic_verbosity;
-extern int apic_runs_main_timer;
-extern int ioapic_force;
-extern int disable_apic_timer;
-
-/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
- */
-#define apic_printk(v, s, a...) do { \
- if ((v) <= apic_verbosity) \
- printk(s, ##a); \
- } while (0)
-
-struct pt_regs;
-
-/*
- * Basic functions accessing APICs.
- */
-
-static __inline void apic_write(unsigned long reg, unsigned int v)
-{
- *((volatile unsigned int *)(APIC_BASE+reg)) = v;
-}
-
-static __inline unsigned int apic_read(unsigned long reg)
-{
- return *((volatile unsigned int *)(APIC_BASE+reg));
-}
-
-extern void apic_wait_icr_idle(void);
-extern unsigned int safe_apic_wait_icr_idle(void);
-
-static inline void ack_APIC_irq(void)
-{
- /*
- * ack_APIC_irq() actually gets compiled as a single instruction:
- * - a single rmw on Pentium/82489DX
- * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
- * ... yummie.
- */
-
- /* Docs say use 0 for future compatibility */
- apic_write(APIC_EOI, 0);
-}
-
-extern int get_maxlvt (void);
-extern void clear_local_APIC (void);
-extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (int virt_wire_setup);
-extern void disable_local_APIC (void);
-extern void lapic_shutdown (void);
-extern int verify_local_APIC (void);
-extern void cache_APIC_registers (void);
-extern void sync_Arb_IDs (void);
-extern void init_bsp_APIC (void);
-extern void setup_local_APIC (void);
-extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (void);
-extern void setup_boot_APIC_clock (void);
-extern void setup_secondary_APIC_clock (void);
-extern int APIC_init_uniprocessor (void);
-extern void setup_apic_routing(void);
-
-extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
- unsigned char msg_type, unsigned char mask);
-
-extern int apic_is_clustered_box(void);
-
-#define K8_APIC_EXT_LVT_BASE 0x500
-#define K8_APIC_EXT_INT_MSG_FIX 0x0
-#define K8_APIC_EXT_INT_MSG_SMI 0x2
-#define K8_APIC_EXT_INT_MSG_NMI 0x4
-#define K8_APIC_EXT_INT_MSG_EXT 0x7
-#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD 0
-
-#define ARCH_APICTIMER_STOPS_ON_C3 1
-
-extern unsigned boot_cpu_id;
-extern int local_apic_timer_c2_ok;
-
-#endif /* __ASM_APIC_H */
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 4542c220bf4..550af7a6f88 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -1,5 +1,413 @@
+#ifndef _ASM_X86_APICDEF_H
+#define _ASM_X86_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define APIC_DEFAULT_PHYS_BASE 0xfee00000
+
+#define APIC_ID 0x20
+
+#ifdef CONFIG_X86_64
+# define APIC_ID_MASK (0xFFu<<24)
+# define GET_APIC_ID(x) (((x)>>24)&0xFFu)
+# define SET_APIC_ID(x) (((x)<<24))
+#endif
+
+#define APIC_LVR 0x30
+#define APIC_LVR_MASK 0xFF00FF
+#define GET_APIC_VERSION(x) ((x)&0xFFu)
+#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFFu)
+#define APIC_INTEGRATED(x) ((x)&0xF0u)
+#define APIC_XAPIC(x) ((x) >= 0x14)
+#define APIC_TASKPRI 0x80
+#define APIC_TPRI_MASK 0xFFu
+#define APIC_ARBPRI 0x90
+#define APIC_ARBPRI_MASK 0xFFu
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_EIO_ACK 0x0
+#define APIC_RRR 0xC0
+#define APIC_LDR 0xD0
+#define APIC_LDR_MASK (0xFFu<<24)
+#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFFu)
+#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
+#define APIC_ALL_CPUS 0xFFu
+#define APIC_DFR 0xE0
+#define APIC_DFR_CLUSTER 0x0FFFFFFFul
+#define APIC_DFR_FLAT 0xFFFFFFFFul
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1<<9)
+#define APIC_SPIV_APIC_ENABLED (1<<8)
+#define APIC_ISR 0x100
+#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */
+#define APIC_TMR 0x180
+#define APIC_IRR 0x200
+#define APIC_ESR 0x280
+#define APIC_ESR_SEND_CS 0x00001
+#define APIC_ESR_RECV_CS 0x00002
+#define APIC_ESR_SEND_ACC 0x00004
+#define APIC_ESR_RECV_ACC 0x00008
+#define APIC_ESR_SENDILL 0x00020
+#define APIC_ESR_RECVILL 0x00040
+#define APIC_ESR_ILLREGA 0x00080
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF)
+#define SET_APIC_DEST_FIELD(x) ((x)<<24)
+#define APIC_LVTT 0x320
+#define APIC_LVTTHMR 0x330
+#define APIC_LVTPC 0x340
+#define APIC_LVT0 0x350
+#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
+#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3)
+#define SET_APIC_TIMER_BASE(x) (((x)<<18))
+#define APIC_TIMER_BASE_CLKIN 0x0
+#define APIC_TIMER_BASE_TMBASE 0x1
+#define APIC_TIMER_BASE_DIV 0x2
+#define APIC_LVT_TIMER_PERIODIC (1<<17)
+#define APIC_LVT_MASKED (1<<16)
+#define APIC_LVT_LEVEL_TRIGGER (1<<15)
+#define APIC_LVT_REMOTE_IRR (1<<14)
+#define APIC_INPUT_POLARITY (1<<13)
+#define APIC_SEND_PENDING (1<<12)
+#define APIC_MODE_MASK 0x700
+#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
+#define SET_APIC_DELIVERY_MODE(x, y) (((x)&~0x700)|((y)<<8))
+#define APIC_MODE_FIXED 0x0
+#define APIC_MODE_NMI 0x4
+#define APIC_MODE_EXTINT 0x7
+#define APIC_LVT1 0x360
+#define APIC_LVTERR 0x370
+#define APIC_TMICT 0x380
+#define APIC_TMCCT 0x390
+#define APIC_TDCR 0x3E0
+#define APIC_TDR_DIV_TMBASE (1<<2)
+#define APIC_TDR_DIV_1 0xB
+#define APIC_TDR_DIV_2 0x0
+#define APIC_TDR_DIV_4 0x1
+#define APIC_TDR_DIV_8 0x2
+#define APIC_TDR_DIV_16 0x3
+#define APIC_TDR_DIV_32 0x8
+#define APIC_TDR_DIV_64 0x9
+#define APIC_TDR_DIV_128 0xA
+#define APIC_EILVT0 0x500
+#define APIC_EILVT_NR_AMD_K8 1 /* Number of extended interrupts */
+#define APIC_EILVT_NR_AMD_10H 4
+#define APIC_EILVT_LVTOFF(x) (((x)>>4)&0xF)
+#define APIC_EILVT_MSG_FIX 0x0
+#define APIC_EILVT_MSG_SMI 0x2
+#define APIC_EILVT_MSG_NMI 0x4
+#define APIC_EILVT_MSG_EXT 0x7
+#define APIC_EILVT_MASKED (1<<16)
+#define APIC_EILVT1 0x510
+#define APIC_EILVT2 0x520
+#define APIC_EILVT3 0x530
+
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
#ifdef CONFIG_X86_32
-# include "apicdef_32.h"
+# define MAX_IO_APICS 64
#else
-# include "apicdef_64.h"
+# define MAX_IO_APICS 128
+# define MAX_LOCAL_APIC 256
+#endif
+
+/*
+ * All x86-64 systems are xAPIC compatible.
+ * In the following, "apicid" is a physical APIC ID.
+ */
+#define XAPIC_DEST_CPUS_SHIFT 4
+#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
+#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
+#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK)
+#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT)
+#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK)
+#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+struct local_apic {
+
+/*000*/ struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/ struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/ struct { /* APIC ID Register */
+ u32 __reserved_1 : 24,
+ phys_apic_id : 4,
+ __reserved_2 : 4;
+ u32 __reserved[3];
+ } id;
+
+/*030*/ const
+ struct { /* APIC Version Register */
+ u32 version : 8,
+ __reserved_1 : 8,
+ max_lvt : 8,
+ __reserved_2 : 8;
+ u32 __reserved[3];
+ } version;
+
+/*040*/ struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/ struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/ struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/ struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/ struct { /* Task Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } tpr;
+
+/*090*/ const
+ struct { /* Arbitration Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } apr;
+
+/*0A0*/ const
+ struct { /* Processor Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } ppr;
+
+/*0B0*/ struct { /* End Of Interrupt Register */
+ u32 eoi;
+ u32 __reserved[3];
+ } eoi;
+
+/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/ struct { /* Logical Destination Register */
+ u32 __reserved_1 : 24,
+ logical_dest : 8;
+ u32 __reserved_2[3];
+ } ldr;
+
+/*0E0*/ struct { /* Destination Format Register */
+ u32 __reserved_1 : 28,
+ model : 4;
+ u32 __reserved_2[3];
+ } dfr;
+
+/*0F0*/ struct { /* Spurious Interrupt Vector Register */
+ u32 spurious_vector : 8,
+ apic_enabled : 1,
+ focus_cpu : 1,
+ __reserved_2 : 22;
+ u32 __reserved_3[3];
+ } svr;
+
+/*100*/ struct { /* In Service Register */
+/*170*/ u32 bitfield;
+ u32 __reserved[3];
+ } isr [8];
+
+/*180*/ struct { /* Trigger Mode Register */
+/*1F0*/ u32 bitfield;
+ u32 __reserved[3];
+ } tmr [8];
+
+/*200*/ struct { /* Interrupt Request Register */
+/*270*/ u32 bitfield;
+ u32 __reserved[3];
+ } irr [8];
+
+/*280*/ union { /* Error Status Register */
+ struct {
+ u32 send_cs_error : 1,
+ receive_cs_error : 1,
+ send_accept_error : 1,
+ receive_accept_error : 1,
+ __reserved_1 : 1,
+ send_illegal_vector : 1,
+ receive_illegal_vector : 1,
+ illegal_register_address : 1,
+ __reserved_2 : 24;
+ u32 __reserved_3[3];
+ } error_bits;
+ struct {
+ u32 errors;
+ u32 __reserved_3[3];
+ } all_errors;
+ } esr;
+
+/*290*/ struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/ struct { /* Interrupt Command Register 1 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ destination_mode : 1,
+ delivery_status : 1,
+ __reserved_1 : 1,
+ level : 1,
+ trigger : 1,
+ __reserved_2 : 2,
+ shorthand : 2,
+ __reserved_3 : 12;
+ u32 __reserved_4[3];
+ } icr1;
+
+/*310*/ struct { /* Interrupt Command Register 2 */
+ union {
+ u32 __reserved_1 : 24,
+ phys_dest : 4,
+ __reserved_2 : 4;
+ u32 __reserved_3 : 24,
+ logical_dest : 8;
+ } dest;
+ u32 __reserved_4[3];
+ } icr2;
+
+/*320*/ struct { /* LVT - Timer */
+ u32 vector : 8,
+ __reserved_1 : 4,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ timer_mode : 1,
+ __reserved_3 : 14;
+ u32 __reserved_4[3];
+ } lvt_timer;
+
+/*330*/ struct { /* LVT - Thermal Sensor */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_thermal;
+
+/*340*/ struct { /* LVT - Performance Counter */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_pc;
+
+/*350*/ struct { /* LVT - LINT0 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ polarity : 1,
+ remote_irr : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15;
+ u32 __reserved_3[3];
+ } lvt_lint0;
+
+/*360*/ struct { /* LVT - LINT1 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ polarity : 1,
+ remote_irr : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15;
+ u32 __reserved_3[3];
+ } lvt_lint1;
+
+/*370*/ struct { /* LVT - Error */
+ u32 vector : 8,
+ __reserved_1 : 4,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_error;
+
+/*380*/ struct { /* Timer Initial Count Register */
+ u32 initial_count;
+ u32 __reserved_2[3];
+ } timer_icr;
+
+/*390*/ const
+ struct { /* Timer Current Count Register */
+ u32 curr_count;
+ u32 __reserved_2[3];
+ } timer_ccr;
+
+/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/ struct { /* Timer Divide Configuration Register */
+ u32 divisor : 4,
+ __reserved_1 : 28;
+ u32 __reserved_2[3];
+ } timer_dcr;
+
+/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#define BAD_APICID 0xFFu
+
#endif
diff --git a/include/asm-x86/apicdef_32.h b/include/asm-x86/apicdef_32.h
deleted file mode 100644
index 9f6995341fd..00000000000
--- a/include/asm-x86/apicdef_32.h
+++ /dev/null
@@ -1,375 +0,0 @@
-#ifndef __ASM_APICDEF_H
-#define __ASM_APICDEF_H
-
-/*
- * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
- *
- * Alan Cox <Alan.Cox@linux.org>, 1995.
- * Ingo Molnar <mingo@redhat.com>, 1999, 2000
- */
-
-#define APIC_DEFAULT_PHYS_BASE 0xfee00000
-
-#define APIC_ID 0x20
-#define APIC_LVR 0x30
-#define APIC_LVR_MASK 0xFF00FF
-#define GET_APIC_VERSION(x) ((x)&0xFF)
-#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF)
-#define APIC_INTEGRATED(x) ((x)&0xF0)
-#define APIC_XAPIC(x) ((x) >= 0x14)
-#define APIC_TASKPRI 0x80
-#define APIC_TPRI_MASK 0xFF
-#define APIC_ARBPRI 0x90
-#define APIC_ARBPRI_MASK 0xFF
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */
-#define APIC_RRR 0xC0
-#define APIC_LDR 0xD0
-#define APIC_LDR_MASK (0xFF<<24)
-#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF)
-#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
-#define APIC_ALL_CPUS 0xFF
-#define APIC_DFR 0xE0
-#define APIC_DFR_CLUSTER 0x0FFFFFFFul
-#define APIC_DFR_FLAT 0xFFFFFFFFul
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1<<9)
-#define APIC_SPIV_APIC_ENABLED (1<<8)
-#define APIC_ISR 0x100
-#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */
-#define APIC_TMR 0x180
-#define APIC_IRR 0x200
-#define APIC_ESR 0x280
-#define APIC_ESR_SEND_CS 0x00001
-#define APIC_ESR_RECV_CS 0x00002
-#define APIC_ESR_SEND_ACC 0x00004
-#define APIC_ESR_RECV_ACC 0x00008
-#define APIC_ESR_SENDILL 0x00020
-#define APIC_ESR_RECVILL 0x00040
-#define APIC_ESR_ILLREGA 0x00080
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF)
-#define SET_APIC_DEST_FIELD(x) ((x)<<24)
-#define APIC_LVTT 0x320
-#define APIC_LVTTHMR 0x330
-#define APIC_LVTPC 0x340
-#define APIC_LVT0 0x350
-#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
-#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3)
-#define SET_APIC_TIMER_BASE(x) (((x)<<18))
-#define APIC_TIMER_BASE_CLKIN 0x0
-#define APIC_TIMER_BASE_TMBASE 0x1
-#define APIC_TIMER_BASE_DIV 0x2
-#define APIC_LVT_TIMER_PERIODIC (1<<17)
-#define APIC_LVT_MASKED (1<<16)
-#define APIC_LVT_LEVEL_TRIGGER (1<<15)
-#define APIC_LVT_REMOTE_IRR (1<<14)
-#define APIC_INPUT_POLARITY (1<<13)
-#define APIC_SEND_PENDING (1<<12)
-#define APIC_MODE_MASK 0x700
-#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
-#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8))
-#define APIC_MODE_FIXED 0x0
-#define APIC_MODE_NMI 0x4
-#define APIC_MODE_EXTINT 0x7
-#define APIC_LVT1 0x360
-#define APIC_LVTERR 0x370
-#define APIC_TMICT 0x380
-#define APIC_TMCCT 0x390
-#define APIC_TDCR 0x3E0
-#define APIC_TDR_DIV_TMBASE (1<<2)
-#define APIC_TDR_DIV_1 0xB
-#define APIC_TDR_DIV_2 0x0
-#define APIC_TDR_DIV_4 0x1
-#define APIC_TDR_DIV_8 0x2
-#define APIC_TDR_DIV_16 0x3
-#define APIC_TDR_DIV_32 0x8
-#define APIC_TDR_DIV_64 0x9
-#define APIC_TDR_DIV_128 0xA
-
-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-
-#define MAX_IO_APICS 64
-
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-
-struct local_apic {
-
-/*000*/ struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/ struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/ struct { /* APIC ID Register */
- u32 __reserved_1 : 24,
- phys_apic_id : 4,
- __reserved_2 : 4;
- u32 __reserved[3];
- } id;
-
-/*030*/ const
- struct { /* APIC Version Register */
- u32 version : 8,
- __reserved_1 : 8,
- max_lvt : 8,
- __reserved_2 : 8;
- u32 __reserved[3];
- } version;
-
-/*040*/ struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/ struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/ struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/ struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/ struct { /* Task Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } tpr;
-
-/*090*/ const
- struct { /* Arbitration Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } apr;
-
-/*0A0*/ const
- struct { /* Processor Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } ppr;
-
-/*0B0*/ struct { /* End Of Interrupt Register */
- u32 eoi;
- u32 __reserved[3];
- } eoi;
-
-/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/ struct { /* Logical Destination Register */
- u32 __reserved_1 : 24,
- logical_dest : 8;
- u32 __reserved_2[3];
- } ldr;
-
-/*0E0*/ struct { /* Destination Format Register */
- u32 __reserved_1 : 28,
- model : 4;
- u32 __reserved_2[3];
- } dfr;
-
-/*0F0*/ struct { /* Spurious Interrupt Vector Register */
- u32 spurious_vector : 8,
- apic_enabled : 1,
- focus_cpu : 1,
- __reserved_2 : 22;
- u32 __reserved_3[3];
- } svr;
-
-/*100*/ struct { /* In Service Register */
-/*170*/ u32 bitfield;
- u32 __reserved[3];
- } isr [8];
-
-/*180*/ struct { /* Trigger Mode Register */
-/*1F0*/ u32 bitfield;
- u32 __reserved[3];
- } tmr [8];
-
-/*200*/ struct { /* Interrupt Request Register */
-/*270*/ u32 bitfield;
- u32 __reserved[3];
- } irr [8];
-
-/*280*/ union { /* Error Status Register */
- struct {
- u32 send_cs_error : 1,
- receive_cs_error : 1,
- send_accept_error : 1,
- receive_accept_error : 1,
- __reserved_1 : 1,
- send_illegal_vector : 1,
- receive_illegal_vector : 1,
- illegal_register_address : 1,
- __reserved_2 : 24;
- u32 __reserved_3[3];
- } error_bits;
- struct {
- u32 errors;
- u32 __reserved_3[3];
- } all_errors;
- } esr;
-
-/*290*/ struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/ struct { /* Interrupt Command Register 1 */
- u32 vector : 8,
- delivery_mode : 3,
- destination_mode : 1,
- delivery_status : 1,
- __reserved_1 : 1,
- level : 1,
- trigger : 1,
- __reserved_2 : 2,
- shorthand : 2,
- __reserved_3 : 12;
- u32 __reserved_4[3];
- } icr1;
-
-/*310*/ struct { /* Interrupt Command Register 2 */
- union {
- u32 __reserved_1 : 24,
- phys_dest : 4,
- __reserved_2 : 4;
- u32 __reserved_3 : 24,
- logical_dest : 8;
- } dest;
- u32 __reserved_4[3];
- } icr2;
-
-/*320*/ struct { /* LVT - Timer */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- timer_mode : 1,
- __reserved_3 : 14;
- u32 __reserved_4[3];
- } lvt_timer;
-
-/*330*/ struct { /* LVT - Thermal Sensor */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_thermal;
-
-/*340*/ struct { /* LVT - Performance Counter */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_pc;
-
-/*350*/ struct { /* LVT - LINT0 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint0;
-
-/*360*/ struct { /* LVT - LINT1 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint1;
-
-/*370*/ struct { /* LVT - Error */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_error;
-
-/*380*/ struct { /* Timer Initial Count Register */
- u32 initial_count;
- u32 __reserved_2[3];
- } timer_icr;
-
-/*390*/ const
- struct { /* Timer Current Count Register */
- u32 curr_count;
- u32 __reserved_2[3];
- } timer_ccr;
-
-/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/ struct { /* Timer Divide Configuration Register */
- u32 divisor : 4,
- __reserved_1 : 28;
- u32 __reserved_2[3];
- } timer_dcr;
-
-/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
-#endif
diff --git a/include/asm-x86/apicdef_64.h b/include/asm-x86/apicdef_64.h
deleted file mode 100644
index 1dd40067c67..00000000000
--- a/include/asm-x86/apicdef_64.h
+++ /dev/null
@@ -1,392 +0,0 @@
-#ifndef __ASM_APICDEF_H
-#define __ASM_APICDEF_H
-
-/*
- * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
- *
- * Alan Cox <Alan.Cox@linux.org>, 1995.
- * Ingo Molnar <mingo@redhat.com>, 1999, 2000
- */
-
-#define APIC_DEFAULT_PHYS_BASE 0xfee00000
-
-#define APIC_ID 0x20
-#define APIC_ID_MASK (0xFFu<<24)
-#define GET_APIC_ID(x) (((x)>>24)&0xFFu)
-#define SET_APIC_ID(x) (((x)<<24))
-#define APIC_LVR 0x30
-#define APIC_LVR_MASK 0xFF00FF
-#define GET_APIC_VERSION(x) ((x)&0xFFu)
-#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFFu)
-#define APIC_INTEGRATED(x) ((x)&0xF0u)
-#define APIC_TASKPRI 0x80
-#define APIC_TPRI_MASK 0xFFu
-#define APIC_ARBPRI 0x90
-#define APIC_ARBPRI_MASK 0xFFu
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */
-#define APIC_RRR 0xC0
-#define APIC_LDR 0xD0
-#define APIC_LDR_MASK (0xFFu<<24)
-#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFFu)
-#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
-#define APIC_ALL_CPUS 0xFFu
-#define APIC_DFR 0xE0
-#define APIC_DFR_CLUSTER 0x0FFFFFFFul
-#define APIC_DFR_FLAT 0xFFFFFFFFul
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1<<9)
-#define APIC_SPIV_APIC_ENABLED (1<<8)
-#define APIC_ISR 0x100
-#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */
-#define APIC_TMR 0x180
-#define APIC_IRR 0x200
-#define APIC_ESR 0x280
-#define APIC_ESR_SEND_CS 0x00001
-#define APIC_ESR_RECV_CS 0x00002
-#define APIC_ESR_SEND_ACC 0x00004
-#define APIC_ESR_RECV_ACC 0x00008
-#define APIC_ESR_SENDILL 0x00020
-#define APIC_ESR_RECVILL 0x00040
-#define APIC_ESR_ILLREGA 0x00080
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DEST_PHYSICAL 0x00000
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF)
-#define SET_APIC_DEST_FIELD(x) ((x)<<24)
-#define APIC_LVTT 0x320
-#define APIC_LVTTHMR 0x330
-#define APIC_LVTPC 0x340
-#define APIC_LVT0 0x350
-#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
-#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3)
-#define SET_APIC_TIMER_BASE(x) (((x)<<18))
-#define APIC_TIMER_BASE_CLKIN 0x0
-#define APIC_TIMER_BASE_TMBASE 0x1
-#define APIC_TIMER_BASE_DIV 0x2
-#define APIC_LVT_TIMER_PERIODIC (1<<17)
-#define APIC_LVT_MASKED (1<<16)
-#define APIC_LVT_LEVEL_TRIGGER (1<<15)
-#define APIC_LVT_REMOTE_IRR (1<<14)
-#define APIC_INPUT_POLARITY (1<<13)
-#define APIC_SEND_PENDING (1<<12)
-#define APIC_MODE_MASK 0x700
-#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
-#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8))
-#define APIC_MODE_FIXED 0x0
-#define APIC_MODE_NMI 0x4
-#define APIC_MODE_EXTINT 0x7
-#define APIC_LVT1 0x360
-#define APIC_LVTERR 0x370
-#define APIC_TMICT 0x380
-#define APIC_TMCCT 0x390
-#define APIC_TDCR 0x3E0
-#define APIC_TDR_DIV_TMBASE (1<<2)
-#define APIC_TDR_DIV_1 0xB
-#define APIC_TDR_DIV_2 0x0
-#define APIC_TDR_DIV_4 0x1
-#define APIC_TDR_DIV_8 0x2
-#define APIC_TDR_DIV_16 0x3
-#define APIC_TDR_DIV_32 0x8
-#define APIC_TDR_DIV_64 0x9
-#define APIC_TDR_DIV_128 0xA
-
-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-
-#define MAX_IO_APICS 128
-#define MAX_LOCAL_APIC 256
-
-/*
- * All x86-64 systems are xAPIC compatible.
- * In the following, "apicid" is a physical APIC ID.
- */
-#define XAPIC_DEST_CPUS_SHIFT 4
-#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK)
-#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT)
-#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK)
-#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
-
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-struct local_apic {
-
-/*000*/ struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/ struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/ struct { /* APIC ID Register */
- u32 __reserved_1 : 24,
- phys_apic_id : 4,
- __reserved_2 : 4;
- u32 __reserved[3];
- } id;
-
-/*030*/ const
- struct { /* APIC Version Register */
- u32 version : 8,
- __reserved_1 : 8,
- max_lvt : 8,
- __reserved_2 : 8;
- u32 __reserved[3];
- } version;
-
-/*040*/ struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/ struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/ struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/ struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/ struct { /* Task Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } tpr;
-
-/*090*/ const
- struct { /* Arbitration Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } apr;
-
-/*0A0*/ const
- struct { /* Processor Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } ppr;
-
-/*0B0*/ struct { /* End Of Interrupt Register */
- u32 eoi;
- u32 __reserved[3];
- } eoi;
-
-/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/ struct { /* Logical Destination Register */
- u32 __reserved_1 : 24,
- logical_dest : 8;
- u32 __reserved_2[3];
- } ldr;
-
-/*0E0*/ struct { /* Destination Format Register */
- u32 __reserved_1 : 28,
- model : 4;
- u32 __reserved_2[3];
- } dfr;
-
-/*0F0*/ struct { /* Spurious Interrupt Vector Register */
- u32 spurious_vector : 8,
- apic_enabled : 1,
- focus_cpu : 1,
- __reserved_2 : 22;
- u32 __reserved_3[3];
- } svr;
-
-/*100*/ struct { /* In Service Register */
-/*170*/ u32 bitfield;
- u32 __reserved[3];
- } isr [8];
-
-/*180*/ struct { /* Trigger Mode Register */
-/*1F0*/ u32 bitfield;
- u32 __reserved[3];
- } tmr [8];
-
-/*200*/ struct { /* Interrupt Request Register */
-/*270*/ u32 bitfield;
- u32 __reserved[3];
- } irr [8];
-
-/*280*/ union { /* Error Status Register */
- struct {
- u32 send_cs_error : 1,
- receive_cs_error : 1,
- send_accept_error : 1,
- receive_accept_error : 1,
- __reserved_1 : 1,
- send_illegal_vector : 1,
- receive_illegal_vector : 1,
- illegal_register_address : 1,
- __reserved_2 : 24;
- u32 __reserved_3[3];
- } error_bits;
- struct {
- u32 errors;
- u32 __reserved_3[3];
- } all_errors;
- } esr;
-
-/*290*/ struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/ struct { /* Interrupt Command Register 1 */
- u32 vector : 8,
- delivery_mode : 3,
- destination_mode : 1,
- delivery_status : 1,
- __reserved_1 : 1,
- level : 1,
- trigger : 1,
- __reserved_2 : 2,
- shorthand : 2,
- __reserved_3 : 12;
- u32 __reserved_4[3];
- } icr1;
-
-/*310*/ struct { /* Interrupt Command Register 2 */
- union {
- u32 __reserved_1 : 24,
- phys_dest : 4,
- __reserved_2 : 4;
- u32 __reserved_3 : 24,
- logical_dest : 8;
- } dest;
- u32 __reserved_4[3];
- } icr2;
-
-/*320*/ struct { /* LVT - Timer */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- timer_mode : 1,
- __reserved_3 : 14;
- u32 __reserved_4[3];
- } lvt_timer;
-
-/*330*/ struct { /* LVT - Thermal Sensor */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_thermal;
-
-/*340*/ struct { /* LVT - Performance Counter */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_pc;
-
-/*350*/ struct { /* LVT - LINT0 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint0;
-
-/*360*/ struct { /* LVT - LINT1 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint1;
-
-/*370*/ struct { /* LVT - Error */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_error;
-
-/*380*/ struct { /* Timer Initial Count Register */
- u32 initial_count;
- u32 __reserved_2[3];
- } timer_icr;
-
-/*390*/ const
- struct { /* Timer Current Count Register */
- u32 curr_count;
- u32 __reserved_2[3];
- } timer_ccr;
-
-/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/ struct { /* Timer Divide Configuration Register */
- u32 divisor : 4,
- __reserved_1 : 28;
- u32 __reserved_2[3];
- } timer_dcr;
-
-/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
-#define BAD_APICID 0xFFu
-
-#endif
diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index a8c1fca9726..768aee8a04e 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h
@@ -6,7 +6,7 @@
/*
* linux/include/asm/arch_hooks.h
*
- * define the architecture specific hooks
+ * define the architecture specific hooks
*/
/* these aren't arch hooks, they are generic routines
@@ -24,7 +24,4 @@ extern void trap_init_hook(void);
extern void time_init_hook(void);
extern void mca_nmi_hook(void);
-extern int setup_early_printk(char *);
-extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
-
#endif
diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
new file mode 100644
index 00000000000..1a6980a60fc
--- /dev/null
+++ b/include/asm-x86/asm.h
@@ -0,0 +1,32 @@
+#ifndef _ASM_X86_ASM_H
+#define _ASM_X86_ASM_H
+
+#ifdef CONFIG_X86_32
+/* 32 bits */
+
+# define _ASM_PTR " .long "
+# define _ASM_ALIGN " .balign 4 "
+# define _ASM_MOV_UL " movl "
+
+# define _ASM_INC " incl "
+# define _ASM_DEC " decl "
+# define _ASM_ADD " addl "
+# define _ASM_SUB " subl "
+# define _ASM_XADD " xaddl "
+
+#else
+/* 64 bits */
+
+# define _ASM_PTR " .quad "
+# define _ASM_ALIGN " .balign 8 "
+# define _ASM_MOV_UL " movq "
+
+# define _ASM_INC " incq "
+# define _ASM_DEC " decq "
+# define _ASM_ADD " addq "
+# define _ASM_SUB " subq "
+# define _ASM_XADD " xaddq "
+
+#endif /* CONFIG_X86_32 */
+
+#endif /* _ASM_X86_ASM_H */
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 07e3f6d4fe4..1a23ce1a569 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -1,5 +1,321 @@
+#ifndef _ASM_X86_BITOPS_H
+#define _ASM_X86_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <asm/alternative.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
+/* Technically wrong, but this avoids compilation errors on some gcc
+ versions. */
+#define ADDR "=m" (*(volatile long *) addr)
+#else
+#define ADDR "+m" (*(volatile long *) addr)
+#endif
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered. See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writing portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile void *addr)
+{
+ asm volatile(LOCK_PREFIX "bts %1,%0"
+ : ADDR
+ : "Ir" (nr) : "memory");
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile void *addr)
+{
+ asm volatile("bts %1,%0"
+ : ADDR
+ : "Ir" (nr) : "memory");
+}
+
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered. However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile void *addr)
+{
+ asm volatile(LOCK_PREFIX "btr %1,%0"
+ : ADDR
+ : "Ir" (nr));
+}
+
+/*
+ * clear_bit_unlock - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and implies release semantics before the memory
+ * operation. It can be used for an unlock.
+ */
+static inline void clear_bit_unlock(unsigned nr, volatile void *addr)
+{
+ barrier();
+ clear_bit(nr, addr);
+}
+
+static inline void __clear_bit(int nr, volatile void *addr)
+{
+ asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+}
+
+/*
+ * __clear_bit_unlock - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * __clear_bit() is non-atomic and implies release semantics before the memory
+ * operation. It can be used for an unlock if no other CPUs can concurrently
+ * modify other bits in the word.
+ *
+ * No memory barrier is required here, because x86 cannot reorder stores past
+ * older loads. Same principle as spin_unlock.
+ */
+static inline void __clear_bit_unlock(unsigned nr, volatile void *addr)
+{
+ barrier();
+ __clear_bit(nr, addr);
+}
+
+#define smp_mb__before_clear_bit() barrier()
+#define smp_mb__after_clear_bit() barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile void *addr)
+{
+ asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile void *addr)
+{
+ asm volatile(LOCK_PREFIX "btc %1,%0"
+ : ADDR : "Ir" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile void *addr)
+{
+ int oldbit;
+
+ asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+/**
+ * test_and_set_bit_lock - Set a bit and return its old value for lock
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This is the same as test_and_set_bit on x86.
+ */
+static inline int test_and_set_bit_lock(int nr, volatile void *addr)
+{
+ return test_and_set_bit(nr, addr);
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile void *addr)
+{
+ int oldbit;
+
+ asm("bts %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr));
+ return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile void *addr)
+{
+ int oldbit;
+
+ asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile void *addr)
+{
+ int oldbit;
+
+ asm volatile("btr %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr));
+ return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr, volatile void *addr)
+{
+ int oldbit;
+
+ asm volatile("btc %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile void *addr)
+{
+ int oldbit;
+
+ asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+static inline int constant_test_bit(int nr, const volatile void *addr)
+{
+ return ((1UL << (nr % BITS_PER_LONG)) &
+ (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
+}
+
+static inline int variable_test_bit(int nr, volatile const void *addr)
+{
+ int oldbit;
+
+ asm volatile("bt %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr));
+
+ return oldbit;
+}
+
+#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static int test_bit(int nr, const volatile unsigned long *addr);
+#endif
+
+#define test_bit(nr,addr) \
+ (__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+#undef ADDR
+
#ifdef CONFIG_X86_32
# include "bitops_32.h"
#else
# include "bitops_64.h"
#endif
+
+#endif /* _ASM_X86_BITOPS_H */
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index 0b40f6d20be..e4d75fcf9c0 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -5,320 +5,12 @@
* Copyright 1992, Linus Torvalds.
*/
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <linux/compiler.h>
-#include <asm/alternative.h>
-
-/*
- * These have to be done with inline assembly: that way the bit-setting
- * is guaranteed to be atomic. All bit operations return 0 if the bit
- * was cleared before the operation and != 0 if it was not.
- *
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
-#define ADDR (*(volatile long *) addr)
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btsl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long * addr)
-{
- __asm__(
- "btsl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btrl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
- barrier();
- clear_bit(nr, addr);
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__(
- "btrl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
- */
-static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
- barrier();
- __clear_bit(nr, addr);
-}
-
-#define smp_mb__before_clear_bit() barrier()
-#define smp_mb__after_clear_bit() barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__(
- "btcl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered. It may be
- * reordered on other architectures than x86.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btcl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It may be reordered on other architectures than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile unsigned long * addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
-static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
-{
- return test_and_set_bit(nr, addr);
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long * addr)
-{
- int oldbit;
-
- __asm__(
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr));
- return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It can be reorderdered on other architectures other than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile unsigned long * addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
- int oldbit;
-
- __asm__(
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr));
- return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile unsigned long* addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void * addr);
-#endif
-
-static __always_inline int constant_test_bit(int nr, const volatile unsigned long *addr)
-{
- return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
-}
-
-static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit)
- :"m" (ADDR),"Ir" (nr));
- return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-#undef ADDR
-
/**
* find_first_zero_bit - find the first zero bit in a memory region
* @addr: The address to start the search at
* @size: The maximum size to search
*
- * Returns the bit-number of the first zero bit, not the number of the byte
+ * Returns the bit number of the first zero bit, not the number of the byte
* containing a bit.
*/
static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
@@ -348,7 +40,7 @@ static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
/**
* find_next_zero_bit - find the first zero bit in a memory region
* @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
+ * @offset: The bit number to start searching at
* @size: The maximum size to search
*/
int find_next_zero_bit(const unsigned long *addr, int size, int offset);
@@ -372,7 +64,7 @@ static inline unsigned long __ffs(unsigned long word)
* @addr: The address to start the search at
* @size: The maximum size to search
*
- * Returns the bit-number of the first set bit, not the number of the byte
+ * Returns the bit number of the first set bit, not the number of the byte
* containing a bit.
*/
static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
@@ -391,7 +83,7 @@ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
/**
* find_next_bit - find the first set bit in a memory region
* @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
+ * @offset: The bit number to start searching at
* @size: The maximum size to search
*/
int find_next_bit(const unsigned long *addr, int size, int offset);
@@ -460,10 +152,10 @@ static inline int fls(int x)
#include <asm-generic/bitops/ext2-non-atomic.h>
-#define ext2_set_bit_atomic(lock,nr,addr) \
- test_and_set_bit((nr),(unsigned long*)addr)
-#define ext2_clear_bit_atomic(lock,nr, addr) \
- test_and_clear_bit((nr),(unsigned long*)addr)
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ test_and_set_bit((nr), (unsigned long *)addr)
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ test_and_clear_bit((nr), (unsigned long *)addr)
#include <asm-generic/bitops/minix.h>
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 766bcc0470a..48adbf56ca6 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -5,303 +5,6 @@
* Copyright 1992, Linus Torvalds.
*/
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <asm/alternative.h>
-
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
-/* Technically wrong, but this avoids compilation errors on some gcc
- versions. */
-#define ADDR "=m" (*(volatile long *) addr)
-#else
-#define ADDR "+m" (*(volatile long *) addr)
-#endif
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile void *addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btsl %1,%0"
- :ADDR
- :"dIr" (nr) : "memory");
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile void *addr)
-{
- __asm__ volatile(
- "btsl %1,%0"
- :ADDR
- :"dIr" (nr) : "memory");
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile void *addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btrl %1,%0"
- :ADDR
- :"dIr" (nr));
-}
-
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
- barrier();
- clear_bit(nr, addr);
-}
-
-static inline void __clear_bit(int nr, volatile void *addr)
-{
- __asm__ __volatile__(
- "btrl %1,%0"
- :ADDR
- :"dIr" (nr));
-}
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
- */
-static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
- barrier();
- __clear_bit(nr, addr);
-}
-
-#define smp_mb__before_clear_bit() barrier()
-#define smp_mb__after_clear_bit() barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile void *addr)
-{
- __asm__ __volatile__(
- "btcl %1,%0"
- :ADDR
- :"dIr" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile void *addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btcl %1,%0"
- :ADDR
- :"dIr" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile void *addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),ADDR
- :"dIr" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
-static inline int test_and_set_bit_lock(int nr, volatile void *addr)
-{
- return test_and_set_bit(nr, addr);
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile void *addr)
-{
- int oldbit;
-
- __asm__(
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),ADDR
- :"dIr" (nr));
- return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile void *addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),ADDR
- :"dIr" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile void *addr)
-{
- int oldbit;
-
- __asm__(
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),ADDR
- :"dIr" (nr));
- return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile void *addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),ADDR
- :"dIr" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile void *addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),ADDR
- :"dIr" (nr) : "memory");
- return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void *addr);
-#endif
-
-static inline int constant_test_bit(int nr, const volatile void *addr)
-{
- return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
-}
-
-static inline int variable_test_bit(int nr, volatile const void *addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit)
- :"m" (*(volatile long *)addr),"dIr" (nr));
- return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-#undef ADDR
-
extern long find_first_zero_bit(const unsigned long *addr, unsigned long size);
extern long find_next_zero_bit(const unsigned long *addr, long size, long offset);
extern long find_first_bit(const unsigned long *addr, unsigned long size);
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index 19f3ddf2df4..51151356840 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -54,13 +54,14 @@ struct sys_desc_table {
};
struct efi_info {
- __u32 _pad1;
+ __u32 efi_loader_signature;
__u32 efi_systab;
__u32 efi_memdesc_size;
__u32 efi_memdesc_version;
__u32 efi_memmap;
__u32 efi_memmap_size;
- __u32 _pad2[2];
+ __u32 efi_systab_hi;
+ __u32 efi_memmap_hi;
};
/* The so-called "zeropage" */
diff --git a/include/asm-x86/bug.h b/include/asm-x86/bug.h
index fd8bdc639c4..8d477a20139 100644
--- a/include/asm-x86/bug.h
+++ b/include/asm-x86/bug.h
@@ -33,9 +33,6 @@
} while(0)
#endif
-void out_of_line_bug(void);
-#else /* CONFIG_BUG */
-static inline void out_of_line_bug(void) { }
#endif /* !CONFIG_BUG */
#include <asm-generic/bug.h>
diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h
index aac8317420a..3fcc30dc073 100644
--- a/include/asm-x86/bugs.h
+++ b/include/asm-x86/bugs.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_BUGS_H
#define _ASM_X86_BUGS_H
-void check_bugs(void);
+extern void check_bugs(void);
+extern int ppro_with_ram_bug(void);
#endif /* _ASM_X86_BUGS_H */
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 9411a2d3f19..8dd8c5e3cc7 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -24,18 +24,35 @@
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy(dst, src, len)
-void global_flush_tlb(void);
-int change_page_attr(struct page *page, int numpages, pgprot_t prot);
-int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
-void clflush_cache_range(void *addr, int size);
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-/* internal debugging function */
-void kernel_map_pages(struct page *page, int numpages, int enable);
-#endif
+int __deprecated_for_modules change_page_attr(struct page *page, int numpages,
+ pgprot_t prot);
+
+int set_pages_uc(struct page *page, int numpages);
+int set_pages_wb(struct page *page, int numpages);
+int set_pages_x(struct page *page, int numpages);
+int set_pages_nx(struct page *page, int numpages);
+int set_pages_ro(struct page *page, int numpages);
+int set_pages_rw(struct page *page, int numpages);
+
+int set_memory_uc(unsigned long addr, int numpages);
+int set_memory_wb(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_np(unsigned long addr, int numpages);
+
+void clflush_cache_range(void *addr, unsigned int size);
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
#endif
+#ifdef CONFIG_DEBUG_RODATA_TEST
+void rodata_test(void);
+#else
+static inline void rodata_test(void)
+{
+}
+#endif
#endif
diff --git a/include/asm-x86/calling.h b/include/asm-x86/calling.h
index 6f4f63af96e..f13e62e2cb3 100644
--- a/include/asm-x86/calling.h
+++ b/include/asm-x86/calling.h
@@ -1,162 +1,168 @@
-/*
+/*
* Some macros to handle stack frames in assembly.
- */
+ */
+#define R15 0
+#define R14 8
+#define R13 16
+#define R12 24
+#define RBP 32
+#define RBX 40
-#define R15 0
-#define R14 8
-#define R13 16
-#define R12 24
-#define RBP 32
-#define RBX 40
/* arguments: interrupts/non tracing syscalls only save upto here*/
-#define R11 48
-#define R10 56
-#define R9 64
-#define R8 72
-#define RAX 80
-#define RCX 88
-#define RDX 96
-#define RSI 104
-#define RDI 112
-#define ORIG_RAX 120 /* + error_code */
-/* end of arguments */
+#define R11 48
+#define R10 56
+#define R9 64
+#define R8 72
+#define RAX 80
+#define RCX 88
+#define RDX 96
+#define RSI 104
+#define RDI 112
+#define ORIG_RAX 120 /* + error_code */
+/* end of arguments */
+
/* cpu exception frame or undefined in case of fast syscall. */
-#define RIP 128
-#define CS 136
-#define EFLAGS 144
-#define RSP 152
-#define SS 160
-#define ARGOFFSET R11
-#define SWFRAME ORIG_RAX
+#define RIP 128
+#define CS 136
+#define EFLAGS 144
+#define RSP 152
+#define SS 160
+
+#define ARGOFFSET R11
+#define SWFRAME ORIG_RAX
- .macro SAVE_ARGS addskip=0,norcx=0,nor891011=0
- subq $9*8+\addskip,%rsp
+ .macro SAVE_ARGS addskip=0, norcx=0, nor891011=0
+ subq $9*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET 9*8+\addskip
- movq %rdi,8*8(%rsp)
- CFI_REL_OFFSET rdi,8*8
- movq %rsi,7*8(%rsp)
- CFI_REL_OFFSET rsi,7*8
- movq %rdx,6*8(%rsp)
- CFI_REL_OFFSET rdx,6*8
+ movq %rdi, 8*8(%rsp)
+ CFI_REL_OFFSET rdi, 8*8
+ movq %rsi, 7*8(%rsp)
+ CFI_REL_OFFSET rsi, 7*8
+ movq %rdx, 6*8(%rsp)
+ CFI_REL_OFFSET rdx, 6*8
.if \norcx
.else
- movq %rcx,5*8(%rsp)
- CFI_REL_OFFSET rcx,5*8
+ movq %rcx, 5*8(%rsp)
+ CFI_REL_OFFSET rcx, 5*8
.endif
- movq %rax,4*8(%rsp)
- CFI_REL_OFFSET rax,4*8
+ movq %rax, 4*8(%rsp)
+ CFI_REL_OFFSET rax, 4*8
.if \nor891011
.else
- movq %r8,3*8(%rsp)
- CFI_REL_OFFSET r8,3*8
- movq %r9,2*8(%rsp)
- CFI_REL_OFFSET r9,2*8
- movq %r10,1*8(%rsp)
- CFI_REL_OFFSET r10,1*8
- movq %r11,(%rsp)
- CFI_REL_OFFSET r11,0*8
+ movq %r8, 3*8(%rsp)
+ CFI_REL_OFFSET r8, 3*8
+ movq %r9, 2*8(%rsp)
+ CFI_REL_OFFSET r9, 2*8
+ movq %r10, 1*8(%rsp)
+ CFI_REL_OFFSET r10, 1*8
+ movq %r11, (%rsp)
+ CFI_REL_OFFSET r11, 0*8
.endif
.endm
-#define ARG_SKIP 9*8
- .macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0
+#define ARG_SKIP 9*8
+
+ .macro RESTORE_ARGS skiprax=0, addskip=0, skiprcx=0, skipr11=0, \
+ skipr8910=0, skiprdx=0
.if \skipr11
.else
- movq (%rsp),%r11
+ movq (%rsp), %r11
CFI_RESTORE r11
.endif
.if \skipr8910
.else
- movq 1*8(%rsp),%r10
+ movq 1*8(%rsp), %r10
CFI_RESTORE r10
- movq 2*8(%rsp),%r9
+ movq 2*8(%rsp), %r9
CFI_RESTORE r9
- movq 3*8(%rsp),%r8
+ movq 3*8(%rsp), %r8
CFI_RESTORE r8
.endif
.if \skiprax
.else
- movq 4*8(%rsp),%rax
+ movq 4*8(%rsp), %rax
CFI_RESTORE rax
.endif
.if \skiprcx
.else
- movq 5*8(%rsp),%rcx
+ movq 5*8(%rsp), %rcx
CFI_RESTORE rcx
.endif
.if \skiprdx
.else
- movq 6*8(%rsp),%rdx
+ movq 6*8(%rsp), %rdx
CFI_RESTORE rdx
.endif
- movq 7*8(%rsp),%rsi
+ movq 7*8(%rsp), %rsi
CFI_RESTORE rsi
- movq 8*8(%rsp),%rdi
+ movq 8*8(%rsp), %rdi
CFI_RESTORE rdi
.if ARG_SKIP+\addskip > 0
- addq $ARG_SKIP+\addskip,%rsp
+ addq $ARG_SKIP+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
.endif
- .endm
+ .endm
.macro LOAD_ARGS offset
- movq \offset(%rsp),%r11
- movq \offset+8(%rsp),%r10
- movq \offset+16(%rsp),%r9
- movq \offset+24(%rsp),%r8
- movq \offset+40(%rsp),%rcx
- movq \offset+48(%rsp),%rdx
- movq \offset+56(%rsp),%rsi
- movq \offset+64(%rsp),%rdi
- movq \offset+72(%rsp),%rax
+ movq \offset(%rsp), %r11
+ movq \offset+8(%rsp), %r10
+ movq \offset+16(%rsp), %r9
+ movq \offset+24(%rsp), %r8
+ movq \offset+40(%rsp), %rcx
+ movq \offset+48(%rsp), %rdx
+ movq \offset+56(%rsp), %rsi
+ movq \offset+64(%rsp), %rdi
+ movq \offset+72(%rsp), %rax
.endm
-
-#define REST_SKIP 6*8
+
+#define REST_SKIP 6*8
+
.macro SAVE_REST
- subq $REST_SKIP,%rsp
+ subq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET REST_SKIP
- movq %rbx,5*8(%rsp)
- CFI_REL_OFFSET rbx,5*8
- movq %rbp,4*8(%rsp)
- CFI_REL_OFFSET rbp,4*8
- movq %r12,3*8(%rsp)
- CFI_REL_OFFSET r12,3*8
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13,2*8
- movq %r14,1*8(%rsp)
- CFI_REL_OFFSET r14,1*8
- movq %r15,(%rsp)
- CFI_REL_OFFSET r15,0*8
- .endm
+ movq %rbx, 5*8(%rsp)
+ CFI_REL_OFFSET rbx, 5*8
+ movq %rbp, 4*8(%rsp)
+ CFI_REL_OFFSET rbp, 4*8
+ movq %r12, 3*8(%rsp)
+ CFI_REL_OFFSET r12, 3*8
+ movq %r13, 2*8(%rsp)
+ CFI_REL_OFFSET r13, 2*8
+ movq %r14, 1*8(%rsp)
+ CFI_REL_OFFSET r14, 1*8
+ movq %r15, (%rsp)
+ CFI_REL_OFFSET r15, 0*8
+ .endm
.macro RESTORE_REST
- movq (%rsp),%r15
+ movq (%rsp), %r15
CFI_RESTORE r15
- movq 1*8(%rsp),%r14
+ movq 1*8(%rsp), %r14
CFI_RESTORE r14
- movq 2*8(%rsp),%r13
+ movq 2*8(%rsp), %r13
CFI_RESTORE r13
- movq 3*8(%rsp),%r12
+ movq 3*8(%rsp), %r12
CFI_RESTORE r12
- movq 4*8(%rsp),%rbp
+ movq 4*8(%rsp), %rbp
CFI_RESTORE rbp
- movq 5*8(%rsp),%rbx
+ movq 5*8(%rsp), %rbx
CFI_RESTORE rbx
- addq $REST_SKIP,%rsp
+ addq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
.endm
-
+
.macro SAVE_ALL
SAVE_ARGS
SAVE_REST
.endm
-
+
.macro RESTORE_ALL addskip=0
RESTORE_REST
- RESTORE_ARGS 0,\addskip
+ RESTORE_ARGS 0, \addskip
.endm
.macro icebp
.byte 0xf1
.endm
+
diff --git a/include/asm-x86/checksum_64.h b/include/asm-x86/checksum_64.h
index 419fe88a034..e5f79997dec 100644
--- a/include/asm-x86/checksum_64.h
+++ b/include/asm-x86/checksum_64.h
@@ -4,7 +4,7 @@
/*
* Checksums for x86-64
* Copyright 2002 by Andi Kleen, SuSE Labs
- * with some code from asm-i386/checksum.h
+ * with some code from asm-x86/checksum.h
*/
#include <linux/compiler.h>
diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h
index f86ede28f6d..cea1dae288a 100644
--- a/include/asm-x86/cmpxchg_32.h
+++ b/include/asm-x86/cmpxchg_32.h
@@ -105,15 +105,24 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
#ifdef CONFIG_X86_CMPXCHG
#define __HAVE_ARCH_CMPXCHG 1
-#define cmpxchg(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
-#define sync_cmpxchg(ptr,o,n)\
- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
-#define cmpxchg_local(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
+#define cmpxchg(ptr, o, n) \
+ ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))))
+#define sync_cmpxchg(ptr, o, n) \
+ ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))))
+#define cmpxchg_local(ptr, o, n) \
+ ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))))
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+#define cmpxchg64(ptr, o, n) \
+ ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
+ (unsigned long long)(n)))
+#define cmpxchg64_local(ptr, o, n) \
+ ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o),\
+ (unsigned long long)(n)))
#endif
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -203,6 +212,34 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
return old;
}
+static inline unsigned long long __cmpxchg64(volatile void *ptr,
+ unsigned long long old, unsigned long long new)
+{
+ unsigned long long prev;
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
+ : "=A"(prev)
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+ "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+}
+
+static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
+ unsigned long long old, unsigned long long new)
+{
+ unsigned long long prev;
+ __asm__ __volatile__("cmpxchg8b %3"
+ : "=A"(prev)
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+ "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+}
+
#ifndef CONFIG_X86_CMPXCHG
/*
* Building a kernel capable running on 80386. It may be necessary to
@@ -228,7 +265,7 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
return old;
}
-#define cmpxchg(ptr,o,n) \
+#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
if (likely(boot_cpu_data.x86 > 3)) \
@@ -239,7 +276,7 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
(unsigned long)(n), sizeof(*(ptr))); \
__ret; \
})
-#define cmpxchg_local(ptr,o,n) \
+#define cmpxchg_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
if (likely(boot_cpu_data.x86 > 3)) \
@@ -252,38 +289,37 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
})
#endif
-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
- unsigned long long new)
-{
- unsigned long long prev;
- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
- : "=A"(prev)
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
- "m"(*__xg(ptr)),
- "0"(old)
- : "memory");
- return prev;
-}
+#ifndef CONFIG_X86_CMPXCHG64
+/*
+ * Building a kernel capable running on 80386 and 80486. It may be necessary
+ * to simulate the cmpxchg8b on the 80386 and 80486 CPU.
+ */
-static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
- unsigned long long old, unsigned long long new)
-{
- unsigned long long prev;
- __asm__ __volatile__("cmpxchg8b %3"
- : "=A"(prev)
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
- "m"(*__xg(ptr)),
- "0"(old)
- : "memory");
- return prev;
-}
+extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
+
+#define cmpxchg64(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ if (likely(boot_cpu_data.x86 > 4)) \
+ __ret = __cmpxchg64((ptr), (unsigned long long)(o), \
+ (unsigned long long)(n)); \
+ else \
+ __ret = cmpxchg_486_u64((ptr), (unsigned long long)(o), \
+ (unsigned long long)(n)); \
+ __ret; \
+})
+#define cmpxchg64_local(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ if (likely(boot_cpu_data.x86 > 4)) \
+ __ret = __cmpxchg64_local((ptr), (unsigned long long)(o), \
+ (unsigned long long)(n)); \
+ else \
+ __ret = cmpxchg_486_u64((ptr), (unsigned long long)(o), \
+ (unsigned long long)(n)); \
+ __ret; \
+})
+
+#endif
-#define cmpxchg64(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
- (unsigned long long)(n)))
-#define cmpxchg64_local(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\
- (unsigned long long)(n)))
#endif
diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h
index 66ba7987184..b270ee04959 100644
--- a/include/asm-x86/compat.h
+++ b/include/asm-x86/compat.h
@@ -207,7 +207,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
static __inline__ void __user *compat_alloc_user_space(long len)
{
struct pt_regs *regs = task_pt_regs(current);
- return (void __user *)regs->rsp - len;
+ return (void __user *)regs->sp - len;
}
static inline int is_compat_task(void)
diff --git a/include/asm-x86/cpu.h b/include/asm-x86/cpu.h
index b1bc7b1b64b..85ece5f10e9 100644
--- a/include/asm-x86/cpu.h
+++ b/include/asm-x86/cpu.h
@@ -7,7 +7,7 @@
#include <linux/nodemask.h>
#include <linux/percpu.h>
-struct i386_cpu {
+struct x86_cpu {
struct cpu cpu;
};
extern int arch_register_cpu(int num);
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index b7160a4598d..3fb7dfa7fc9 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -1,5 +1,207 @@
-#ifdef CONFIG_X86_32
-# include "cpufeature_32.h"
+/*
+ * Defines x86 CPU feature bits
+ */
+#ifndef _ASM_X86_CPUFEATURE_H
+#define _ASM_X86_CPUFEATURE_H
+
+#ifndef __ASSEMBLY__
+#include <linux/bitops.h>
+#endif
+#include <asm/required-features.h>
+
+#define NCAPINTS 8 /* N 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DS (0*32+21) /* Debug Store */
+#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+ /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
+#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */
+#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
+#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
+#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
+/* 14 free */
+/* 15 free */
+#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */
+#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */
+#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */
+#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_CID (4*32+10) /* Context ID */
+#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
+#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */
+#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */
+#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */
+#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc
+ */
+#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
+
+#define cpu_has(c, bit) \
+ (__builtin_constant_p(bit) && \
+ ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \
+ (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \
+ (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \
+ (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) || \
+ (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \
+ (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \
+ (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \
+ (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ) \
+ ? 1 : \
+ test_bit(bit, (unsigned long *)((c)->x86_capability)))
+#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
+
+#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
+#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
+#define setup_clear_cpu_cap(bit) do { \
+ clear_cpu_cap(&boot_cpu_data, bit); \
+ set_bit(bit, cleared_cpu_caps); \
+} while (0)
+#define setup_force_cpu_cap(bit) do { \
+ set_cpu_cap(&boot_cpu_data, bit); \
+ clear_bit(bit, cleared_cpu_caps); \
+} while (0)
+
+#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
+#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
+#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
+#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
+#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
+#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE)
+#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
+#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
+#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
+#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR)
+#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX)
+#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
+#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
+#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
+#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
+#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
+#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
+#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR)
+#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR)
+#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
+#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE)
+#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN)
+#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT)
+#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN)
+#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2)
+#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN)
+#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE)
+#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN)
+#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM)
+#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN)
+#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS)
+#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS)
+#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH)
+#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS)
+
+#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
+# define cpu_has_invlpg 1
#else
-# include "cpufeature_64.h"
+# define cpu_has_invlpg (boot_cpu_data.x86 > 3)
#endif
+
+#ifdef CONFIG_X86_64
+
+#undef cpu_has_vme
+#define cpu_has_vme 0
+
+#undef cpu_has_pae
+#define cpu_has_pae ___BUG___
+
+#undef cpu_has_mp
+#define cpu_has_mp 1
+
+#undef cpu_has_k6_mtrr
+#define cpu_has_k6_mtrr 0
+
+#undef cpu_has_cyrix_arr
+#define cpu_has_cyrix_arr 0
+
+#undef cpu_has_centaur_mcr
+#define cpu_has_centaur_mcr 0
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/include/asm-x86/cpufeature_32.h b/include/asm-x86/cpufeature_32.h
deleted file mode 100644
index f17e688dfb0..00000000000
--- a/include/asm-x86/cpufeature_32.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * cpufeature.h
- *
- * Defines x86 CPU feature bits
- */
-
-#ifndef __ASM_I386_CPUFEATURE_H
-#define __ASM_I386_CPUFEATURE_H
-
-#ifndef __ASSEMBLY__
-#include <linux/bitops.h>
-#endif
-#include <asm/required-features.h>
-
-#define NCAPINTS 8 /* N 32-bit words worth of info */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
-#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
-#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
-#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DS (0*32+21) /* Debug Store */
-#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
- /* of FPU context), and CR4.OSFXSR available */
-#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
-#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
-#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
-#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
-#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */
-#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */
-#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
-#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
-#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
-/* 14 free */
-#define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */
-#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
-#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */
-#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */
-#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_CID (4*32+10) /* Context ID */
-#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
-#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */
-#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */
-#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */
-#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc
- */
-#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
-
-#define cpu_has(c, bit) \
- (__builtin_constant_p(bit) && \
- ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \
- (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \
- (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \
- (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) || \
- (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \
- (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \
- (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \
- (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ) \
- ? 1 : \
- test_bit(bit, (c)->x86_capability))
-#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
-
-#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
-#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
-#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE)
-#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
-#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR)
-#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX)
-#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
-#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
-#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
-#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR)
-#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR)
-#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
-#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE)
-#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN)
-#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT)
-#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN)
-#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2)
-#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN)
-#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE)
-#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN)
-#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM)
-#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN)
-#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS)
-#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS)
-#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH)
-#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS)
-
-#endif /* __ASM_I386_CPUFEATURE_H */
-
-/*
- * Local Variables:
- * mode:c
- * comment-column:42
- * End:
- */
diff --git a/include/asm-x86/cpufeature_64.h b/include/asm-x86/cpufeature_64.h
deleted file mode 100644
index e18496b7b85..00000000000
--- a/include/asm-x86/cpufeature_64.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * cpufeature_32.h
- *
- * Defines x86 CPU feature bits
- */
-
-#ifndef __ASM_X8664_CPUFEATURE_H
-#define __ASM_X8664_CPUFEATURE_H
-
-#include "cpufeature_32.h"
-
-#undef cpu_has_vme
-#define cpu_has_vme 0
-
-#undef cpu_has_pae
-#define cpu_has_pae ___BUG___
-
-#undef cpu_has_mp
-#define cpu_has_mp 1 /* XXX */
-
-#undef cpu_has_k6_mtrr
-#define cpu_has_k6_mtrr 0
-
-#undef cpu_has_cyrix_arr
-#define cpu_has_cyrix_arr 0
-
-#undef cpu_has_centaur_mcr
-#define cpu_has_centaur_mcr 0
-
-#endif /* __ASM_X8664_CPUFEATURE_H */
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 6065c509226..5b6a05d3a77 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -1,5 +1,381 @@
+#ifndef _ASM_DESC_H_
+#define _ASM_DESC_H_
+
+#ifndef __ASSEMBLY__
+#include <asm/desc_defs.h>
+#include <asm/ldt.h>
+#include <asm/mmu.h>
+#include <linux/smp.h>
+
+static inline void fill_ldt(struct desc_struct *desc,
+ const struct user_desc *info)
+{
+ desc->limit0 = info->limit & 0x0ffff;
+ desc->base0 = info->base_addr & 0x0000ffff;
+
+ desc->base1 = (info->base_addr & 0x00ff0000) >> 16;
+ desc->type = (info->read_exec_only ^ 1) << 1;
+ desc->type |= info->contents << 2;
+ desc->s = 1;
+ desc->dpl = 0x3;
+ desc->p = info->seg_not_present ^ 1;
+ desc->limit = (info->limit & 0xf0000) >> 16;
+ desc->avl = info->useable;
+ desc->d = info->seg_32bit;
+ desc->g = info->limit_in_pages;
+ desc->base2 = (info->base_addr & 0xff000000) >> 24;
+}
+
+extern struct desc_ptr idt_descr;
+extern gate_desc idt_table[];
+
+#ifdef CONFIG_X86_64
+extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
+extern struct desc_ptr cpu_gdt_descr[];
+/* the cpu gdt accessor */
+#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
+
+static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
+ unsigned dpl, unsigned ist, unsigned seg)
+{
+ gate->offset_low = PTR_LOW(func);
+ gate->segment = __KERNEL_CS;
+ gate->ist = ist;
+ gate->p = 1;
+ gate->dpl = dpl;
+ gate->zero0 = 0;
+ gate->zero1 = 0;
+ gate->type = type;
+ gate->offset_middle = PTR_MIDDLE(func);
+ gate->offset_high = PTR_HIGH(func);
+}
+
+#else
+struct gdt_page {
+ struct desc_struct gdt[GDT_ENTRIES];
+} __attribute__((aligned(PAGE_SIZE)));
+DECLARE_PER_CPU(struct gdt_page, gdt_page);
+
+static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+{
+ return per_cpu(gdt_page, cpu).gdt;
+}
+
+static inline void pack_gate(gate_desc *gate, unsigned char type,
+ unsigned long base, unsigned dpl, unsigned flags, unsigned short seg)
+
+{
+ gate->a = (seg << 16) | (base & 0xffff);
+ gate->b = (base & 0xffff0000) |
+ (((0x80 | type | (dpl << 5)) & 0xff) << 8);
+}
+
+#endif
+
+static inline int desc_empty(const void *ptr)
+{
+ const u32 *desc = ptr;
+ return !(desc[0] | desc[1]);
+}
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define load_TR_desc() native_load_tr_desc()
+#define load_gdt(dtr) native_load_gdt(dtr)
+#define load_idt(dtr) native_load_idt(dtr)
+#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
+#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
+
+#define store_gdt(dtr) native_store_gdt(dtr)
+#define store_idt(dtr) native_store_idt(dtr)
+#define store_tr(tr) (tr = native_store_tr())
+#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
+
+#define load_TLS(t, cpu) native_load_tls(t, cpu)
+#define set_ldt native_set_ldt
+
+#define write_ldt_entry(dt, entry, desc) \
+ native_write_ldt_entry(dt, entry, desc)
+#define write_gdt_entry(dt, entry, desc, type) \
+ native_write_gdt_entry(dt, entry, desc, type)
+#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
+#endif
+
+static inline void native_write_idt_entry(gate_desc *idt, int entry,
+ const gate_desc *gate)
+{
+ memcpy(&idt[entry], gate, sizeof(*gate));
+}
+
+static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry,
+ const void *desc)
+{
+ memcpy(&ldt[entry], desc, 8);
+}
+
+static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry,
+ const void *desc, int type)
+{
+ unsigned int size;
+ switch (type) {
+ case DESC_TSS:
+ size = sizeof(tss_desc);
+ break;
+ case DESC_LDT:
+ size = sizeof(ldt_desc);
+ break;
+ default:
+ size = sizeof(struct desc_struct);
+ break;
+ }
+ memcpy(&gdt[entry], desc, size);
+}
+
+static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
+ unsigned long limit, unsigned char type,
+ unsigned char flags)
+{
+ desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
+ desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
+ (limit & 0x000f0000) | ((type & 0xff) << 8) |
+ ((flags & 0xf) << 20);
+ desc->p = 1;
+}
+
+
+static inline void set_tssldt_descriptor(void *d, unsigned long addr,
+ unsigned type, unsigned size)
+{
+#ifdef CONFIG_X86_64
+ struct ldttss_desc64 *desc = d;
+ memset(desc, 0, sizeof(*desc));
+ desc->limit0 = size & 0xFFFF;
+ desc->base0 = PTR_LOW(addr);
+ desc->base1 = PTR_MIDDLE(addr) & 0xFF;
+ desc->type = type;
+ desc->p = 1;
+ desc->limit1 = (size >> 16) & 0xF;
+ desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
+ desc->base3 = PTR_HIGH(addr);
+#else
+
+ pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
+#endif
+}
+
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+{
+ struct desc_struct *d = get_cpu_gdt_table(cpu);
+ tss_desc tss;
+
+ /*
+ * sizeof(unsigned long) coming from an extra "long" at the end
+ * of the iobitmap. See tss_struct definition in processor.h
+ *
+ * -1? seg base+limit should be pointing to the address of the
+ * last valid byte
+ */
+ set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
+ IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
+ write_gdt_entry(d, entry, &tss, DESC_TSS);
+}
+
+#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
+
+static inline void native_set_ldt(const void *addr, unsigned int entries)
+{
+ if (likely(entries == 0))
+ __asm__ __volatile__("lldt %w0"::"q" (0));
+ else {
+ unsigned cpu = smp_processor_id();
+ ldt_desc ldt;
+
+ set_tssldt_descriptor(&ldt, (unsigned long)addr,
+ DESC_LDT, entries * sizeof(ldt) - 1);
+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+ &ldt, DESC_LDT);
+ __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
+ }
+}
+
+static inline void native_load_tr_desc(void)
+{
+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+ asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+ asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+ asm volatile("sidt %0":"=m" (*dtr));
+}
+
+static inline unsigned long native_store_tr(void)
+{
+ unsigned long tr;
+ asm volatile("str %0":"=r" (tr));
+ return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+ unsigned int i;
+ struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
+
+#define _LDT_empty(info) (\
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0)
+
+#ifdef CONFIG_X86_64
+#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
+#else
+#define LDT_empty(info) (_LDT_empty(info))
+#endif
+
+static inline void clear_LDT(void)
+{
+ set_ldt(NULL, 0);
+}
+
+/*
+ * load one particular LDT into the current CPU
+ */
+static inline void load_LDT_nolock(mm_context_t *pc)
+{
+ set_ldt(pc->ldt, pc->size);
+}
+
+static inline void load_LDT(mm_context_t *pc)
+{
+ preempt_disable();
+ load_LDT_nolock(pc);
+ preempt_enable();
+}
+
+static inline unsigned long get_desc_base(const struct desc_struct *desc)
+{
+ return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
+}
+
+static inline unsigned long get_desc_limit(const struct desc_struct *desc)
+{
+ return desc->limit0 | (desc->limit << 16);
+}
+
+static inline void _set_gate(int gate, unsigned type, void *addr,
+ unsigned dpl, unsigned ist, unsigned seg)
+{
+ gate_desc s;
+ pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
+ /*
+ * does not need to be atomic because it is only done once at
+ * setup time
+ */
+ write_idt_entry(idt_table, gate, &s);
+}
+
+/*
+ * This needs to use 'idt_table' rather than 'idt', and
+ * thus use the _nonmapped_ version of the IDT, as the
+ * Pentium F0 0F bugfix can have resulted in the mapped
+ * IDT being write-protected.
+ */
+static inline void set_intr_gate(unsigned int n, void *addr)
+{
+ BUG_ON((unsigned)n > 0xFF);
+ _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
+}
+
+/*
+ * This routine sets up an interrupt gate at directory privilege level 3.
+ */
+static inline void set_system_intr_gate(unsigned int n, void *addr)
+{
+ BUG_ON((unsigned)n > 0xFF);
+ _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
+}
+
+static inline void set_trap_gate(unsigned int n, void *addr)
+{
+ BUG_ON((unsigned)n > 0xFF);
+ _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
+}
+
+static inline void set_system_gate(unsigned int n, void *addr)
+{
+ BUG_ON((unsigned)n > 0xFF);
#ifdef CONFIG_X86_32
-# include "desc_32.h"
+ _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
+#else
+ _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
+#endif
+}
+
+static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
+{
+ BUG_ON((unsigned)n > 0xFF);
+ _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
+}
+
+static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
+{
+ BUG_ON((unsigned)n > 0xFF);
+ _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
+}
+
+static inline void set_system_gate_ist(int n, void *addr, unsigned ist)
+{
+ BUG_ON((unsigned)n > 0xFF);
+ _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
+}
+
#else
-# include "desc_64.h"
+/*
+ * GET_DESC_BASE reads the descriptor base of the specified segment.
+ *
+ * Args:
+ * idx - descriptor index
+ * gdt - GDT pointer
+ * base - 32bit register to which the base will be written
+ * lo_w - lo word of the "base" register
+ * lo_b - lo byte of the "base" register
+ * hi_b - hi byte of the low word of the "base" register
+ *
+ * Example:
+ * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
+ * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
+ */
+#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
+ movb idx*8+4(gdt), lo_b; \
+ movb idx*8+7(gdt), hi_b; \
+ shll $16, base; \
+ movw idx*8+2(gdt), lo_w;
+
+
+#endif /* __ASSEMBLY__ */
+
#endif
diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h
deleted file mode 100644
index c547403f341..00000000000
--- a/include/asm-x86/desc_32.h
+++ /dev/null
@@ -1,244 +0,0 @@
-#ifndef __ARCH_DESC_H
-#define __ARCH_DESC_H
-
-#include <asm/ldt.h>
-#include <asm/segment.h>
-
-#ifndef __ASSEMBLY__
-
-#include <linux/preempt.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
-
-#include <asm/mmu.h>
-
-struct Xgt_desc_struct {
- unsigned short size;
- unsigned long address __attribute__((packed));
- unsigned short pad;
-} __attribute__ ((packed));
-
-struct gdt_page
-{
- struct desc_struct gdt[GDT_ENTRIES];
-} __attribute__((aligned(PAGE_SIZE)));
-DECLARE_PER_CPU(struct gdt_page, gdt_page);
-
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
-{
- return per_cpu(gdt_page, cpu).gdt;
-}
-
-extern struct Xgt_desc_struct idt_descr;
-extern struct desc_struct idt_table[];
-extern void set_intr_gate(unsigned int irq, void * addr);
-
-static inline void pack_descriptor(__u32 *a, __u32 *b,
- unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
-{
- *a = ((base & 0xffff) << 16) | (limit & 0xffff);
- *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
- (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
-}
-
-static inline void pack_gate(__u32 *a, __u32 *b,
- unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
-{
- *a = (seg << 16) | (base & 0xffff);
- *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
-}
-
-#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
-#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
-#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
-#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
-#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
-#define DESCTYPE_DPL3 0x60 /* DPL-3 */
-#define DESCTYPE_S 0x10 /* !system */
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define load_TR_desc() native_load_tr_desc()
-#define load_gdt(dtr) native_load_gdt(dtr)
-#define load_idt(dtr) native_load_idt(dtr)
-#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
-
-#define store_gdt(dtr) native_store_gdt(dtr)
-#define store_idt(dtr) native_store_idt(dtr)
-#define store_tr(tr) (tr = native_store_tr())
-#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
-
-#define load_TLS(t, cpu) native_load_tls(t, cpu)
-#define set_ldt native_set_ldt
-
-#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#endif
-
-static inline void write_dt_entry(struct desc_struct *dt,
- int entry, u32 entry_low, u32 entry_high)
-{
- dt[entry].a = entry_low;
- dt[entry].b = entry_high;
-}
-
-static inline void native_set_ldt(const void *addr, unsigned int entries)
-{
- if (likely(entries == 0))
- __asm__ __volatile__("lldt %w0"::"q" (0));
- else {
- unsigned cpu = smp_processor_id();
- __u32 a, b;
-
- pack_descriptor(&a, &b, (unsigned long)addr,
- entries * sizeof(struct desc_struct) - 1,
- DESCTYPE_LDT, 0);
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
- __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
- }
-}
-
-
-static inline void native_load_tr_desc(void)
-{
- asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
-}
-
-static inline void native_load_gdt(const struct Xgt_desc_struct *dtr)
-{
- asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct Xgt_desc_struct *dtr)
-{
- asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct Xgt_desc_struct *dtr)
-{
- asm ("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct Xgt_desc_struct *dtr)
-{
- asm ("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
- unsigned long tr;
- asm ("str %0":"=r" (tr));
- return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- unsigned int i;
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
-
- for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
- gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
-static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
-{
- __u32 a, b;
- pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
- write_idt_entry(idt_table, gate, a, b);
-}
-
-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
-{
- __u32 a, b;
- pack_descriptor(&a, &b, (unsigned long)addr,
- offsetof(struct tss_struct, __cacheline_filler) - 1,
- DESCTYPE_TSS, 0);
- write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
-}
-
-
-#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
-
-#define LDT_entry_a(info) \
- ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-#define LDT_entry_b(info) \
- (((info)->base_addr & 0xff000000) | \
- (((info)->base_addr & 0x00ff0000) >> 16) | \
- ((info)->limit & 0xf0000) | \
- (((info)->read_exec_only ^ 1) << 9) | \
- ((info)->contents << 10) | \
- (((info)->seg_not_present ^ 1) << 15) | \
- ((info)->seg_32bit << 22) | \
- ((info)->limit_in_pages << 23) | \
- ((info)->useable << 20) | \
- 0x7000)
-
-#define LDT_empty(info) (\
- (info)->base_addr == 0 && \
- (info)->limit == 0 && \
- (info)->contents == 0 && \
- (info)->read_exec_only == 1 && \
- (info)->seg_32bit == 0 && \
- (info)->limit_in_pages == 0 && \
- (info)->seg_not_present == 1 && \
- (info)->useable == 0 )
-
-static inline void clear_LDT(void)
-{
- set_ldt(NULL, 0);
-}
-
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc)
-{
- set_ldt(pc->ldt, pc->size);
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
- preempt_disable();
- load_LDT_nolock(pc);
- preempt_enable();
-}
-
-static inline unsigned long get_desc_base(unsigned long *desc)
-{
- unsigned long base;
- base = ((desc[0] >> 16) & 0x0000ffff) |
- ((desc[1] << 16) & 0x00ff0000) |
- (desc[1] & 0xff000000);
- return base;
-}
-
-#else /* __ASSEMBLY__ */
-
-/*
- * GET_DESC_BASE reads the descriptor base of the specified segment.
- *
- * Args:
- * idx - descriptor index
- * gdt - GDT pointer
- * base - 32bit register to which the base will be written
- * lo_w - lo word of the "base" register
- * lo_b - lo byte of the "base" register
- * hi_b - hi byte of the low word of the "base" register
- *
- * Example:
- * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
- * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
- */
-#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
- movb idx*8+4(gdt), lo_b; \
- movb idx*8+7(gdt), hi_b; \
- shll $16, base; \
- movw idx*8+2(gdt), lo_w;
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index 7d9c938e69f..8b137891791 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -1,204 +1 @@
-/* Written 2000 by Andi Kleen */
-#ifndef __ARCH_DESC_H
-#define __ARCH_DESC_H
-#include <linux/threads.h>
-#include <asm/ldt.h>
-
-#ifndef __ASSEMBLY__
-
-#include <linux/string.h>
-#include <linux/smp.h>
-#include <asm/desc_defs.h>
-
-#include <asm/segment.h>
-#include <asm/mmu.h>
-
-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
-
-#define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8))
-#define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
-#define clear_LDT() asm volatile("lldt %w0"::"r" (0))
-
-static inline unsigned long __store_tr(void)
-{
- unsigned long tr;
-
- asm volatile ("str %w0":"=r" (tr));
- return tr;
-}
-
-#define store_tr(tr) (tr) = __store_tr()
-
-/*
- * This is the ldt that every process will get unless we need
- * something other than this.
- */
-extern struct desc_struct default_ldt[];
-extern struct gate_struct idt_table[];
-extern struct desc_ptr cpu_gdt_descr[];
-
-/* the cpu gdt accessor */
-#define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address)
-
-static inline void load_gdt(const struct desc_ptr *ptr)
-{
- asm volatile("lgdt %w0"::"m" (*ptr));
-}
-
-static inline void store_gdt(struct desc_ptr *ptr)
-{
- asm("sgdt %w0":"=m" (*ptr));
-}
-
-static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)
-{
- struct gate_struct s;
- s.offset_low = PTR_LOW(func);
- s.segment = __KERNEL_CS;
- s.ist = ist;
- s.p = 1;
- s.dpl = dpl;
- s.zero0 = 0;
- s.zero1 = 0;
- s.type = type;
- s.offset_middle = PTR_MIDDLE(func);
- s.offset_high = PTR_HIGH(func);
- /* does not need to be atomic because it is only done once at setup time */
- memcpy(adr, &s, 16);
-}
-
-static inline void set_intr_gate(int nr, void *func)
-{
- BUG_ON((unsigned)nr > 0xFF);
- _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0);
-}
-
-static inline void set_intr_gate_ist(int nr, void *func, unsigned ist)
-{
- BUG_ON((unsigned)nr > 0xFF);
- _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist);
-}
-
-static inline void set_system_gate(int nr, void *func)
-{
- BUG_ON((unsigned)nr > 0xFF);
- _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0);
-}
-
-static inline void set_system_gate_ist(int nr, void *func, unsigned ist)
-{
- _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
-}
-
-static inline void load_idt(const struct desc_ptr *ptr)
-{
- asm volatile("lidt %w0"::"m" (*ptr));
-}
-
-static inline void store_idt(struct desc_ptr *dtr)
-{
- asm("sidt %w0":"=m" (*dtr));
-}
-
-static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type,
- unsigned size)
-{
- struct ldttss_desc d;
- memset(&d,0,sizeof(d));
- d.limit0 = size & 0xFFFF;
- d.base0 = PTR_LOW(tss);
- d.base1 = PTR_MIDDLE(tss) & 0xFF;
- d.type = type;
- d.p = 1;
- d.limit1 = (size >> 16) & 0xF;
- d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF;
- d.base3 = PTR_HIGH(tss);
- memcpy(ptr, &d, 16);
-}
-
-static inline void set_tss_desc(unsigned cpu, void *addr)
-{
- /*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap. See tss_struct definition in processor.h
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
- set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_TSS],
- (unsigned long)addr, DESC_TSS,
- IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
-}
-
-static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
-{
- set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_LDT], (unsigned long)addr,
- DESC_LDT, size * 8 - 1);
-}
-
-#define LDT_entry_a(info) \
- ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-/* Don't allow setting of the lm bit. It is useless anyways because
- 64bit system calls require __USER_CS. */
-#define LDT_entry_b(info) \
- (((info)->base_addr & 0xff000000) | \
- (((info)->base_addr & 0x00ff0000) >> 16) | \
- ((info)->limit & 0xf0000) | \
- (((info)->read_exec_only ^ 1) << 9) | \
- ((info)->contents << 10) | \
- (((info)->seg_not_present ^ 1) << 15) | \
- ((info)->seg_32bit << 22) | \
- ((info)->limit_in_pages << 23) | \
- ((info)->useable << 20) | \
- /* ((info)->lm << 21) | */ \
- 0x7000)
-
-#define LDT_empty(info) (\
- (info)->base_addr == 0 && \
- (info)->limit == 0 && \
- (info)->contents == 0 && \
- (info)->read_exec_only == 1 && \
- (info)->seg_32bit == 0 && \
- (info)->limit_in_pages == 0 && \
- (info)->seg_not_present == 1 && \
- (info)->useable == 0 && \
- (info)->lm == 0)
-
-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
-{
- unsigned int i;
- u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN);
-
- for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
- gdt[i] = t->tls_array[i];
-}
-
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock (mm_context_t *pc, int cpu)
-{
- int count = pc->size;
-
- if (likely(!count)) {
- clear_LDT();
- return;
- }
-
- set_ldt_desc(cpu, pc->ldt, count);
- load_LDT_desc();
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
- int cpu = get_cpu();
- load_LDT_nolock(pc, cpu);
- put_cpu();
-}
-
-extern struct desc_ptr idt_descr;
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index 08900407009..e33f078b3e5 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -11,26 +11,36 @@
#include <linux/types.h>
+/*
+ * FIXME: Acessing the desc_struct through its fields is more elegant,
+ * and should be the one valid thing to do. However, a lot of open code
+ * still touches the a and b acessors, and doing this allow us to do it
+ * incrementally. We keep the signature as a struct, rather than an union,
+ * so we can get rid of it transparently in the future -- glommer
+ */
// 8 byte segment descriptor
struct desc_struct {
- u16 limit0;
- u16 base0;
- unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
- unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
-} __attribute__((packed));
+ union {
+ struct { unsigned int a, b; };
+ struct {
+ u16 limit0;
+ u16 base0;
+ unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
+ unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
+ };
-struct n_desc_struct {
- unsigned int a,b;
-};
+ };
+} __attribute__((packed));
enum {
GATE_INTERRUPT = 0xE,
GATE_TRAP = 0xF,
GATE_CALL = 0xC,
+ GATE_TASK = 0x5,
};
// 16byte gate
-struct gate_struct {
+struct gate_struct64 {
u16 offset_low;
u16 segment;
unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
@@ -39,17 +49,18 @@ struct gate_struct {
u32 zero1;
} __attribute__((packed));
-#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
-#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
-#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
+#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
+#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
+#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)
enum {
DESC_TSS = 0x9,
DESC_LDT = 0x2,
+ DESCTYPE_S = 0x10, /* !system */
};
// LDT or TSS descriptor in the GDT. 16 bytes.
-struct ldttss_desc {
+struct ldttss_desc64 {
u16 limit0;
u16 base0;
unsigned base1 : 8, type : 5, dpl : 2, p : 1;
@@ -58,6 +69,16 @@ struct ldttss_desc {
u32 zero1;
} __attribute__((packed));
+#ifdef CONFIG_X86_64
+typedef struct gate_struct64 gate_desc;
+typedef struct ldttss_desc64 ldt_desc;
+typedef struct ldttss_desc64 tss_desc;
+#else
+typedef struct desc_struct gate_desc;
+typedef struct desc_struct ldt_desc;
+typedef struct desc_struct tss_desc;
+#endif
+
struct desc_ptr {
unsigned short size;
unsigned long address;
diff --git a/include/asm-x86/dma.h b/include/asm-x86/dma.h
index 9f936c61a4e..e9733ce8988 100644
--- a/include/asm-x86/dma.h
+++ b/include/asm-x86/dma.h
@@ -1,5 +1,319 @@
+/*
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_X86_DMA_H
+#define _ASM_X86_DMA_H
+
+#include <linux/spinlock.h> /* And spinlocks */
+#include <asm/io.h> /* need byte IO */
+#include <linux/delay.h>
+
+
+#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
+#define dma_outb outb_p
+#else
+#define dma_outb outb
+#endif
+
+#define dma_inb inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ * controller 1: channels 0-3, byte operations, ports 00-1F
+ * controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ * - ALL registers are 8 bits only, regardless of transfer size
+ * - channel 4 is not used - cascades 1 into 2.
+ * - channels 0-3 are byte - addresses/counts are for physical bytes
+ * - channels 5-7 are word - addresses/counts are for physical words
+ * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ * - transfer count loaded to registers is 1 less than actual count
+ * - controller 2 offsets are all even (2x offsets for controller 1)
+ * - page registers for 5-7 don't use data bit 0, represent 128K pages
+ * - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ * Address mapping for channels 0-3:
+ *
+ * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses)
+ * | ... | | ... | | ... |
+ * | ... | | ... | | ... |
+ * | ... | | ... | | ... |
+ * P7 ... P0 A7 ... A0 A7 ... A0
+ * | Page | Addr MSB | Addr LSB | (DMA registers)
+ *
+ * Address mapping for channels 5-7:
+ *
+ * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses)
+ * | ... | \ \ ... \ \ \ ... \ \
+ * | ... | \ \ ... \ \ \ ... \ (not used)
+ * | ... | \ \ ... \ \ \ ... \
+ * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0
+ * | Page | Addr MSB | Addr LSB | (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation.
+ *
+ */
+
+#define MAX_DMA_CHANNELS 8
+
#ifdef CONFIG_X86_32
-# include "dma_32.h"
+
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000)
+
+#else
+
+/* 16MB ISA DMA zone */
+#define MAX_DMA_PFN ((16*1024*1024) >> PAGE_SHIFT)
+
+/* 4GB broken PCI/AGP hardware bus master zone */
+#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
+
+/* Compat define for old dma zone */
+#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
+
+#endif
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG 0x08 /* command register (w) */
+#define DMA1_STAT_REG 0x08 /* status register (r) */
+#define DMA1_REQ_REG 0x09 /* request register (w) */
+#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */
+#define DMA1_MODE_REG 0x0B /* mode register (w) */
+#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */
+#define DMA1_RESET_REG 0x0D /* Master Clear (w) */
+#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */
+#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */
+
+#define DMA2_CMD_REG 0xD0 /* command register (w) */
+#define DMA2_STAT_REG 0xD0 /* status register (r) */
+#define DMA2_REQ_REG 0xD2 /* request register (w) */
+#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */
+#define DMA2_MODE_REG 0xD6 /* mode register (w) */
+#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */
+#define DMA2_RESET_REG 0xDA /* Master Clear (w) */
+#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */
+#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */
+
+#define DMA_ADDR_0 0x00 /* DMA address registers */
+#define DMA_ADDR_1 0x02
+#define DMA_ADDR_2 0x04
+#define DMA_ADDR_3 0x06
+#define DMA_ADDR_4 0xC0
+#define DMA_ADDR_5 0xC4
+#define DMA_ADDR_6 0xC8
+#define DMA_ADDR_7 0xCC
+
+#define DMA_CNT_0 0x01 /* DMA count registers */
+#define DMA_CNT_1 0x03
+#define DMA_CNT_2 0x05
+#define DMA_CNT_3 0x07
+#define DMA_CNT_4 0xC2
+#define DMA_CNT_5 0xC6
+#define DMA_CNT_6 0xCA
+#define DMA_CNT_7 0xCE
+
+#define DMA_PAGE_0 0x87 /* DMA page registers */
+#define DMA_PAGE_1 0x83
+#define DMA_PAGE_2 0x81
+#define DMA_PAGE_3 0x82
+#define DMA_PAGE_5 0x8B
+#define DMA_PAGE_6 0x89
+#define DMA_PAGE_7 0x8A
+
+/* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_READ 0x44
+/* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE 0x48
+/* pass thru DREQ->HRQ, DACK<-HLDA only */
+#define DMA_MODE_CASCADE 0xC0
+
+#define DMA_AUTOINIT 0x10
+
+
+extern spinlock_t dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&dma_spin_lock, flags);
+ return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+ spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+ if (dmanr <= 3)
+ dma_outb(dmanr, DMA1_MASK_REG);
+ else
+ dma_outb(dmanr & 3, DMA2_MASK_REG);
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+ if (dmanr <= 3)
+ dma_outb(dmanr | 4, DMA1_MASK_REG);
+ else
+ dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+ if (dmanr <= 3)
+ dma_outb(0, DMA1_CLEAR_FF_REG);
+ else
+ dma_outb(0, DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+ if (dmanr <= 3)
+ dma_outb(mode | dmanr, DMA1_MODE_REG);
+ else
+ dma_outb(mode | (dmanr & 3), DMA2_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
+{
+ switch (dmanr) {
+ case 0:
+ dma_outb(pagenr, DMA_PAGE_0);
+ break;
+ case 1:
+ dma_outb(pagenr, DMA_PAGE_1);
+ break;
+ case 2:
+ dma_outb(pagenr, DMA_PAGE_2);
+ break;
+ case 3:
+ dma_outb(pagenr, DMA_PAGE_3);
+ break;
+ case 5:
+ dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+ break;
+ case 6:
+ dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+ break;
+ case 7:
+ dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+ break;
+ }
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+ set_dma_page(dmanr, a>>16);
+ if (dmanr <= 3) {
+ dma_outb(a & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+ dma_outb((a >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+ } else {
+ dma_outb((a >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+ dma_outb((a >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+ }
+}
+
+
+/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+ count--;
+ if (dmanr <= 3) {
+ dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+ dma_outb((count >> 8) & 0xff,
+ ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+ } else {
+ dma_outb((count >> 1) & 0xff,
+ ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+ dma_outb((count >> 9) & 0xff,
+ ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+ }
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+ unsigned int io_port;
+ /* using short to get 16-bit wrap around */
+ unsigned short count;
+
+ io_port = (dmanr <= 3) ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE
+ : ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE;
+
+ count = 1 + dma_inb(io_port);
+ count += dma_inb(io_port) << 8;
+
+ return (dmanr <= 3) ? count : (count << 1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char *device_id);
+extern void free_dma(unsigned int dmanr);
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
#else
-# include "dma_64.h"
+#define isa_dma_bridge_buggy (0)
#endif
+
+#endif /* _ASM_X86_DMA_H */
diff --git a/include/asm-x86/dma_32.h b/include/asm-x86/dma_32.h
deleted file mode 100644
index d23aac8e1a5..00000000000
--- a/include/asm-x86/dma_32.h
+++ /dev/null
@@ -1,297 +0,0 @@
-/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <linux/spinlock.h> /* And spinlocks */
-#include <asm/io.h> /* need byte IO */
-#include <linux/delay.h>
-
-
-#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
-#define dma_outb outb_p
-#else
-#define dma_outb outb
-#endif
-
-#define dma_inb inb
-
-/*
- * NOTES about DMA transfers:
- *
- * controller 1: channels 0-3, byte operations, ports 00-1F
- * controller 2: channels 4-7, word operations, ports C0-DF
- *
- * - ALL registers are 8 bits only, regardless of transfer size
- * - channel 4 is not used - cascades 1 into 2.
- * - channels 0-3 are byte - addresses/counts are for physical bytes
- * - channels 5-7 are word - addresses/counts are for physical words
- * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- * - transfer count loaded to registers is 1 less than actual count
- * - controller 2 offsets are all even (2x offsets for controller 1)
- * - page registers for 5-7 don't use data bit 0, represent 128K pages
- * - page registers for 0-3 use bit 0, represent 64K pages
- *
- * DMA transfers are limited to the lower 16MB of _physical_ memory.
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- * Address mapping for channels 0-3:
- *
- * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses)
- * | ... | | ... | | ... |
- * | ... | | ... | | ... |
- * | ... | | ... | | ... |
- * P7 ... P0 A7 ... A0 A7 ... A0
- * | Page | Addr MSB | Addr LSB | (DMA registers)
- *
- * Address mapping for channels 5-7:
- *
- * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses)
- * | ... | \ \ ... \ \ \ ... \ \
- * | ... | \ \ ... \ \ \ ... \ (not used)
- * | ... | \ \ ... \ \ \ ... \
- * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0
- * | Page | Addr MSB | Addr LSB | (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation.
- *
- */
-
-#define MAX_DMA_CHANNELS 8
-
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000)
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG 0x08 /* command register (w) */
-#define DMA1_STAT_REG 0x08 /* status register (r) */
-#define DMA1_REQ_REG 0x09 /* request register (w) */
-#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */
-#define DMA1_MODE_REG 0x0B /* mode register (w) */
-#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */
-#define DMA1_RESET_REG 0x0D /* Master Clear (w) */
-#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */
-#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */
-
-#define DMA2_CMD_REG 0xD0 /* command register (w) */
-#define DMA2_STAT_REG 0xD0 /* status register (r) */
-#define DMA2_REQ_REG 0xD2 /* request register (w) */
-#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */
-#define DMA2_MODE_REG 0xD6 /* mode register (w) */
-#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */
-#define DMA2_RESET_REG 0xDA /* Master Clear (w) */
-#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */
-#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */
-
-#define DMA_ADDR_0 0x00 /* DMA address registers */
-#define DMA_ADDR_1 0x02
-#define DMA_ADDR_2 0x04
-#define DMA_ADDR_3 0x06
-#define DMA_ADDR_4 0xC0
-#define DMA_ADDR_5 0xC4
-#define DMA_ADDR_6 0xC8
-#define DMA_ADDR_7 0xCC
-
-#define DMA_CNT_0 0x01 /* DMA count registers */
-#define DMA_CNT_1 0x03
-#define DMA_CNT_2 0x05
-#define DMA_CNT_3 0x07
-#define DMA_CNT_4 0xC2
-#define DMA_CNT_5 0xC6
-#define DMA_CNT_6 0xCA
-#define DMA_CNT_7 0xCE
-
-#define DMA_PAGE_0 0x87 /* DMA page registers */
-#define DMA_PAGE_1 0x83
-#define DMA_PAGE_2 0x81
-#define DMA_PAGE_3 0x82
-#define DMA_PAGE_5 0x8B
-#define DMA_PAGE_6 0x89
-#define DMA_PAGE_7 0x8A
-
-#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT 0x10
-
-
-extern spinlock_t dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
- unsigned long flags;
- spin_lock_irqsave(&dma_spin_lock, flags);
- return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
- spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(dmanr, DMA1_MASK_REG);
- else
- dma_outb(dmanr & 3, DMA2_MASK_REG);
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(dmanr | 4, DMA1_MASK_REG);
- else
- dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(0, DMA1_CLEAR_FF_REG);
- else
- dma_outb(0, DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
- if (dmanr<=3)
- dma_outb(mode | dmanr, DMA1_MODE_REG);
- else
- dma_outb(mode | (dmanr&3), DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
- switch(dmanr) {
- case 0:
- dma_outb(pagenr, DMA_PAGE_0);
- break;
- case 1:
- dma_outb(pagenr, DMA_PAGE_1);
- break;
- case 2:
- dma_outb(pagenr, DMA_PAGE_2);
- break;
- case 3:
- dma_outb(pagenr, DMA_PAGE_3);
- break;
- case 5:
- dma_outb(pagenr & 0xfe, DMA_PAGE_5);
- break;
- case 6:
- dma_outb(pagenr & 0xfe, DMA_PAGE_6);
- break;
- case 7:
- dma_outb(pagenr & 0xfe, DMA_PAGE_7);
- break;
- }
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
- set_dma_page(dmanr, a>>16);
- if (dmanr <= 3) {
- dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
- dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
- } else {
- dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
- dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
- }
-}
-
-
-/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
- count--;
- if (dmanr <= 3) {
- dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
- dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
- } else {
- dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
- dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
- }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
- unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
- : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
- /* using short to get 16-bit wrap around */
- unsigned short count;
-
- count = 1 + dma_inb(io_port);
- count += dma_inb(io_port) << 8;
-
- return (dmanr<=3)? count : (count<<1);
-}
-
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr); /* release it again */
-
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
-#endif /* _ASM_DMA_H */
diff --git a/include/asm-x86/dma_64.h b/include/asm-x86/dma_64.h
deleted file mode 100644
index a37c16f0628..00000000000
--- a/include/asm-x86/dma_64.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <linux/spinlock.h> /* And spinlocks */
-#include <asm/io.h> /* need byte IO */
-#include <linux/delay.h>
-
-
-#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
-#define dma_outb outb_p
-#else
-#define dma_outb outb
-#endif
-
-#define dma_inb inb
-
-/*
- * NOTES about DMA transfers:
- *
- * controller 1: channels 0-3, byte operations, ports 00-1F
- * controller 2: channels 4-7, word operations, ports C0-DF
- *
- * - ALL registers are 8 bits only, regardless of transfer size
- * - channel 4 is not used - cascades 1 into 2.
- * - channels 0-3 are byte - addresses/counts are for physical bytes
- * - channels 5-7 are word - addresses/counts are for physical words
- * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- * - transfer count loaded to registers is 1 less than actual count
- * - controller 2 offsets are all even (2x offsets for controller 1)
- * - page registers for 5-7 don't use data bit 0, represent 128K pages
- * - page registers for 0-3 use bit 0, represent 64K pages
- *
- * DMA transfers are limited to the lower 16MB of _physical_ memory.
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- * Address mapping for channels 0-3:
- *
- * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses)
- * | ... | | ... | | ... |
- * | ... | | ... | | ... |
- * | ... | | ... | | ... |
- * P7 ... P0 A7 ... A0 A7 ... A0
- * | Page | Addr MSB | Addr LSB | (DMA registers)
- *
- * Address mapping for channels 5-7:
- *
- * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses)
- * | ... | \ \ ... \ \ \ ... \ \
- * | ... | \ \ ... \ \ \ ... \ (not used)
- * | ... | \ \ ... \ \ \ ... \
- * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0
- * | Page | Addr MSB | Addr LSB | (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation.
- *
- */
-
-#define MAX_DMA_CHANNELS 8
-
-
-/* 16MB ISA DMA zone */
-#define MAX_DMA_PFN ((16*1024*1024) >> PAGE_SHIFT)
-
-/* 4GB broken PCI/AGP hardware bus master zone */
-#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
-
-/* Compat define for old dma zone */
-#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG 0x08 /* command register (w) */
-#define DMA1_STAT_REG 0x08 /* status register (r) */
-#define DMA1_REQ_REG 0x09 /* request register (w) */
-#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */
-#define DMA1_MODE_REG 0x0B /* mode register (w) */
-#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */
-#define DMA1_RESET_REG 0x0D /* Master Clear (w) */
-#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */
-#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */
-
-#define DMA2_CMD_REG 0xD0 /* command register (w) */
-#define DMA2_STAT_REG 0xD0 /* status register (r) */
-#define DMA2_REQ_REG 0xD2 /* request register (w) */
-#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */
-#define DMA2_MODE_REG 0xD6 /* mode register (w) */
-#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */
-#define DMA2_RESET_REG 0xDA /* Master Clear (w) */
-#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */
-#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */
-
-#define DMA_ADDR_0 0x00 /* DMA address registers */
-#define DMA_ADDR_1 0x02
-#define DMA_ADDR_2 0x04
-#define DMA_ADDR_3 0x06
-#define DMA_ADDR_4 0xC0
-#define DMA_ADDR_5 0xC4
-#define DMA_ADDR_6 0xC8
-#define DMA_ADDR_7 0xCC
-
-#define DMA_CNT_0 0x01 /* DMA count registers */
-#define DMA_CNT_1 0x03
-#define DMA_CNT_2 0x05
-#define DMA_CNT_3 0x07
-#define DMA_CNT_4 0xC2
-#define DMA_CNT_5 0xC6
-#define DMA_CNT_6 0xCA
-#define DMA_CNT_7 0xCE
-
-#define DMA_PAGE_0 0x87 /* DMA page registers */
-#define DMA_PAGE_1 0x83
-#define DMA_PAGE_2 0x81
-#define DMA_PAGE_3 0x82
-#define DMA_PAGE_5 0x8B
-#define DMA_PAGE_6 0x89
-#define DMA_PAGE_7 0x8A
-
-#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT 0x10
-
-
-extern spinlock_t dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
- unsigned long flags;
- spin_lock_irqsave(&dma_spin_lock, flags);
- return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
- spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(dmanr, DMA1_MASK_REG);
- else
- dma_outb(dmanr & 3, DMA2_MASK_REG);
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(dmanr | 4, DMA1_MASK_REG);
- else
- dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(0, DMA1_CLEAR_FF_REG);
- else
- dma_outb(0, DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
- if (dmanr<=3)
- dma_outb(mode | dmanr, DMA1_MODE_REG);
- else
- dma_outb(mode | (dmanr&3), DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
- switch(dmanr) {
- case 0:
- dma_outb(pagenr, DMA_PAGE_0);
- break;
- case 1:
- dma_outb(pagenr, DMA_PAGE_1);
- break;
- case 2:
- dma_outb(pagenr, DMA_PAGE_2);
- break;
- case 3:
- dma_outb(pagenr, DMA_PAGE_3);
- break;
- case 5:
- dma_outb(pagenr & 0xfe, DMA_PAGE_5);
- break;
- case 6:
- dma_outb(pagenr & 0xfe, DMA_PAGE_6);
- break;
- case 7:
- dma_outb(pagenr & 0xfe, DMA_PAGE_7);
- break;
- }
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
- set_dma_page(dmanr, a>>16);
- if (dmanr <= 3) {
- dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
- dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
- } else {
- dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
- dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
- }
-}
-
-
-/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
- count--;
- if (dmanr <= 3) {
- dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
- dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
- } else {
- dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
- dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
- }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
- unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
- : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
- /* using short to get 16-bit wrap around */
- unsigned short count;
-
- count = 1 + dma_inb(io_port);
- count += dma_inb(io_port) << 8;
-
- return (dmanr<=3)? count : (count<<1);
-}
-
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr); /* release it again */
-
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
-#endif /* _ASM_DMA_H */
diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h
index 8e2b0e6aa8e..1241e6ad193 100644
--- a/include/asm-x86/dmi.h
+++ b/include/asm-x86/dmi.h
@@ -5,9 +5,6 @@
#ifdef CONFIG_X86_32
-/* Use early IO mappings for DMI because it's initialized early */
-#define dmi_ioremap bt_ioremap
-#define dmi_iounmap bt_iounmap
#define dmi_alloc alloc_bootmem
#else /* CONFIG_X86_32 */
@@ -22,14 +19,15 @@ extern char dmi_alloc_data[DMI_MAX_DATA];
static inline void *dmi_alloc(unsigned len)
{
int idx = dmi_alloc_index;
- if ((dmi_alloc_index += len) > DMI_MAX_DATA)
+ if ((dmi_alloc_index + len) > DMI_MAX_DATA)
return NULL;
+ dmi_alloc_index += len;
return dmi_alloc_data + idx;
}
+#endif
+
#define dmi_ioremap early_ioremap
#define dmi_iounmap early_iounmap
#endif
-
-#endif
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
new file mode 100644
index 00000000000..7881368142f
--- /dev/null
+++ b/include/asm-x86/ds.h
@@ -0,0 +1,72 @@
+/*
+ * Debug Store (DS) support
+ *
+ * This provides a low-level interface to the hardware's Debug Store
+ * feature that is used for last branch recording (LBR) and
+ * precise-event based sampling (PEBS).
+ *
+ * Different architectures use a different DS layout/pointer size.
+ * The below functions therefore work on a void*.
+ *
+ *
+ * Since there is no user for PEBS, yet, only LBR (or branch
+ * trace store, BTS) is supported.
+ *
+ *
+ * Copyright (C) 2007 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ */
+
+#ifndef _ASM_X86_DS_H
+#define _ASM_X86_DS_H
+
+#include <linux/types.h>
+#include <linux/init.h>
+
+struct cpuinfo_x86;
+
+
+/* a branch trace record entry
+ *
+ * In order to unify the interface between various processor versions,
+ * we use the below data structure for all processors.
+ */
+enum bts_qualifier {
+ BTS_INVALID = 0,
+ BTS_BRANCH,
+ BTS_TASK_ARRIVES,
+ BTS_TASK_DEPARTS
+};
+
+struct bts_struct {
+ u64 qualifier;
+ union {
+ /* BTS_BRANCH */
+ struct {
+ u64 from_ip;
+ u64 to_ip;
+ } lbr;
+ /* BTS_TASK_ARRIVES or
+ BTS_TASK_DEPARTS */
+ u64 jiffies;
+ } variant;
+};
+
+/* Overflow handling mechanisms */
+#define DS_O_SIGNAL 1 /* send overflow signal */
+#define DS_O_WRAP 2 /* wrap around */
+
+extern int ds_allocate(void **, size_t);
+extern int ds_free(void **);
+extern int ds_get_bts_size(void *);
+extern int ds_get_bts_end(void *);
+extern int ds_get_bts_index(void *);
+extern int ds_set_overflow(void *, int);
+extern int ds_get_overflow(void *);
+extern int ds_clear(void *);
+extern int ds_read_bts(void *, int, struct bts_struct *);
+extern int ds_write_bts(void *, const struct bts_struct *);
+extern unsigned long ds_debugctl_mask(void);
+extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
+
+#endif /* _ASM_X86_DS_H */
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 3e214f39fad..7004251fc66 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -22,6 +22,12 @@ struct e820map {
};
#endif /* __ASSEMBLY__ */
+#define ISA_START_ADDRESS 0xa0000
+#define ISA_END_ADDRESS 0x100000
+
+#define BIOS_BEGIN 0x000a0000
+#define BIOS_END 0x00100000
+
#ifdef __KERNEL__
#ifdef CONFIG_X86_32
# include "e820_32.h"
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 03f60c690c8..f1da7ebd190 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -12,20 +12,28 @@
#ifndef __E820_HEADER
#define __E820_HEADER
+#include <linux/ioport.h>
+
#define HIGH_MEMORY (1024*1024)
#ifndef __ASSEMBLY__
extern struct e820map e820;
+extern void update_e820(void);
extern int e820_all_mapped(unsigned long start, unsigned long end,
unsigned type);
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern void find_max_pfn(void);
extern void register_bootmem_low_pages(unsigned long max_low_pfn);
+extern void add_memory_region(unsigned long long start,
+ unsigned long long size, int type);
extern void e820_register_memory(void);
extern void limit_regions(unsigned long long size);
extern void print_memory_map(char *who);
+extern void init_iomem_resources(struct resource *code_resource,
+ struct resource *data_resource,
+ struct resource *bss_resource);
#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
extern void e820_mark_nosave_regions(void);
@@ -35,5 +43,6 @@ static inline void e820_mark_nosave_regions(void)
}
#endif
+
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 0bd4787a5d5..51e4170f9ca 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -11,6 +11,8 @@
#ifndef __E820_HEADER
#define __E820_HEADER
+#include <linux/ioport.h>
+
#ifndef __ASSEMBLY__
extern unsigned long find_e820_area(unsigned long start, unsigned long end,
unsigned size);
@@ -19,11 +21,15 @@ extern void add_memory_region(unsigned long start, unsigned long size,
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
-extern void e820_reserve_resources(void);
+extern void e820_reserve_resources(struct resource *code_resource,
+ struct resource *data_resource, struct resource *bss_resource);
extern void e820_mark_nosave_regions(void);
-extern void e820_print_map(char *who);
extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_any_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_any_valid(unsigned long start, unsigned long end);
+extern int e820_all_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_all_valid(unsigned long start, unsigned long end);
extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
extern void e820_setup_gap(void);
@@ -33,9 +39,11 @@ extern void e820_register_active_regions(int nid,
extern void finish_e820_parsing(void);
extern struct e820map e820;
+extern void update_e820(void);
+
+extern void reserve_early(unsigned long start, unsigned long end);
+extern void early_res_to_bootmem(void);
-extern unsigned ebda_addr, ebda_size;
-extern unsigned long nodemap_addr, nodemap_size;
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
new file mode 100644
index 00000000000..9c68a1f098d
--- /dev/null
+++ b/include/asm-x86/efi.h
@@ -0,0 +1,97 @@
+#ifndef _ASM_X86_EFI_H
+#define _ASM_X86_EFI_H
+
+#ifdef CONFIG_X86_32
+
+extern unsigned long asmlinkage efi_call_phys(void *, ...);
+
+#define efi_call_phys0(f) efi_call_phys(f)
+#define efi_call_phys1(f, a1) efi_call_phys(f, a1)
+#define efi_call_phys2(f, a1, a2) efi_call_phys(f, a1, a2)
+#define efi_call_phys3(f, a1, a2, a3) efi_call_phys(f, a1, a2, a3)
+#define efi_call_phys4(f, a1, a2, a3, a4) \
+ efi_call_phys(f, a1, a2, a3, a4)
+#define efi_call_phys5(f, a1, a2, a3, a4, a5) \
+ efi_call_phys(f, a1, a2, a3, a4, a5)
+#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \
+ efi_call_phys(f, a1, a2, a3, a4, a5, a6)
+/*
+ * Wrap all the virtual calls in a way that forces the parameters on the stack.
+ */
+
+#define efi_call_virt(f, args...) \
+ ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args)
+
+#define efi_call_virt0(f) efi_call_virt(f)
+#define efi_call_virt1(f, a1) efi_call_virt(f, a1)
+#define efi_call_virt2(f, a1, a2) efi_call_virt(f, a1, a2)
+#define efi_call_virt3(f, a1, a2, a3) efi_call_virt(f, a1, a2, a3)
+#define efi_call_virt4(f, a1, a2, a3, a4) \
+ efi_call_virt(f, a1, a2, a3, a4)
+#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
+ efi_call_virt(f, a1, a2, a3, a4, a5)
+#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
+ efi_call_virt(f, a1, a2, a3, a4, a5, a6)
+
+#define efi_ioremap(addr, size) ioremap(addr, size)
+
+#else /* !CONFIG_X86_32 */
+
+#define MAX_EFI_IO_PAGES 100
+
+extern u64 efi_call0(void *fp);
+extern u64 efi_call1(void *fp, u64 arg1);
+extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
+extern u64 efi_call3(void *fp, u64 arg1, u64 arg2, u64 arg3);
+extern u64 efi_call4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4);
+extern u64 efi_call5(void *fp, u64 arg1, u64 arg2, u64 arg3,
+ u64 arg4, u64 arg5);
+extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
+ u64 arg4, u64 arg5, u64 arg6);
+
+#define efi_call_phys0(f) \
+ efi_call0((void *)(f))
+#define efi_call_phys1(f, a1) \
+ efi_call1((void *)(f), (u64)(a1))
+#define efi_call_phys2(f, a1, a2) \
+ efi_call2((void *)(f), (u64)(a1), (u64)(a2))
+#define efi_call_phys3(f, a1, a2, a3) \
+ efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3))
+#define efi_call_phys4(f, a1, a2, a3, a4) \
+ efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \
+ (u64)(a4))
+#define efi_call_phys5(f, a1, a2, a3, a4, a5) \
+ efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \
+ (u64)(a4), (u64)(a5))
+#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \
+ efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \
+ (u64)(a4), (u64)(a5), (u64)(a6))
+
+#define efi_call_virt0(f) \
+ efi_call0((void *)(efi.systab->runtime->f))
+#define efi_call_virt1(f, a1) \
+ efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
+#define efi_call_virt2(f, a1, a2) \
+ efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+#define efi_call_virt3(f, a1, a2, a3) \
+ efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+ (u64)(a3))
+#define efi_call_virt4(f, a1, a2, a3, a4) \
+ efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+ (u64)(a3), (u64)(a4))
+#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
+ efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+ (u64)(a3), (u64)(a4), (u64)(a5))
+#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
+ efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+ (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
+
+extern void *efi_ioremap(unsigned long offset, unsigned long size);
+
+#endif /* CONFIG_X86_32 */
+
+extern void efi_reserve_bootmem(void);
+extern void efi_call_phys_prelog(void);
+extern void efi_call_phys_epilog(void);
+
+#endif
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index ec42a4d2e83..d9c94e70728 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -73,18 +73,23 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
#endif
#ifdef __KERNEL__
+#include <asm/vdso.h>
-#ifdef CONFIG_X86_32
-#include <asm/processor.h>
-#include <asm/system.h> /* for savesegment */
-#include <asm/desc.h>
+extern unsigned int vdso_enabled;
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
-#define elf_check_arch(x) \
+#define elf_check_arch_ia32(x) \
(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+#ifdef CONFIG_X86_32
+#include <asm/processor.h>
+#include <asm/system.h> /* for savesegment */
+#include <asm/desc.h>
+
+#define elf_check_arch(x) elf_check_arch_ia32(x)
+
/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
contains a pointer to a function which might be registered using `atexit'.
This provides a mean for the dynamic linker to call DT_FINI functions for
@@ -96,36 +101,38 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
just to make things more deterministic.
*/
#define ELF_PLAT_INIT(_r, load_addr) do { \
- _r->ebx = 0; _r->ecx = 0; _r->edx = 0; \
- _r->esi = 0; _r->edi = 0; _r->ebp = 0; \
- _r->eax = 0; \
+ _r->bx = 0; _r->cx = 0; _r->dx = 0; \
+ _r->si = 0; _r->di = 0; _r->bp = 0; \
+ _r->ax = 0; \
} while (0)
-/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
- now struct_user_regs, they are different) */
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs) \
- pr_reg[0] = regs->ebx; \
- pr_reg[1] = regs->ecx; \
- pr_reg[2] = regs->edx; \
- pr_reg[3] = regs->esi; \
- pr_reg[4] = regs->edi; \
- pr_reg[5] = regs->ebp; \
- pr_reg[6] = regs->eax; \
- pr_reg[7] = regs->xds & 0xffff; \
- pr_reg[8] = regs->xes & 0xffff; \
- pr_reg[9] = regs->xfs & 0xffff; \
- savesegment(gs,pr_reg[10]); \
- pr_reg[11] = regs->orig_eax; \
- pr_reg[12] = regs->eip; \
- pr_reg[13] = regs->xcs & 0xffff; \
- pr_reg[14] = regs->eflags; \
- pr_reg[15] = regs->esp; \
- pr_reg[16] = regs->xss & 0xffff;
+/*
+ * regs is struct pt_regs, pr_reg is elf_gregset_t (which is
+ * now struct_user_regs, they are different)
+ */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \
+ pr_reg[0] = regs->bx; \
+ pr_reg[1] = regs->cx; \
+ pr_reg[2] = regs->dx; \
+ pr_reg[3] = regs->si; \
+ pr_reg[4] = regs->di; \
+ pr_reg[5] = regs->bp; \
+ pr_reg[6] = regs->ax; \
+ pr_reg[7] = regs->ds & 0xffff; \
+ pr_reg[8] = regs->es & 0xffff; \
+ pr_reg[9] = regs->fs & 0xffff; \
+ savesegment(gs, pr_reg[10]); \
+ pr_reg[11] = regs->orig_ax; \
+ pr_reg[12] = regs->ip; \
+ pr_reg[13] = regs->cs & 0xffff; \
+ pr_reg[14] = regs->flags; \
+ pr_reg[15] = regs->sp; \
+ pr_reg[16] = regs->ss & 0xffff; \
+} while (0);
#define ELF_PLATFORM (utsname()->machine)
#define set_personality_64bit() do { } while (0)
-extern unsigned int vdso_enabled;
#else /* CONFIG_X86_32 */
@@ -137,28 +144,57 @@ extern unsigned int vdso_enabled;
#define elf_check_arch(x) \
((x)->e_machine == EM_X86_64)
+#define compat_elf_check_arch(x) elf_check_arch_ia32(x)
+
+static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp)
+{
+ asm volatile("movl %0,%%fs" :: "r" (0));
+ asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS));
+ load_gs_index(0);
+ regs->ip = ip;
+ regs->sp = sp;
+ regs->flags = X86_EFLAGS_IF;
+ regs->cs = __USER32_CS;
+ regs->ss = __USER32_DS;
+}
+
+static inline void elf_common_init(struct thread_struct *t,
+ struct pt_regs *regs, const u16 ds)
+{
+ regs->ax = regs->bx = regs->cx = regs->dx = 0;
+ regs->si = regs->di = regs->bp = 0;
+ regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
+ regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+ t->fs = t->gs = 0;
+ t->fsindex = t->gsindex = 0;
+ t->ds = t->es = ds;
+}
+
#define ELF_PLAT_INIT(_r, load_addr) do { \
- struct task_struct *cur = current; \
- (_r)->rbx = 0; (_r)->rcx = 0; (_r)->rdx = 0; \
- (_r)->rsi = 0; (_r)->rdi = 0; (_r)->rbp = 0; \
- (_r)->rax = 0; \
- (_r)->r8 = 0; \
- (_r)->r9 = 0; \
- (_r)->r10 = 0; \
- (_r)->r11 = 0; \
- (_r)->r12 = 0; \
- (_r)->r13 = 0; \
- (_r)->r14 = 0; \
- (_r)->r15 = 0; \
- cur->thread.fs = 0; cur->thread.gs = 0; \
- cur->thread.fsindex = 0; cur->thread.gsindex = 0; \
- cur->thread.ds = 0; cur->thread.es = 0; \
+ elf_common_init(&current->thread, _r, 0); \
clear_thread_flag(TIF_IA32); \
} while (0)
-/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
- now struct_user_regs, they are different). Assumes current is the process
- getting dumped. */
+#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
+ elf_common_init(&current->thread, regs, __USER_DS)
+#define compat_start_thread(regs, ip, sp) do { \
+ start_ia32_thread(regs, ip, sp); \
+ set_fs(USER_DS); \
+ } while (0)
+#define COMPAT_SET_PERSONALITY(ex, ibcs2) do { \
+ if (test_thread_flag(TIF_IA32)) \
+ clear_thread_flag(TIF_ABI_PENDING); \
+ else \
+ set_thread_flag(TIF_ABI_PENDING); \
+ current->personality |= force_personality32; \
+ } while (0)
+#define COMPAT_ELF_PLATFORM ("i686")
+
+/*
+ * regs is struct pt_regs, pr_reg is elf_gregset_t (which is
+ * now struct_user_regs, they are different). Assumes current is the process
+ * getting dumped.
+ */
#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \
unsigned v; \
@@ -166,22 +202,22 @@ extern unsigned int vdso_enabled;
(pr_reg)[1] = (regs)->r14; \
(pr_reg)[2] = (regs)->r13; \
(pr_reg)[3] = (regs)->r12; \
- (pr_reg)[4] = (regs)->rbp; \
- (pr_reg)[5] = (regs)->rbx; \
+ (pr_reg)[4] = (regs)->bp; \
+ (pr_reg)[5] = (regs)->bx; \
(pr_reg)[6] = (regs)->r11; \
(pr_reg)[7] = (regs)->r10; \
(pr_reg)[8] = (regs)->r9; \
(pr_reg)[9] = (regs)->r8; \
- (pr_reg)[10] = (regs)->rax; \
- (pr_reg)[11] = (regs)->rcx; \
- (pr_reg)[12] = (regs)->rdx; \
- (pr_reg)[13] = (regs)->rsi; \
- (pr_reg)[14] = (regs)->rdi; \
- (pr_reg)[15] = (regs)->orig_rax; \
- (pr_reg)[16] = (regs)->rip; \
+ (pr_reg)[10] = (regs)->ax; \
+ (pr_reg)[11] = (regs)->cx; \
+ (pr_reg)[12] = (regs)->dx; \
+ (pr_reg)[13] = (regs)->si; \
+ (pr_reg)[14] = (regs)->di; \
+ (pr_reg)[15] = (regs)->orig_ax; \
+ (pr_reg)[16] = (regs)->ip; \
(pr_reg)[17] = (regs)->cs; \
- (pr_reg)[18] = (regs)->eflags; \
- (pr_reg)[19] = (regs)->rsp; \
+ (pr_reg)[18] = (regs)->flags; \
+ (pr_reg)[19] = (regs)->sp; \
(pr_reg)[20] = (regs)->ss; \
(pr_reg)[21] = current->thread.fs; \
(pr_reg)[22] = current->thread.gs; \
@@ -189,15 +225,17 @@ extern unsigned int vdso_enabled;
asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \
asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \
asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \
-} while(0);
+} while (0);
/* I'm not sure if we can use '-' here */
#define ELF_PLATFORM ("x86_64")
extern void set_personality_64bit(void);
-extern int vdso_enabled;
+extern unsigned int sysctl_vsyscall32;
+extern int force_personality32;
#endif /* !CONFIG_X86_32 */
+#define CORE_DUMP_USE_REGSET
#define USE_ELF_CORE_DUMP
#define ELF_EXEC_PAGESIZE 4096
@@ -232,43 +270,24 @@ extern int vdso_enabled;
struct task_struct;
-extern int dump_task_regs (struct task_struct *, elf_gregset_t *);
-extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
-
-#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
-#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
+#define ARCH_DLINFO_IA32(vdso_enabled) \
+do if (vdso_enabled) { \
+ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
+} while (0)
#ifdef CONFIG_X86_32
-extern int dump_task_extended_fpu (struct task_struct *,
- struct user_fxsr_struct *);
-#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) \
- dump_task_extended_fpu(tsk, elf_xfpregs)
-#define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
-#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
-#define VDSO_PRELINK 0
-
-#define VDSO_SYM(x) \
- (VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK)
-
-#define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE)
-#define VDSO_EHDR ((const struct elfhdr *) VDSO_CURRENT_BASE)
-extern void __kernel_vsyscall;
-
-#define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall)
+#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled)
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
-#define ARCH_DLINFO \
-do if (vdso_enabled) { \
- NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
- NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
-} while (0)
-
#else /* CONFIG_X86_32 */
+#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */
+
/* 1GB for 64bit, 8MB for 32bit */
#define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff)
@@ -277,14 +296,31 @@ do if (vdso_enabled) { \
NEW_AUX_ENT(AT_SYSINFO_EHDR,(unsigned long)current->mm->context.vdso);\
} while (0)
+#define AT_SYSINFO 32
+
+#define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32)
+
+#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
+
#endif /* !CONFIG_X86_32 */
+#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
+
+#define VDSO_ENTRY \
+ ((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
+
struct linux_binprm;
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack);
+extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
+#define compat_arch_setup_additional_pages syscall32_setup_pages
+
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
#endif /* __KERNEL__ */
#endif
diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h
index 680c3956334..8e6aef19f8f 100644
--- a/include/asm-x86/emergency-restart.h
+++ b/include/asm-x86/emergency-restart.h
@@ -1,6 +1,18 @@
#ifndef _ASM_EMERGENCY_RESTART_H
#define _ASM_EMERGENCY_RESTART_H
+enum reboot_type {
+ BOOT_TRIPLE = 't',
+ BOOT_KBD = 'k',
+#ifdef CONFIG_X86_32
+ BOOT_BIOS = 'b',
+#endif
+ BOOT_ACPI = 'a',
+ BOOT_EFI = 'e'
+};
+
+extern enum reboot_type reboot_type;
+
extern void machine_emergency_restart(void);
#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index 249e753ac80..a7404d50686 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -65,7 +65,7 @@ enum fixed_addresses {
#endif
#ifdef CONFIG_X86_VISWS_APIC
FIX_CO_CPU, /* Cobalt timer */
- FIX_CO_APIC, /* Cobalt APIC Redirection Table */
+ FIX_CO_APIC, /* Cobalt APIC Redirection Table */
FIX_LI_PCIA, /* Lithium PCI Bridge A */
FIX_LI_PCIB, /* Lithium PCI Bridge B */
#endif
@@ -74,7 +74,7 @@ enum fixed_addresses {
#endif
#ifdef CONFIG_X86_CYCLONE_TIMER
FIX_CYCLONE_TIMER, /*cyclone timer register*/
-#endif
+#endif
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -90,11 +90,23 @@ enum fixed_addresses {
FIX_PARAVIRT_BOOTMAP,
#endif
__end_of_permanent_fixed_addresses,
- /* temporary boot-time mappings, used before ioremap() is functional */
-#define NR_FIX_BTMAPS 16
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
+ /*
+ * 256 temporary boot-time mappings, used by early_ioremap(),
+ * before ioremap() is functional.
+ *
+ * We round it up to the next 512 pages boundary so that we
+ * can have a single pgd entry and a single pte table:
+ */
+#define NR_FIX_BTMAPS 64
+#define FIX_BTMAPS_NESTING 4
+ FIX_BTMAP_END =
+ __end_of_permanent_fixed_addresses + 512 -
+ (__end_of_permanent_fixed_addresses & 511),
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
FIX_WP_TEST,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ FIX_OHCI1394_BASE,
+#endif
__end_of_fixed_addresses
};
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index cdfbe4a6ae6..70ddb21e645 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -15,6 +15,7 @@
#include <asm/apicdef.h>
#include <asm/page.h>
#include <asm/vsyscall.h>
+#include <asm/efi.h>
/*
* Here we define all the compile-time 'special' virtual
@@ -41,6 +42,11 @@ enum fixed_addresses {
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+ FIX_EFI_IO_MAP_LAST_PAGE,
+ FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ FIX_OHCI1394_BASE,
+#endif
__end_of_fixed_addresses
};
diff --git a/include/asm-x86/fpu32.h b/include/asm-x86/fpu32.h
deleted file mode 100644
index 4153db5c0c3..00000000000
--- a/include/asm-x86/fpu32.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _FPU32_H
-#define _FPU32_H 1
-
-struct _fpstate_ia32;
-
-int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave);
-int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf,
- struct pt_regs *regs, int fsave);
-
-#endif
diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index 1f4610e0c61..62828d63f1b 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h
@@ -1,5 +1,135 @@
-#ifdef CONFIG_X86_32
-# include "futex_32.h"
-#else
-# include "futex_64.h"
+#ifndef _ASM_X86_FUTEX_H
+#define _ASM_X86_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+
+#include <asm/asm.h>
+#include <asm/errno.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
+ __asm__ __volatile( \
+"1: " insn "\n" \
+"2: .section .fixup,\"ax\"\n \
+3: mov %3, %1\n \
+ jmp 2b\n \
+ .previous\n \
+ .section __ex_table,\"a\"\n \
+ .align 8\n" \
+ _ASM_PTR "1b,3b\n \
+ .previous" \
+ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
+ : "i" (-EFAULT), "0" (oparg), "1" (0))
+
+#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
+ __asm__ __volatile( \
+"1: movl %2, %0\n \
+ movl %0, %3\n" \
+ insn "\n" \
+"2: " LOCK_PREFIX "cmpxchgl %3, %2\n \
+ jnz 1b\n \
+3: .section .fixup,\"ax\"\n \
+4: mov %5, %1\n \
+ jmp 3b\n \
+ .previous\n \
+ .section __ex_table,\"a\"\n \
+ .align 8\n" \
+ _ASM_PTR "1b,4b,2b,4b\n \
+ .previous" \
+ : "=&a" (oldval), "=&r" (ret), "+m" (*uaddr), \
+ "=&r" (tem) \
+ : "r" (oparg), "i" (-EFAULT), "1" (0))
+
+static inline int
+futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+{
+ int op = (encoded_op >> 28) & 7;
+ int cmp = (encoded_op >> 24) & 15;
+ int oparg = (encoded_op << 8) >> 20;
+ int cmparg = (encoded_op << 20) >> 20;
+ int oldval = 0, ret, tem;
+
+ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+ oparg = 1 << oparg;
+
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ return -EFAULT;
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
+ /* Real i386 machines can only support FUTEX_OP_SET */
+ if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3)
+ return -ENOSYS;
+#endif
+
+ pagefault_disable();
+
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
+ uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ pagefault_enable();
+
+ if (!ret) {
+ switch (cmp) {
+ case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+ case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+ case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+ case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+ case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+ case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+ default: ret = -ENOSYS;
+ }
+ }
+ return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+{
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ return -EFAULT;
+
+ __asm__ __volatile__(
+ "1: " LOCK_PREFIX "cmpxchgl %3, %1 \n"
+
+ "2: .section .fixup, \"ax\" \n"
+ "3: mov %2, %0 \n"
+ " jmp 2b \n"
+ " .previous \n"
+
+ " .section __ex_table, \"a\" \n"
+ " .align 8 \n"
+ _ASM_PTR " 1b,3b \n"
+ " .previous \n"
+
+ : "=a" (oldval), "+m" (*uaddr)
+ : "i" (-EFAULT), "r" (newval), "0" (oldval)
+ : "memory"
+ );
+
+ return oldval;
+}
+
+#endif
#endif
diff --git a/include/asm-x86/futex_32.h b/include/asm-x86/futex_32.h
deleted file mode 100644
index 438ef0ec710..00000000000
--- a/include/asm-x86/futex_32.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: " insn "\n" \
-"2: .section .fixup,\"ax\"\n\
-3: mov %3, %1\n\
- jmp 2b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .long 1b,3b\n\
- .previous" \
- : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
- : "i" (-EFAULT), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: movl %2, %0\n\
- movl %0, %3\n" \
- insn "\n" \
-"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\
- jnz 1b\n\
-3: .section .fixup,\"ax\"\n\
-4: mov %5, %1\n\
- jmp 3b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .long 1b,4b,2b,4b\n\
- .previous" \
- : "=&a" (oldval), "=&r" (ret), "+m" (*uaddr), \
- "=&r" (tem) \
- : "r" (oparg), "i" (-EFAULT), "1" (0))
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
- int oldval = 0, ret, tem;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
- return -EFAULT;
-
- pagefault_disable();
-
- if (op == FUTEX_OP_SET)
- __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
- else {
-#ifndef CONFIG_X86_BSWAP
- if (boot_cpu_data.x86 == 3)
- ret = -ENOSYS;
- else
-#endif
- switch (op) {
- case FUTEX_OP_ADD:
- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret,
- oldval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr,
- oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr,
- ~oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr,
- oparg);
- break;
- default:
- ret = -ENOSYS;
- }
- }
-
- pagefault_enable();
-
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
- return ret;
-}
-
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
-{
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
- return -EFAULT;
-
- __asm__ __volatile__(
- "1: " LOCK_PREFIX "cmpxchgl %3, %1 \n"
-
- "2: .section .fixup, \"ax\" \n"
- "3: mov %2, %0 \n"
- " jmp 2b \n"
- " .previous \n"
-
- " .section __ex_table, \"a\" \n"
- " .align 8 \n"
- " .long 1b,3b \n"
- " .previous \n"
-
- : "=a" (oldval), "+m" (*uaddr)
- : "i" (-EFAULT), "r" (newval), "0" (oldval)
- : "memory"
- );
-
- return oldval;
-}
-
-#endif
-#endif
diff --git a/include/asm-x86/futex_64.h b/include/asm-x86/futex_64.h
deleted file mode 100644
index 5cdfb08013c..00000000000
--- a/include/asm-x86/futex_64.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: " insn "\n" \
-"2: .section .fixup,\"ax\"\n\
-3: mov %3, %1\n\
- jmp 2b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .quad 1b,3b\n\
- .previous" \
- : "=r" (oldval), "=r" (ret), "=m" (*uaddr) \
- : "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: movl %2, %0\n\
- movl %0, %3\n" \
- insn "\n" \
-"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\
- jnz 1b\n\
-3: .section .fixup,\"ax\"\n\
-4: mov %5, %1\n\
- jmp 3b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .quad 1b,4b,2b,4b\n\
- .previous" \
- : "=&a" (oldval), "=&r" (ret), "=m" (*uaddr), \
- "=&r" (tem) \
- : "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
- int oldval = 0, ret, tem;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
- return -EFAULT;
-
- pagefault_disable();
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
- uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
- }
-
- pagefault_enable();
-
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
- return ret;
-}
-
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
-{
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
- return -EFAULT;
-
- __asm__ __volatile__(
- "1: " LOCK_PREFIX "cmpxchgl %3, %1 \n"
-
- "2: .section .fixup, \"ax\" \n"
- "3: mov %2, %0 \n"
- " jmp 2b \n"
- " .previous \n"
-
- " .section __ex_table, \"a\" \n"
- " .align 8 \n"
- " .quad 1b,3b \n"
- " .previous \n"
-
- : "=a" (oldval), "=m" (*uaddr)
- : "i" (-EFAULT), "r" (newval), "0" (oldval)
- : "memory"
- );
-
- return oldval;
-}
-
-#endif
-#endif
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index f704c50519b..90958ed993f 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -9,6 +9,7 @@ extern int iommu_detected;
extern void gart_iommu_init(void);
extern void gart_iommu_shutdown(void);
extern void __init gart_parse_options(char *);
+extern void early_gart_iommu_check(void);
extern void gart_iommu_hole_init(void);
extern int fallback_aper_order;
extern int fallback_aper_force;
@@ -20,6 +21,10 @@ extern int fix_aperture;
#define gart_iommu_aperture 0
#define gart_iommu_aperture_allowed 0
+static inline void early_gart_iommu_check(void)
+{
+}
+
static inline void gart_iommu_shutdown(void)
{
}
diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h
index 771af336734..811fe14f70b 100644
--- a/include/asm-x86/geode.h
+++ b/include/asm-x86/geode.h
@@ -121,9 +121,15 @@ extern int geode_get_dev_base(unsigned int dev);
#define GPIO_MAP_Z 0xE8
#define GPIO_MAP_W 0xEC
-extern void geode_gpio_set(unsigned int, unsigned int);
-extern void geode_gpio_clear(unsigned int, unsigned int);
-extern int geode_gpio_isset(unsigned int, unsigned int);
+static inline u32 geode_gpio(unsigned int nr)
+{
+ BUG_ON(nr > 28);
+ return 1 << nr;
+}
+
+extern void geode_gpio_set(u32, unsigned int);
+extern void geode_gpio_clear(u32, unsigned int);
+extern int geode_gpio_isset(u32, unsigned int);
extern void geode_gpio_setup_event(unsigned int, int, int);
extern void geode_gpio_set_irq(unsigned int, unsigned int);
diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
new file mode 100644
index 00000000000..ff87fca0caf
--- /dev/null
+++ b/include/asm-x86/gpio.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_I386_GPIO_H
+#define _ASM_I386_GPIO_H
+
+#include <gpio.h>
+
+#endif /* _ASM_I386_GPIO_H */
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index ad8d6e75878..6a9b4ac59bf 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -69,6 +69,7 @@ extern void force_hpet_resume(void);
#include <linux/interrupt.h>
+typedef irqreturn_t (*rtc_irq_handler)(int interrupt, void *cookie);
extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask);
extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
@@ -77,13 +78,16 @@ extern int hpet_set_periodic_freq(unsigned long freq);
extern int hpet_rtc_dropped_irq(void);
extern int hpet_rtc_timer_init(void);
extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
+extern int hpet_register_irq_handler(rtc_irq_handler handler);
+extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
#endif /* CONFIG_HPET_EMULATE_RTC */
-#else
+#else /* CONFIG_HPET_TIMER */
static inline int hpet_enable(void) { return 0; }
static inline unsigned long hpet_readl(unsigned long a) { return 0; }
+static inline int is_hpet_enabled(void) { return 0; }
-#endif /* CONFIG_HPET_TIMER */
+#endif
#endif /* ASM_X86_HPET_H */
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index 0bedbdf5e90..6d65fbb6358 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -26,19 +26,19 @@
* Interrupt entry/exit code at both C and assembly level
*/
-extern void (*interrupt[NR_IRQS])(void);
+extern void (*const interrupt[NR_IRQS])(void);
#ifdef CONFIG_SMP
-fastcall void reschedule_interrupt(void);
-fastcall void invalidate_interrupt(void);
-fastcall void call_function_interrupt(void);
+void reschedule_interrupt(void);
+void invalidate_interrupt(void);
+void call_function_interrupt(void);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
-fastcall void apic_timer_interrupt(void);
-fastcall void error_interrupt(void);
-fastcall void spurious_interrupt(void);
-fastcall void thermal_interrupt(void);
+void apic_timer_interrupt(void);
+void error_interrupt(void);
+void spurious_interrupt(void);
+void thermal_interrupt(void);
#define platform_legacy_irq(irq) ((irq) < 16)
#endif
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index a470d59da67..312a58d6dac 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -135,11 +135,13 @@ extern void init_8259A(int aeoi);
extern void send_IPI_self(int vector);
extern void init_VISWS_APIC_irqs(void);
extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
extern void disable_IO_APIC(void);
extern void print_IO_APIC(void);
extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
extern void send_IPI(int dest, int vector);
extern void setup_ioapic_dest(void);
+extern void native_init_IRQ(void);
extern unsigned long io_apic_irqs;
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index a8bbed34966..ba8105ca822 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -1,5 +1,360 @@
-#ifdef CONFIG_X86_32
-# include "i387_32.h"
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _ASM_X86_I387_H
+#define _ASM_X86_I387_H
+
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/regset.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/uaccess.h>
+
+extern void fpu_init(void);
+extern unsigned int mxcsr_feature_mask;
+extern void mxcsr_feature_mask_init(void);
+extern void init_fpu(struct task_struct *child);
+extern asmlinkage void math_state_restore(void);
+
+extern user_regset_active_fn fpregs_active, xfpregs_active;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
+
+#ifdef CONFIG_IA32_EMULATION
+struct _fpstate_ia32;
+extern int save_i387_ia32(struct _fpstate_ia32 __user *buf);
+extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf);
+#endif
+
+#ifdef CONFIG_X86_64
+
+/* Ignore delayed exceptions from user space */
+static inline void tolerant_fwait(void)
+{
+ asm volatile("1: fwait\n"
+ "2:\n"
+ " .section __ex_table,\"a\"\n"
+ " .align 8\n"
+ " .quad 1b,2b\n"
+ " .previous\n");
+}
+
+static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
+{
+ int err;
+
+ asm volatile("1: rex64/fxrstor (%[fx])\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 8\n"
+ " .quad 1b,3b\n"
+ ".previous"
+ : [err] "=r" (err)
+#if 0 /* See comment in __save_init_fpu() below. */
+ : [fx] "r" (fx), "m" (*fx), "0" (0));
+#else
+ : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
+#endif
+ if (unlikely(err))
+ init_fpu(current);
+ return err;
+}
+
+#define X87_FSW_ES (1 << 7) /* Exception Summary */
+
+/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+ values. The kernel data segment can be sometimes 0 and sometimes
+ new user value. Both should be ok.
+ Use the PDA as safe address because it should be already in L1. */
+static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+{
+ if (unlikely(fx->swd & X87_FSW_ES))
+ asm volatile("fnclex");
+ alternative_input(ASM_NOP8 ASM_NOP2,
+ " emms\n" /* clear stack tags */
+ " fildl %%gs:0", /* load to clear state */
+ X86_FEATURE_FXSAVE_LEAK);
+}
+
+static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
+{
+ int err;
+
+ asm volatile("1: rex64/fxsave (%[fx])\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 8\n"
+ " .quad 1b,3b\n"
+ ".previous"
+ : [err] "=r" (err), "=m" (*fx)
+#if 0 /* See comment in __fxsave_clear() below. */
+ : [fx] "r" (fx), "0" (0));
+#else
+ : [fx] "cdaSDb" (fx), "0" (0));
+#endif
+ if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct)))
+ err = -EFAULT;
+ /* No need to clear here because the caller clears USED_MATH */
+ return err;
+}
+
+static inline void __save_init_fpu(struct task_struct *tsk)
+{
+ /* Using "rex64; fxsave %0" is broken because, if the memory operand
+ uses any extended registers for addressing, a second REX prefix
+ will be generated (to the assembler, rex64 followed by semicolon
+ is a separate instruction), and hence the 64-bitness is lost. */
+#if 0
+ /* Using "fxsaveq %0" would be the ideal choice, but is only supported
+ starting with gas 2.16. */
+ __asm__ __volatile__("fxsaveq %0"
+ : "=m" (tsk->thread.i387.fxsave));
+#elif 0
+ /* Using, as a workaround, the properly prefixed form below isn't
+ accepted by any binutils version so far released, complaining that
+ the same type of prefix is used twice if an extended register is
+ needed for addressing (fix submitted to mainline 2005-11-21). */
+ __asm__ __volatile__("rex64/fxsave %0"
+ : "=m" (tsk->thread.i387.fxsave));
+#else
+ /* This, however, we can work around by forcing the compiler to select
+ an addressing mode that doesn't require extended registers. */
+ __asm__ __volatile__("rex64/fxsave %P2(%1)"
+ : "=m" (tsk->thread.i387.fxsave)
+ : "cdaSDb" (tsk),
+ "i" (offsetof(__typeof__(*tsk),
+ thread.i387.fxsave)));
+#endif
+ clear_fpu_state(&tsk->thread.i387.fxsave);
+ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+}
+
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387(struct _fpstate __user *buf)
+{
+ struct task_struct *tsk = current;
+ int err = 0;
+
+ BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
+ sizeof(tsk->thread.i387.fxsave));
+
+ if ((unsigned long)buf % 16)
+ printk("save_i387: bad fpstate %p\n", buf);
+
+ if (!used_math())
+ return 0;
+ clear_used_math(); /* trigger finit */
+ if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ err = save_i387_checking((struct i387_fxsave_struct __user *)buf);
+ if (err) return err;
+ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ stts();
+ } else {
+ if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
+ sizeof(struct i387_fxsave_struct)))
+ return -1;
+ }
+ return 1;
+}
+
+/*
+ * This restores directly out of user space. Exceptions are handled.
+ */
+static inline int restore_i387(struct _fpstate __user *buf)
+{
+ set_used_math();
+ if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+ clts();
+ task_thread_info(current)->status |= TS_USEDFPU;
+ }
+ return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
+}
+
+#else /* CONFIG_X86_32 */
+
+static inline void tolerant_fwait(void)
+{
+ asm volatile("fnclex ; fwait");
+}
+
+static inline void restore_fpu(struct task_struct *tsk)
+{
+ /*
+ * The "nop" is needed to make the instructions the same
+ * length.
+ */
+ alternative_input(
+ "nop ; frstor %1",
+ "fxrstor %1",
+ X86_FEATURE_FXSR,
+ "m" ((tsk)->thread.i387.fxsave));
+}
+
+/* We need a safe address that is cheap to find and that is already
+ in L1 during context switch. The best choices are unfortunately
+ different for UP and SMP */
+#ifdef CONFIG_SMP
+#define safe_address (__per_cpu_offset[0])
#else
-# include "i387_64.h"
+#define safe_address (kstat_cpu(0).cpustat.user)
#endif
+
+/*
+ * These must be called with preempt disabled
+ */
+static inline void __save_init_fpu(struct task_struct *tsk)
+{
+ /* Use more nops than strictly needed in case the compiler
+ varies code */
+ alternative_input(
+ "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
+ "fxsave %[fx]\n"
+ "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
+ X86_FEATURE_FXSR,
+ [fx] "m" (tsk->thread.i387.fxsave),
+ [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
+ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+ values. safe_address is a random variable that should be in L1 */
+ alternative_input(
+ GENERIC_NOP8 GENERIC_NOP2,
+ "emms\n\t" /* clear stack tags */
+ "fildl %[addr]", /* set F?P to defined value */
+ X86_FEATURE_FXSAVE_LEAK,
+ [addr] "m" (safe_address));
+ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+}
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387(struct _fpstate __user *buf);
+extern int restore_i387(struct _fpstate __user *buf);
+
+#endif /* CONFIG_X86_64 */
+
+static inline void __unlazy_fpu(struct task_struct *tsk)
+{
+ if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ __save_init_fpu(tsk);
+ stts();
+ } else
+ tsk->fpu_counter = 0;
+}
+
+static inline void __clear_fpu(struct task_struct *tsk)
+{
+ if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ tolerant_fwait();
+ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ stts();
+ }
+}
+
+static inline void kernel_fpu_begin(void)
+{
+ struct thread_info *me = current_thread_info();
+ preempt_disable();
+ if (me->status & TS_USEDFPU)
+ __save_init_fpu(me->task);
+ else
+ clts();
+}
+
+static inline void kernel_fpu_end(void)
+{
+ stts();
+ preempt_enable();
+}
+
+#ifdef CONFIG_X86_64
+
+static inline void save_init_fpu(struct task_struct *tsk)
+{
+ __save_init_fpu(tsk);
+ stts();
+}
+
+#define unlazy_fpu __unlazy_fpu
+#define clear_fpu __clear_fpu
+
+#else /* CONFIG_X86_32 */
+
+/*
+ * These disable preemption on their own and are safe
+ */
+static inline void save_init_fpu(struct task_struct *tsk)
+{
+ preempt_disable();
+ __save_init_fpu(tsk);
+ stts();
+ preempt_enable();
+}
+
+static inline void unlazy_fpu(struct task_struct *tsk)
+{
+ preempt_disable();
+ __unlazy_fpu(tsk);
+ preempt_enable();
+}
+
+static inline void clear_fpu(struct task_struct *tsk)
+{
+ preempt_disable();
+ __clear_fpu(tsk);
+ preempt_enable();
+}
+
+#endif /* CONFIG_X86_64 */
+
+/*
+ * i387 state interaction
+ */
+static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
+{
+ if (cpu_has_fxsr) {
+ return tsk->thread.i387.fxsave.cwd;
+ } else {
+ return (unsigned short)tsk->thread.i387.fsave.cwd;
+ }
+}
+
+static inline unsigned short get_fpu_swd(struct task_struct *tsk)
+{
+ if (cpu_has_fxsr) {
+ return tsk->thread.i387.fxsave.swd;
+ } else {
+ return (unsigned short)tsk->thread.i387.fsave.swd;
+ }
+}
+
+static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
+{
+ if (cpu_has_xmm) {
+ return tsk->thread.i387.fxsave.mxcsr;
+ } else {
+ return MXCSR_DEFAULT;
+ }
+}
+
+#endif /* _ASM_X86_I387_H */
diff --git a/include/asm-x86/i387_32.h b/include/asm-x86/i387_32.h
deleted file mode 100644
index cdd1e248e3b..00000000000
--- a/include/asm-x86/i387_32.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * include/asm-i386/i387.h
- *
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#ifndef __ASM_I386_I387_H
-#define __ASM_I386_I387_H
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-
-extern void mxcsr_feature_mask_init(void);
-extern void init_fpu(struct task_struct *);
-
-/*
- * FPU lazy state save handling...
- */
-
-/*
- * The "nop" is needed to make the instructions the same
- * length.
- */
-#define restore_fpu(tsk) \
- alternative_input( \
- "nop ; frstor %1", \
- "fxrstor %1", \
- X86_FEATURE_FXSR, \
- "m" ((tsk)->thread.i387.fxsave))
-
-extern void kernel_fpu_begin(void);
-#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
-
-/* We need a safe address that is cheap to find and that is already
- in L1 during context switch. The best choices are unfortunately
- different for UP and SMP */
-#ifdef CONFIG_SMP
-#define safe_address (__per_cpu_offset[0])
-#else
-#define safe_address (kstat_cpu(0).cpustat.user)
-#endif
-
-/*
- * These must be called with preempt disabled
- */
-static inline void __save_init_fpu( struct task_struct *tsk )
-{
- /* Use more nops than strictly needed in case the compiler
- varies code */
- alternative_input(
- "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
- "fxsave %[fx]\n"
- "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
- X86_FEATURE_FXSR,
- [fx] "m" (tsk->thread.i387.fxsave),
- [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
- /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
- is pending. Clear the x87 state here by setting it to fixed
- values. safe_address is a random variable that should be in L1 */
- alternative_input(
- GENERIC_NOP8 GENERIC_NOP2,
- "emms\n\t" /* clear stack tags */
- "fildl %[addr]", /* set F?P to defined value */
- X86_FEATURE_FXSAVE_LEAK,
- [addr] "m" (safe_address));
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
-}
-
-#define __unlazy_fpu( tsk ) do { \
- if (task_thread_info(tsk)->status & TS_USEDFPU) { \
- __save_init_fpu(tsk); \
- stts(); \
- } else \
- tsk->fpu_counter = 0; \
-} while (0)
-
-#define __clear_fpu( tsk ) \
-do { \
- if (task_thread_info(tsk)->status & TS_USEDFPU) { \
- asm volatile("fnclex ; fwait"); \
- task_thread_info(tsk)->status &= ~TS_USEDFPU; \
- stts(); \
- } \
-} while (0)
-
-
-/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu( struct task_struct *tsk )
-{
- preempt_disable();
- __save_init_fpu(tsk);
- stts();
- preempt_enable();
-}
-
-#define unlazy_fpu( tsk ) do { \
- preempt_disable(); \
- __unlazy_fpu(tsk); \
- preempt_enable(); \
-} while (0)
-
-#define clear_fpu( tsk ) do { \
- preempt_disable(); \
- __clear_fpu( tsk ); \
- preempt_enable(); \
-} while (0)
-
-/*
- * FPU state interaction...
- */
-extern unsigned short get_fpu_cwd( struct task_struct *tsk );
-extern unsigned short get_fpu_swd( struct task_struct *tsk );
-extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
-extern asmlinkage void math_state_restore(void);
-
-/*
- * Signal frame handlers...
- */
-extern int save_i387( struct _fpstate __user *buf );
-extern int restore_i387( struct _fpstate __user *buf );
-
-/*
- * ptrace request handers...
- */
-extern int get_fpregs( struct user_i387_struct __user *buf,
- struct task_struct *tsk );
-extern int set_fpregs( struct task_struct *tsk,
- struct user_i387_struct __user *buf );
-
-extern int get_fpxregs( struct user_fxsr_struct __user *buf,
- struct task_struct *tsk );
-extern int set_fpxregs( struct task_struct *tsk,
- struct user_fxsr_struct __user *buf );
-
-/*
- * FPU state for core dumps...
- */
-extern int dump_fpu( struct pt_regs *regs,
- struct user_i387_struct *fpu );
-
-#endif /* __ASM_I386_I387_H */
diff --git a/include/asm-x86/i387_64.h b/include/asm-x86/i387_64.h
deleted file mode 100644
index 3a4ffba3d6b..00000000000
--- a/include/asm-x86/i387_64.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * include/asm-x86_64/i387.h
- *
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef __ASM_X86_64_I387_H
-#define __ASM_X86_64_I387_H
-
-#include <linux/sched.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/thread_info.h>
-#include <asm/uaccess.h>
-
-extern void fpu_init(void);
-extern unsigned int mxcsr_feature_mask;
-extern void mxcsr_feature_mask_init(void);
-extern void init_fpu(struct task_struct *child);
-extern int save_i387(struct _fpstate __user *buf);
-extern asmlinkage void math_state_restore(void);
-
-/*
- * FPU lazy state save handling...
- */
-
-#define unlazy_fpu(tsk) do { \
- if (task_thread_info(tsk)->status & TS_USEDFPU) \
- save_init_fpu(tsk); \
- else \
- tsk->fpu_counter = 0; \
-} while (0)
-
-/* Ignore delayed exceptions from user space */
-static inline void tolerant_fwait(void)
-{
- asm volatile("1: fwait\n"
- "2:\n"
- " .section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 1b,2b\n"
- " .previous\n");
-}
-
-#define clear_fpu(tsk) do { \
- if (task_thread_info(tsk)->status & TS_USEDFPU) { \
- tolerant_fwait(); \
- task_thread_info(tsk)->status &= ~TS_USEDFPU; \
- stts(); \
- } \
-} while (0)
-
-/*
- * ptrace request handers...
- */
-extern int get_fpregs(struct user_i387_struct __user *buf,
- struct task_struct *tsk);
-extern int set_fpregs(struct task_struct *tsk,
- struct user_i387_struct __user *buf);
-
-/*
- * i387 state interaction
- */
-#define get_fpu_mxcsr(t) ((t)->thread.i387.fxsave.mxcsr)
-#define get_fpu_cwd(t) ((t)->thread.i387.fxsave.cwd)
-#define get_fpu_fxsr_twd(t) ((t)->thread.i387.fxsave.twd)
-#define get_fpu_swd(t) ((t)->thread.i387.fxsave.swd)
-#define set_fpu_cwd(t,val) ((t)->thread.i387.fxsave.cwd = (val))
-#define set_fpu_swd(t,val) ((t)->thread.i387.fxsave.swd = (val))
-#define set_fpu_fxsr_twd(t,val) ((t)->thread.i387.fxsave.twd = (val))
-
-#define X87_FSW_ES (1 << 7) /* Exception Summary */
-
-/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
- is pending. Clear the x87 state here by setting it to fixed
- values. The kernel data segment can be sometimes 0 and sometimes
- new user value. Both should be ok.
- Use the PDA as safe address because it should be already in L1. */
-static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
-{
- if (unlikely(fx->swd & X87_FSW_ES))
- asm volatile("fnclex");
- alternative_input(ASM_NOP8 ASM_NOP2,
- " emms\n" /* clear stack tags */
- " fildl %%gs:0", /* load to clear state */
- X86_FEATURE_FXSAVE_LEAK);
-}
-
-static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
-{
- int err;
-
- asm volatile("1: rex64/fxrstor (%[fx])\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 1b,3b\n"
- ".previous"
- : [err] "=r" (err)
-#if 0 /* See comment in __fxsave_clear() below. */
- : [fx] "r" (fx), "m" (*fx), "0" (0));
-#else
- : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
-#endif
- if (unlikely(err))
- init_fpu(current);
- return err;
-}
-
-static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
-{
- int err;
-
- asm volatile("1: rex64/fxsave (%[fx])\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 1b,3b\n"
- ".previous"
- : [err] "=r" (err), "=m" (*fx)
-#if 0 /* See comment in __fxsave_clear() below. */
- : [fx] "r" (fx), "0" (0));
-#else
- : [fx] "cdaSDb" (fx), "0" (0));
-#endif
- if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct)))
- err = -EFAULT;
- /* No need to clear here because the caller clears USED_MATH */
- return err;
-}
-
-static inline void __fxsave_clear(struct task_struct *tsk)
-{
- /* Using "rex64; fxsave %0" is broken because, if the memory operand
- uses any extended registers for addressing, a second REX prefix
- will be generated (to the assembler, rex64 followed by semicolon
- is a separate instruction), and hence the 64-bitness is lost. */
-#if 0
- /* Using "fxsaveq %0" would be the ideal choice, but is only supported
- starting with gas 2.16. */
- __asm__ __volatile__("fxsaveq %0"
- : "=m" (tsk->thread.i387.fxsave));
-#elif 0
- /* Using, as a workaround, the properly prefixed form below isn't
- accepted by any binutils version so far released, complaining that
- the same type of prefix is used twice if an extended register is
- needed for addressing (fix submitted to mainline 2005-11-21). */
- __asm__ __volatile__("rex64/fxsave %0"
- : "=m" (tsk->thread.i387.fxsave));
-#else
- /* This, however, we can work around by forcing the compiler to select
- an addressing mode that doesn't require extended registers. */
- __asm__ __volatile__("rex64/fxsave %P2(%1)"
- : "=m" (tsk->thread.i387.fxsave)
- : "cdaSDb" (tsk),
- "i" (offsetof(__typeof__(*tsk),
- thread.i387.fxsave)));
-#endif
- clear_fpu_state(&tsk->thread.i387.fxsave);
-}
-
-static inline void kernel_fpu_begin(void)
-{
- struct thread_info *me = current_thread_info();
- preempt_disable();
- if (me->status & TS_USEDFPU) {
- __fxsave_clear(me->task);
- me->status &= ~TS_USEDFPU;
- return;
- }
- clts();
-}
-
-static inline void kernel_fpu_end(void)
-{
- stts();
- preempt_enable();
-}
-
-static inline void save_init_fpu(struct task_struct *tsk)
-{
- __fxsave_clear(tsk);
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
-}
-
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-static inline int restore_i387(struct _fpstate __user *buf)
-{
- set_used_math();
- if (!(task_thread_info(current)->status & TS_USEDFPU)) {
- clts();
- task_thread_info(current)->status |= TS_USEDFPU;
- }
- return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
-}
-
-#endif /* __ASM_X86_64_I387_H */
diff --git a/include/asm-x86/i8253.h b/include/asm-x86/i8253.h
index 747548ec5d1..b51c0487fc4 100644
--- a/include/asm-x86/i8253.h
+++ b/include/asm-x86/i8253.h
@@ -12,4 +12,7 @@ extern struct clock_event_device *global_clock_event;
extern void setup_pit_timer(void);
+#define inb_pit inb_p
+#define outb_pit outb_p
+
#endif /* __ASM_I8253_H__ */
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 29d8f9a6b3f..67c319e0efc 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -3,10 +3,25 @@
extern unsigned int cached_irq_mask;
-#define __byte(x,y) (((unsigned char *) &(y))[x])
+#define __byte(x,y) (((unsigned char *) &(y))[x])
#define cached_master_mask (__byte(0, cached_irq_mask))
#define cached_slave_mask (__byte(1, cached_irq_mask))
+/* i8259A PIC registers */
+#define PIC_MASTER_CMD 0x20
+#define PIC_MASTER_IMR 0x21
+#define PIC_MASTER_ISR PIC_MASTER_CMD
+#define PIC_MASTER_POLL PIC_MASTER_ISR
+#define PIC_MASTER_OCW3 PIC_MASTER_ISR
+#define PIC_SLAVE_CMD 0xa0
+#define PIC_SLAVE_IMR 0xa1
+
+/* i8259A PIC related value */
+#define PIC_CASCADE_IR 2
+#define MASTER_ICW4_DEFAULT 0x01
+#define SLAVE_ICW4_DEFAULT 0x01
+#define PIC_ICW4_AEOI 2
+
extern spinlock_t i8259A_lock;
extern void init_8259A(int auto_eoi);
@@ -14,4 +29,7 @@ extern void enable_8259A_irq(unsigned int irq);
extern void disable_8259A_irq(unsigned int irq);
extern unsigned int startup_8259A_irq(unsigned int irq);
+#define inb_pic inb_p
+#define outb_pic outb_p
+
#endif /* __ASM_I8259_H__ */
diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h
index 0190b7c4e31..aa9733206e2 100644
--- a/include/asm-x86/ia32.h
+++ b/include/asm-x86/ia32.h
@@ -159,12 +159,6 @@ struct ustat32 {
#define IA32_STACK_TOP IA32_PAGE_OFFSET
#ifdef __KERNEL__
-struct user_desc;
-struct siginfo_t;
-int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info);
-int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info);
-int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs);
-
struct linux_binprm;
extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
unsigned long stack_top, int exec_stack);
diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h
index 5b52ce50733..61cea9e7c5c 100644
--- a/include/asm-x86/ia32_unistd.h
+++ b/include/asm-x86/ia32_unistd.h
@@ -5,7 +5,7 @@
* This file contains the system call numbers of the ia32 port,
* this is for the kernel only.
* Only add syscalls here where some part of the kernel needs to know
- * the number. This should be otherwise in sync with asm-i386/unistd.h. -AK
+ * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK
*/
#define __NR_ia32_restart_syscall 0
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index 42130adf9c7..c2552d8bebf 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -1,6 +1,4 @@
/*
- * linux/include/asm-i386/ide.h
- *
* Copyright (C) 1994-1996 Linus Torvalds & authors
*/
diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h
index 6bd47dcf206..d240e5b30a4 100644
--- a/include/asm-x86/idle.h
+++ b/include/asm-x86/idle.h
@@ -6,7 +6,6 @@
struct notifier_block;
void idle_notifier_register(struct notifier_block *n);
-void idle_notifier_unregister(struct notifier_block *n);
void enter_idle(void);
void exit_idle(void);
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd1e6f..586d7aa54ce 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -100,8 +100,6 @@ static inline void * phys_to_virt(unsigned long address)
*/
#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
/**
* ioremap - map bus memory into CPU space
* @offset: bus address of the memory
@@ -111,32 +109,39 @@ extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsign
* make bus memory CPU accessible via the readb/readw/readl/writeb/
* writew/writel functions and the other mmio helpers. The returned
* address is not guaranteed to be usable directly as a virtual
- * address.
+ * address.
*
* If the area you are trying to map is a PCI BAR you should have a
* look at pci_iomap().
*/
+extern void __iomem *ioremap_nocache(unsigned long offset, unsigned long size);
+extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size);
-static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+/*
+ * The default ioremap() behavior is non-cached:
+ */
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
{
- return __ioremap(offset, size, 0);
+ return ioremap_nocache(offset, size);
}
-extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
extern void iounmap(volatile void __iomem *addr);
/*
- * bt_ioremap() and bt_iounmap() are for temporary early boot-time
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
* mappings, before the real ioremap() is functional.
* A boot-time mapping is currently limited to at most 16 pages.
*/
-extern void *bt_ioremap(unsigned long offset, unsigned long size);
-extern void bt_iounmap(void *addr, unsigned long size);
+extern void early_ioremap_init(void);
+extern void early_ioremap_clear(void);
+extern void early_ioremap_reset(void);
+extern void *early_ioremap(unsigned long offset, unsigned long size);
+extern void early_iounmap(void *addr, unsigned long size);
extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
/* Use early IO mappings for DMI because it's initialized early */
-#define dmi_ioremap bt_ioremap
-#define dmi_iounmap bt_iounmap
+#define dmi_ioremap early_ioremap
+#define dmi_iounmap early_iounmap
#define dmi_alloc alloc_bootmem
/*
@@ -250,10 +255,10 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
+
+extern int io_delay_type;
+extern void io_delay_init(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b079433..f64a59cc396 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,12 +35,24 @@
* - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+extern void native_io_delay(void);
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+extern int io_delay_type;
+extern void io_delay_init(void);
+
+#if defined(CONFIG_PARAVIRT)
+#include <asm/paravirt.h>
#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+
+static inline void slow_down_io(void)
+{
+ native_io_delay();
+#ifdef REALLY_SLOW_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
+#endif
+}
#endif
/*
@@ -52,9 +64,15 @@ static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+#ifndef REALLY_SLOW_IO
+#define REALLY_SLOW_IO
+#define UNSET_REALLY_SLOW_IO
+#endif
+
#define __OUT(s,s1,x) \
__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \
+ slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
@@ -63,8 +81,13 @@ static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); return _v; } \
+__IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \
+ slow_down_io(); return _v; }
+
+#ifdef UNSET_REALLY_SLOW_IO
+#undef REALLY_SLOW_IO
+#endif
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
@@ -127,13 +150,6 @@ static inline void * phys_to_virt(unsigned long address)
#include <asm-generic/iomap.h>
-extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
-static inline void __iomem * ioremap (unsigned long offset, unsigned long size)
-{
- return __ioremap(offset, size, 0);
-}
-
extern void *early_ioremap(unsigned long addr, unsigned long size);
extern void early_iounmap(void *addr, unsigned long size);
@@ -142,8 +158,19 @@ extern void early_iounmap(void *addr, unsigned long size);
* it's useful if some control registers are in such an area and write combining
* or read caching is not desirable:
*/
-extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
+extern void __iomem *ioremap_nocache(unsigned long offset, unsigned long size);
+extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size);
+
+/*
+ * The default ioremap() behavior is non-cached:
+ */
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
+{
+ return ioremap_nocache(offset, size);
+}
+
extern void iounmap(volatile void __iomem *addr);
+
extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
/*
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 88494966bee..0f5b3fef0b0 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -1,5 +1,159 @@
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <asm/types.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+/*
+ * The structure of the IO-APIC:
+ */
+union IO_APIC_reg_00 {
+ u32 raw;
+ struct {
+ u32 __reserved_2 : 14,
+ LTS : 1,
+ delivery_type : 1,
+ __reserved_1 : 8,
+ ID : 8;
+ } __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_01 {
+ u32 raw;
+ struct {
+ u32 version : 8,
+ __reserved_2 : 7,
+ PRQ : 1,
+ entries : 8,
+ __reserved_1 : 8;
+ } __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_02 {
+ u32 raw;
+ struct {
+ u32 __reserved_2 : 24,
+ arbitration : 4,
+ __reserved_1 : 4;
+ } __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_03 {
+ u32 raw;
+ struct {
+ u32 boot_DT : 1,
+ __reserved_1 : 31;
+ } __attribute__ ((packed)) bits;
+};
+
+enum ioapic_irq_destination_types {
+ dest_Fixed = 0,
+ dest_LowestPrio = 1,
+ dest_SMI = 2,
+ dest__reserved_1 = 3,
+ dest_NMI = 4,
+ dest_INIT = 5,
+ dest__reserved_2 = 6,
+ dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+ __u32 vector : 8,
+ delivery_mode : 3, /* 000: FIXED
+ * 001: lowest prio
+ * 111: ExtINT
+ */
+ dest_mode : 1, /* 0: physical, 1: logical */
+ delivery_status : 1,
+ polarity : 1,
+ irr : 1,
+ trigger : 1, /* 0: edge, 1: level */
+ mask : 1, /* 0: enabled, 1: disabled */
+ __reserved_2 : 15;
+
#ifdef CONFIG_X86_32
-# include "io_apic_32.h"
+ union {
+ struct {
+ __u32 __reserved_1 : 24,
+ physical_dest : 4,
+ __reserved_2 : 4;
+ } physical;
+
+ struct {
+ __u32 __reserved_1 : 24,
+ logical_dest : 8;
+ } logical;
+ } dest;
#else
-# include "io_apic_64.h"
+ __u32 __reserved_3 : 24,
+ dest : 8;
+#endif
+
+} __attribute__ ((packed));
+
+#ifdef CONFIG_X86_IO_APIC
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+/* Older SiS APIC requires we rewrite the index register */
+extern int sis_apic_bug;
+
+/* 1 if "noapic" boot option passed */
+extern int skip_ioapic_setup;
+
+static inline void disable_ioapic_setup(void)
+{
+ skip_ioapic_setup = 1;
+}
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs \
+ (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
+
+#ifdef CONFIG_ACPI
+extern int io_apic_get_unique_id(int ioapic, int apic_id);
+extern int io_apic_get_version(int ioapic);
+extern int io_apic_get_redir_entries(int ioapic);
+extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
+ int edge_level, int active_high_low);
+extern int timer_uses_ioapic_pin_0;
+#endif /* CONFIG_ACPI */
+
+extern int (*ioapic_renumber_irq)(int ioapic, int irq);
+extern void ioapic_init_mappings(void);
+
+#else /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+#endif
+
#endif
diff --git a/include/asm-x86/io_apic_32.h b/include/asm-x86/io_apic_32.h
deleted file mode 100644
index 3f087883ea4..00000000000
--- a/include/asm-x86/io_apic_32.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
-
-#include <asm/types.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-
-/*
- * Intel IO-APIC support for SMP and UP systems.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
- */
-
-/*
- * The structure of the IO-APIC:
- */
-union IO_APIC_reg_00 {
- u32 raw;
- struct {
- u32 __reserved_2 : 14,
- LTS : 1,
- delivery_type : 1,
- __reserved_1 : 8,
- ID : 8;
- } __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_01 {
- u32 raw;
- struct {
- u32 version : 8,
- __reserved_2 : 7,
- PRQ : 1,
- entries : 8,
- __reserved_1 : 8;
- } __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_02 {
- u32 raw;
- struct {
- u32 __reserved_2 : 24,
- arbitration : 4,
- __reserved_1 : 4;
- } __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_03 {
- u32 raw;
- struct {
- u32 boot_DT : 1,
- __reserved_1 : 31;
- } __attribute__ ((packed)) bits;
-};
-
-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
-};
-
-struct IO_APIC_route_entry {
- __u32 vector : 8,
- delivery_mode : 3, /* 000: FIXED
- * 001: lowest prio
- * 111: ExtINT
- */
- dest_mode : 1, /* 0: physical, 1: logical */
- delivery_status : 1,
- polarity : 1,
- irr : 1,
- trigger : 1, /* 0: edge, 1: level */
- mask : 1, /* 0: enabled, 1: disabled */
- __reserved_2 : 15;
-
- union { struct { __u32
- __reserved_1 : 24,
- physical_dest : 4,
- __reserved_2 : 4;
- } physical;
-
- struct { __u32
- __reserved_1 : 24,
- logical_dest : 8;
- } logical;
- } dest;
-
-} __attribute__ ((packed));
-
-#ifdef CONFIG_X86_IO_APIC
-
-/*
- * # of IO-APICs and # of IRQ routing registers
- */
-extern int nr_ioapics;
-extern int nr_ioapic_registers[MAX_IO_APICS];
-
-/*
- * MP-BIOS irq configuration table structures:
- */
-
-/* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-extern int mp_irq_entries;
-
-/* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
-/* Older SiS APIC requires we rewrite the index register */
-extern int sis_apic_bug;
-
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
-
-static inline void disable_ioapic_setup(void)
-{
- skip_ioapic_setup = 1;
-}
-
-static inline int ioapic_setup_disabled(void)
-{
- return skip_ioapic_setup;
-}
-
-/*
- * If we use the IO-APIC for IRQ routing, disable automatic
- * assignment of PCI IRQ's.
- */
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-
-#ifdef CONFIG_ACPI
-extern int io_apic_get_unique_id (int ioapic, int apic_id);
-extern int io_apic_get_version (int ioapic);
-extern int io_apic_get_redir_entries (int ioapic);
-extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low);
-extern int timer_uses_ioapic_pin_0;
-#endif /* CONFIG_ACPI */
-
-extern int (*ioapic_renumber_irq)(int ioapic, int irq);
-
-#else /* !CONFIG_X86_IO_APIC */
-#define io_apic_assign_pci_irqs 0
-#endif
-
-#endif
diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h
deleted file mode 100644
index e2c13675ee4..00000000000
--- a/include/asm-x86/io_apic_64.h
+++ /dev/null
@@ -1,138 +0,0 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
-
-#include <asm/types.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-
-/*
- * Intel IO-APIC support for SMP and UP systems.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
- */
-
-#define APIC_MISMATCH_DEBUG
-
-/*
- * The structure of the IO-APIC:
- */
-union IO_APIC_reg_00 {
- u32 raw;
- struct {
- u32 __reserved_2 : 14,
- LTS : 1,
- delivery_type : 1,
- __reserved_1 : 8,
- ID : 8;
- } __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_01 {
- u32 raw;
- struct {
- u32 version : 8,
- __reserved_2 : 7,
- PRQ : 1,
- entries : 8,
- __reserved_1 : 8;
- } __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_02 {
- u32 raw;
- struct {
- u32 __reserved_2 : 24,
- arbitration : 4,
- __reserved_1 : 4;
- } __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_03 {
- u32 raw;
- struct {
- u32 boot_DT : 1,
- __reserved_1 : 31;
- } __attribute__ ((packed)) bits;
-};
-
-/*
- * # of IO-APICs and # of IRQ routing registers
- */
-extern int nr_ioapics;
-extern int nr_ioapic_registers[MAX_IO_APICS];
-
-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
-};
-
-struct IO_APIC_route_entry {
- __u32 vector : 8,
- delivery_mode : 3, /* 000: FIXED
- * 001: lowest prio
- * 111: ExtINT
- */
- dest_mode : 1, /* 0: physical, 1: logical */
- delivery_status : 1,
- polarity : 1,
- irr : 1,
- trigger : 1, /* 0: edge, 1: level */
- mask : 1, /* 0: enabled, 1: disabled */
- __reserved_2 : 15;
-
- __u32 __reserved_3 : 24,
- dest : 8;
-} __attribute__ ((packed));
-
-/*
- * MP-BIOS irq configuration table structures:
- */
-
-/* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-extern int mp_irq_entries;
-
-/* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
-
-static inline void disable_ioapic_setup(void)
-{
- skip_ioapic_setup = 1;
-}
-
-
-/*
- * If we use the IO-APIC for IRQ routing, disable automatic
- * assignment of PCI IRQ's.
- */
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-
-#ifdef CONFIG_ACPI
-extern int io_apic_get_version (int ioapic);
-extern int io_apic_get_redir_entries (int ioapic);
-extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int, int);
-#endif
-
-extern int sis_apic_bug; /* dummy */
-
-void enable_NMI_through_LVT0 (void * dummy);
-
-extern spinlock_t i8259A_lock;
-
-extern int timer_over_8254;
-
-#endif
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index 1b695ff5268..92021c1ffa3 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -1,5 +1,245 @@
-#ifdef CONFIG_X86_32
-# include "irqflags_32.h"
+#ifndef _X86_IRQFLAGS_H_
+#define _X86_IRQFLAGS_H_
+
+#include <asm/processor-flags.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * Interrupt control:
+ */
+
+static inline unsigned long native_save_fl(void)
+{
+ unsigned long flags;
+
+ __asm__ __volatile__(
+ "# __raw_save_flags\n\t"
+ "pushf ; pop %0"
+ : "=g" (flags)
+ : /* no input */
+ : "memory"
+ );
+
+ return flags;
+}
+
+static inline void native_restore_fl(unsigned long flags)
+{
+ __asm__ __volatile__(
+ "push %0 ; popf"
+ : /* no output */
+ :"g" (flags)
+ :"memory", "cc"
+ );
+}
+
+static inline void native_irq_disable(void)
+{
+ asm volatile("cli": : :"memory");
+}
+
+static inline void native_irq_enable(void)
+{
+ asm volatile("sti": : :"memory");
+}
+
+static inline void native_safe_halt(void)
+{
+ asm volatile("sti; hlt": : :"memory");
+}
+
+static inline void native_halt(void)
+{
+ asm volatile("hlt": : :"memory");
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#ifndef __ASSEMBLY__
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+ return native_save_fl();
+}
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+ native_restore_fl(flags);
+}
+
+static inline void raw_local_irq_disable(void)
+{
+ native_irq_disable();
+}
+
+static inline void raw_local_irq_enable(void)
+{
+ native_irq_enable();
+}
+
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static inline void raw_safe_halt(void)
+{
+ native_safe_halt();
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static inline void halt(void)
+{
+ native_halt();
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long __raw_local_irq_save(void)
+{
+ unsigned long flags = __raw_local_save_flags();
+
+ raw_local_irq_disable();
+
+ return flags;
+}
+#else
+
+#define ENABLE_INTERRUPTS(x) sti
+#define DISABLE_INTERRUPTS(x) cli
+
+#ifdef CONFIG_X86_64
+#define INTERRUPT_RETURN iretq
+#define ENABLE_INTERRUPTS_SYSCALL_RET \
+ movq %gs:pda_oldrsp, %rsp; \
+ swapgs; \
+ sysretq;
+#else
+#define INTERRUPT_RETURN iret
+#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit
+#define GET_CR0_INTO_EAX movl %cr0, %eax
+#endif
+
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifndef __ASSEMBLY__
+#define raw_local_save_flags(flags) \
+ do { (flags) = __raw_local_save_flags(); } while (0)
+
+#define raw_local_irq_save(flags) \
+ do { (flags) = __raw_local_irq_save(); } while (0)
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & X86_EFLAGS_IF);
+}
+
+static inline int raw_irqs_disabled(void)
+{
+ unsigned long flags = __raw_local_save_flags();
+
+ return raw_irqs_disabled_flags(flags);
+}
+
+/*
+ * makes the traced hardirq state match with the machine state
+ *
+ * should be a rarely used function, only in places where its
+ * otherwise impossible to know the irq state, like in traps.
+ */
+static inline void trace_hardirqs_fixup_flags(unsigned long flags)
+{
+ if (raw_irqs_disabled_flags(flags))
+ trace_hardirqs_off();
+ else
+ trace_hardirqs_on();
+}
+
+static inline void trace_hardirqs_fixup(void)
+{
+ unsigned long flags = __raw_local_save_flags();
+
+ trace_hardirqs_fixup_flags(flags);
+}
+
#else
-# include "irqflags_64.h"
+
+#ifdef CONFIG_X86_64
+/*
+ * Currently paravirt can't handle swapgs nicely when we
+ * don't have a stack we can rely on (such as a user space
+ * stack). So we either find a way around these or just fault
+ * and emulate if a guest tries to call swapgs directly.
+ *
+ * Either way, this is a good way to document that we don't
+ * have a reliable stack. x86_64 only.
+ */
+#define SWAPGS_UNSAFE_STACK swapgs
+#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
+#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
+#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
+#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
+ TRACE_IRQS_ON; \
+ sti; \
+ SAVE_REST; \
+ LOCKDEP_SYS_EXIT; \
+ RESTORE_REST; \
+ cli; \
+ TRACE_IRQS_OFF;
+
+#else
+#define ARCH_TRACE_IRQS_ON \
+ pushl %eax; \
+ pushl %ecx; \
+ pushl %edx; \
+ call trace_hardirqs_on; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+
+#define ARCH_TRACE_IRQS_OFF \
+ pushl %eax; \
+ pushl %ecx; \
+ pushl %edx; \
+ call trace_hardirqs_off; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+
+#define ARCH_LOCKDEP_SYS_EXIT \
+ pushl %eax; \
+ pushl %ecx; \
+ pushl %edx; \
+ call lockdep_sys_exit; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+
+#define ARCH_LOCKDEP_SYS_EXIT_IRQ
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON
+# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF
+#else
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
+# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
+# else
+# define LOCKDEP_SYS_EXIT
+# define LOCKDEP_SYS_EXIT_IRQ
+# endif
+
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h
deleted file mode 100644
index 4c7720089cb..00000000000
--- a/include/asm-x86/irqflags_32.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * include/asm-i386/irqflags.h
- *
- * IRQ flags handling
- *
- * This file gets included from lowlevel asm headers too, to provide
- * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() functions from the lowlevel headers.
- */
-#ifndef _ASM_IRQFLAGS_H
-#define _ASM_IRQFLAGS_H
-#include <asm/processor-flags.h>
-
-#ifndef __ASSEMBLY__
-static inline unsigned long native_save_fl(void)
-{
- unsigned long f;
- asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
- return f;
-}
-
-static inline void native_restore_fl(unsigned long f)
-{
- asm volatile("pushl %0 ; popfl": /* no output */
- :"g" (f)
- :"memory", "cc");
-}
-
-static inline void native_irq_disable(void)
-{
- asm volatile("cli": : :"memory");
-}
-
-static inline void native_irq_enable(void)
-{
- asm volatile("sti": : :"memory");
-}
-
-static inline void native_safe_halt(void)
-{
- asm volatile("sti; hlt": : :"memory");
-}
-
-static inline void native_halt(void)
-{
- asm volatile("hlt": : :"memory");
-}
-#endif /* __ASSEMBLY__ */
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#ifndef __ASSEMBLY__
-
-static inline unsigned long __raw_local_save_flags(void)
-{
- return native_save_fl();
-}
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
- native_restore_fl(flags);
-}
-
-static inline void raw_local_irq_disable(void)
-{
- native_irq_disable();
-}
-
-static inline void raw_local_irq_enable(void)
-{
- native_irq_enable();
-}
-
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static inline void raw_safe_halt(void)
-{
- native_safe_halt();
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static inline void halt(void)
-{
- native_halt();
-}
-
-/*
- * For spinlocks, etc:
- */
-static inline unsigned long __raw_local_irq_save(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- raw_local_irq_disable();
-
- return flags;
-}
-
-#else
-#define DISABLE_INTERRUPTS(clobbers) cli
-#define ENABLE_INTERRUPTS(clobbers) sti
-#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
-#define INTERRUPT_RETURN iret
-#define GET_CR0_INTO_EAX movl %cr0, %eax
-#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_PARAVIRT */
-
-#ifndef __ASSEMBLY__
-#define raw_local_save_flags(flags) \
- do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags) \
- do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
- return !(flags & X86_EFLAGS_IF);
-}
-
-static inline int raw_irqs_disabled(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- return raw_irqs_disabled_flags(flags);
-}
-
-/*
- * makes the traced hardirq state match with the machine state
- *
- * should be a rarely used function, only in places where its
- * otherwise impossible to know the irq state, like in traps.
- */
-static inline void trace_hardirqs_fixup_flags(unsigned long flags)
-{
- if (raw_irqs_disabled_flags(flags))
- trace_hardirqs_off();
- else
- trace_hardirqs_on();
-}
-
-static inline void trace_hardirqs_fixup(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- trace_hardirqs_fixup_flags(flags);
-}
-#endif /* __ASSEMBLY__ */
-
-/*
- * Do the CPU's IRQ-state tracing from assembly code. We call a
- * C function, so save all the C-clobbered registers:
- */
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-# define TRACE_IRQS_ON \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_on; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
-# define TRACE_IRQS_OFF \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_off; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
-#else
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
-#endif
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define LOCKDEP_SYS_EXIT \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call lockdep_sys_exit; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-#else
-# define LOCKDEP_SYS_EXIT
-#endif
-
-#endif
diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h
deleted file mode 100644
index bb9163bb29d..00000000000
--- a/include/asm-x86/irqflags_64.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * include/asm-x86_64/irqflags.h
- *
- * IRQ flags handling
- *
- * This file gets included from lowlevel asm headers too, to provide
- * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() functions from the lowlevel headers.
- */
-#ifndef _ASM_IRQFLAGS_H
-#define _ASM_IRQFLAGS_H
-#include <asm/processor-flags.h>
-
-#ifndef __ASSEMBLY__
-/*
- * Interrupt control:
- */
-
-static inline unsigned long __raw_local_save_flags(void)
-{
- unsigned long flags;
-
- __asm__ __volatile__(
- "# __raw_save_flags\n\t"
- "pushfq ; popq %q0"
- : "=g" (flags)
- : /* no input */
- : "memory"
- );
-
- return flags;
-}
-
-#define raw_local_save_flags(flags) \
- do { (flags) = __raw_local_save_flags(); } while (0)
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
- __asm__ __volatile__(
- "pushq %0 ; popfq"
- : /* no output */
- :"g" (flags)
- :"memory", "cc"
- );
-}
-
-#ifdef CONFIG_X86_VSMP
-
-/*
- * Interrupt control for the VSMP architecture:
- */
-
-static inline void raw_local_irq_disable(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
-}
-
-static inline void raw_local_irq_enable(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
-}
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
- return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
-}
-
-#else /* CONFIG_X86_VSMP */
-
-static inline void raw_local_irq_disable(void)
-{
- __asm__ __volatile__("cli" : : : "memory");
-}
-
-static inline void raw_local_irq_enable(void)
-{
- __asm__ __volatile__("sti" : : : "memory");
-}
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
- return !(flags & X86_EFLAGS_IF);
-}
-
-#endif
-
-/*
- * For spinlocks, etc.:
- */
-
-static inline unsigned long __raw_local_irq_save(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- raw_local_irq_disable();
-
- return flags;
-}
-
-#define raw_local_irq_save(flags) \
- do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- return raw_irqs_disabled_flags(flags);
-}
-
-/*
- * makes the traced hardirq state match with the machine state
- *
- * should be a rarely used function, only in places where its
- * otherwise impossible to know the irq state, like in traps.
- */
-static inline void trace_hardirqs_fixup_flags(unsigned long flags)
-{
- if (raw_irqs_disabled_flags(flags))
- trace_hardirqs_off();
- else
- trace_hardirqs_on();
-}
-
-static inline void trace_hardirqs_fixup(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- trace_hardirqs_fixup_flags(flags);
-}
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static inline void raw_safe_halt(void)
-{
- __asm__ __volatile__("sti; hlt" : : : "memory");
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static inline void halt(void)
-{
- __asm__ __volatile__("hlt": : :"memory");
-}
-
-#else /* __ASSEMBLY__: */
-# ifdef CONFIG_TRACE_IRQFLAGS
-# define TRACE_IRQS_ON call trace_hardirqs_on_thunk
-# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk
-# else
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
-# endif
-# ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
-# define LOCKDEP_SYS_EXIT_IRQ \
- TRACE_IRQS_ON; \
- sti; \
- SAVE_REST; \
- LOCKDEP_SYS_EXIT; \
- RESTORE_REST; \
- cli; \
- TRACE_IRQS_OFF;
-# else
-# define LOCKDEP_SYS_EXIT
-# define LOCKDEP_SYS_EXIT_IRQ
-# endif
-#endif
-
-#endif
diff --git a/include/asm-x86/k8.h b/include/asm-x86/k8.h
index 699dd6961ed..452e2b696ff 100644
--- a/include/asm-x86/k8.h
+++ b/include/asm-x86/k8.h
@@ -10,5 +10,6 @@ extern struct pci_dev **k8_northbridges;
extern int num_k8_northbridges;
extern int cache_k8_northbridges(void);
extern void k8_flush_garts(void);
+extern int k8_scan_nodes(unsigned long start, unsigned long end);
#endif
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index e2f9b62e535..dd442a1632c 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -22,12 +22,17 @@ enum die_val {
DIE_PAGE_FAULT,
};
-extern void printk_address(unsigned long address);
+extern void printk_address(unsigned long address, int reliable);
extern void die(const char *,struct pt_regs *,long);
-extern void __die(const char *,struct pt_regs *,long);
+extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_registers(struct pt_regs *regs);
+extern void __show_registers(struct pt_regs *, int all);
+extern void show_trace(struct task_struct *t, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp);
+extern void __show_regs(struct pt_regs *regs);
+extern void show_regs(struct pt_regs *regs);
extern void dump_pagetable(unsigned long);
extern unsigned long oops_begin(void);
-extern void oops_end(unsigned long);
+extern void oops_end(unsigned long, struct pt_regs *, int signr);
#endif
diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h
index 718ddbfb951..c90d3c77afc 100644
--- a/include/asm-x86/kexec.h
+++ b/include/asm-x86/kexec.h
@@ -1,5 +1,170 @@
+#ifndef _KEXEC_H
+#define _KEXEC_H
+
#ifdef CONFIG_X86_32
-# include "kexec_32.h"
+# define PA_CONTROL_PAGE 0
+# define VA_CONTROL_PAGE 1
+# define PA_PGD 2
+# define VA_PGD 3
+# define PA_PTE_0 4
+# define VA_PTE_0 5
+# define PA_PTE_1 6
+# define VA_PTE_1 7
+# ifdef CONFIG_X86_PAE
+# define PA_PMD_0 8
+# define VA_PMD_0 9
+# define PA_PMD_1 10
+# define VA_PMD_1 11
+# define PAGES_NR 12
+# else
+# define PAGES_NR 8
+# endif
#else
-# include "kexec_64.h"
+# define PA_CONTROL_PAGE 0
+# define VA_CONTROL_PAGE 1
+# define PA_PGD 2
+# define VA_PGD 3
+# define PA_PUD_0 4
+# define VA_PUD_0 5
+# define PA_PMD_0 6
+# define VA_PMD_0 7
+# define PA_PTE_0 8
+# define VA_PTE_0 9
+# define PA_PUD_1 10
+# define VA_PUD_1 11
+# define PA_PMD_1 12
+# define VA_PMD_1 13
+# define PA_PTE_1 14
+# define VA_PTE_1 15
+# define PA_TABLE_PAGE 16
+# define PAGES_NR 17
#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/string.h>
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ *
+ * So far x86_64 is limited to 40 physical address bits.
+ */
+#ifdef CONFIG_X86_32
+/* Maximum physical address we can use pages from */
+# define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+# define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+# define KEXEC_CONTROL_CODE_SIZE 4096
+
+/* The native architecture */
+# define KEXEC_ARCH KEXEC_ARCH_386
+
+/* We can also handle crash dumps from 64 bit kernel. */
+# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
+#else
+/* Maximum physical address we can use pages from */
+# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL)
+/* Maximum address we can reach in physical address mode */
+# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
+/* Maximum address we can use for the control pages */
+# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL)
+
+/* Allocate one page for the pdp and the second for the code */
+# define KEXEC_CONTROL_CODE_SIZE (4096UL + 4096UL)
+
+/* The native architecture */
+# define KEXEC_ARCH KEXEC_ARCH_X86_64
+#endif
+
+/*
+ * CPU does not save ss and sp on stack if execution is already
+ * running in kernel mode at the time of NMI occurrence. This code
+ * fixes it.
+ */
+static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
+ struct pt_regs *oldregs)
+{
+#ifdef CONFIG_X86_32
+ newregs->sp = (unsigned long)&(oldregs->sp);
+ __asm__ __volatile__(
+ "xorl %%eax, %%eax\n\t"
+ "movw %%ss, %%ax\n\t"
+ :"=a"(newregs->ss));
+#endif
+}
+
+/*
+ * This function is responsible for capturing register states if coming
+ * via panic otherwise just fix up the ss and sp if coming via kernel
+ * mode exception.
+ */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+ struct pt_regs *oldregs)
+{
+ if (oldregs) {
+ memcpy(newregs, oldregs, sizeof(*newregs));
+ crash_fixup_ss_esp(newregs, oldregs);
+ } else {
+#ifdef CONFIG_X86_32
+ __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->bx));
+ __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->cx));
+ __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->dx));
+ __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->si));
+ __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->di));
+ __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->bp));
+ __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->ax));
+ __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->sp));
+ __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
+ __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
+ __asm__ __volatile__("movl %%ds, %%eax;" :"=a"(newregs->ds));
+ __asm__ __volatile__("movl %%es, %%eax;" :"=a"(newregs->es));
+ __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->flags));
+#else
+ __asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->bx));
+ __asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->cx));
+ __asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->dx));
+ __asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->si));
+ __asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->di));
+ __asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->bp));
+ __asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->ax));
+ __asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->sp));
+ __asm__ __volatile__("movq %%r8,%0" : "=m"(newregs->r8));
+ __asm__ __volatile__("movq %%r9,%0" : "=m"(newregs->r9));
+ __asm__ __volatile__("movq %%r10,%0" : "=m"(newregs->r10));
+ __asm__ __volatile__("movq %%r11,%0" : "=m"(newregs->r11));
+ __asm__ __volatile__("movq %%r12,%0" : "=m"(newregs->r12));
+ __asm__ __volatile__("movq %%r13,%0" : "=m"(newregs->r13));
+ __asm__ __volatile__("movq %%r14,%0" : "=m"(newregs->r14));
+ __asm__ __volatile__("movq %%r15,%0" : "=m"(newregs->r15));
+ __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
+ __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
+ __asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->flags));
+#endif
+ newregs->ip = (unsigned long)current_text_addr();
+ }
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage NORET_TYPE void
+relocate_kernel(unsigned long indirection_page,
+ unsigned long control_page,
+ unsigned long start_address,
+ unsigned int has_pae) ATTRIB_NORET;
+#else
+NORET_TYPE void
+relocate_kernel(unsigned long indirection_page,
+ unsigned long page_list,
+ unsigned long start_address) ATTRIB_NORET;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _KEXEC_H */
diff --git a/include/asm-x86/kexec_32.h b/include/asm-x86/kexec_32.h
deleted file mode 100644
index 4b9dc9e6b70..00000000000
--- a/include/asm-x86/kexec_32.h
+++ /dev/null
@@ -1,99 +0,0 @@
-#ifndef _I386_KEXEC_H
-#define _I386_KEXEC_H
-
-#define PA_CONTROL_PAGE 0
-#define VA_CONTROL_PAGE 1
-#define PA_PGD 2
-#define VA_PGD 3
-#define PA_PTE_0 4
-#define VA_PTE_0 5
-#define PA_PTE_1 6
-#define VA_PTE_1 7
-#ifdef CONFIG_X86_PAE
-#define PA_PMD_0 8
-#define VA_PMD_0 9
-#define PA_PMD_1 10
-#define VA_PMD_1 11
-#define PAGES_NR 12
-#else
-#define PAGES_NR 8
-#endif
-
-#ifndef __ASSEMBLY__
-
-#include <asm/ptrace.h>
-#include <asm/string.h>
-
-/*
- * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
- * I.e. Maximum page that is mapped directly into kernel memory,
- * and kmap is not required.
- */
-
-/* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
-/* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
-/* Maximum address we can use for the control code buffer */
-#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
-
-#define KEXEC_CONTROL_CODE_SIZE 4096
-
-/* The native architecture */
-#define KEXEC_ARCH KEXEC_ARCH_386
-
-/* We can also handle crash dumps from 64 bit kernel. */
-#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
-
-/* CPU does not save ss and esp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
- struct pt_regs *oldregs)
-{
- memcpy(newregs, oldregs, sizeof(*newregs));
- newregs->esp = (unsigned long)&(oldregs->esp);
- __asm__ __volatile__(
- "xorl %%eax, %%eax\n\t"
- "movw %%ss, %%ax\n\t"
- :"=a"(newregs->xss));
-}
-
-/*
- * This function is responsible for capturing register states if coming
- * via panic otherwise just fix up the ss and esp if coming via kernel
- * mode exception.
- */
-static inline void crash_setup_regs(struct pt_regs *newregs,
- struct pt_regs *oldregs)
-{
- if (oldregs)
- crash_fixup_ss_esp(newregs, oldregs);
- else {
- __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->ebx));
- __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->ecx));
- __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->edx));
- __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->esi));
- __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->edi));
- __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->ebp));
- __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->eax));
- __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->esp));
- __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(newregs->xss));
- __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(newregs->xcs));
- __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(newregs->xds));
- __asm__ __volatile__("movw %%es, %%ax;" :"=a"(newregs->xes));
- __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->eflags));
-
- newregs->eip = (unsigned long)current_text_addr();
- }
-}
-asmlinkage NORET_TYPE void
-relocate_kernel(unsigned long indirection_page,
- unsigned long control_page,
- unsigned long start_address,
- unsigned int has_pae) ATTRIB_NORET;
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _I386_KEXEC_H */
diff --git a/include/asm-x86/kexec_64.h b/include/asm-x86/kexec_64.h
deleted file mode 100644
index 738e581b67f..00000000000
--- a/include/asm-x86/kexec_64.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef _X86_64_KEXEC_H
-#define _X86_64_KEXEC_H
-
-#define PA_CONTROL_PAGE 0
-#define VA_CONTROL_PAGE 1
-#define PA_PGD 2
-#define VA_PGD 3
-#define PA_PUD_0 4
-#define VA_PUD_0 5
-#define PA_PMD_0 6
-#define VA_PMD_0 7
-#define PA_PTE_0 8
-#define VA_PTE_0 9
-#define PA_PUD_1 10
-#define VA_PUD_1 11
-#define PA_PMD_1 12
-#define VA_PMD_1 13
-#define PA_PTE_1 14
-#define VA_PTE_1 15
-#define PA_TABLE_PAGE 16
-#define PAGES_NR 17
-
-#ifndef __ASSEMBLY__
-
-#include <linux/string.h>
-
-#include <asm/page.h>
-#include <asm/ptrace.h>
-
-/*
- * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
- * I.e. Maximum page that is mapped directly into kernel memory,
- * and kmap is not required.
- *
- * So far x86_64 is limited to 40 physical address bits.
- */
-
-/* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL)
-/* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
-/* Maximum address we can use for the control pages */
-#define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL)
-
-/* Allocate one page for the pdp and the second for the code */
-#define KEXEC_CONTROL_CODE_SIZE (4096UL + 4096UL)
-
-/* The native architecture */
-#define KEXEC_ARCH KEXEC_ARCH_X86_64
-
-/*
- * Saving the registers of the cpu on which panic occured in
- * crash_kexec to save a valid sp. The registers of other cpus
- * will be saved in machine_crash_shutdown while shooting down them.
- */
-
-static inline void crash_setup_regs(struct pt_regs *newregs,
- struct pt_regs *oldregs)
-{
- if (oldregs)
- memcpy(newregs, oldregs, sizeof(*newregs));
- else {
- __asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->rbx));
- __asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->rcx));
- __asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->rdx));
- __asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->rsi));
- __asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->rdi));
- __asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->rbp));
- __asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->rax));
- __asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->rsp));
- __asm__ __volatile__("movq %%r8,%0" : "=m"(newregs->r8));
- __asm__ __volatile__("movq %%r9,%0" : "=m"(newregs->r9));
- __asm__ __volatile__("movq %%r10,%0" : "=m"(newregs->r10));
- __asm__ __volatile__("movq %%r11,%0" : "=m"(newregs->r11));
- __asm__ __volatile__("movq %%r12,%0" : "=m"(newregs->r12));
- __asm__ __volatile__("movq %%r13,%0" : "=m"(newregs->r13));
- __asm__ __volatile__("movq %%r14,%0" : "=m"(newregs->r14));
- __asm__ __volatile__("movq %%r15,%0" : "=m"(newregs->r15));
- __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
- __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
- __asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->eflags));
-
- newregs->rip = (unsigned long)current_text_addr();
- }
-}
-
-NORET_TYPE void
-relocate_kernel(unsigned long indirection_page,
- unsigned long page_list,
- unsigned long start_address) ATTRIB_NORET;
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _X86_64_KEXEC_H */
diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h
index b7bbd25ba2a..143476a3cb5 100644
--- a/include/asm-x86/kprobes.h
+++ b/include/asm-x86/kprobes.h
@@ -1,5 +1,98 @@
-#ifdef CONFIG_X86_32
-# include "kprobes_32.h"
-#else
-# include "kprobes_64.h"
-#endif
+#ifndef _ASM_KPROBES_H
+#define _ASM_KPROBES_H
+/*
+ * Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * See arch/x86/kernel/kprobes.c for x86 kprobes history.
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct pt_regs;
+struct kprobe;
+
+typedef u8 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION 0xcc
+#define RELATIVEJUMP_INSTRUCTION 0xe9
+#define MAX_INSN_SIZE 16
+#define MAX_STACK_SIZE 64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+ (((unsigned long)current_thread_info()) + THREAD_SIZE \
+ - (unsigned long)(ADDR))) \
+ ? (MAX_STACK_SIZE) \
+ : (((unsigned long)current_thread_info()) + THREAD_SIZE \
+ - (unsigned long)(ADDR)))
+
+#define ARCH_SUPPORTS_KRETPROBES
+#define flush_insn_slot(p) do { } while (0)
+
+extern const int kretprobe_blacklist_size;
+
+void arch_remove_kprobe(struct kprobe *p);
+void kretprobe_trampoline(void);
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+ /* copy of the original instruction */
+ kprobe_opcode_t *insn;
+ /*
+ * boostable = -1: This instruction type is not boostable.
+ * boostable = 0: This instruction type is boostable.
+ * boostable = 1: This instruction has been boosted: we have
+ * added a relative jump after the instruction copy in insn,
+ * so no single-step and fixup are needed (unless there's
+ * a post_handler or break_handler).
+ */
+ int boostable;
+};
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned long status;
+ unsigned long old_flags;
+ unsigned long saved_flags;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+ unsigned long kprobe_status;
+ unsigned long kprobe_old_flags;
+ unsigned long kprobe_saved_flags;
+ unsigned long *jprobe_saved_sp;
+ struct pt_regs jprobe_saved_regs;
+ kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+ struct prev_kprobe prev_kprobe;
+};
+
+/* trap3/1 are intr gates for kprobes. So, restore the status of IF,
+ * if necessary, before executing the original int3/1 (trap) handler.
+ */
+static inline void restore_interrupts(struct pt_regs *regs)
+{
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
+}
+
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data);
+#endif /* _ASM_KPROBES_H */
diff --git a/include/asm-x86/kprobes_32.h b/include/asm-x86/kprobes_32.h
deleted file mode 100644
index 9fe8f3bddfd..00000000000
--- a/include/asm-x86/kprobes_32.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef _ASM_KPROBES_H
-#define _ASM_KPROBES_H
-/*
- * Kernel Probes (KProbes)
- * include/asm-i386/kprobes.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- * Probes initial implementation ( includes suggestions from
- * Rusty Russell).
- */
-#include <linux/types.h>
-#include <linux/ptrace.h>
-
-#define __ARCH_WANT_KPROBES_INSN_SLOT
-
-struct kprobe;
-struct pt_regs;
-
-typedef u8 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xcc
-#define RELATIVEJUMP_INSTRUCTION 0xe9
-#define MAX_INSN_SIZE 16
-#define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
- (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
- ? (MAX_STACK_SIZE) \
- : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
-
-#define ARCH_SUPPORTS_KRETPROBES
-#define flush_insn_slot(p) do { } while (0)
-
-extern const int kretprobe_blacklist_size;
-
-void arch_remove_kprobe(struct kprobe *p);
-void kretprobe_trampoline(void);
-
-/* Architecture specific copy of original instruction*/
-struct arch_specific_insn {
- /* copy of the original instruction */
- kprobe_opcode_t *insn;
- /*
- * If this flag is not 0, this kprobe can be boost when its
- * post_handler and break_handler is not set.
- */
- int boostable;
-};
-
-struct prev_kprobe {
- struct kprobe *kp;
- unsigned long status;
- unsigned long old_eflags;
- unsigned long saved_eflags;
-};
-
-/* per-cpu kprobe control block */
-struct kprobe_ctlblk {
- unsigned long kprobe_status;
- unsigned long kprobe_old_eflags;
- unsigned long kprobe_saved_eflags;
- unsigned long *jprobe_saved_esp;
- struct pt_regs jprobe_saved_regs;
- kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
- struct prev_kprobe prev_kprobe;
-};
-
-/* trap3/1 are intr gates for kprobes. So, restore the status of IF,
- * if necessary, before executing the original int3/1 (trap) handler.
- */
-static inline void restore_interrupts(struct pt_regs *regs)
-{
- if (regs->eflags & IF_MASK)
- local_irq_enable();
-}
-
-extern int kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data);
-extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-#endif /* _ASM_KPROBES_H */
diff --git a/include/asm-x86/kprobes_64.h b/include/asm-x86/kprobes_64.h
deleted file mode 100644
index 743d76218fc..00000000000
--- a/include/asm-x86/kprobes_64.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef _ASM_KPROBES_H
-#define _ASM_KPROBES_H
-/*
- * Kernel Probes (KProbes)
- * include/asm-x86_64/kprobes.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2004-Oct Prasanna S Panchamukhi <prasanna@in.ibm.com> and Jim Keniston
- * kenistoj@us.ibm.com adopted from i386.
- */
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/percpu.h>
-
-#define __ARCH_WANT_KPROBES_INSN_SLOT
-
-struct pt_regs;
-struct kprobe;
-
-typedef u8 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xcc
-#define MAX_INSN_SIZE 15
-#define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
- (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
- ? (MAX_STACK_SIZE) \
- : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
-
-#define ARCH_SUPPORTS_KRETPROBES
-extern const int kretprobe_blacklist_size;
-
-void kretprobe_trampoline(void);
-extern void arch_remove_kprobe(struct kprobe *p);
-#define flush_insn_slot(p) do { } while (0)
-
-/* Architecture specific copy of original instruction*/
-struct arch_specific_insn {
- /* copy of the original instruction */
- kprobe_opcode_t *insn;
-};
-
-struct prev_kprobe {
- struct kprobe *kp;
- unsigned long status;
- unsigned long old_rflags;
- unsigned long saved_rflags;
-};
-
-/* per-cpu kprobe control block */
-struct kprobe_ctlblk {
- unsigned long kprobe_status;
- unsigned long kprobe_old_rflags;
- unsigned long kprobe_saved_rflags;
- unsigned long *jprobe_saved_rsp;
- struct pt_regs jprobe_saved_regs;
- kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
- struct prev_kprobe prev_kprobe;
-};
-
-/* trap3/1 are intr gates for kprobes. So, restore the status of IF,
- * if necessary, before executing the original int3/1 (trap) handler.
- */
-static inline void restore_interrupts(struct pt_regs *regs)
-{
- if (regs->eflags & IF_MASK)
- local_irq_enable();
-}
-
-extern int post_kprobe_handler(struct pt_regs *regs);
-extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-extern int kprobe_handler(struct pt_regs *regs);
-
-extern int kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data);
-#endif /* _ASM_KPROBES_H */
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index ccd33846081..1c8367a692f 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -44,14 +44,14 @@ struct lguest_ro_state
{
/* Host information we need to restore when we switch back. */
u32 host_cr3;
- struct Xgt_desc_struct host_idt_desc;
- struct Xgt_desc_struct host_gdt_desc;
+ struct desc_ptr host_idt_desc;
+ struct desc_ptr host_gdt_desc;
u32 host_sp;
/* Fields which are used when guest is running. */
- struct Xgt_desc_struct guest_idt_desc;
- struct Xgt_desc_struct guest_gdt_desc;
- struct i386_hw_tss guest_tss;
+ struct desc_ptr guest_idt_desc;
+ struct desc_ptr guest_gdt_desc;
+ struct x86_hw_tss guest_tss;
struct desc_struct guest_idt[IDT_ENTRIES];
struct desc_struct guest_gdt[GDT_ENTRIES];
};
@@ -78,8 +78,8 @@ static inline void lguest_set_ts(void)
}
/* Full 4G segment descriptors, suitable for CS and DS. */
-#define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00})
-#define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300})
+#define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } })
+#define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } })
#endif /* __ASSEMBLY__ */
diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h
index 94b257fa870..31739c7d66a 100644
--- a/include/asm-x86/linkage.h
+++ b/include/asm-x86/linkage.h
@@ -1,5 +1,25 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#ifdef CONFIG_X86_64
+#define __ALIGN .p2align 4,,15
+#define __ALIGN_STR ".p2align 4,,15"
+#endif
+
#ifdef CONFIG_X86_32
-# include "linkage_32.h"
-#else
-# include "linkage_64.h"
+#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
+#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret))
+/*
+ * For 32-bit UML - mark functions implemented in assembly that use
+ * regparm input parameters:
+ */
+#define asmregparm __attribute__((regparm(3)))
+#endif
+
+#ifdef CONFIG_X86_ALIGNMENT_16
+#define __ALIGN .align 16,0x90
+#define __ALIGN_STR ".align 16,0x90"
+#endif
+
#endif
+
diff --git a/include/asm-x86/linkage_32.h b/include/asm-x86/linkage_32.h
deleted file mode 100644
index f4a6ebac024..00000000000
--- a/include/asm-x86/linkage_32.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-#define FASTCALL(x) x __attribute__((regparm(3)))
-#define fastcall __attribute__((regparm(3)))
-
-#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret))
-
-#ifdef CONFIG_X86_ALIGNMENT_16
-#define __ALIGN .align 16,0x90
-#define __ALIGN_STR ".align 16,0x90"
-#endif
-
-#endif
diff --git a/include/asm-x86/linkage_64.h b/include/asm-x86/linkage_64.h
deleted file mode 100644
index b5f39d0189c..00000000000
--- a/include/asm-x86/linkage_64.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-#define __ALIGN .p2align 4,,15
-
-#endif
diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h
index c7a1b1c66c9..f852c62b331 100644
--- a/include/asm-x86/local.h
+++ b/include/asm-x86/local.h
@@ -1,5 +1,240 @@
-#ifdef CONFIG_X86_32
-# include "local_32.h"
-#else
-# include "local_64.h"
+#ifndef _ARCH_LOCAL_H
+#define _ARCH_LOCAL_H
+
+#include <linux/percpu.h>
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/asm.h>
+
+typedef struct {
+ atomic_long_t a;
+} local_t;
+
+#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
+
+#define local_read(l) atomic_long_read(&(l)->a)
+#define local_set(l, i) atomic_long_set(&(l)->a, (i))
+
+static inline void local_inc(local_t *l)
+{
+ __asm__ __volatile__(
+ _ASM_INC "%0"
+ :"+m" (l->a.counter));
+}
+
+static inline void local_dec(local_t *l)
+{
+ __asm__ __volatile__(
+ _ASM_DEC "%0"
+ :"+m" (l->a.counter));
+}
+
+static inline void local_add(long i, local_t *l)
+{
+ __asm__ __volatile__(
+ _ASM_ADD "%1,%0"
+ :"+m" (l->a.counter)
+ :"ir" (i));
+}
+
+static inline void local_sub(long i, local_t *l)
+{
+ __asm__ __volatile__(
+ _ASM_SUB "%1,%0"
+ :"+m" (l->a.counter)
+ :"ir" (i));
+}
+
+/**
+ * local_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @l: pointer to type local_t
+ *
+ * Atomically subtracts @i from @l and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int local_sub_and_test(long i, local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ _ASM_SUB "%2,%0; sete %1"
+ :"+m" (l->a.counter), "=qm" (c)
+ :"ir" (i) : "memory");
+ return c;
+}
+
+/**
+ * local_dec_and_test - decrement and test
+ * @l: pointer to type local_t
+ *
+ * Atomically decrements @l by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int local_dec_and_test(local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ _ASM_DEC "%0; sete %1"
+ :"+m" (l->a.counter), "=qm" (c)
+ : : "memory");
+ return c != 0;
+}
+
+/**
+ * local_inc_and_test - increment and test
+ * @l: pointer to type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int local_inc_and_test(local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ _ASM_INC "%0; sete %1"
+ :"+m" (l->a.counter), "=qm" (c)
+ : : "memory");
+ return c != 0;
+}
+
+/**
+ * local_add_negative - add and test if negative
+ * @i: integer value to add
+ * @l: pointer to type local_t
+ *
+ * Atomically adds @i to @l and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline int local_add_negative(long i, local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ _ASM_ADD "%2,%0; sets %1"
+ :"+m" (l->a.counter), "=qm" (c)
+ :"ir" (i) : "memory");
+ return c;
+}
+
+/**
+ * local_add_return - add and return
+ * @i: integer value to add
+ * @l: pointer to type local_t
+ *
+ * Atomically adds @i to @l and returns @i + @l
+ */
+static inline long local_add_return(long i, local_t *l)
+{
+ long __i;
+#ifdef CONFIG_M386
+ unsigned long flags;
+ if (unlikely(boot_cpu_data.x86 <= 3))
+ goto no_xadd;
#endif
+ /* Modern 486+ processor */
+ __i = i;
+ __asm__ __volatile__(
+ _ASM_XADD "%0, %1;"
+ :"+r" (i), "+m" (l->a.counter)
+ : : "memory");
+ return i + __i;
+
+#ifdef CONFIG_M386
+no_xadd: /* Legacy 386 processor */
+ local_irq_save(flags);
+ __i = local_read(l);
+ local_set(l, i + __i);
+ local_irq_restore(flags);
+ return i + __i;
+#endif
+}
+
+static inline long local_sub_return(long i, local_t *l)
+{
+ return local_add_return(-i, l);
+}
+
+#define local_inc_return(l) (local_add_return(1, l))
+#define local_dec_return(l) (local_sub_return(1, l))
+
+#define local_cmpxchg(l, o, n) \
+ (cmpxchg_local(&((l)->a.counter), (o), (n)))
+/* Always has a lock prefix */
+#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u) \
+({ \
+ long c, old; \
+ c = local_read(l); \
+ for (;;) { \
+ if (unlikely(c == (u))) \
+ break; \
+ old = local_cmpxchg((l), c, c + (a)); \
+ if (likely(old == c)) \
+ break; \
+ c = old; \
+ } \
+ c != (u); \
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+/* On x86_32, these are no better than the atomic variants.
+ * On x86-64 these are better than the atomic variants on SMP kernels
+ * because they dont use a lock prefix.
+ */
+#define __local_inc(l) local_inc(l)
+#define __local_dec(l) local_dec(l)
+#define __local_add(i, l) local_add((i), (l))
+#define __local_sub(i, l) local_sub((i), (l))
+
+/* Use these for per-cpu local_t variables: on some archs they are
+ * much more efficient than these naive implementations. Note they take
+ * a variable, not an address.
+ *
+ * X86_64: This could be done better if we moved the per cpu data directly
+ * after GS.
+ */
+
+/* Need to disable preemption for the cpu local counters otherwise we could
+ still access a variable of a previous CPU in a non atomic way. */
+#define cpu_local_wrap_v(l) \
+ ({ local_t res__; \
+ preempt_disable(); \
+ res__ = (l); \
+ preempt_enable(); \
+ res__; })
+#define cpu_local_wrap(l) \
+ ({ preempt_disable(); \
+ l; \
+ preempt_enable(); }) \
+
+#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
+#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
+#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l)))
+#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l)))
+#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
+#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
+
+#define __cpu_local_inc(l) cpu_local_inc(l)
+#define __cpu_local_dec(l) cpu_local_dec(l)
+#define __cpu_local_add(i, l) cpu_local_add((i), (l))
+#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))
+
+#endif /* _ARCH_LOCAL_H */
diff --git a/include/asm-x86/local_32.h b/include/asm-x86/local_32.h
deleted file mode 100644
index 6e85975b9ed..00000000000
--- a/include/asm-x86/local_32.h
+++ /dev/null
@@ -1,233 +0,0 @@
-#ifndef _ARCH_I386_LOCAL_H
-#define _ARCH_I386_LOCAL_H
-
-#include <linux/percpu.h>
-#include <asm/system.h>
-#include <asm/atomic.h>
-
-typedef struct
-{
- atomic_long_t a;
-} local_t;
-
-#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
-
-#define local_read(l) atomic_long_read(&(l)->a)
-#define local_set(l,i) atomic_long_set(&(l)->a, (i))
-
-static __inline__ void local_inc(local_t *l)
-{
- __asm__ __volatile__(
- "incl %0"
- :"+m" (l->a.counter));
-}
-
-static __inline__ void local_dec(local_t *l)
-{
- __asm__ __volatile__(
- "decl %0"
- :"+m" (l->a.counter));
-}
-
-static __inline__ void local_add(long i, local_t *l)
-{
- __asm__ __volatile__(
- "addl %1,%0"
- :"+m" (l->a.counter)
- :"ir" (i));
-}
-
-static __inline__ void local_sub(long i, local_t *l)
-{
- __asm__ __volatile__(
- "subl %1,%0"
- :"+m" (l->a.counter)
- :"ir" (i));
-}
-
-/**
- * local_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @l: pointer of type local_t
- *
- * Atomically subtracts @i from @l and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int local_sub_and_test(long i, local_t *l)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- "subl %2,%0; sete %1"
- :"+m" (l->a.counter), "=qm" (c)
- :"ir" (i) : "memory");
- return c;
-}
-
-/**
- * local_dec_and_test - decrement and test
- * @l: pointer of type local_t
- *
- * Atomically decrements @l by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static __inline__ int local_dec_and_test(local_t *l)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- "decl %0; sete %1"
- :"+m" (l->a.counter), "=qm" (c)
- : : "memory");
- return c != 0;
-}
-
-/**
- * local_inc_and_test - increment and test
- * @l: pointer of type local_t
- *
- * Atomically increments @l by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int local_inc_and_test(local_t *l)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- "incl %0; sete %1"
- :"+m" (l->a.counter), "=qm" (c)
- : : "memory");
- return c != 0;
-}
-
-/**
- * local_add_negative - add and test if negative
- * @l: pointer of type local_t
- * @i: integer value to add
- *
- * Atomically adds @i to @l and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static __inline__ int local_add_negative(long i, local_t *l)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- "addl %2,%0; sets %1"
- :"+m" (l->a.counter), "=qm" (c)
- :"ir" (i) : "memory");
- return c;
-}
-
-/**
- * local_add_return - add and return
- * @l: pointer of type local_t
- * @i: integer value to add
- *
- * Atomically adds @i to @l and returns @i + @l
- */
-static __inline__ long local_add_return(long i, local_t *l)
-{
- long __i;
-#ifdef CONFIG_M386
- unsigned long flags;
- if(unlikely(boot_cpu_data.x86 <= 3))
- goto no_xadd;
-#endif
- /* Modern 486+ processor */
- __i = i;
- __asm__ __volatile__(
- "xaddl %0, %1;"
- :"+r" (i), "+m" (l->a.counter)
- : : "memory");
- return i + __i;
-
-#ifdef CONFIG_M386
-no_xadd: /* Legacy 386 processor */
- local_irq_save(flags);
- __i = local_read(l);
- local_set(l, i + __i);
- local_irq_restore(flags);
- return i + __i;
-#endif
-}
-
-static __inline__ long local_sub_return(long i, local_t *l)
-{
- return local_add_return(-i,l);
-}
-
-#define local_inc_return(l) (local_add_return(1,l))
-#define local_dec_return(l) (local_sub_return(1,l))
-
-#define local_cmpxchg(l, o, n) \
- (cmpxchg_local(&((l)->a.counter), (o), (n)))
-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
-
-/**
- * local_add_unless - add unless the number is a given value
- * @l: pointer of type local_t
- * @a: the amount to add to l...
- * @u: ...unless l is equal to u.
- *
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
- */
-#define local_add_unless(l, a, u) \
-({ \
- long c, old; \
- c = local_read(l); \
- for (;;) { \
- if (unlikely(c == (u))) \
- break; \
- old = local_cmpxchg((l), c, c + (a)); \
- if (likely(old == c)) \
- break; \
- c = old; \
- } \
- c != (u); \
-})
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-/* On x86, these are no better than the atomic variants. */
-#define __local_inc(l) local_inc(l)
-#define __local_dec(l) local_dec(l)
-#define __local_add(i,l) local_add((i),(l))
-#define __local_sub(i,l) local_sub((i),(l))
-
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations. Note they take
- * a variable, not an address.
- */
-
-/* Need to disable preemption for the cpu local counters otherwise we could
- still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(l) \
- ({ local_t res__; \
- preempt_disable(); \
- res__ = (l); \
- preempt_enable(); \
- res__; })
-#define cpu_local_wrap(l) \
- ({ preempt_disable(); \
- l; \
- preempt_enable(); }) \
-
-#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
-#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
-#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l)))
-#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l)))
-#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
-#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
-
-#define __cpu_local_inc(l) cpu_local_inc(l)
-#define __cpu_local_dec(l) cpu_local_dec(l)
-#define __cpu_local_add(i, l) cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))
-
-#endif /* _ARCH_I386_LOCAL_H */
diff --git a/include/asm-x86/local_64.h b/include/asm-x86/local_64.h
deleted file mode 100644
index e87492bb069..00000000000
--- a/include/asm-x86/local_64.h
+++ /dev/null
@@ -1,222 +0,0 @@
-#ifndef _ARCH_X8664_LOCAL_H
-#define _ARCH_X8664_LOCAL_H
-
-#include <linux/percpu.h>
-#include <asm/atomic.h>
-
-typedef struct
-{
- atomic_long_t a;
-} local_t;
-
-#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
-
-#define local_read(l) atomic_long_read(&(l)->a)
-#define local_set(l,i) atomic_long_set(&(l)->a, (i))
-
-static inline void local_inc(local_t *l)
-{
- __asm__ __volatile__(
- "incq %0"
- :"=m" (l->a.counter)
- :"m" (l->a.counter));
-}
-
-static inline void local_dec(local_t *l)
-{
- __asm__ __volatile__(
- "decq %0"
- :"=m" (l->a.counter)
- :"m" (l->a.counter));
-}
-
-static inline void local_add(long i, local_t *l)
-{
- __asm__ __volatile__(
- "addq %1,%0"
- :"=m" (l->a.counter)
- :"ir" (i), "m" (l->a.counter));
-}
-
-static inline void local_sub(long i, local_t *l)
-{
- __asm__ __volatile__(
- "subq %1,%0"
- :"=m" (l->a.counter)
- :"ir" (i), "m" (l->a.counter));
-}
-
-/**
- * local_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @l: pointer to type local_t
- *
- * Atomically subtracts @i from @l and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int local_sub_and_test(long i, local_t *l)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- "subq %2,%0; sete %1"
- :"=m" (l->a.counter), "=qm" (c)
- :"ir" (i), "m" (l->a.counter) : "memory");
- return c;
-}
-
-/**
- * local_dec_and_test - decrement and test
- * @l: pointer to type local_t
- *
- * Atomically decrements @l by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static __inline__ int local_dec_and_test(local_t *l)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- "decq %0; sete %1"
- :"=m" (l->a.counter), "=qm" (c)
- :"m" (l->a.counter) : "memory");
- return c != 0;
-}
-
-/**
- * local_inc_and_test - increment and test
- * @l: pointer to type local_t
- *
- * Atomically increments @l by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int local_inc_and_test(local_t *l)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- "incq %0; sete %1"
- :"=m" (l->a.counter), "=qm" (c)
- :"m" (l->a.counter) : "memory");
- return c != 0;
-}
-
-/**
- * local_add_negative - add and test if negative
- * @i: integer value to add
- * @l: pointer to type local_t
- *
- * Atomically adds @i to @l and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static __inline__ int local_add_negative(long i, local_t *l)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- "addq %2,%0; sets %1"
- :"=m" (l->a.counter), "=qm" (c)
- :"ir" (i), "m" (l->a.counter) : "memory");
- return c;
-}
-
-/**
- * local_add_return - add and return
- * @i: integer value to add
- * @l: pointer to type local_t
- *
- * Atomically adds @i to @l and returns @i + @l
- */
-static __inline__ long local_add_return(long i, local_t *l)
-{
- long __i = i;
- __asm__ __volatile__(
- "xaddq %0, %1;"
- :"+r" (i), "+m" (l->a.counter)
- : : "memory");
- return i + __i;
-}
-
-static __inline__ long local_sub_return(long i, local_t *l)
-{
- return local_add_return(-i,l);
-}
-
-#define local_inc_return(l) (local_add_return(1,l))
-#define local_dec_return(l) (local_sub_return(1,l))
-
-#define local_cmpxchg(l, o, n) \
- (cmpxchg_local(&((l)->a.counter), (o), (n)))
-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
-
-/**
- * atomic_up_add_unless - add unless the number is a given value
- * @l: pointer of type local_t
- * @a: the amount to add to l...
- * @u: ...unless l is equal to u.
- *
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
- */
-#define local_add_unless(l, a, u) \
-({ \
- long c, old; \
- c = local_read(l); \
- for (;;) { \
- if (unlikely(c == (u))) \
- break; \
- old = local_cmpxchg((l), c, c + (a)); \
- if (likely(old == c)) \
- break; \
- c = old; \
- } \
- c != (u); \
-})
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-/* On x86-64 these are better than the atomic variants on SMP kernels
- because they dont use a lock prefix. */
-#define __local_inc(l) local_inc(l)
-#define __local_dec(l) local_dec(l)
-#define __local_add(i,l) local_add((i),(l))
-#define __local_sub(i,l) local_sub((i),(l))
-
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations. Note they take
- * a variable, not an address.
- *
- * This could be done better if we moved the per cpu data directly
- * after GS.
- */
-
-/* Need to disable preemption for the cpu local counters otherwise we could
- still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(l) \
- ({ local_t res__; \
- preempt_disable(); \
- res__ = (l); \
- preempt_enable(); \
- res__; })
-#define cpu_local_wrap(l) \
- ({ preempt_disable(); \
- l; \
- preempt_enable(); }) \
-
-#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
-#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
-#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l)))
-#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l)))
-#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
-#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
-
-#define __cpu_local_inc(l) cpu_local_inc(l)
-#define __cpu_local_dec(l) cpu_local_dec(l)
-#define __cpu_local_add(i, l) cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))
-
-#endif /* _ARCH_X8664_LOCAL_H */
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h
index ebd319f838a..6df235e8ea9 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/mach-bigsmp/mach_apic.h
@@ -110,13 +110,13 @@ static inline int cpu_to_logical_apicid(int cpu)
}
static inline int mpc_apic_id(struct mpc_config_processor *m,
- struct mpc_config_translation *translation_record)
+ struct mpc_config_translation *translation_record)
{
- printk("Processor #%d %ld:%ld APIC version %d\n",
- m->mpc_apicid,
- (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
- m->mpc_apicver);
+ printk("Processor #%d %u:%u APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
return m->mpc_apicid;
}
diff --git a/include/asm-x86/mach-default/apm.h b/include/asm-x86/mach-default/apm.h
index 1f730b8bd1f..989f34c37d3 100644
--- a/include/asm-x86/mach-default/apm.h
+++ b/include/asm-x86/mach-default/apm.h
@@ -1,6 +1,4 @@
/*
- * include/asm-i386/mach-default/apm.h
- *
* Machine specific APM BIOS functions for generic.
* Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp>
*/
diff --git a/include/asm-x86/mach-default/io_ports.h b/include/asm-x86/mach-default/io_ports.h
deleted file mode 100644
index 48540ba9716..00000000000
--- a/include/asm-x86/mach-default/io_ports.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * arch/i386/mach-generic/io_ports.h
- *
- * Machine specific IO port address definition for generic.
- * Written by Osamu Tomita <tomita@cinet.co.jp>
- */
-#ifndef _MACH_IO_PORTS_H
-#define _MACH_IO_PORTS_H
-
-/* i8259A PIC registers */
-#define PIC_MASTER_CMD 0x20
-#define PIC_MASTER_IMR 0x21
-#define PIC_MASTER_ISR PIC_MASTER_CMD
-#define PIC_MASTER_POLL PIC_MASTER_ISR
-#define PIC_MASTER_OCW3 PIC_MASTER_ISR
-#define PIC_SLAVE_CMD 0xa0
-#define PIC_SLAVE_IMR 0xa1
-
-/* i8259A PIC related value */
-#define PIC_CASCADE_IR 2
-#define MASTER_ICW4_DEFAULT 0x01
-#define SLAVE_ICW4_DEFAULT 0x01
-#define PIC_ICW4_AEOI 2
-
-#endif /* !_MACH_IO_PORTS_H */
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 6db1c3babe9..e3c2c1012c1 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -89,15 +89,15 @@ static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
return physid_mask_of_physid(phys_apicid);
}
-static inline int mpc_apic_id(struct mpc_config_processor *m,
- struct mpc_config_translation *translation_record)
-{
- printk("Processor #%d %ld:%ld APIC version %d\n",
- m->mpc_apicid,
- (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
- m->mpc_apicver);
- return (m->mpc_apicid);
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+ struct mpc_config_translation *translation_record)
+{
+ printk("Processor #%d %u:%u APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
+ return m->mpc_apicid;
}
static inline void setup_portio_remap(void)
diff --git a/include/asm-x86/mach-default/mach_time.h b/include/asm-x86/mach-default/mach_time.h
deleted file mode 100644
index 31eb5de6f3d..00000000000
--- a/include/asm-x86/mach-default/mach_time.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * include/asm-i386/mach-default/mach_time.h
- *
- * Machine specific set RTC function for generic.
- * Split out from time.c by Osamu Tomita <tomita@cinet.co.jp>
- */
-#ifndef _MACH_TIME_H
-#define _MACH_TIME_H
-
-#include <linux/mc146818rtc.h>
-
-/* for check timing call set_rtc_mmss() 500ms */
-/* used in arch/i386/time.c::do_timer_interrupt() */
-#define USEC_AFTER 500000
-#define USEC_BEFORE 500000
-
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be
- * called 500 ms after the second nowtime has started, because when
- * nowtime is written into the registers of the CMOS clock, it will
- * jump to the next second precisely 500 ms later. Check the Motorola
- * MC146818A or Dallas DS12887 data sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- * sets the minutes. Usually you'll only notice that after reboot!
- */
-static inline int mach_set_rtc_mmss(unsigned long nowtime)
-{
- int retval = 0;
- int real_seconds, real_minutes, cmos_minutes;
- unsigned char save_control, save_freq_select;
-
- save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
- CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
- CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
- cmos_minutes = CMOS_READ(RTC_MINUTES);
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BCD_TO_BIN(cmos_minutes);
-
- /*
- * since we're only adjusting minutes and seconds,
- * don't interfere with hour overflow. This avoids
- * messing with unknown time zones but requires your
- * RTC not to be off by more than 15 minutes
- */
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
- if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
- real_minutes += 30; /* correct for half hour time zone */
- real_minutes %= 60;
-
- if (abs(real_minutes - cmos_minutes) < 30) {
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
- }
- CMOS_WRITE(real_seconds,RTC_SECONDS);
- CMOS_WRITE(real_minutes,RTC_MINUTES);
- } else {
- printk(KERN_WARNING
- "set_rtc_mmss: can't update from %d to %d\n",
- cmos_minutes, real_minutes);
- retval = -1;
- }
-
- /* The following flags have to be released exactly in this order,
- * otherwise the DS12887 (popular MC146818A clone with integrated
- * battery and quartz) will not reset the oscillator and will not
- * update precisely 500 ms later. You won't find this mentioned in
- * the Dallas Semiconductor data sheets, but who believes data
- * sheets anyway ... -- Markus Kuhn
- */
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-
- return retval;
-}
-
-static inline unsigned long mach_get_cmos_time(void)
-{
- unsigned int year, mon, day, hour, min, sec;
-
- do {
- sec = CMOS_READ(RTC_SECONDS);
- min = CMOS_READ(RTC_MINUTES);
- hour = CMOS_READ(RTC_HOURS);
- day = CMOS_READ(RTC_DAY_OF_MONTH);
- mon = CMOS_READ(RTC_MONTH);
- year = CMOS_READ(RTC_YEAR);
- } while (sec != CMOS_READ(RTC_SECONDS));
-
- if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
- }
-
- year += 1900;
- if (year < 1970)
- year += 100;
-
- return mktime(year, mon, day, hour, min, sec);
-}
-
-#endif /* !_MACH_TIME_H */
diff --git a/include/asm-x86/mach-default/mach_timer.h b/include/asm-x86/mach-default/mach_timer.h
index 807992fd417..4b76e536cd9 100644
--- a/include/asm-x86/mach-default/mach_timer.h
+++ b/include/asm-x86/mach-default/mach_timer.h
@@ -1,6 +1,4 @@
/*
- * include/asm-i386/mach-default/mach_timer.h
- *
* Machine specific calibrate_tsc() for generic.
* Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp>
*/
diff --git a/include/asm-x86/mach-default/mach_traps.h b/include/asm-x86/mach-default/mach_traps.h
index 625438b8a6e..2fe7705c048 100644
--- a/include/asm-x86/mach-default/mach_traps.h
+++ b/include/asm-x86/mach-default/mach_traps.h
@@ -1,6 +1,4 @@
/*
- * include/asm-i386/mach-default/mach_traps.h
- *
* Machine specific NMI handling for generic.
* Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp>
*/
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h
index caec64be516..d23011fdf45 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/mach-es7000/mach_apic.h
@@ -131,11 +131,11 @@ static inline int cpu_to_logical_apicid(int cpu)
static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *unused)
{
- printk("Processor #%d %ld:%ld APIC version %d\n",
- m->mpc_apicid,
- (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
- m->mpc_apicver);
+ printk("Processor #%d %u:%u APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
return (m->mpc_apicid);
}
diff --git a/include/asm-x86/mach-generic/gpio.h b/include/asm-x86/mach-generic/gpio.h
new file mode 100644
index 00000000000..5305dcb96df
--- /dev/null
+++ b/include/asm-x86/mach-generic/gpio.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_MACH_GENERIC_GPIO_H
+#define __ASM_MACH_GENERIC_GPIO_H
+
+int gpio_request(unsigned gpio, const char *label);
+void gpio_free(unsigned gpio);
+int gpio_direction_input(unsigned gpio);
+int gpio_direction_output(unsigned gpio, int value);
+int gpio_get_value(unsigned gpio);
+void gpio_set_value(unsigned gpio, int value);
+int gpio_to_irq(unsigned gpio);
+int irq_to_gpio(unsigned irq);
+
+#include <asm-generic/gpio.h> /* cansleep wrappers */
+
+#endif /* __ASM_MACH_GENERIC_GPIO_H */
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h
index 5e5e7dd2692..17e183bd39c 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/mach-numaq/mach_apic.h
@@ -101,11 +101,11 @@ static inline int mpc_apic_id(struct mpc_config_processor *m,
int quad = translation_record->trans_quad;
int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
- printk("Processor #%d %ld:%ld APIC version %d (quad %d, apic %d)\n",
- m->mpc_apicid,
- (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
- m->mpc_apicver, quad, logical_apicid);
+ printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver, quad, logical_apicid);
return logical_apicid;
}
diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h
new file mode 100644
index 00000000000..db31b929b99
--- /dev/null
+++ b/include/asm-x86/mach-rdc321x/gpio.h
@@ -0,0 +1,56 @@
+#ifndef _RDC321X_GPIO_H
+#define _RDC321X_GPIO_H
+
+extern int rdc_gpio_get_value(unsigned gpio);
+extern void rdc_gpio_set_value(unsigned gpio, int value);
+extern int rdc_gpio_direction_input(unsigned gpio);
+extern int rdc_gpio_direction_output(unsigned gpio, int value);
+
+
+/* Wrappers for the arch-neutral GPIO API */
+
+static inline int gpio_request(unsigned gpio, const char *label)
+{
+ /* Not yet implemented */
+ return 0;
+}
+
+static inline void gpio_free(unsigned gpio)
+{
+ /* Not yet implemented */
+}
+
+static inline int gpio_direction_input(unsigned gpio)
+{
+ return rdc_gpio_direction_input(gpio);
+}
+
+static inline int gpio_direction_output(unsigned gpio, int value)
+{
+ return rdc_gpio_direction_output(gpio, value);
+}
+
+static inline int gpio_get_value(unsigned gpio)
+{
+ return rdc_gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned gpio, int value)
+{
+ rdc_gpio_set_value(gpio, value);
+}
+
+static inline int gpio_to_irq(unsigned gpio)
+{
+ return gpio;
+}
+
+static inline int irq_to_gpio(unsigned irq)
+{
+ return irq;
+}
+
+/* For cansleep */
+#include <asm-generic/gpio.h>
+
+#endif /* _RDC321X_GPIO_H_ */
diff --git a/include/asm-x86/mach-rdc321x/rdc321x_defs.h b/include/asm-x86/mach-rdc321x/rdc321x_defs.h
new file mode 100644
index 00000000000..838ba8f64fd
--- /dev/null
+++ b/include/asm-x86/mach-rdc321x/rdc321x_defs.h
@@ -0,0 +1,6 @@
+#define PFX "rdc321x: "
+
+/* General purpose configuration and data registers */
+#define RDC3210_CFGREG_ADDR 0x0CF8
+#define RDC3210_CFGREG_DATA 0x0CFC
+#define RDC_MAX_GPIO 0x3A
diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h
index 732f776aab8..062c97f6100 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/mach-summit/mach_apic.h
@@ -126,15 +126,15 @@ static inline physid_mask_t apicid_to_cpu_present(int apicid)
return physid_mask_of_physid(0);
}
-static inline int mpc_apic_id(struct mpc_config_processor *m,
- struct mpc_config_translation *translation_record)
-{
- printk("Processor #%d %ld:%ld APIC version %d\n",
- m->mpc_apicid,
- (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
- m->mpc_apicver);
- return (m->mpc_apicid);
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+ struct mpc_config_translation *translation_record)
+{
+ printk("Processor #%d %u:%u APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
+ return m->mpc_apicid;
}
static inline void setup_portio_remap(void)
diff --git a/include/asm-x86/math_emu.h b/include/asm-x86/math_emu.h
index a4b0aa3320e..9bf4ae93ab1 100644
--- a/include/asm-x86/math_emu.h
+++ b/include/asm-x86/math_emu.h
@@ -1,11 +1,6 @@
#ifndef _I386_MATH_EMU_H
#define _I386_MATH_EMU_H
-#include <asm/sigcontext.h>
-
-int restore_i387_soft(void *s387, struct _fpstate __user *buf);
-int save_i387_soft(void *s387, struct _fpstate __user *buf);
-
/* This structure matches the layout of the data saved to the stack
following a device-not-present interrupt, part of it saved
automatically by the 80386/80486.
diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h
index 5c2bb66caf1..cdd9f965835 100644
--- a/include/asm-x86/mc146818rtc.h
+++ b/include/asm-x86/mc146818rtc.h
@@ -1,5 +1,100 @@
-#ifdef CONFIG_X86_32
-# include "mc146818rtc_32.h"
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <linux/mc146818rtc.h>
+
+#ifndef RTC_PORT
+#define RTC_PORT(x) (0x70 + (x))
+#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
+#endif
+
+#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG)
+/*
+ * This lock provides nmi access to the CMOS/RTC registers. It has some
+ * special properties. It is owned by a CPU and stores the index register
+ * currently being accessed (if owned). The idea here is that it works
+ * like a normal lock (normally). However, in an NMI, the NMI code will
+ * first check to see if its CPU owns the lock, meaning that the NMI
+ * interrupted during the read/write of the device. If it does, it goes ahead
+ * and performs the access and then restores the index register. If it does
+ * not, it locks normally.
+ *
+ * Note that since we are working with NMIs, we need this lock even in
+ * a non-SMP machine just to mark that the lock is owned.
+ *
+ * This only works with compare-and-swap. There is no other way to
+ * atomically claim the lock and set the owner.
+ */
+#include <linux/smp.h>
+extern volatile unsigned long cmos_lock;
+
+/*
+ * All of these below must be called with interrupts off, preempt
+ * disabled, etc.
+ */
+
+static inline void lock_cmos(unsigned char reg)
+{
+ unsigned long new;
+ new = ((smp_processor_id()+1) << 8) | reg;
+ for (;;) {
+ if (cmos_lock) {
+ cpu_relax();
+ continue;
+ }
+ if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0)
+ return;
+ }
+}
+
+static inline void unlock_cmos(void)
+{
+ cmos_lock = 0;
+}
+static inline int do_i_have_lock_cmos(void)
+{
+ return (cmos_lock >> 8) == (smp_processor_id()+1);
+}
+static inline unsigned char current_lock_cmos_reg(void)
+{
+ return cmos_lock & 0xff;
+}
+#define lock_cmos_prefix(reg) \
+ do { \
+ unsigned long cmos_flags; \
+ local_irq_save(cmos_flags); \
+ lock_cmos(reg)
+#define lock_cmos_suffix(reg) \
+ unlock_cmos(); \
+ local_irq_restore(cmos_flags); \
+ } while (0)
#else
-# include "mc146818rtc_64.h"
+#define lock_cmos_prefix(reg) do {} while (0)
+#define lock_cmos_suffix(reg) do {} while (0)
+#define lock_cmos(reg)
+#define unlock_cmos()
+#define do_i_have_lock_cmos() 0
+#define current_lock_cmos_reg() 0
#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) rtc_cmos_read(addr)
+#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr)
+unsigned char rtc_cmos_read(unsigned char addr);
+void rtc_cmos_write(unsigned char val, unsigned char addr);
+
+extern int mach_set_rtc_mmss(unsigned long nowtime);
+extern unsigned long mach_get_cmos_time(void);
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-x86/mc146818rtc_32.h b/include/asm-x86/mc146818rtc_32.h
deleted file mode 100644
index 1613b42eaf5..00000000000
--- a/include/asm-x86/mc146818rtc_32.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#include <asm/io.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <linux/mc146818rtc.h>
-
-#ifndef RTC_PORT
-#define RTC_PORT(x) (0x70 + (x))
-#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
-#endif
-
-#ifdef __HAVE_ARCH_CMPXCHG
-/*
- * This lock provides nmi access to the CMOS/RTC registers. It has some
- * special properties. It is owned by a CPU and stores the index register
- * currently being accessed (if owned). The idea here is that it works
- * like a normal lock (normally). However, in an NMI, the NMI code will
- * first check to see if its CPU owns the lock, meaning that the NMI
- * interrupted during the read/write of the device. If it does, it goes ahead
- * and performs the access and then restores the index register. If it does
- * not, it locks normally.
- *
- * Note that since we are working with NMIs, we need this lock even in
- * a non-SMP machine just to mark that the lock is owned.
- *
- * This only works with compare-and-swap. There is no other way to
- * atomically claim the lock and set the owner.
- */
-#include <linux/smp.h>
-extern volatile unsigned long cmos_lock;
-
-/*
- * All of these below must be called with interrupts off, preempt
- * disabled, etc.
- */
-
-static inline void lock_cmos(unsigned char reg)
-{
- unsigned long new;
- new = ((smp_processor_id()+1) << 8) | reg;
- for (;;) {
- if (cmos_lock) {
- cpu_relax();
- continue;
- }
- if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0)
- return;
- }
-}
-
-static inline void unlock_cmos(void)
-{
- cmos_lock = 0;
-}
-static inline int do_i_have_lock_cmos(void)
-{
- return (cmos_lock >> 8) == (smp_processor_id()+1);
-}
-static inline unsigned char current_lock_cmos_reg(void)
-{
- return cmos_lock & 0xff;
-}
-#define lock_cmos_prefix(reg) \
- do { \
- unsigned long cmos_flags; \
- local_irq_save(cmos_flags); \
- lock_cmos(reg)
-#define lock_cmos_suffix(reg) \
- unlock_cmos(); \
- local_irq_restore(cmos_flags); \
- } while (0)
-#else
-#define lock_cmos_prefix(reg) do {} while (0)
-#define lock_cmos_suffix(reg) do {} while (0)
-#define lock_cmos(reg)
-#define unlock_cmos()
-#define do_i_have_lock_cmos() 0
-#define current_lock_cmos_reg() 0
-#endif
-
-/*
- * The yet supported machines all access the RTC index register via
- * an ISA port access but the way to access the date register differs ...
- */
-#define CMOS_READ(addr) rtc_cmos_read(addr)
-#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr)
-unsigned char rtc_cmos_read(unsigned char addr);
-void rtc_cmos_write(unsigned char val, unsigned char addr);
-
-#define RTC_IRQ 8
-
-#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-x86/mc146818rtc_64.h b/include/asm-x86/mc146818rtc_64.h
deleted file mode 100644
index d6e3009430c..00000000000
--- a/include/asm-x86/mc146818rtc_64.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#include <asm/io.h>
-
-#ifndef RTC_PORT
-#define RTC_PORT(x) (0x70 + (x))
-#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
-#endif
-
-/*
- * The yet supported machines all access the RTC index register via
- * an ISA port access but the way to access the date register differs ...
- */
-#define CMOS_READ(addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-inb_p(RTC_PORT(1)); \
-})
-#define CMOS_WRITE(val, addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-outb_p((val),RTC_PORT(1)); \
-})
-
-#define RTC_IRQ 8
-
-#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h
index df304fd89c2..94f1fd79e22 100644
--- a/include/asm-x86/mce.h
+++ b/include/asm-x86/mce.h
@@ -13,7 +13,7 @@
#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */
-#define MCG_STATUS_EIPV (1UL<<1) /* eip points to correct instruction */
+#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */
#define MCI_STATUS_VAL (1UL<<63) /* valid error */
@@ -30,7 +30,7 @@ struct mce {
__u64 misc;
__u64 addr;
__u64 mcgstatus;
- __u64 rip;
+ __u64 ip;
__u64 tsc; /* cpu time stamp counter */
__u64 res1; /* for future extension */
__u64 res2; /* dito. */
@@ -85,14 +85,7 @@ struct mce_log {
#ifdef __KERNEL__
#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_MCE
-extern void mcheck_init(struct cpuinfo_x86 *c);
-#else
-#define mcheck_init(c) do {} while(0)
-#endif
-
extern int mce_disabled;
-
#else /* CONFIG_X86_32 */
#include <asm/atomic.h>
@@ -121,6 +114,13 @@ extern int mce_notify_user(void);
#endif /* !CONFIG_X86_32 */
+
+
+#ifdef CONFIG_X86_MCE
+extern void mcheck_init(struct cpuinfo_x86 *c);
+#else
+#define mcheck_init(c) do { } while (0)
+#endif
extern void stop_mce(void);
extern void restart_mce(void);
diff --git a/include/asm-x86/mmsegment.h b/include/asm-x86/mmsegment.h
deleted file mode 100644
index d3f80c99633..00000000000
--- a/include/asm-x86/mmsegment.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_MMSEGMENT_H
-#define _ASM_MMSEGMENT_H 1
-
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
-#endif
diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h
index 3f922c8e1c8..efa962c3889 100644
--- a/include/asm-x86/mmu.h
+++ b/include/asm-x86/mmu.h
@@ -20,4 +20,12 @@ typedef struct {
void *vdso;
} mm_context_t;
+#ifdef CONFIG_SMP
+void leave_mm(int cpu);
+#else
+static inline void leave_mm(int cpu)
+{
+}
+#endif
+
#endif /* _ASM_X86_MMU_H */
diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h
index 7eb0b0b1fb3..8198d1cca1f 100644
--- a/include/asm-x86/mmu_context_32.h
+++ b/include/asm-x86/mmu_context_32.h
@@ -32,8 +32,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
#endif
}
-void leave_mm(unsigned long cpu);
-
static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index 0cce83a7837..ad6dc821ef9 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -7,7 +7,9 @@
#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#ifndef CONFIG_PARAVIRT
#include <asm-generic/mm_hooks.h>
+#endif
/*
* possibly do the LDT unload here?
@@ -23,11 +25,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
#endif
}
-static inline void load_cr3(pgd_t *pgd)
-{
- asm volatile("movq %0,%%cr3" :: "r" (__pa(pgd)) : "memory");
-}
-
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -43,20 +40,20 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
load_cr3(next->pgd);
if (unlikely(next->context.ldt != prev->context.ldt))
- load_LDT_nolock(&next->context, cpu);
+ load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
else {
write_pda(mmu_state, TLBSTATE_OK);
if (read_pda(active_mm) != next)
- out_of_line_bug();
+ BUG();
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
*/
load_cr3(next->pgd);
- load_LDT_nolock(&next->context, cpu);
+ load_LDT_nolock(&next->context);
}
}
#endif
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index 118e9812778..5d6f4ce6e6d 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -87,9 +87,6 @@ static inline int pfn_to_nid(unsigned long pfn)
__pgdat->node_start_pfn + __pgdat->node_spanned_pages; \
})
-/* XXX: FIXME -- wli */
-#define kern_addr_valid(kaddr) (0)
-
#ifdef CONFIG_X86_NUMAQ /* we have contiguous memory on NUMA-Q */
#define pfn_valid(pfn) ((pfn) < num_physpages)
#else
diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index 19a89377b12..ebaf9663aa8 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -15,9 +15,9 @@
struct memnode {
int shift;
unsigned int mapsize;
- u8 *map;
- u8 embedded_map[64-16];
-} ____cacheline_aligned; /* total size = 64 bytes */
+ s16 *map;
+ s16 embedded_map[64-8];
+} ____cacheline_aligned; /* total size = 128 bytes */
extern struct memnode memnode;
#define memnode_shift memnode.shift
#define memnodemap memnode.map
@@ -41,11 +41,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
NODE_DATA(nid)->node_spanned_pages)
-#ifdef CONFIG_DISCONTIGMEM
-#define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
-
-extern int pfn_valid(unsigned long pfn);
-#endif
+extern int early_pfn_to_nid(unsigned long pfn);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE (64*1024*1024)
diff --git a/include/asm-x86/module.h b/include/asm-x86/module.h
index 2b2f18d8a53..bfedb247871 100644
--- a/include/asm-x86/module.h
+++ b/include/asm-x86/module.h
@@ -1,5 +1,82 @@
+#ifndef _ASM_MODULE_H
+#define _ASM_MODULE_H
+
+/* x86_32/64 are simple */
+struct mod_arch_specific {};
+
#ifdef CONFIG_X86_32
-# include "module_32.h"
+# define Elf_Shdr Elf32_Shdr
+# define Elf_Sym Elf32_Sym
+# define Elf_Ehdr Elf32_Ehdr
#else
-# include "module_64.h"
+# define Elf_Shdr Elf64_Shdr
+# define Elf_Sym Elf64_Sym
+# define Elf_Ehdr Elf64_Ehdr
#endif
+
+#ifdef CONFIG_X86_64
+/* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M386
+#define MODULE_PROC_FAMILY "386 "
+#elif defined CONFIG_M486
+#define MODULE_PROC_FAMILY "486 "
+#elif defined CONFIG_M586
+#define MODULE_PROC_FAMILY "586 "
+#elif defined CONFIG_M586TSC
+#define MODULE_PROC_FAMILY "586TSC "
+#elif defined CONFIG_M586MMX
+#define MODULE_PROC_FAMILY "586MMX "
+#elif defined CONFIG_MCORE2
+#define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_M686
+#define MODULE_PROC_FAMILY "686 "
+#elif defined CONFIG_MPENTIUMII
+#define MODULE_PROC_FAMILY "PENTIUMII "
+#elif defined CONFIG_MPENTIUMIII
+#define MODULE_PROC_FAMILY "PENTIUMIII "
+#elif defined CONFIG_MPENTIUMM
+#define MODULE_PROC_FAMILY "PENTIUMM "
+#elif defined CONFIG_MPENTIUM4
+#define MODULE_PROC_FAMILY "PENTIUM4 "
+#elif defined CONFIG_MK6
+#define MODULE_PROC_FAMILY "K6 "
+#elif defined CONFIG_MK7
+#define MODULE_PROC_FAMILY "K7 "
+#elif defined CONFIG_MK8
+#define MODULE_PROC_FAMILY "K8 "
+#elif defined CONFIG_X86_ELAN
+#define MODULE_PROC_FAMILY "ELAN "
+#elif defined CONFIG_MCRUSOE
+#define MODULE_PROC_FAMILY "CRUSOE "
+#elif defined CONFIG_MEFFICEON
+#define MODULE_PROC_FAMILY "EFFICEON "
+#elif defined CONFIG_MWINCHIPC6
+#define MODULE_PROC_FAMILY "WINCHIPC6 "
+#elif defined CONFIG_MWINCHIP2
+#define MODULE_PROC_FAMILY "WINCHIP2 "
+#elif defined CONFIG_MWINCHIP3D
+#define MODULE_PROC_FAMILY "WINCHIP3D "
+#elif defined CONFIG_MCYRIXIII
+#define MODULE_PROC_FAMILY "CYRIXIII "
+#elif defined CONFIG_MVIAC3_2
+#define MODULE_PROC_FAMILY "VIAC3-2 "
+#elif defined CONFIG_MVIAC7
+#define MODULE_PROC_FAMILY "VIAC7 "
+#elif defined CONFIG_MGEODEGX1
+#define MODULE_PROC_FAMILY "GEODEGX1 "
+#elif defined CONFIG_MGEODE_LX
+#define MODULE_PROC_FAMILY "GEODE "
+#else
+#error unknown processor family
+#endif
+
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_4KSTACKS
+# define MODULE_STACKSIZE "4KSTACKS "
+# else
+# define MODULE_STACKSIZE ""
+# endif
+# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
+#endif
+
+#endif /* _ASM_MODULE_H */
diff --git a/include/asm-x86/module_32.h b/include/asm-x86/module_32.h
deleted file mode 100644
index 7e5fda6c397..00000000000
--- a/include/asm-x86/module_32.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef _ASM_I386_MODULE_H
-#define _ASM_I386_MODULE_H
-
-/* x86 is simple */
-struct mod_arch_specific
-{
-};
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-#ifdef CONFIG_M386
-#define MODULE_PROC_FAMILY "386 "
-#elif defined CONFIG_M486
-#define MODULE_PROC_FAMILY "486 "
-#elif defined CONFIG_M586
-#define MODULE_PROC_FAMILY "586 "
-#elif defined CONFIG_M586TSC
-#define MODULE_PROC_FAMILY "586TSC "
-#elif defined CONFIG_M586MMX
-#define MODULE_PROC_FAMILY "586MMX "
-#elif defined CONFIG_MCORE2
-#define MODULE_PROC_FAMILY "CORE2 "
-#elif defined CONFIG_M686
-#define MODULE_PROC_FAMILY "686 "
-#elif defined CONFIG_MPENTIUMII
-#define MODULE_PROC_FAMILY "PENTIUMII "
-#elif defined CONFIG_MPENTIUMIII
-#define MODULE_PROC_FAMILY "PENTIUMIII "
-#elif defined CONFIG_MPENTIUMM
-#define MODULE_PROC_FAMILY "PENTIUMM "
-#elif defined CONFIG_MPENTIUM4
-#define MODULE_PROC_FAMILY "PENTIUM4 "
-#elif defined CONFIG_MK6
-#define MODULE_PROC_FAMILY "K6 "
-#elif defined CONFIG_MK7
-#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MK8
-#define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_X86_ELAN
-#define MODULE_PROC_FAMILY "ELAN "
-#elif defined CONFIG_MCRUSOE
-#define MODULE_PROC_FAMILY "CRUSOE "
-#elif defined CONFIG_MEFFICEON
-#define MODULE_PROC_FAMILY "EFFICEON "
-#elif defined CONFIG_MWINCHIPC6
-#define MODULE_PROC_FAMILY "WINCHIPC6 "
-#elif defined CONFIG_MWINCHIP2
-#define MODULE_PROC_FAMILY "WINCHIP2 "
-#elif defined CONFIG_MWINCHIP3D
-#define MODULE_PROC_FAMILY "WINCHIP3D "
-#elif defined CONFIG_MCYRIXIII
-#define MODULE_PROC_FAMILY "CYRIXIII "
-#elif defined CONFIG_MVIAC3_2
-#define MODULE_PROC_FAMILY "VIAC3-2 "
-#elif defined CONFIG_MVIAC7
-#define MODULE_PROC_FAMILY "VIAC7 "
-#elif defined CONFIG_MGEODEGX1
-#define MODULE_PROC_FAMILY "GEODEGX1 "
-#elif defined CONFIG_MGEODE_LX
-#define MODULE_PROC_FAMILY "GEODE "
-#else
-#error unknown processor family
-#endif
-
-#ifdef CONFIG_4KSTACKS
-#define MODULE_STACKSIZE "4KSTACKS "
-#else
-#define MODULE_STACKSIZE ""
-#endif
-
-#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
-
-#endif /* _ASM_I386_MODULE_H */
diff --git a/include/asm-x86/module_64.h b/include/asm-x86/module_64.h
deleted file mode 100644
index 67f8f69fa7b..00000000000
--- a/include/asm-x86/module_64.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_X8664_MODULE_H
-#define _ASM_X8664_MODULE_H
-
-struct mod_arch_specific {};
-
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-
-#endif
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 8f268e8fd2e..781ad74ab9e 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -1,5 +1,117 @@
+#ifndef _AM_X86_MPSPEC_H
+#define _AM_X86_MPSPEC_H
+
+#include <asm/mpspec_def.h>
+
#ifdef CONFIG_X86_32
-# include "mpspec_32.h"
+#include <mach_mpspec.h>
+
+extern int mp_bus_id_to_type[MAX_MP_BUSSES];
+extern int mp_bus_id_to_node[MAX_MP_BUSSES];
+extern int mp_bus_id_to_local[MAX_MP_BUSSES];
+extern int quad_local_to_mp_bus_id[NR_CPUS/4][4];
+
+extern unsigned int def_to_bigsmp;
+extern int apic_version[MAX_APICS];
+extern u8 apicid_2_node[];
+extern int pic_mode;
+
+#define MAX_APICID 256
+
#else
-# include "mpspec_64.h"
+
+#define MAX_MP_BUSSES 256
+/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */
+#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
+
+extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+#endif
+
+extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
+
+extern unsigned int boot_cpu_physical_apicid;
+extern int smp_found_config;
+extern int nr_ioapics;
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+extern int mpc_default_type;
+extern unsigned long mp_lapic_addr;
+
+extern void find_smp_config(void);
+extern void get_smp_config(void);
+
+#ifdef CONFIG_ACPI
+extern void mp_register_lapic(u8 id, u8 enabled);
+extern void mp_register_lapic_address(u64 address);
+extern void mp_register_ioapic(u8 id, u32 address, u32 gsi_base);
+extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
+ u32 gsi);
+extern void mp_config_acpi_legacy_irqs(void);
+extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+#endif /* CONFIG_ACPI */
+
+#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS)
+
+struct physid_mask
+{
+ unsigned long mask[PHYSID_ARRAY_SIZE];
+};
+
+typedef struct physid_mask physid_mask_t;
+
+#define physid_set(physid, map) set_bit(physid, (map).mask)
+#define physid_clear(physid, map) clear_bit(physid, (map).mask)
+#define physid_isset(physid, map) test_bit(physid, (map).mask)
+#define physid_test_and_set(physid, map) \
+ test_and_set_bit(physid, (map).mask)
+
+#define physids_and(dst, src1, src2) \
+ bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+
+#define physids_or(dst, src1, src2) \
+ bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+
+#define physids_clear(map) \
+ bitmap_zero((map).mask, MAX_APICS)
+
+#define physids_complement(dst, src) \
+ bitmap_complement((dst).mask, (src).mask, MAX_APICS)
+
+#define physids_empty(map) \
+ bitmap_empty((map).mask, MAX_APICS)
+
+#define physids_equal(map1, map2) \
+ bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
+
+#define physids_weight(map) \
+ bitmap_weight((map).mask, MAX_APICS)
+
+#define physids_shift_right(d, s, n) \
+ bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
+
+#define physids_shift_left(d, s, n) \
+ bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
+
+#define physids_coerce(map) ((map).mask[0])
+
+#define physids_promote(physids) \
+ ({ \
+ physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
+ __physid_mask.mask[0] = physids; \
+ __physid_mask; \
+ })
+
+#define physid_mask_of_physid(physid) \
+ ({ \
+ physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
+ physid_set(physid, __physid_mask); \
+ __physid_mask; \
+ })
+
+#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
+#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
+
+extern physid_mask_t phys_cpu_present_map;
+
#endif
diff --git a/include/asm-x86/mpspec_32.h b/include/asm-x86/mpspec_32.h
deleted file mode 100644
index f21349399d1..00000000000
--- a/include/asm-x86/mpspec_32.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef __ASM_MPSPEC_H
-#define __ASM_MPSPEC_H
-
-#include <linux/cpumask.h>
-#include <asm/mpspec_def.h>
-#include <mach_mpspec.h>
-
-extern int mp_bus_id_to_type [MAX_MP_BUSSES];
-extern int mp_bus_id_to_node [MAX_MP_BUSSES];
-extern int mp_bus_id_to_local [MAX_MP_BUSSES];
-extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
-
-extern unsigned int def_to_bigsmp;
-extern unsigned int boot_cpu_physical_apicid;
-extern int smp_found_config;
-extern void find_smp_config (void);
-extern void get_smp_config (void);
-extern int nr_ioapics;
-extern int apic_version [MAX_APICS];
-extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
-extern int mpc_default_type;
-extern unsigned long mp_lapic_addr;
-extern int pic_mode;
-
-#ifdef CONFIG_ACPI
-extern void mp_register_lapic (u8 id, u8 enabled);
-extern void mp_register_lapic_address (u64 address);
-extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
-extern void mp_config_acpi_legacy_irqs (void);
-extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low);
-#endif /* CONFIG_ACPI */
-
-#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS)
-
-struct physid_mask
-{
- unsigned long mask[PHYSID_ARRAY_SIZE];
-};
-
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map) set_bit(physid, (map).mask)
-#define physid_clear(physid, map) clear_bit(physid, (map).mask)
-#define physid_isset(physid, map) test_bit(physid, (map).mask)
-#define physid_test_and_set(physid, map) test_and_set_bit(physid, (map).mask)
-
-#define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_clear(map) bitmap_zero((map).mask, MAX_APICS)
-#define physids_complement(dst, src) bitmap_complement((dst).mask,(src).mask, MAX_APICS)
-#define physids_empty(map) bitmap_empty((map).mask, MAX_APICS)
-#define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
-#define physids_weight(map) bitmap_weight((map).mask, MAX_APICS)
-#define physids_shift_right(d, s, n) bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
-#define physids_shift_left(d, s, n) bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
-#define physids_coerce(map) ((map).mask[0])
-
-#define physids_promote(physids) \
- ({ \
- physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
- __physid_mask.mask[0] = physids; \
- __physid_mask; \
- })
-
-#define physid_mask_of_physid(physid) \
- ({ \
- physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
- physid_set(physid, __physid_mask); \
- __physid_mask; \
- })
-
-#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
-#endif
-
diff --git a/include/asm-x86/mpspec_64.h b/include/asm-x86/mpspec_64.h
deleted file mode 100644
index 017fddb61dc..00000000000
--- a/include/asm-x86/mpspec_64.h
+++ /dev/null
@@ -1,233 +0,0 @@
-#ifndef __ASM_MPSPEC_H
-#define __ASM_MPSPEC_H
-
-/*
- * Structure definitions for SMP machines following the
- * Intel Multiprocessing Specification 1.1 and 1.4.
- */
-
-/*
- * This tag identifies where the SMP configuration
- * information is.
- */
-
-#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
-
-/*
- * A maximum of 255 APICs with the current APIC ID architecture.
- */
-#define MAX_APICS 255
-
-struct intel_mp_floating
-{
- char mpf_signature[4]; /* "_MP_" */
- unsigned int mpf_physptr; /* Configuration table address */
- unsigned char mpf_length; /* Our length (paragraphs) */
- unsigned char mpf_specification;/* Specification version */
- unsigned char mpf_checksum; /* Checksum (makes sum 0) */
- unsigned char mpf_feature1; /* Standard or configuration ? */
- unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
- unsigned char mpf_feature3; /* Unused (0) */
- unsigned char mpf_feature4; /* Unused (0) */
- unsigned char mpf_feature5; /* Unused (0) */
-};
-
-struct mp_config_table
-{
- char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
- unsigned short mpc_length; /* Size of table */
- char mpc_spec; /* 0x01 */
- char mpc_checksum;
- char mpc_oem[8];
- char mpc_productid[12];
- unsigned int mpc_oemptr; /* 0 if not present */
- unsigned short mpc_oemsize; /* 0 if not present */
- unsigned short mpc_oemcount;
- unsigned int mpc_lapic; /* APIC address */
- unsigned int reserved;
-};
-
-/* Followed by entries */
-
-#define MP_PROCESSOR 0
-#define MP_BUS 1
-#define MP_IOAPIC 2
-#define MP_INTSRC 3
-#define MP_LINTSRC 4
-
-struct mpc_config_processor
-{
- unsigned char mpc_type;
- unsigned char mpc_apicid; /* Local APIC number */
- unsigned char mpc_apicver; /* Its versions */
- unsigned char mpc_cpuflag;
-#define CPU_ENABLED 1 /* Processor is available */
-#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
- unsigned int mpc_cpufeature;
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK 0xF0
-#define CPU_FAMILY_MASK 0xF00
- unsigned int mpc_featureflag; /* CPUID feature value */
- unsigned int mpc_reserved[2];
-};
-
-struct mpc_config_bus
-{
- unsigned char mpc_type;
- unsigned char mpc_busid;
- unsigned char mpc_bustype[6];
-};
-
-/* List of Bus Type string values, Intel MP Spec. */
-#define BUSTYPE_EISA "EISA"
-#define BUSTYPE_ISA "ISA"
-#define BUSTYPE_INTERN "INTERN" /* Internal BUS */
-#define BUSTYPE_MCA "MCA"
-#define BUSTYPE_VL "VL" /* Local bus */
-#define BUSTYPE_PCI "PCI"
-#define BUSTYPE_PCMCIA "PCMCIA"
-#define BUSTYPE_CBUS "CBUS"
-#define BUSTYPE_CBUSII "CBUSII"
-#define BUSTYPE_FUTURE "FUTURE"
-#define BUSTYPE_MBI "MBI"
-#define BUSTYPE_MBII "MBII"
-#define BUSTYPE_MPI "MPI"
-#define BUSTYPE_MPSA "MPSA"
-#define BUSTYPE_NUBUS "NUBUS"
-#define BUSTYPE_TC "TC"
-#define BUSTYPE_VME "VME"
-#define BUSTYPE_XPRESS "XPRESS"
-
-struct mpc_config_ioapic
-{
- unsigned char mpc_type;
- unsigned char mpc_apicid;
- unsigned char mpc_apicver;
- unsigned char mpc_flags;
-#define MPC_APIC_USABLE 0x01
- unsigned int mpc_apicaddr;
-};
-
-struct mpc_config_intsrc
-{
- unsigned char mpc_type;
- unsigned char mpc_irqtype;
- unsigned short mpc_irqflag;
- unsigned char mpc_srcbus;
- unsigned char mpc_srcbusirq;
- unsigned char mpc_dstapic;
- unsigned char mpc_dstirq;
-};
-
-enum mp_irq_source_types {
- mp_INT = 0,
- mp_NMI = 1,
- mp_SMI = 2,
- mp_ExtINT = 3
-};
-
-#define MP_IRQDIR_DEFAULT 0
-#define MP_IRQDIR_HIGH 1
-#define MP_IRQDIR_LOW 3
-
-
-struct mpc_config_lintsrc
-{
- unsigned char mpc_type;
- unsigned char mpc_irqtype;
- unsigned short mpc_irqflag;
- unsigned char mpc_srcbusid;
- unsigned char mpc_srcbusirq;
- unsigned char mpc_destapic;
-#define MP_APIC_ALL 0xFF
- unsigned char mpc_destapiclint;
-};
-
-/*
- * Default configurations
- *
- * 1 2 CPU ISA 82489DX
- * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
- * 3 2 CPU EISA 82489DX
- * 4 2 CPU MCA 82489DX
- * 5 2 CPU ISA+PCI
- * 6 2 CPU EISA+PCI
- * 7 2 CPU MCA+PCI
- */
-
-#define MAX_MP_BUSSES 256
-/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */
-#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
-extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
-
-extern unsigned int boot_cpu_physical_apicid;
-extern int smp_found_config;
-extern void find_smp_config (void);
-extern void get_smp_config (void);
-extern int nr_ioapics;
-extern unsigned char apic_version [MAX_APICS];
-extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
-extern int mpc_default_type;
-extern unsigned long mp_lapic_addr;
-
-#ifdef CONFIG_ACPI
-extern void mp_register_lapic (u8 id, u8 enabled);
-extern void mp_register_lapic_address (u64 address);
-
-extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
-extern void mp_config_acpi_legacy_irqs (void);
-extern int mp_register_gsi (u32 gsi, int triggering, int polarity);
-#endif
-
-extern int using_apic_timer;
-
-#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS)
-
-struct physid_mask
-{
- unsigned long mask[PHYSID_ARRAY_SIZE];
-};
-
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map) set_bit(physid, (map).mask)
-#define physid_clear(physid, map) clear_bit(physid, (map).mask)
-#define physid_isset(physid, map) test_bit(physid, (map).mask)
-#define physid_test_and_set(physid, map) test_and_set_bit(physid, (map).mask)
-
-#define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_clear(map) bitmap_zero((map).mask, MAX_APICS)
-#define physids_complement(dst, src) bitmap_complement((dst).mask, (src).mask, MAX_APICS)
-#define physids_empty(map) bitmap_empty((map).mask, MAX_APICS)
-#define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
-#define physids_weight(map) bitmap_weight((map).mask, MAX_APICS)
-#define physids_shift_right(d, s, n) bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
-#define physids_shift_left(d, s, n) bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
-#define physids_coerce(map) ((map).mask[0])
-
-#define physids_promote(physids) \
- ({ \
- physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
- __physid_mask.mask[0] = physids; \
- __physid_mask; \
- })
-
-#define physid_mask_of_physid(physid) \
- ({ \
- physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
- physid_set(physid, __physid_mask); \
- __physid_mask; \
- })
-
-#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
-#endif
-
diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h
index 13bafb16e7a..3504617fe64 100644
--- a/include/asm-x86/mpspec_def.h
+++ b/include/asm-x86/mpspec_def.h
@@ -8,52 +8,68 @@
/*
* This tag identifies where the SMP configuration
- * information is.
+ * information is.
*/
-
+
#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
-#define MAX_MPC_ENTRY 1024
-#define MAX_APICS 256
+#ifdef CONFIG_X86_32
+# define MAX_MPC_ENTRY 1024
+# define MAX_APICS 256
+#else
+/*
+ * A maximum of 255 APICs with the current APIC ID architecture.
+ */
+# define MAX_APICS 255
+#endif
struct intel_mp_floating
{
- char mpf_signature[4]; /* "_MP_" */
- unsigned long mpf_physptr; /* Configuration table address */
+ char mpf_signature[4]; /* "_MP_" */
+ unsigned int mpf_physptr; /* Configuration table address */
unsigned char mpf_length; /* Our length (paragraphs) */
unsigned char mpf_specification;/* Specification version */
unsigned char mpf_checksum; /* Checksum (makes sum 0) */
- unsigned char mpf_feature1; /* Standard or configuration ? */
+ unsigned char mpf_feature1; /* Standard or configuration ? */
unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
unsigned char mpf_feature3; /* Unused (0) */
unsigned char mpf_feature4; /* Unused (0) */
unsigned char mpf_feature5; /* Unused (0) */
};
+#define MPC_SIGNATURE "PCMP"
+
struct mp_config_table
{
char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
unsigned short mpc_length; /* Size of table */
char mpc_spec; /* 0x01 */
char mpc_checksum;
char mpc_oem[8];
char mpc_productid[12];
- unsigned long mpc_oemptr; /* 0 if not present */
+ unsigned int mpc_oemptr; /* 0 if not present */
unsigned short mpc_oemsize; /* 0 if not present */
unsigned short mpc_oemcount;
- unsigned long mpc_lapic; /* APIC address */
- unsigned long reserved;
+ unsigned int mpc_lapic; /* APIC address */
+ unsigned int reserved;
};
/* Followed by entries */
-#define MP_PROCESSOR 0
-#define MP_BUS 1
-#define MP_IOAPIC 2
-#define MP_INTSRC 3
-#define MP_LINTSRC 4
-#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */
+#define MP_PROCESSOR 0
+#define MP_BUS 1
+#define MP_IOAPIC 2
+#define MP_INTSRC 3
+#define MP_LINTSRC 4
+/* Used by IBM NUMA-Q to describe node locality */
+#define MP_TRANSLATION 192
+
+#define CPU_ENABLED 1 /* Processor is available */
+#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
+
+#define CPU_STEPPING_MASK 0x000F
+#define CPU_MODEL_MASK 0x00F0
+#define CPU_FAMILY_MASK 0x0F00
struct mpc_config_processor
{
@@ -61,14 +77,9 @@ struct mpc_config_processor
unsigned char mpc_apicid; /* Local APIC number */
unsigned char mpc_apicver; /* Its versions */
unsigned char mpc_cpuflag;
-#define CPU_ENABLED 1 /* Processor is available */
-#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
- unsigned long mpc_cpufeature;
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK 0xF0
-#define CPU_FAMILY_MASK 0xF00
- unsigned long mpc_featureflag; /* CPUID feature value */
- unsigned long mpc_reserved[2];
+ unsigned int mpc_cpufeature;
+ unsigned int mpc_featureflag; /* CPUID feature value */
+ unsigned int mpc_reserved[2];
};
struct mpc_config_bus
@@ -98,14 +109,15 @@ struct mpc_config_bus
#define BUSTYPE_VME "VME"
#define BUSTYPE_XPRESS "XPRESS"
+#define MPC_APIC_USABLE 0x01
+
struct mpc_config_ioapic
{
unsigned char mpc_type;
unsigned char mpc_apicid;
unsigned char mpc_apicver;
unsigned char mpc_flags;
-#define MPC_APIC_USABLE 0x01
- unsigned long mpc_apicaddr;
+ unsigned int mpc_apicaddr;
};
struct mpc_config_intsrc
@@ -130,6 +142,7 @@ enum mp_irq_source_types {
#define MP_IRQDIR_HIGH 1
#define MP_IRQDIR_LOW 3
+#define MP_APIC_ALL 0xFF
struct mpc_config_lintsrc
{
@@ -138,15 +151,15 @@ struct mpc_config_lintsrc
unsigned short mpc_irqflag;
unsigned char mpc_srcbusid;
unsigned char mpc_srcbusirq;
- unsigned char mpc_destapic;
-#define MP_APIC_ALL 0xFF
+ unsigned char mpc_destapic;
unsigned char mpc_destapiclint;
};
+#define MPC_OEM_SIGNATURE "_OEM"
+
struct mp_config_oemtable
{
char oem_signature[4];
-#define MPC_OEM_SIGNATURE "_OEM"
unsigned short oem_length; /* Size of table */
char oem_rev; /* 0x01 */
char oem_checksum;
@@ -155,13 +168,13 @@ struct mp_config_oemtable
struct mpc_config_translation
{
- unsigned char mpc_type;
- unsigned char trans_len;
- unsigned char trans_type;
- unsigned char trans_quad;
- unsigned char trans_global;
- unsigned char trans_local;
- unsigned short trans_reserved;
+ unsigned char mpc_type;
+ unsigned char trans_len;
+ unsigned char trans_type;
+ unsigned char trans_quad;
+ unsigned char trans_global;
+ unsigned char trans_local;
+ unsigned short trans_reserved;
};
/*
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index a4944732be0..fae118a2527 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -63,6 +63,13 @@
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de
+/* DEBUGCTLMSR bits (others vary by model): */
+#define _DEBUGCTLMSR_LBR 0 /* last branch recording */
+#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */
+
+#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR)
+#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF)
+
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
@@ -88,6 +95,14 @@
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSCTL 0xc001103a
+/* Fam 10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE (1<<0)
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff
+#define FAM10H_MMIO_CONF_BASE_SHIFT 20
+
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 80b027081b3..204a8a30fec 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -7,77 +7,109 @@
# include <linux/types.h>
#endif
-#ifdef __i386__
-
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
+#include <asm/asm.h>
#include <asm/errno.h>
+static inline unsigned long long native_read_tscp(unsigned int *aux)
+{
+ unsigned long low, high;
+ asm volatile (".byte 0x0f,0x01,0xf9"
+ : "=a" (low), "=d" (high), "=c" (*aux));
+ return low | ((u64)high >> 32);
+}
+
+/*
+ * i386 calling convention returns 64-bit value in edx:eax, while
+ * x86_64 returns at rax. Also, the "A" constraint does not really
+ * mean rdx:rax in x86_64, so we need specialized behaviour for each
+ * architecture
+ */
+#ifdef CONFIG_X86_64
+#define DECLARE_ARGS(val, low, high) unsigned low, high
+#define EAX_EDX_VAL(val, low, high) (low | ((u64)(high) << 32))
+#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
+#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
+#else
+#define DECLARE_ARGS(val, low, high) unsigned long long val
+#define EAX_EDX_VAL(val, low, high) (val)
+#define EAX_EDX_ARGS(val, low, high) "A" (val)
+#define EAX_EDX_RET(val, low, high) "=A" (val)
+#endif
+
static inline unsigned long long native_read_msr(unsigned int msr)
{
- unsigned long long val;
+ DECLARE_ARGS(val, low, high);
- asm volatile("rdmsr" : "=A" (val) : "c" (msr));
- return val;
+ asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+ return EAX_EDX_VAL(val, low, high);
}
static inline unsigned long long native_read_msr_safe(unsigned int msr,
int *err)
{
- unsigned long long val;
+ DECLARE_ARGS(val, low, high);
- asm volatile("2: rdmsr ; xorl %0,%0\n"
+ asm volatile("2: rdmsr ; xor %0,%0\n"
"1:\n\t"
".section .fixup,\"ax\"\n\t"
- "3: movl %3,%0 ; jmp 1b\n\t"
+ "3: mov %3,%0 ; jmp 1b\n\t"
".previous\n\t"
".section __ex_table,\"a\"\n"
- " .align 4\n\t"
- " .long 2b,3b\n\t"
+ _ASM_ALIGN "\n\t"
+ _ASM_PTR " 2b,3b\n\t"
".previous"
- : "=r" (*err), "=A" (val)
+ : "=r" (*err), EAX_EDX_RET(val, low, high)
: "c" (msr), "i" (-EFAULT));
-
- return val;
+ return EAX_EDX_VAL(val, low, high);
}
-static inline void native_write_msr(unsigned int msr, unsigned long long val)
+static inline void native_write_msr(unsigned int msr,
+ unsigned low, unsigned high)
{
- asm volatile("wrmsr" : : "c" (msr), "A"(val));
+ asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high));
}
static inline int native_write_msr_safe(unsigned int msr,
- unsigned long long val)
+ unsigned low, unsigned high)
{
int err;
- asm volatile("2: wrmsr ; xorl %0,%0\n"
+ asm volatile("2: wrmsr ; xor %0,%0\n"
"1:\n\t"
".section .fixup,\"ax\"\n\t"
- "3: movl %4,%0 ; jmp 1b\n\t"
+ "3: mov %4,%0 ; jmp 1b\n\t"
".previous\n\t"
".section __ex_table,\"a\"\n"
- " .align 4\n\t"
- " .long 2b,3b\n\t"
+ _ASM_ALIGN "\n\t"
+ _ASM_PTR " 2b,3b\n\t"
".previous"
: "=a" (err)
- : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
+ : "c" (msr), "0" (low), "d" (high),
"i" (-EFAULT));
return err;
}
-static inline unsigned long long native_read_tsc(void)
+extern unsigned long long native_read_tsc(void);
+
+static __always_inline unsigned long long __native_read_tsc(void)
{
- unsigned long long val;
- asm volatile("rdtsc" : "=A" (val));
- return val;
+ DECLARE_ARGS(val, low, high);
+
+ rdtsc_barrier();
+ asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+ rdtsc_barrier();
+
+ return EAX_EDX_VAL(val, low, high);
}
-static inline unsigned long long native_read_pmc(void)
+static inline unsigned long long native_read_pmc(int counter)
{
- unsigned long long val;
- asm volatile("rdpmc" : "=A" (val));
- return val;
+ DECLARE_ARGS(val, low, high);
+
+ asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
+ return EAX_EDX_VAL(val, low, high);
}
#ifdef CONFIG_PARAVIRT
@@ -97,20 +129,21 @@ static inline unsigned long long native_read_pmc(void)
(val2) = (u32)(__val >> 32); \
} while(0)
-static inline void wrmsr(u32 __msr, u32 __low, u32 __high)
+static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
{
- native_write_msr(__msr, ((u64)__high << 32) | __low);
+ native_write_msr(msr, low, high);
}
#define rdmsrl(msr,val) \
((val) = native_read_msr(msr))
-#define wrmsrl(msr,val) native_write_msr(msr, val)
+#define wrmsrl(msr, val) \
+ native_write_msr(msr, (u32)((u64)(val)), (u32)((u64)(val) >> 32))
/* wrmsr with exception handling */
-static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
+static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
{
- return native_write_msr_safe(__msr, ((u64)__high << 32) | __low);
+ return native_write_msr_safe(msr, low, high);
}
/* rdmsr with exception handling */
@@ -129,204 +162,31 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
#define rdtscll(val) \
((val) = native_read_tsc())
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
#define rdpmc(counter,low,high) \
do { \
- u64 _l = native_read_pmc(); \
+ u64 _l = native_read_pmc(counter); \
(low) = (u32)_l; \
(high) = (u32)(_l >> 32); \
} while(0)
-#endif /* !CONFIG_PARAVIRT */
-
-#ifdef CONFIG_SMP
-void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
-void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-#else /* CONFIG_SMP */
-static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- rdmsr(msr_no, *l, *h);
-}
-static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- wrmsr(msr_no, l, h);
-}
-static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- return rdmsr_safe(msr_no, l, h);
-}
-static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- return wrmsr_safe(msr_no, l, h);
-}
-#endif /* CONFIG_SMP */
-#endif /* ! __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
-#else /* __i386__ */
-
-#ifndef __ASSEMBLY__
-#include <linux/errno.h>
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
-
-#define rdmsr(msr,val1,val2) \
- __asm__ __volatile__("rdmsr" \
- : "=a" (val1), "=d" (val2) \
- : "c" (msr))
-
-
-#define rdmsrl(msr,val) do { unsigned long a__,b__; \
- __asm__ __volatile__("rdmsr" \
- : "=a" (a__), "=d" (b__) \
- : "c" (msr)); \
- val = a__ | (b__<<32); \
-} while(0)
-
-#define wrmsr(msr,val1,val2) \
- __asm__ __volatile__("wrmsr" \
- : /* no outputs */ \
- : "c" (msr), "a" (val1), "d" (val2))
-
-#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32)
-#define rdtsc(low,high) \
- __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+#define rdtscp(low, high, aux) \
+ do { \
+ unsigned long long _val = native_read_tscp(&(aux)); \
+ (low) = (u32)_val; \
+ (high) = (u32)(_val >> 32); \
+ } while (0)
-#define rdtscl(low) \
- __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
+#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
-#define rdtscp(low,high,aux) \
- __asm__ __volatile__ (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
+#endif /* !CONFIG_PARAVIRT */
-#define rdtscll(val) do { \
- unsigned int __a,__d; \
- __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
- (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
-} while(0)
-#define rdtscpll(val, aux) do { \
- unsigned long __a, __d; \
- __asm__ __volatile__ (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \
- (val) = (__d << 32) | __a; \
-} while (0)
+#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32))
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
#define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)
-#define rdpmc(counter,low,high) \
- __asm__ __volatile__("rdpmc" \
- : "=a" (low), "=d" (high) \
- : "c" (counter))
-
-
-static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op));
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
- int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op), "c" (count));
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax;
-
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (op)
- : "bx", "cx", "dx");
- return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx;
-
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx)
- : "0" (op)
- : "cx", "dx" );
- return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ecx;
-
- __asm__("cpuid"
- : "=a" (eax), "=c" (ecx)
- : "0" (op)
- : "bx", "dx" );
- return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, edx;
-
- __asm__("cpuid"
- : "=a" (eax), "=d" (edx)
- : "0" (op)
- : "bx", "cx");
- return edx;
-}
-
-#ifdef __KERNEL__
-
-/* wrmsr with exception handling */
-#define wrmsr_safe(msr,a,b) ({ int ret__; \
- asm volatile("2: wrmsr ; xorl %0,%0\n" \
- "1:\n\t" \
- ".section .fixup,\"ax\"\n\t" \
- "3: movl %4,%0 ; jmp 1b\n\t" \
- ".previous\n\t" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n\t" \
- " .quad 2b,3b\n\t" \
- ".previous" \
- : "=a" (ret__) \
- : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \
- ret__; })
-
-#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32))
-
-#define rdmsr_safe(msr,a,b) \
- ({ int ret__; \
- asm volatile ("1: rdmsr\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl %4,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 1b,3b\n" \
- ".previous":"=&bDS" (ret__), "=a"(*(a)), "=d"(*(b)) \
- :"c"(msr), "i"(-EIO), "0"(0)); \
- ret__; })
-
#ifdef CONFIG_SMP
void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
@@ -350,9 +210,8 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
return wrmsr_safe(msr_no, l, h);
}
#endif /* CONFIG_SMP */
-#endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
-#endif /* !__i386__ */
#endif
diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h
index e8320e4e6ca..319d065800b 100644
--- a/include/asm-x86/mtrr.h
+++ b/include/asm-x86/mtrr.h
@@ -89,24 +89,25 @@ struct mtrr_gentry
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
extern int mtrr_add (unsigned long base, unsigned long size,
- unsigned int type, char increment);
+ unsigned int type, bool increment);
extern int mtrr_add_page (unsigned long base, unsigned long size,
- unsigned int type, char increment);
+ unsigned int type, bool increment);
extern int mtrr_del (int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
extern void mtrr_ap_init(void);
extern void mtrr_bp_init(void);
+extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
# else
#define mtrr_save_fixed_ranges(arg) do {} while (0)
#define mtrr_save_state() do {} while (0)
static __inline__ int mtrr_add (unsigned long base, unsigned long size,
- unsigned int type, char increment)
+ unsigned int type, bool increment)
{
return -ENODEV;
}
static __inline__ int mtrr_add_page (unsigned long base, unsigned long size,
- unsigned int type, char increment)
+ unsigned int type, bool increment)
{
return -ENODEV;
}
@@ -120,7 +121,10 @@ static __inline__ int mtrr_del_page (int reg, unsigned long base,
{
return -ENODEV;
}
-
+static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
+{
+ return 0;
+}
static __inline__ void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) {;}
#define mtrr_ap_init() do {} while (0)
diff --git a/include/asm-x86/mutex_32.h b/include/asm-x86/mutex_32.h
index 7a17d9e58ad..bbeefb96ddf 100644
--- a/include/asm-x86/mutex_32.h
+++ b/include/asm-x86/mutex_32.h
@@ -26,7 +26,7 @@ do { \
unsigned int dummy; \
\
typecheck(atomic_t *, count); \
- typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
+ typecheck_fn(void (*)(atomic_t *), fail_fn); \
\
__asm__ __volatile__( \
LOCK_PREFIX " decl (%%eax) \n" \
@@ -51,8 +51,7 @@ do { \
* or anything the slow path function returns
*/
static inline int
-__mutex_fastpath_lock_retval(atomic_t *count,
- int fastcall (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
{
if (unlikely(atomic_dec_return(count) < 0))
return fail_fn(count);
@@ -78,7 +77,7 @@ do { \
unsigned int dummy; \
\
typecheck(atomic_t *, count); \
- typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
+ typecheck_fn(void (*)(atomic_t *), fail_fn); \
\
__asm__ __volatile__( \
LOCK_PREFIX " incl (%%eax) \n" \
diff --git a/include/asm-x86/nmi_32.h b/include/asm-x86/nmi_32.h
index 70a958a8e38..7206c7e8a38 100644
--- a/include/asm-x86/nmi_32.h
+++ b/include/asm-x86/nmi_32.h
@@ -1,6 +1,3 @@
-/*
- * linux/include/asm-i386/nmi.h
- */
#ifndef ASM_NMI_H
#define ASM_NMI_H
diff --git a/include/asm-x86/nmi_64.h b/include/asm-x86/nmi_64.h
index 65b6acf3bb5..2eeb74e5f3f 100644
--- a/include/asm-x86/nmi_64.h
+++ b/include/asm-x86/nmi_64.h
@@ -1,6 +1,3 @@
-/*
- * linux/include/asm-i386/nmi.h
- */
#ifndef ASM_NMI_H
#define ASM_NMI_H
@@ -41,7 +38,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
#define get_nmi_reason() inb(0x61)
-extern int panic_on_timeout;
extern int unknown_nmi_panic;
extern int nmi_watchdog_enabled;
@@ -60,7 +56,6 @@ extern void enable_timer_nmi_watchdog(void);
extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
extern void nmi_watchdog_default(void);
-extern int setup_nmi_watchdog(char *);
extern atomic_t nmi_active;
extern unsigned int nmi_watchdog;
diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
new file mode 100644
index 00000000000..fec025c7f58
--- /dev/null
+++ b/include/asm-x86/nops.h
@@ -0,0 +1,90 @@
+#ifndef _ASM_NOPS_H
+#define _ASM_NOPS_H 1
+
+/* Define nops for use with alternative() */
+
+/* generic versions from gas */
+#define GENERIC_NOP1 ".byte 0x90\n"
+#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
+#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
+#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
+#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
+#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
+
+/* Opteron 64bit nops */
+#define K8_NOP1 GENERIC_NOP1
+#define K8_NOP2 ".byte 0x66,0x90\n"
+#define K8_NOP3 ".byte 0x66,0x66,0x90\n"
+#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n"
+#define K8_NOP5 K8_NOP3 K8_NOP2
+#define K8_NOP6 K8_NOP3 K8_NOP3
+#define K8_NOP7 K8_NOP4 K8_NOP3
+#define K8_NOP8 K8_NOP4 K8_NOP4
+
+/* K7 nops */
+/* uses eax dependencies (arbitary choice) */
+#define K7_NOP1 GENERIC_NOP1
+#define K7_NOP2 ".byte 0x8b,0xc0\n"
+#define K7_NOP3 ".byte 0x8d,0x04,0x20\n"
+#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n"
+#define K7_NOP5 K7_NOP4 ASM_NOP1
+#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n"
+#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n"
+#define K7_NOP8 K7_NOP7 ASM_NOP1
+
+/* P6 nops */
+/* uses eax dependencies (Intel-recommended choice) */
+#define P6_NOP1 GENERIC_NOP1
+#define P6_NOP2 ".byte 0x66,0x90\n"
+#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n"
+#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n"
+#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n"
+#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
+#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n"
+#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
+
+#if defined(CONFIG_MK8)
+#define ASM_NOP1 K8_NOP1
+#define ASM_NOP2 K8_NOP2
+#define ASM_NOP3 K8_NOP3
+#define ASM_NOP4 K8_NOP4
+#define ASM_NOP5 K8_NOP5
+#define ASM_NOP6 K8_NOP6
+#define ASM_NOP7 K8_NOP7
+#define ASM_NOP8 K8_NOP8
+#elif defined(CONFIG_MK7)
+#define ASM_NOP1 K7_NOP1
+#define ASM_NOP2 K7_NOP2
+#define ASM_NOP3 K7_NOP3
+#define ASM_NOP4 K7_NOP4
+#define ASM_NOP5 K7_NOP5
+#define ASM_NOP6 K7_NOP6
+#define ASM_NOP7 K7_NOP7
+#define ASM_NOP8 K7_NOP8
+#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
+ defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
+ defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4)
+#define ASM_NOP1 P6_NOP1
+#define ASM_NOP2 P6_NOP2
+#define ASM_NOP3 P6_NOP3
+#define ASM_NOP4 P6_NOP4
+#define ASM_NOP5 P6_NOP5
+#define ASM_NOP6 P6_NOP6
+#define ASM_NOP7 P6_NOP7
+#define ASM_NOP8 P6_NOP8
+#else
+#define ASM_NOP1 GENERIC_NOP1
+#define ASM_NOP2 GENERIC_NOP2
+#define ASM_NOP3 GENERIC_NOP3
+#define ASM_NOP4 GENERIC_NOP4
+#define ASM_NOP5 GENERIC_NOP5
+#define ASM_NOP6 GENERIC_NOP6
+#define ASM_NOP7 GENERIC_NOP7
+#define ASM_NOP8 GENERIC_NOP8
+#endif
+
+#define ASM_NOP_MAX 8
+
+#endif
diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index 96fcb157db1..03d0f7a9bf0 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h
@@ -1,3 +1,15 @@
+#ifndef _ASM_X86_32_NUMA_H
+#define _ASM_X86_32_NUMA_H 1
-int pxm_to_nid(int pxm);
+extern int pxm_to_nid(int pxm);
+#ifdef CONFIG_NUMA
+extern void __init remap_numa_kva(void);
+extern void set_highmem_pages_init(int);
+#else
+static inline void remap_numa_kva(void)
+{
+}
+#endif
+
+#endif /* _ASM_X86_32_NUMA_H */
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 0cc5c97a7fc..15fe07cde58 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -20,13 +20,19 @@ extern void numa_set_node(int cpu, int node);
extern void srat_reserve_add_area(int nodeid);
extern int hotadd_percent;
-extern unsigned char apicid_to_node[MAX_LOCAL_APIC];
+extern s16 apicid_to_node[MAX_LOCAL_APIC];
+
+extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern unsigned long numa_free_all_bootmem(void);
+extern void setup_node_bootmem(int nodeid, unsigned long start,
+ unsigned long end);
+
#ifdef CONFIG_NUMA
extern void __init init_cpu_to_node(void);
static inline void clear_node_cpumask(int cpu)
{
- clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+ clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
}
#else
@@ -34,6 +40,4 @@ static inline void clear_node_cpumask(int cpu)
#define clear_node_cpumask(cpu) do {} while (0)
#endif
-#define NUMA_NO_NODE 0xff
-
#endif
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index a757eb26141..c8b30efeed8 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -1,13 +1,183 @@
+#ifndef _ASM_X86_PAGE_H
+#define _ASM_X86_PAGE_H
+
+#include <linux/const.h>
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
#ifdef __KERNEL__
-# ifdef CONFIG_X86_32
-# include "page_32.h"
-# else
-# include "page_64.h"
-# endif
+
+#define PHYSICAL_PAGE_MASK (PAGE_MASK & __PHYSICAL_MASK)
+#define PTE_MASK (_AT(long, PHYSICAL_PAGE_MASK))
+
+#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
+#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+
+#define HPAGE_SHIFT PMD_SHIFT
+#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+#define __PHYSICAL_MASK _AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1)
+#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
+
+#ifdef CONFIG_X86_64
+#include <asm/page_64.h>
+#define max_pfn_mapped end_pfn_map
#else
-# ifdef __i386__
-# include "page_32.h"
-# else
-# include "page_64.h"
-# endif
+#include <asm/page_32.h>
+#define max_pfn_mapped max_low_pfn
+#endif /* CONFIG_X86_64 */
+
+#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+
+#ifndef __ASSEMBLY__
+
+extern int page_is_ram(unsigned long pagenr);
+
+struct page;
+
+static void inline clear_user_page(void *page, unsigned long vaddr,
+ struct page *pg)
+{
+ clear_page(page);
+}
+
+static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
+ struct page *topage)
+{
+ copy_page(to, from);
+}
+
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
+ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+typedef struct { pgdval_t pgd; } pgd_t;
+typedef struct { pgprotval_t pgprot; } pgprot_t;
+
+static inline pgd_t native_make_pgd(pgdval_t val)
+{
+ return (pgd_t) { val };
+}
+
+static inline pgdval_t native_pgd_val(pgd_t pgd)
+{
+ return pgd.pgd;
+}
+
+#if PAGETABLE_LEVELS >= 3
+#if PAGETABLE_LEVELS == 4
+typedef struct { pudval_t pud; } pud_t;
+
+static inline pud_t native_make_pud(pmdval_t val)
+{
+ return (pud_t) { val };
+}
+
+static inline pudval_t native_pud_val(pud_t pud)
+{
+ return pud.pud;
+}
+#else /* PAGETABLE_LEVELS == 3 */
+#include <asm-generic/pgtable-nopud.h>
+
+static inline pudval_t native_pud_val(pud_t pud)
+{
+ return native_pgd_val(pud.pgd);
+}
+#endif /* PAGETABLE_LEVELS == 4 */
+
+typedef struct { pmdval_t pmd; } pmd_t;
+
+static inline pmd_t native_make_pmd(pmdval_t val)
+{
+ return (pmd_t) { val };
+}
+
+static inline pmdval_t native_pmd_val(pmd_t pmd)
+{
+ return pmd.pmd;
+}
+#else /* PAGETABLE_LEVELS == 2 */
+#include <asm-generic/pgtable-nopmd.h>
+
+static inline pmdval_t native_pmd_val(pmd_t pmd)
+{
+ return native_pgd_val(pmd.pud.pgd);
+}
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static inline pte_t native_make_pte(pteval_t val)
+{
+ return (pte_t) { .pte = val };
+}
+
+static inline pteval_t native_pte_val(pte_t pte)
+{
+ return pte.pte;
+}
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else /* !CONFIG_PARAVIRT */
+
+#define pgd_val(x) native_pgd_val(x)
+#define __pgd(x) native_make_pgd(x)
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define pud_val(x) native_pud_val(x)
+#define __pud(x) native_make_pud(x)
+#endif
+
+#ifndef __PAGETABLE_PMD_FOLDED
+#define pmd_val(x) native_pmd_val(x)
+#define __pmd(x) native_make_pmd(x)
#endif
+
+#define pte_val(x) native_pte_val(x)
+#define __pte(x) native_make_pte(x)
+
+#endif /* CONFIG_PARAVIRT */
+
+#define __pa(x) __phys_addr((unsigned long)(x))
+/* __pa_symbol should be used for C visible symbols.
+ This seems to be the official gcc blessed way to do such arithmetic. */
+#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x)))
+
+#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+
+#define __boot_va(x) __va(x)
+#define __boot_pa(x) __pa(x)
+
+#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#endif /* __ASSEMBLY__ */
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/page.h>
+
+#define __HAVE_ARCH_GATE_AREA 1
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_X86_PAGE_H */
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 80ecc66b6d8..a6fd10f230d 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -1,206 +1,107 @@
-#ifndef _I386_PAGE_H
-#define _I386_PAGE_H
-
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (1UL << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
-
-#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
-#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
-#ifdef CONFIG_X86_USE_3DNOW
-
-#include <asm/mmx.h>
-
-#define clear_page(page) mmx_clear_page((void *)(page))
-#define copy_page(to,from) mmx_copy_page(to,from)
-
-#else
+#ifndef _ASM_X86_PAGE_32_H
+#define _ASM_X86_PAGE_32_H
/*
- * On older X86 processors it's not a win to use MMX here it seems.
- * Maybe the K6-III ?
- */
-
-#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE)
-#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE)
-
-#endif
-
-#define clear_user_page(page, vaddr, pg) clear_page(page)
-#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
-/*
- * These are used to make use of C type-checking..
+ * This handles the memory map.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB.
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
*/
-extern int nx_enabled;
+#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
#ifdef CONFIG_X86_PAE
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
-typedef struct { unsigned long long pmd; } pmd_t;
-typedef struct { unsigned long long pgd; } pgd_t;
-typedef struct { unsigned long long pgprot; } pgprot_t;
+#define __PHYSICAL_MASK_SHIFT 36
+#define __VIRTUAL_MASK_SHIFT 32
+#define PAGETABLE_LEVELS 3
-static inline unsigned long long native_pgd_val(pgd_t pgd)
-{
- return pgd.pgd;
-}
-
-static inline unsigned long long native_pmd_val(pmd_t pmd)
-{
- return pmd.pmd;
-}
-
-static inline unsigned long long native_pte_val(pte_t pte)
-{
- return pte.pte_low | ((unsigned long long)pte.pte_high << 32);
-}
-
-static inline pgd_t native_make_pgd(unsigned long long val)
-{
- return (pgd_t) { val };
-}
-
-static inline pmd_t native_make_pmd(unsigned long long val)
-{
- return (pmd_t) { val };
-}
-
-static inline pte_t native_make_pte(unsigned long long val)
-{
- return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ;
-}
-
-#ifndef CONFIG_PARAVIRT
-#define pmd_val(x) native_pmd_val(x)
-#define __pmd(x) native_make_pmd(x)
-#endif
-
-#define HPAGE_SHIFT 21
-#include <asm-generic/pgtable-nopud.h>
+#ifndef __ASSEMBLY__
+typedef u64 pteval_t;
+typedef u64 pmdval_t;
+typedef u64 pudval_t;
+typedef u64 pgdval_t;
+typedef u64 pgprotval_t;
+typedef u64 phys_addr_t;
+
+typedef union {
+ struct {
+ unsigned long pte_low, pte_high;
+ };
+ pteval_t pte;
+} pte_t;
+#endif /* __ASSEMBLY__
+ */
#else /* !CONFIG_X86_PAE */
-typedef struct { unsigned long pte_low; } pte_t;
-typedef struct { unsigned long pgd; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-#define boot_pte_t pte_t /* or would you rather have a typedef */
-
-static inline unsigned long native_pgd_val(pgd_t pgd)
-{
- return pgd.pgd;
-}
+#define __PHYSICAL_MASK_SHIFT 32
+#define __VIRTUAL_MASK_SHIFT 32
+#define PAGETABLE_LEVELS 2
-static inline unsigned long native_pte_val(pte_t pte)
-{
- return pte.pte_low;
-}
-
-static inline pgd_t native_make_pgd(unsigned long val)
-{
- return (pgd_t) { val };
-}
+#ifndef __ASSEMBLY__
+typedef unsigned long pteval_t;
+typedef unsigned long pmdval_t;
+typedef unsigned long pudval_t;
+typedef unsigned long pgdval_t;
+typedef unsigned long pgprotval_t;
+typedef unsigned long phys_addr_t;
-static inline pte_t native_make_pte(unsigned long val)
-{
- return (pte_t) { .pte_low = val };
-}
+typedef union { pteval_t pte, pte_low; } pte_t;
+typedef pte_t boot_pte_t;
-#define HPAGE_SHIFT 22
-#include <asm-generic/pgtable-nopmd.h>
+#endif /* __ASSEMBLY__ */
#endif /* CONFIG_X86_PAE */
-#define PTE_MASK PAGE_MASK
-
#ifdef CONFIG_HUGETLB_PAGE
-#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
-#define HPAGE_MASK (~(HPAGE_SIZE - 1))
-#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
-#ifndef CONFIG_PARAVIRT
-#define pgd_val(x) native_pgd_val(x)
-#define __pgd(x) native_make_pgd(x)
-#define pte_val(x) native_pte_val(x)
-#define __pte(x) native_make_pte(x)
-#endif
-
-#endif /* !__ASSEMBLY__ */
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
-/*
- * This handles the memory map.. We could make this a config
- * option, but too many people screw it up, and too few need
- * it.
- *
- * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
- * a virtual address space of one gigabyte, which limits the
- * amount of physical memory you can use to about 950MB.
- *
- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
- * and CONFIG_HIGHMEM64G options in the kernel configuration.
- */
-
#ifndef __ASSEMBLY__
+#define __phys_addr(x) ((x)-PAGE_OFFSET)
+#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
+
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn) ((pfn) < max_mapnr)
+#endif /* CONFIG_FLATMEM */
-struct vm_area_struct;
+extern int nx_enabled;
/*
* This much address space is reserved for vmalloc() and iomap()
* as well as fixmap mappings.
*/
extern unsigned int __VMALLOC_RESERVE;
-
extern int sysctl_legacy_va_layout;
-extern int page_is_ram(unsigned long pagenr);
-
-#endif /* __ASSEMBLY__ */
-
-#ifdef __ASSEMBLY__
-#define __PAGE_OFFSET CONFIG_PAGE_OFFSET
-#else
-#define __PAGE_OFFSET ((unsigned long)CONFIG_PAGE_OFFSET)
-#endif
-
-
-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
-#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
-/* __pa_symbol should be used for C visible symbols.
- This seems to be the official gcc blessed way to do such arithmetic. */
-#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x),0))
-#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
-#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
-#endif /* CONFIG_FLATMEM */
-#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#ifdef CONFIG_X86_USE_3DNOW
+#include <asm/mmx.h>
+
+static inline void clear_page(void *page)
+{
+ mmx_clear_page(page);
+}
-#define VM_DATA_DEFAULT_FLAGS \
- (VM_READ | VM_WRITE | \
- ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+static inline void copy_page(void *to, void *from)
+{
+ mmx_copy_page(to, from);
+}
+#else /* !CONFIG_X86_USE_3DNOW */
+#include <linux/string.h>
-#include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+static inline void clear_page(void *page)
+{
+ memset(page, 0, PAGE_SIZE);
+}
-#define __HAVE_ARCH_GATE_AREA 1
-#endif /* __KERNEL__ */
+static inline void copy_page(void *to, void *from)
+{
+ memcpy(to, from, PAGE_SIZE);
+}
+#endif /* CONFIG_X86_3DNOW */
+#endif /* !__ASSEMBLY__ */
-#endif /* _I386_PAGE_H */
+#endif /* _ASM_X86_PAGE_32_H */
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index c3b52bcb171..c1ac42d8707 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -1,15 +1,9 @@
#ifndef _X86_64_PAGE_H
#define _X86_64_PAGE_H
-#include <linux/const.h>
+#define PAGETABLE_LEVELS 4
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
-#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
-
-#define THREAD_ORDER 1
+#define THREAD_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
#define CURRENT_MASK (~(THREAD_SIZE-1))
@@ -29,54 +23,7 @@
#define MCE_STACK 5
#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
-#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
-#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
-
-#define HPAGE_SHIFT PMD_SHIFT
-#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
-#define HPAGE_MASK (~(HPAGE_SIZE - 1))
-#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
-extern unsigned long end_pfn;
-
-void clear_page(void *);
-void copy_page(void *, void *);
-
-#define clear_user_page(page, vaddr, pg) clear_page(page)
-#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pmd; } pmd_t;
-typedef struct { unsigned long pud; } pud_t;
-typedef struct { unsigned long pgd; } pgd_t;
-#define PTE_MASK PHYSICAL_PAGE_MASK
-
-typedef struct { unsigned long pgprot; } pgprot_t;
-
-extern unsigned long phys_base;
-
-#define pte_val(x) ((x).pte)
-#define pmd_val(x) ((x).pmd)
-#define pud_val(x) ((x).pud)
-#define pgd_val(x) ((x).pgd)
-#define pgprot_val(x) ((x).pgprot)
-
-#define __pte(x) ((pte_t) { (x) } )
-#define __pmd(x) ((pmd_t) { (x) } )
-#define __pud(x) ((pud_t) { (x) } )
-#define __pgd(x) ((pgd_t) { (x) } )
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
-#endif /* !__ASSEMBLY__ */
+#define __PAGE_OFFSET _AC(0xffff810000000000, UL)
#define __PHYSICAL_START CONFIG_PHYSICAL_START
#define __KERNEL_ALIGN 0x200000
@@ -92,53 +39,44 @@ extern unsigned long phys_base;
#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
-#define __PAGE_OFFSET _AC(0xffff810000000000, UL)
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
/* See Documentation/x86_64/mm.txt for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 46
-#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
#define __VIRTUAL_MASK_SHIFT 48
-#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
#define KERNEL_TEXT_SIZE (40*1024*1024)
#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
-#define PAGE_OFFSET __PAGE_OFFSET
#ifndef __ASSEMBLY__
+void clear_page(void *page);
+void copy_page(void *to, void *from);
-#include <asm/bug.h>
+extern unsigned long end_pfn;
+extern unsigned long end_pfn_map;
+extern unsigned long phys_base;
extern unsigned long __phys_addr(unsigned long);
+#define __phys_reloc_hide(x) (x)
-#endif /* __ASSEMBLY__ */
-
-#define __pa(x) __phys_addr((unsigned long)(x))
-#define __pa_symbol(x) __phys_addr((unsigned long)(x))
-
-#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
-#define __boot_va(x) __va(x)
-#define __boot_pa(x) __pa(x)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < end_pfn)
-#endif
-
-#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef unsigned long pteval_t;
+typedef unsigned long pmdval_t;
+typedef unsigned long pudval_t;
+typedef unsigned long pgdval_t;
+typedef unsigned long pgprotval_t;
+typedef unsigned long phys_addr_t;
-#define VM_DATA_DEFAULT_FLAGS \
- (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+typedef struct { pteval_t pte; } pte_t;
-#define __HAVE_ARCH_GATE_AREA 1
#define vmemmap ((struct page *)VMEMMAP_START)
-#include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#endif /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn) ((pfn) < end_pfn)
+#endif
-#endif /* __KERNEL__ */
#endif /* _X86_64_PAGE_H */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index f59d370c5df..d6236eb4646 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -5,22 +5,37 @@
#ifdef CONFIG_PARAVIRT
#include <asm/page.h>
+#include <asm/asm.h>
/* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0x0
-#define CLBR_EAX 0x1
-#define CLBR_ECX 0x2
-#define CLBR_EDX 0x4
-#define CLBR_ANY 0x7
+#define CLBR_NONE 0
+#define CLBR_EAX (1 << 0)
+#define CLBR_ECX (1 << 1)
+#define CLBR_EDX (1 << 2)
+
+#ifdef CONFIG_X86_64
+#define CLBR_RSI (1 << 3)
+#define CLBR_RDI (1 << 4)
+#define CLBR_R8 (1 << 5)
+#define CLBR_R9 (1 << 6)
+#define CLBR_R10 (1 << 7)
+#define CLBR_R11 (1 << 8)
+#define CLBR_ANY ((1 << 9) - 1)
+#include <asm/desc_defs.h>
+#else
+/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+#define CLBR_ANY ((1 << 3) - 1)
+#endif /* X86_64 */
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/cpumask.h>
#include <asm/kmap_types.h>
+#include <asm/desc_defs.h>
struct page;
struct thread_struct;
-struct Xgt_desc_struct;
+struct desc_ptr;
struct tss_struct;
struct mm_struct;
struct desc_struct;
@@ -86,22 +101,27 @@ struct pv_cpu_ops {
unsigned long (*read_cr4)(void);
void (*write_cr4)(unsigned long);
+#ifdef CONFIG_X86_64
+ unsigned long (*read_cr8)(void);
+ void (*write_cr8)(unsigned long);
+#endif
+
/* Segment descriptor handling */
void (*load_tr_desc)(void);
- void (*load_gdt)(const struct Xgt_desc_struct *);
- void (*load_idt)(const struct Xgt_desc_struct *);
- void (*store_gdt)(struct Xgt_desc_struct *);
- void (*store_idt)(struct Xgt_desc_struct *);
+ void (*load_gdt)(const struct desc_ptr *);
+ void (*load_idt)(const struct desc_ptr *);
+ void (*store_gdt)(struct desc_ptr *);
+ void (*store_idt)(struct desc_ptr *);
void (*set_ldt)(const void *desc, unsigned entries);
unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
- void (*write_ldt_entry)(struct desc_struct *,
- int entrynum, u32 low, u32 high);
+ void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
+ const void *desc);
void (*write_gdt_entry)(struct desc_struct *,
- int entrynum, u32 low, u32 high);
- void (*write_idt_entry)(struct desc_struct *,
- int entrynum, u32 low, u32 high);
- void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
+ int entrynum, const void *desc, int size);
+ void (*write_idt_entry)(gate_desc *,
+ int entrynum, const gate_desc *gate);
+ void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
void (*set_iopl_mask)(unsigned mask);
@@ -115,15 +135,18 @@ struct pv_cpu_ops {
/* MSR, PMC and TSR operations.
err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
u64 (*read_msr)(unsigned int msr, int *err);
- int (*write_msr)(unsigned int msr, u64 val);
+ int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
u64 (*read_tsc)(void);
- u64 (*read_pmc)(void);
+ u64 (*read_pmc)(int counter);
+ unsigned long long (*read_tscp)(unsigned int *aux);
/* These two are jmp to, not actually called. */
- void (*irq_enable_sysexit)(void);
+ void (*irq_enable_syscall_ret)(void);
void (*iret)(void);
+ void (*swapgs)(void);
+
struct pv_lazy_ops lazy_mode;
};
@@ -150,9 +173,9 @@ struct pv_apic_ops {
* Direct APIC operations, principally for VMI. Ideally
* these shouldn't be in this interface.
*/
- void (*apic_write)(unsigned long reg, unsigned long v);
- void (*apic_write_atomic)(unsigned long reg, unsigned long v);
- unsigned long (*apic_read)(unsigned long reg);
+ void (*apic_write)(unsigned long reg, u32 v);
+ void (*apic_write_atomic)(unsigned long reg, u32 v);
+ u32 (*apic_read)(unsigned long reg);
void (*setup_boot_clock)(void);
void (*setup_secondary_clock)(void);
@@ -198,7 +221,7 @@ struct pv_mmu_ops {
/* Hooks for allocating/releasing pagetable pages */
void (*alloc_pt)(struct mm_struct *mm, u32 pfn);
- void (*alloc_pd)(u32 pfn);
+ void (*alloc_pd)(struct mm_struct *mm, u32 pfn);
void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
void (*release_pt)(u32 pfn);
void (*release_pd)(u32 pfn);
@@ -212,28 +235,34 @@ struct pv_mmu_ops {
void (*pte_update_defer)(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
+ pteval_t (*pte_val)(pte_t);
+ pte_t (*make_pte)(pteval_t pte);
+
+ pgdval_t (*pgd_val)(pgd_t);
+ pgd_t (*make_pgd)(pgdval_t pgd);
+
+#if PAGETABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
- void (*set_pud)(pud_t *pudp, pud_t pudval);
void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
void (*pmd_clear)(pmd_t *pmdp);
- unsigned long long (*pte_val)(pte_t);
- unsigned long long (*pmd_val)(pmd_t);
- unsigned long long (*pgd_val)(pgd_t);
+#endif /* CONFIG_X86_PAE */
- pte_t (*make_pte)(unsigned long long pte);
- pmd_t (*make_pmd)(unsigned long long pmd);
- pgd_t (*make_pgd)(unsigned long long pgd);
-#else
- unsigned long (*pte_val)(pte_t);
- unsigned long (*pgd_val)(pgd_t);
+ void (*set_pud)(pud_t *pudp, pud_t pudval);
- pte_t (*make_pte)(unsigned long pte);
- pgd_t (*make_pgd)(unsigned long pgd);
-#endif
+ pmdval_t (*pmd_val)(pmd_t);
+ pmd_t (*make_pmd)(pmdval_t pmd);
+
+#if PAGETABLE_LEVELS == 4
+ pudval_t (*pud_val)(pud_t);
+ pud_t (*make_pud)(pudval_t pud);
+
+ void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
+#endif /* PAGETABLE_LEVELS == 4 */
+#endif /* PAGETABLE_LEVELS >= 3 */
#ifdef CONFIG_HIGHPTE
void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
@@ -279,7 +308,8 @@ extern struct pv_mmu_ops pv_mmu_ops;
#define _paravirt_alt(insn_string, type, clobber) \
"771:\n\t" insn_string "\n" "772:\n" \
".pushsection .parainstructions,\"a\"\n" \
- " .long 771b\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR " 771b\n" \
" .byte " type "\n" \
" .byte 772b-771b\n" \
" .short " clobber "\n" \
@@ -289,6 +319,11 @@ extern struct pv_mmu_ops pv_mmu_ops;
#define paravirt_alt(insn_string) \
_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
+/* Simple instruction patching code. */
+#define DEF_NATIVE(ops, name, code) \
+ extern const char start_##ops##_##name[], end_##ops##_##name[]; \
+ asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
+
unsigned paravirt_patch_nop(void);
unsigned paravirt_patch_ignore(unsigned len);
unsigned paravirt_patch_call(void *insnbuf,
@@ -303,6 +338,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
const char *start, const char *end);
+unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+ unsigned long addr, unsigned len);
+
int paravirt_disable_iospace(void);
/*
@@ -319,7 +357,7 @@ int paravirt_disable_iospace(void);
* runtime.
*
* Normally, a call to a pv_op function is a simple indirect call:
- * (paravirt_ops.operations)(args...).
+ * (pv_op_struct.operations)(args...).
*
* Unfortunately, this is a relatively slow operation for modern CPUs,
* because it cannot necessarily determine what the destination
@@ -329,11 +367,17 @@ int paravirt_disable_iospace(void);
* calls are essentially free, because the call and return addresses
* are completely predictable.)
*
- * These macros rely on the standard gcc "regparm(3)" calling
+ * For i386, these macros rely on the standard gcc "regparm(3)" calling
* convention, in which the first three arguments are placed in %eax,
* %edx, %ecx (in that order), and the remaining arguments are placed
* on the stack. All caller-save registers (eax,edx,ecx) are expected
* to be modified (either clobbered or used for return values).
+ * X86_64, on the other hand, already specifies a register-based calling
+ * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
+ * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
+ * special handling for dealing with 4 arguments, unlike i386.
+ * However, x86_64 also have to clobber all caller saved registers, which
+ * unfortunately, are quite a bit (r8 - r11)
*
* The call instruction itself is marked by placing its start address
* and size into the .parainstructions section, so that
@@ -356,10 +400,12 @@ int paravirt_disable_iospace(void);
* the return type. The macro then uses sizeof() on that type to
* determine whether its a 32 or 64 bit value, and places the return
* in the right register(s) (just %eax for 32-bit, and %edx:%eax for
- * 64-bit).
+ * 64-bit). For x86_64 machines, it just returns at %rax regardless of
+ * the return value size.
*
* 64-bit arguments are passed as a pair of adjacent 32-bit arguments
- * in low,high order.
+ * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
+ * in low,high order
*
* Small structures are passed and returned in registers. The macro
* calling convention can't directly deal with this, so the wrapper
@@ -369,46 +415,67 @@ int paravirt_disable_iospace(void);
* means that all uses must be wrapped in inline functions. This also
* makes sure the incoming and outgoing types are always correct.
*/
+#ifdef CONFIG_X86_32
+#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx
+#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
+#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
+ "=c" (__ecx)
+#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
+#define EXTRA_CLOBBERS
+#define VEXTRA_CLOBBERS
+#else
+#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx
+#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
+#define PVOP_VCALL_CLOBBERS "=D" (__edi), \
+ "=S" (__esi), "=d" (__edx), \
+ "=c" (__ecx)
+
+#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
+
+#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
+#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
+#endif
+
#define __PVOP_CALL(rettype, op, pre, post, ...) \
({ \
rettype __ret; \
- unsigned long __eax, __edx, __ecx; \
+ PVOP_CALL_ARGS; \
+ /* This is 32-bit specific, but is okay in 64-bit */ \
+ /* since this condition will never hold */ \
if (sizeof(rettype) > sizeof(unsigned long)) { \
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : "=a" (__eax), "=d" (__edx), \
- "=c" (__ecx) \
+ : PVOP_CALL_CLOBBERS \
: paravirt_type(op), \
paravirt_clobber(CLBR_ANY), \
##__VA_ARGS__ \
- : "memory", "cc"); \
+ : "memory", "cc" EXTRA_CLOBBERS); \
__ret = (rettype)((((u64)__edx) << 32) | __eax); \
} else { \
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : "=a" (__eax), "=d" (__edx), \
- "=c" (__ecx) \
+ : PVOP_CALL_CLOBBERS \
: paravirt_type(op), \
paravirt_clobber(CLBR_ANY), \
##__VA_ARGS__ \
- : "memory", "cc"); \
+ : "memory", "cc" EXTRA_CLOBBERS); \
__ret = (rettype)__eax; \
} \
__ret; \
})
#define __PVOP_VCALL(op, pre, post, ...) \
({ \
- unsigned long __eax, __edx, __ecx; \
+ PVOP_VCALL_ARGS; \
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \
+ : PVOP_VCALL_CLOBBERS \
: paravirt_type(op), \
paravirt_clobber(CLBR_ANY), \
##__VA_ARGS__ \
- : "memory", "cc"); \
+ : "memory", "cc" VEXTRA_CLOBBERS); \
})
#define PVOP_CALL0(rettype, op) \
@@ -417,22 +484,26 @@ int paravirt_disable_iospace(void);
__PVOP_VCALL(op, "", "")
#define PVOP_CALL1(rettype, op, arg1) \
- __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)))
+ __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
#define PVOP_VCALL1(op, arg1) \
- __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)))
+ __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
#define PVOP_CALL2(rettype, op, arg1, arg2) \
- __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2)))
+ __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
+ "1" ((unsigned long)(arg2)))
#define PVOP_VCALL2(op, arg1, arg2) \
- __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2)))
+ __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
+ "1" ((unsigned long)(arg2)))
#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
- __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), \
- "1"((u32)(arg2)), "2"((u32)(arg3)))
+ __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
+ "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
#define PVOP_VCALL3(op, arg1, arg2, arg3) \
- __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)), \
- "2"((u32)(arg3)))
+ __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
+ "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+/* This is the only difference in x86_64. We can make it much simpler */
+#ifdef CONFIG_X86_32
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
__PVOP_CALL(rettype, op, \
"push %[_arg4];", "lea 4(%%esp),%%esp;", \
@@ -443,16 +514,26 @@ int paravirt_disable_iospace(void);
"push %[_arg4];", "lea 4(%%esp),%%esp;", \
"0" ((u32)(arg1)), "1" ((u32)(arg2)), \
"2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+#else
+#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
+ __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
+ "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
+ "3"((unsigned long)(arg4)))
+#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
+ __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
+ "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
+ "3"((unsigned long)(arg4)))
+#endif
static inline int paravirt_enabled(void)
{
return pv_info.paravirt_enabled;
}
-static inline void load_esp0(struct tss_struct *tss,
+static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
- PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread);
+ PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
}
#define ARCH_SETUP pv_init_ops.arch_setup();
@@ -540,6 +621,18 @@ static inline void write_cr4(unsigned long x)
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
}
+#ifdef CONFIG_X86_64
+static inline unsigned long read_cr8(void)
+{
+ return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
+}
+
+static inline void write_cr8(unsigned long x)
+{
+ PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
+}
+#endif
+
static inline void raw_safe_halt(void)
{
PVOP_VCALL0(pv_irq_ops.safe_halt);
@@ -613,8 +706,6 @@ static inline unsigned long long paravirt_sched_clock(void)
}
#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
static inline unsigned long long paravirt_read_pmc(int counter)
{
return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
@@ -626,15 +717,36 @@ static inline unsigned long long paravirt_read_pmc(int counter)
high = _l >> 32; \
} while(0)
+static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
+{
+ return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
+}
+
+#define rdtscp(low, high, aux) \
+do { \
+ int __aux; \
+ unsigned long __val = paravirt_rdtscp(&__aux); \
+ (low) = (u32)__val; \
+ (high) = (u32)(__val >> 32); \
+ (aux) = __aux; \
+} while (0)
+
+#define rdtscpll(val, aux) \
+do { \
+ unsigned long __aux; \
+ val = paravirt_rdtscp(&__aux); \
+ (aux) = __aux; \
+} while (0)
+
static inline void load_TR_desc(void)
{
PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
}
-static inline void load_gdt(const struct Xgt_desc_struct *dtr)
+static inline void load_gdt(const struct desc_ptr *dtr)
{
PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
}
-static inline void load_idt(const struct Xgt_desc_struct *dtr)
+static inline void load_idt(const struct desc_ptr *dtr)
{
PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
}
@@ -642,11 +754,11 @@ static inline void set_ldt(const void *addr, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
}
-static inline void store_gdt(struct Xgt_desc_struct *dtr)
+static inline void store_gdt(struct desc_ptr *dtr)
{
PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
}
-static inline void store_idt(struct Xgt_desc_struct *dtr)
+static inline void store_idt(struct desc_ptr *dtr)
{
PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
}
@@ -659,17 +771,22 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu)
{
PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
}
-static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high)
+
+static inline void write_ldt_entry(struct desc_struct *dt, int entry,
+ const void *desc)
{
- PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high);
+ PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc);
}
-static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high)
+
+static inline void write_gdt_entry(struct desc_struct *dt, int entry,
+ void *desc, int type)
{
- PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high);
+ PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type);
}
-static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high)
+
+static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
- PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high);
+ PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
}
static inline void set_iopl_mask(unsigned mask)
{
@@ -690,17 +807,17 @@ static inline void slow_down_io(void) {
/*
* Basic functions accessing APICs.
*/
-static inline void apic_write(unsigned long reg, unsigned long v)
+static inline void apic_write(unsigned long reg, u32 v)
{
PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
}
-static inline void apic_write_atomic(unsigned long reg, unsigned long v)
+static inline void apic_write_atomic(unsigned long reg, u32 v)
{
PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
}
-static inline unsigned long apic_read(unsigned long reg)
+static inline u32 apic_read(unsigned long reg)
{
return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
}
@@ -786,9 +903,9 @@ static inline void paravirt_release_pt(unsigned pfn)
PVOP_VCALL1(pv_mmu_ops.release_pt, pfn);
}
-static inline void paravirt_alloc_pd(unsigned pfn)
+static inline void paravirt_alloc_pd(struct mm_struct *mm, unsigned pfn)
{
- PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn);
+ PVOP_VCALL2(pv_mmu_ops.alloc_pd, mm, pfn);
}
static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn,
@@ -822,128 +939,236 @@ static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
}
-#ifdef CONFIG_X86_PAE
-static inline pte_t __pte(unsigned long long val)
+static inline pte_t __pte(pteval_t val)
{
- unsigned long long ret = PVOP_CALL2(unsigned long long,
- pv_mmu_ops.make_pte,
- val, val >> 32);
- return (pte_t) { ret, ret >> 32 };
+ pteval_t ret;
+
+ if (sizeof(pteval_t) > sizeof(long))
+ ret = PVOP_CALL2(pteval_t,
+ pv_mmu_ops.make_pte,
+ val, (u64)val >> 32);
+ else
+ ret = PVOP_CALL1(pteval_t,
+ pv_mmu_ops.make_pte,
+ val);
+
+ return (pte_t) { .pte = ret };
}
-static inline pmd_t __pmd(unsigned long long val)
+static inline pteval_t pte_val(pte_t pte)
{
- return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd,
- val, val >> 32) };
+ pteval_t ret;
+
+ if (sizeof(pteval_t) > sizeof(long))
+ ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
+ pte.pte, (u64)pte.pte >> 32);
+ else
+ ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
+ pte.pte);
+
+ return ret;
}
-static inline pgd_t __pgd(unsigned long long val)
+static inline pgd_t __pgd(pgdval_t val)
{
- return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd,
- val, val >> 32) };
+ pgdval_t ret;
+
+ if (sizeof(pgdval_t) > sizeof(long))
+ ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
+ val, (u64)val >> 32);
+ else
+ ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
+ val);
+
+ return (pgd_t) { ret };
}
-static inline unsigned long long pte_val(pte_t x)
+static inline pgdval_t pgd_val(pgd_t pgd)
{
- return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val,
- x.pte_low, x.pte_high);
+ pgdval_t ret;
+
+ if (sizeof(pgdval_t) > sizeof(long))
+ ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
+ pgd.pgd, (u64)pgd.pgd >> 32);
+ else
+ ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
+ pgd.pgd);
+
+ return ret;
}
-static inline unsigned long long pmd_val(pmd_t x)
+static inline void set_pte(pte_t *ptep, pte_t pte)
{
- return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val,
- x.pmd, x.pmd >> 32);
+ if (sizeof(pteval_t) > sizeof(long))
+ PVOP_VCALL3(pv_mmu_ops.set_pte, ptep,
+ pte.pte, (u64)pte.pte >> 32);
+ else
+ PVOP_VCALL2(pv_mmu_ops.set_pte, ptep,
+ pte.pte);
}
-static inline unsigned long long pgd_val(pgd_t x)
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
- return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val,
- x.pgd, x.pgd >> 32);
+ if (sizeof(pteval_t) > sizeof(long))
+ /* 5 arg words */
+ pv_mmu_ops.set_pte_at(mm, addr, ptep, pte);
+ else
+ PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
}
-static inline void set_pte(pte_t *ptep, pte_t pteval)
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high);
+ pmdval_t val = native_pmd_val(pmd);
+
+ if (sizeof(pmdval_t) > sizeof(long))
+ PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32);
+ else
+ PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
}
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
+#if PAGETABLE_LEVELS >= 3
+static inline pmd_t __pmd(pmdval_t val)
{
- /* 5 arg words */
- pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval);
+ pmdval_t ret;
+
+ if (sizeof(pmdval_t) > sizeof(long))
+ ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
+ val, (u64)val >> 32);
+ else
+ ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
+ val);
+
+ return (pmd_t) { ret };
}
-static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
+static inline pmdval_t pmd_val(pmd_t pmd)
{
- PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
- pteval.pte_low, pteval.pte_high);
+ pmdval_t ret;
+
+ if (sizeof(pmdval_t) > sizeof(long))
+ ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
+ pmd.pmd, (u64)pmd.pmd >> 32);
+ else
+ ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
+ pmd.pmd);
+
+ return ret;
}
-static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void set_pud(pud_t *pudp, pud_t pud)
{
- /* 5 arg words */
- pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
+ pudval_t val = native_pud_val(pud);
+
+ if (sizeof(pudval_t) > sizeof(long))
+ PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
+ val, (u64)val >> 32);
+ else
+ PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
+ val);
+}
+#if PAGETABLE_LEVELS == 4
+static inline pud_t __pud(pudval_t val)
+{
+ pudval_t ret;
+
+ if (sizeof(pudval_t) > sizeof(long))
+ ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
+ val, (u64)val >> 32);
+ else
+ ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
+ val);
+
+ return (pud_t) { ret };
}
-static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
+static inline pudval_t pud_val(pud_t pud)
{
- PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp,
- pmdval.pmd, pmdval.pmd >> 32);
+ pudval_t ret;
+
+ if (sizeof(pudval_t) > sizeof(long))
+ ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
+ pud.pud, (u64)pud.pud >> 32);
+ else
+ ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
+ pud.pud);
+
+ return ret;
}
-static inline void set_pud(pud_t *pudp, pud_t pudval)
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
- pudval.pgd.pgd, pudval.pgd.pgd >> 32);
+ pgdval_t val = native_pgd_val(pgd);
+
+ if (sizeof(pgdval_t) > sizeof(long))
+ PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
+ val, (u64)val >> 32);
+ else
+ PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
+ val);
}
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+static inline void pgd_clear(pgd_t *pgdp)
{
- PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
+ set_pgd(pgdp, __pgd(0));
}
-static inline void pmd_clear(pmd_t *pmdp)
+static inline void pud_clear(pud_t *pudp)
{
- PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
+ set_pud(pudp, __pud(0));
}
-#else /* !CONFIG_X86_PAE */
+#endif /* PAGETABLE_LEVELS == 4 */
-static inline pte_t __pte(unsigned long val)
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+#ifdef CONFIG_X86_PAE
+/* Special-case pte-setting operations for PAE, which can't update a
+ 64-bit pte atomically */
+static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
- return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) };
+ PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
+ pte.pte, pte.pte >> 32);
}
-static inline pgd_t __pgd(unsigned long val)
+static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
- return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) };
+ /* 5 arg words */
+ pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
}
-static inline unsigned long pte_val(pte_t x)
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low);
+ PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
}
-static inline unsigned long pgd_val(pgd_t x)
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
+}
+#else /* !CONFIG_X86_PAE */
+static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
- return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd);
+ set_pte(ptep, pte);
}
-static inline void set_pte(pte_t *ptep, pte_t pteval)
+static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
- PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low);
+ set_pte(ptep, pte);
}
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low);
+ set_pte_at(mm, addr, ptep, __pte(0));
}
-static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
+static inline void pmd_clear(pmd_t *pmdp)
{
- PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd);
+ set_pmd(pmdp, __pmd(0));
}
#endif /* CONFIG_X86_PAE */
@@ -1014,52 +1239,68 @@ struct paravirt_patch_site {
extern struct paravirt_patch_site __parainstructions[],
__parainstructions_end[];
+#ifdef CONFIG_X86_32
+#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
+#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
+#define PV_FLAGS_ARG "0"
+#define PV_EXTRA_CLOBBERS
+#define PV_VEXTRA_CLOBBERS
+#else
+/* We save some registers, but all of them, that's too much. We clobber all
+ * caller saved registers but the argument parameter */
+#define PV_SAVE_REGS "pushq %%rdi;"
+#define PV_RESTORE_REGS "popq %%rdi;"
+#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx"
+#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx"
+#define PV_FLAGS_ARG "D"
+#endif
+
static inline unsigned long __raw_local_save_flags(void)
{
unsigned long f;
- asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
+ asm volatile(paravirt_alt(PV_SAVE_REGS
PARAVIRT_CALL
- "popl %%edx; popl %%ecx")
+ PV_RESTORE_REGS)
: "=a"(f)
: paravirt_type(pv_irq_ops.save_fl),
paravirt_clobber(CLBR_EAX)
- : "memory", "cc");
+ : "memory", "cc" PV_VEXTRA_CLOBBERS);
return f;
}
static inline void raw_local_irq_restore(unsigned long f)
{
- asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
+ asm volatile(paravirt_alt(PV_SAVE_REGS
PARAVIRT_CALL
- "popl %%edx; popl %%ecx")
+ PV_RESTORE_REGS)
: "=a"(f)
- : "0"(f),
+ : PV_FLAGS_ARG(f),
paravirt_type(pv_irq_ops.restore_fl),
paravirt_clobber(CLBR_EAX)
- : "memory", "cc");
+ : "memory", "cc" PV_EXTRA_CLOBBERS);
}
static inline void raw_local_irq_disable(void)
{
- asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
+ asm volatile(paravirt_alt(PV_SAVE_REGS
PARAVIRT_CALL
- "popl %%edx; popl %%ecx")
+ PV_RESTORE_REGS)
:
: paravirt_type(pv_irq_ops.irq_disable),
paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc");
+ : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
}
static inline void raw_local_irq_enable(void)
{
- asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
+ asm volatile(paravirt_alt(PV_SAVE_REGS
PARAVIRT_CALL
- "popl %%edx; popl %%ecx")
+ PV_RESTORE_REGS)
:
: paravirt_type(pv_irq_ops.irq_enable),
paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc");
+ : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
}
static inline unsigned long __raw_local_irq_save(void)
@@ -1071,27 +1312,6 @@ static inline unsigned long __raw_local_irq_save(void)
return f;
}
-#define CLI_STRING \
- _paravirt_alt("pushl %%ecx; pushl %%edx;" \
- "call *%[paravirt_cli_opptr];" \
- "popl %%edx; popl %%ecx", \
- "%c[paravirt_cli_type]", "%c[paravirt_clobber]")
-
-#define STI_STRING \
- _paravirt_alt("pushl %%ecx; pushl %%edx;" \
- "call *%[paravirt_sti_opptr];" \
- "popl %%edx; popl %%ecx", \
- "%c[paravirt_sti_type]", "%c[paravirt_clobber]")
-
-#define CLI_STI_CLOBBERS , "%eax"
-#define CLI_STI_INPUT_ARGS \
- , \
- [paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)), \
- [paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable), \
- [paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)), \
- [paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable), \
- paravirt_clobber(CLBR_EAX)
-
/* Make sure as little as possible of this mess escapes. */
#undef PARAVIRT_CALL
#undef __PVOP_CALL
@@ -1109,43 +1329,72 @@ static inline unsigned long __raw_local_irq_save(void)
#else /* __ASSEMBLY__ */
-#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
-
-#define PARA_SITE(ptype, clobbers, ops) \
+#define _PVSITE(ptype, clobbers, ops, word, algn) \
771:; \
ops; \
772:; \
.pushsection .parainstructions,"a"; \
- .long 771b; \
+ .align algn; \
+ word 771b; \
.byte ptype; \
.byte 772b-771b; \
.short clobbers; \
.popsection
+
+#ifdef CONFIG_X86_64
+#define PV_SAVE_REGS pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
+#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
+#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
+#else
+#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx
+#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
+#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
+#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
+#endif
+
#define INTERRUPT_RETURN \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
jmp *%cs:pv_cpu_ops+PV_CPU_iret)
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
- pushl %eax; pushl %ecx; pushl %edx; \
+ PV_SAVE_REGS; \
call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \
- popl %edx; popl %ecx; popl %eax) \
+ PV_RESTORE_REGS;) \
#define ENABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
- pushl %eax; pushl %ecx; pushl %edx; \
+ PV_SAVE_REGS; \
call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \
- popl %edx; popl %ecx; popl %eax)
+ PV_RESTORE_REGS;)
+
+#define ENABLE_INTERRUPTS_SYSCALL_RET \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
+ CLBR_NONE, \
+ jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)
-#define ENABLE_INTERRUPTS_SYSEXIT \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\
- jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit)
+#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
call *pv_cpu_ops+PV_CPU_read_cr0; \
pop %edx; pop %ecx
+#else
+#define SWAPGS \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
+ PV_SAVE_REGS; \
+ call *pv_cpu_ops+PV_CPU_swapgs; \
+ PV_RESTORE_REGS \
+ )
+
+#define GET_CR2_INTO_RCX \
+ call *pv_mmu_ops+PV_MMU_read_cr2; \
+ movq %rax, %rcx; \
+ xorq %rax, %rax;
+
+#endif
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index e8836196634..c61190cb9e1 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -66,6 +66,7 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
#ifdef CONFIG_PCI
+extern void early_quirks(void);
static inline void pci_dma_burst_advice(struct pci_dev *pdev,
enum pci_dma_burst_strategy *strat,
unsigned long *strategy_parameter)
@@ -73,9 +74,10 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
*strat = PCI_DMA_BURST_INFINITY;
*strategy_parameter = ~0UL;
}
+#else
+static inline void early_quirks(void) { }
#endif
-
#endif /* __KERNEL__ */
#ifdef CONFIG_X86_32
@@ -90,6 +92,19 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
/* generic pci stuff */
#include <asm-generic/pci.h>
+#ifdef CONFIG_NUMA
+/* Returns the node based on pci bus */
+static inline int __pcibus_to_node(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ return sd->node;
+}
+
+static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
+{
+ return node_to_cpumask(__pcibus_to_node(bus));
+}
+#endif
#endif
diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h
index ef54226a932..37469031453 100644
--- a/include/asm-x86/pci_64.h
+++ b/include/asm-x86/pci_64.h
@@ -26,7 +26,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int l
extern void pci_iommu_alloc(void);
-extern int iommu_setup(char *opt);
/* The PCI address space does equal the physical memory
* address space. The networking and block device layers use
diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index 35962bbe5e7..c0305bff0f1 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -7,22 +7,22 @@
#include <linux/cache.h>
#include <asm/page.h>
-/* Per processor datastructure. %gs points to it while the kernel runs */
+/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
struct task_struct *pcurrent; /* 0 Current process */
unsigned long data_offset; /* 8 Per cpu data offset from linker
address */
- unsigned long kernelstack; /* 16 top of kernel stack for current */
- unsigned long oldrsp; /* 24 user rsp for system call */
- int irqcount; /* 32 Irq nesting counter. Starts with -1 */
- int cpunumber; /* 36 Logical CPU number */
+ unsigned long kernelstack; /* 16 top of kernel stack for current */
+ unsigned long oldrsp; /* 24 user rsp for system call */
+ int irqcount; /* 32 Irq nesting counter. Starts -1 */
+ unsigned int cpunumber; /* 36 Logical CPU number */
#ifdef CONFIG_CC_STACKPROTECTOR
unsigned long stack_canary; /* 40 stack canary value */
/* gcc-ABI: this canary MUST be at
offset 40!!! */
#endif
char *irqstackptr;
- int nodenumber; /* number of current node */
+ unsigned int nodenumber; /* number of current node */
unsigned int __softirq_pending;
unsigned int __nmi_count; /* number of NMI on this CPUs */
short mmu_state;
@@ -40,13 +40,14 @@ struct x8664_pda {
extern struct x8664_pda *_cpu_pda[];
extern struct x8664_pda boot_cpu_pda[];
+extern void pda_init(int);
#define cpu_pda(i) (_cpu_pda[i])
-/*
+/*
* There is no fast way to get the base address of the PDA, all the accesses
* have to mention %fs/%gs. So it needs to be done this Torvaldian way.
- */
+ */
extern void __bad_pda_field(void) __attribute__((noreturn));
/*
@@ -57,70 +58,70 @@ extern struct x8664_pda _proxy_pda;
#define pda_offset(field) offsetof(struct x8664_pda, field)
-#define pda_to_op(op,field,val) do { \
+#define pda_to_op(op, field, val) do { \
typedef typeof(_proxy_pda.field) T__; \
if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
switch (sizeof(_proxy_pda.field)) { \
case 2: \
- asm(op "w %1,%%gs:%c2" : \
+ asm(op "w %1,%%gs:%c2" : \
"+m" (_proxy_pda.field) : \
"ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
+ "i"(pda_offset(field))); \
+ break; \
case 4: \
- asm(op "l %1,%%gs:%c2" : \
+ asm(op "l %1,%%gs:%c2" : \
"+m" (_proxy_pda.field) : \
"ri" ((T__)val), \
- "i" (pda_offset(field))); \
+ "i" (pda_offset(field))); \
break; \
case 8: \
- asm(op "q %1,%%gs:%c2": \
+ asm(op "q %1,%%gs:%c2": \
"+m" (_proxy_pda.field) : \
"ri" ((T__)val), \
- "i"(pda_offset(field))); \
+ "i"(pda_offset(field))); \
break; \
- default: \
+ default: \
__bad_pda_field(); \
- } \
- } while (0)
+ } \
+ } while (0)
#define pda_from_op(op,field) ({ \
typeof(_proxy_pda.field) ret__; \
switch (sizeof(_proxy_pda.field)) { \
- case 2: \
- asm(op "w %%gs:%c1,%0" : \
+ case 2: \
+ asm(op "w %%gs:%c1,%0" : \
"=r" (ret__) : \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
+ "i" (pda_offset(field)), \
+ "m" (_proxy_pda.field)); \
break; \
case 4: \
asm(op "l %%gs:%c1,%0": \
"=r" (ret__): \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
+ "i" (pda_offset(field)), \
+ "m" (_proxy_pda.field)); \
break; \
- case 8: \
+ case 8: \
asm(op "q %%gs:%c1,%0": \
"=r" (ret__) : \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
+ "i" (pda_offset(field)), \
+ "m" (_proxy_pda.field)); \
break; \
- default: \
+ default: \
__bad_pda_field(); \
} \
ret__; })
-#define read_pda(field) pda_from_op("mov",field)
-#define write_pda(field,val) pda_to_op("mov",field,val)
-#define add_pda(field,val) pda_to_op("add",field,val)
-#define sub_pda(field,val) pda_to_op("sub",field,val)
-#define or_pda(field,val) pda_to_op("or",field,val)
+#define read_pda(field) pda_from_op("mov", field)
+#define write_pda(field, val) pda_to_op("mov", field, val)
+#define add_pda(field, val) pda_to_op("add", field, val)
+#define sub_pda(field, val) pda_to_op("sub", field, val)
+#define or_pda(field, val) pda_to_op("or", field, val)
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
-#define test_and_clear_bit_pda(bit,field) ({ \
+#define test_and_clear_bit_pda(bit, field) ({ \
int old__; \
asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \
- : "=r" (old__), "+m" (_proxy_pda.field) \
+ : "=r" (old__), "+m" (_proxy_pda.field) \
: "dIr" (bit), "i" (pda_offset(field)) : "memory"); \
old__; \
})
diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index a1aaad274cc..0dec00f27eb 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -1,5 +1,142 @@
-#ifdef CONFIG_X86_32
-# include "percpu_32.h"
-#else
-# include "percpu_64.h"
+#ifndef _ASM_X86_PERCPU_H_
+#define _ASM_X86_PERCPU_H_
+
+#ifdef CONFIG_X86_64
+#include <linux/compiler.h>
+
+/* Same as asm-generic/percpu.h, except that we store the per cpu offset
+ in the PDA. Longer term the PDA and every per cpu variable
+ should be just put into a single section and referenced directly
+ from %gs */
+
+#ifdef CONFIG_SMP
+#include <asm/pda.h>
+
+#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
+#define __my_cpu_offset read_pda(data_offset)
+
+#define per_cpu_offset(x) (__per_cpu_offset(x))
+
#endif
+#include <asm-generic/percpu.h>
+
+DECLARE_PER_CPU(struct x8664_pda, pda);
+
+#else /* CONFIG_X86_64 */
+
+#ifdef __ASSEMBLY__
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ * var - variable name
+ * reg - 32bit register
+ *
+ * The resulting address is stored in the "reg" argument.
+ *
+ * Example:
+ * PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#ifdef CONFIG_SMP
+#define PER_CPU(var, reg) \
+ movl %fs:per_cpu__##this_cpu_off, reg; \
+ lea per_cpu__##var(reg), reg
+#define PER_CPU_VAR(var) %fs:per_cpu__##var
+#else /* ! SMP */
+#define PER_CPU(var, reg) \
+ movl $per_cpu__##var, reg
+#define PER_CPU_VAR(var) per_cpu__##var
+#endif /* SMP */
+
+#else /* ...!ASSEMBLY */
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ * var - variable name
+ * cpu - 32bit register containing the current CPU number
+ *
+ * The resulting address is stored in the "cpu" argument.
+ *
+ * Example:
+ * PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#ifdef CONFIG_SMP
+
+#define __my_cpu_offset x86_read_percpu(this_cpu_off)
+
+/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
+#define __percpu_seg "%%fs:"
+
+#else /* !SMP */
+
+#define __percpu_seg ""
+
+#endif /* SMP */
+
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
+/* For arch-specific code, we can use direct single-insn ops (they
+ * don't give an lvalue though). */
+extern void __bad_percpu_size(void);
+
+#define percpu_to_op(op,var,val) \
+ do { \
+ typedef typeof(var) T__; \
+ if (0) { T__ tmp__; tmp__ = (val); } \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm(op "b %1,"__percpu_seg"%0" \
+ : "+m" (var) \
+ :"ri" ((T__)val)); \
+ break; \
+ case 2: \
+ asm(op "w %1,"__percpu_seg"%0" \
+ : "+m" (var) \
+ :"ri" ((T__)val)); \
+ break; \
+ case 4: \
+ asm(op "l %1,"__percpu_seg"%0" \
+ : "+m" (var) \
+ :"ri" ((T__)val)); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ } while (0)
+
+#define percpu_from_op(op,var) \
+ ({ \
+ typeof(var) ret__; \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm(op "b "__percpu_seg"%1,%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
+ case 2: \
+ asm(op "w "__percpu_seg"%1,%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
+ case 4: \
+ asm(op "l "__percpu_seg"%1,%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ ret__; })
+
+#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
+#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
+#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
+#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
+#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
+#endif /* !__ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+#endif /* _ASM_X86_PERCPU_H_ */
diff --git a/include/asm-x86/percpu_32.h b/include/asm-x86/percpu_32.h
deleted file mode 100644
index a7ebd436f3c..00000000000
--- a/include/asm-x86/percpu_32.h
+++ /dev/null
@@ -1,154 +0,0 @@
-#ifndef __ARCH_I386_PERCPU__
-#define __ARCH_I386_PERCPU__
-
-#ifdef __ASSEMBLY__
-
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- * var - variable name
- * reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- * PER_CPU(cpu_gdt_descr, %ebx)
- */
-#ifdef CONFIG_SMP
-#define PER_CPU(var, reg) \
- movl %fs:per_cpu__##this_cpu_off, reg; \
- lea per_cpu__##var(reg), reg
-#define PER_CPU_VAR(var) %fs:per_cpu__##var
-#else /* ! SMP */
-#define PER_CPU(var, reg) \
- movl $per_cpu__##var, reg
-#define PER_CPU_VAR(var) per_cpu__##var
-#endif /* SMP */
-
-#else /* ...!ASSEMBLY */
-
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- * var - variable name
- * cpu - 32bit register containing the current CPU number
- *
- * The resulting address is stored in the "cpu" argument.
- *
- * Example:
- * PER_CPU(cpu_gdt_descr, %ebx)
- */
-#ifdef CONFIG_SMP
-/* Same as generic implementation except for optimized local access. */
-#define __GENERIC_PER_CPU
-
-/* This is used for other cpus to find our section. */
-extern unsigned long __per_cpu_offset[];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-/* Separate out the type, so (int[3], foo) works. */
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.shared_aligned"))) \
- __typeof__(type) per_cpu__##name \
- ____cacheline_aligned_in_smp
-
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({ \
- extern int simple_indentifier_##var(void); \
- RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
-
-#define __raw_get_cpu_var(var) (*({ \
- extern int simple_indentifier_##var(void); \
- RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off)); \
-}))
-
-#define __get_cpu_var(var) __raw_get_cpu_var(var)
-
-/* A macro to avoid #include hell... */
-#define percpu_modcopy(pcpudst, src, size) \
-do { \
- unsigned int __i; \
- for_each_possible_cpu(__i) \
- memcpy((pcpudst)+__per_cpu_offset[__i], \
- (src), (size)); \
-} while (0)
-
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
-/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
-#define __percpu_seg "%%fs:"
-#else /* !SMP */
-#include <asm-generic/percpu.h>
-#define __percpu_seg ""
-#endif /* SMP */
-
-/* For arch-specific code, we can use direct single-insn ops (they
- * don't give an lvalue though). */
-extern void __bad_percpu_size(void);
-
-#define percpu_to_op(op,var,val) \
- do { \
- typedef typeof(var) T__; \
- if (0) { T__ tmp__; tmp__ = (val); } \
- switch (sizeof(var)) { \
- case 1: \
- asm(op "b %1,"__percpu_seg"%0" \
- : "+m" (var) \
- :"ri" ((T__)val)); \
- break; \
- case 2: \
- asm(op "w %1,"__percpu_seg"%0" \
- : "+m" (var) \
- :"ri" ((T__)val)); \
- break; \
- case 4: \
- asm(op "l %1,"__percpu_seg"%0" \
- : "+m" (var) \
- :"ri" ((T__)val)); \
- break; \
- default: __bad_percpu_size(); \
- } \
- } while (0)
-
-#define percpu_from_op(op,var) \
- ({ \
- typeof(var) ret__; \
- switch (sizeof(var)) { \
- case 1: \
- asm(op "b "__percpu_seg"%1,%0" \
- : "=r" (ret__) \
- : "m" (var)); \
- break; \
- case 2: \
- asm(op "w "__percpu_seg"%1,%0" \
- : "=r" (ret__) \
- : "m" (var)); \
- break; \
- case 4: \
- asm(op "l "__percpu_seg"%1,%0" \
- : "=r" (ret__) \
- : "m" (var)); \
- break; \
- default: __bad_percpu_size(); \
- } \
- ret__; })
-
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __ARCH_I386_PERCPU__ */
diff --git a/include/asm-x86/percpu_64.h b/include/asm-x86/percpu_64.h
deleted file mode 100644
index 5abd4827010..00000000000
--- a/include/asm-x86/percpu_64.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef _ASM_X8664_PERCPU_H_
-#define _ASM_X8664_PERCPU_H_
-#include <linux/compiler.h>
-
-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
- in the PDA. Longer term the PDA and every per cpu variable
- should be just put into a single section and referenced directly
- from %gs */
-
-#ifdef CONFIG_SMP
-
-#include <asm/pda.h>
-
-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset() read_pda(data_offset)
-
-#define per_cpu_offset(x) (__per_cpu_offset(x))
-
-/* Separate out the type, so (int[3], foo) works. */
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.shared_aligned"))) \
- __typeof__(type) per_cpu__##name \
- ____cacheline_internodealigned_in_smp
-
-/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*({ \
- extern int simple_identifier_##var(void); \
- RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); }))
-#define __get_cpu_var(var) (*({ \
- extern int simple_identifier_##var(void); \
- RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
-#define __raw_get_cpu_var(var) (*({ \
- extern int simple_identifier_##var(void); \
- RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
-
-/* A macro to avoid #include hell... */
-#define percpu_modcopy(pcpudst, src, size) \
-do { \
- unsigned int __i; \
- for_each_possible_cpu(__i) \
- memcpy((pcpudst)+__per_cpu_offset(__i), \
- (src), (size)); \
-} while (0)
-
-extern void setup_per_cpu_areas(void);
-
-#else /* ! SMP */
-
-#define DEFINE_PER_CPU(type, name) \
- __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
-
-#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var))
-#define __get_cpu_var(var) per_cpu__##var
-#define __raw_get_cpu_var(var) per_cpu__##var
-
-#endif /* SMP */
-
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
-#endif /* _ASM_X8664_PERCPU_H_ */
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
index f2fc33ceb9f..10c2b452e64 100644
--- a/include/asm-x86/pgalloc_32.h
+++ b/include/asm-x86/pgalloc_32.h
@@ -3,31 +3,33 @@
#include <linux/threads.h>
#include <linux/mm.h> /* for struct page */
+#include <asm/tlb.h>
+#include <asm-generic/tlb.h>
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
#define paravirt_alloc_pt(mm, pfn) do { } while (0)
-#define paravirt_alloc_pd(pfn) do { } while (0)
-#define paravirt_alloc_pd(pfn) do { } while (0)
+#define paravirt_alloc_pd(mm, pfn) do { } while (0)
#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
#define paravirt_release_pt(pfn) do { } while (0)
#define paravirt_release_pd(pfn) do { } while (0)
#endif
-#define pmd_populate_kernel(mm, pmd, pte) \
-do { \
- paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \
- set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
-} while (0)
+static inline void pmd_populate_kernel(struct mm_struct *mm,
+ pmd_t *pmd, pte_t *pte)
+{
+ paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
+ set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+{
+ unsigned long pfn = page_to_pfn(pte);
-#define pmd_populate(mm, pmd, pte) \
-do { \
- paravirt_alloc_pt(mm, page_to_pfn(pte)); \
- set_pmd(pmd, __pmd(_PAGE_TABLE + \
- ((unsigned long long)page_to_pfn(pte) << \
- (unsigned long long) PAGE_SHIFT))); \
-} while (0)
+ paravirt_alloc_pt(mm, pfn);
+ set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
+}
/*
* Allocate and free page tables.
@@ -49,20 +51,55 @@ static inline void pte_free(struct page *pte)
}
-#define __pte_free_tlb(tlb,pte) \
-do { \
- paravirt_release_pt(page_to_pfn(pte)); \
- tlb_remove_page((tlb),(pte)); \
-} while (0)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+{
+ paravirt_release_pt(page_to_pfn(pte));
+ tlb_remove_page(tlb, pte);
+}
#ifdef CONFIG_X86_PAE
/*
* In the PAE case we free the pmds as part of the pgd.
*/
-#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
-#define pmd_free(x) do { } while (0)
-#define __pmd_free_tlb(tlb,x) do { } while (0)
-#define pud_populate(mm, pmd, pte) BUG()
-#endif
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pmd_free(pmd_t *pmd)
+{
+ BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+ free_page((unsigned long)pmd);
+}
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+{
+ /* This is called just after the pmd has been detached from
+ the pgd, which requires a full tlb flush to be recognized
+ by the CPU. Rather than incurring multiple tlb flushes
+ while the address space is being pulled down, make the tlb
+ gathering machinery do a full flush when we're done. */
+ tlb->fullmm = 1;
+
+ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+ tlb_remove_page(tlb, virt_to_page(pmd));
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
+{
+ paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
+
+ /* Note: almost everything apart from _PAGE_PRESENT is
+ reserved at the pmd (PDPT) level. */
+ set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+
+ /*
+ * Pentium-II erratum A13: in PAE mode we explicitly have to flush
+ * the TLB via cr3 if the top-level pgd is changed...
+ */
+ if (mm == current->active_mm)
+ write_cr3(read_cr3());
+}
+#endif /* CONFIG_X86_PAE */
#endif /* _I386_PGALLOC_H */
diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index 84b03cf56a7..701404fab30 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h
@@ -15,30 +15,31 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte)
{
*ptep = pte;
}
-static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep , pte_t pte)
-{
- native_set_pte(ptep, pte);
-}
+
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
}
-#ifndef CONFIG_PARAVIRT
-#define set_pte(pteptr, pteval) native_set_pte(pteptr, pteval)
-#define set_pte_at(mm,addr,ptep,pteval) native_set_pte_at(mm, addr, ptep, pteval)
-#define set_pmd(pmdptr, pmdval) native_set_pmd(pmdptr, pmdval)
-#endif
-#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
-#define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval)
+static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+ native_set_pte(ptep, pte);
+}
-#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+static inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ native_set_pte(ptep, pte);
+}
+
+static inline void native_pmd_clear(pmd_t *pmdp)
+{
+ native_set_pmd(pmdp, __pmd(0));
+}
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp)
{
- *xp = __pte(0);
+ *xp = native_make_pte(0);
}
#ifdef CONFIG_SMP
@@ -53,16 +54,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define pte_none(x) (!(x).pte_low)
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
-#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
-
-/*
- * All present pages are kernel-executable:
- */
-static inline int pte_exec_kernel(pte_t pte)
-{
- return 1;
-}
/*
* Bits 0, 6 and 7 are taken, split up the 29 bits of offset
@@ -74,13 +65,13 @@ static inline int pte_exec_kernel(pte_t pte)
((((pte).pte_low >> 1) & 0x1f ) + (((pte).pte_low >> 8) << 5 ))
#define pgoff_to_pte(off) \
- ((pte_t) { (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE })
+ ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE })
/* Encode and de-code a swap entry */
#define __swp_type(x) (((x).val >> 1) & 0x1f)
#define __swp_offset(x) ((x).val >> 8)
#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 948a3341411..a195c3e757b 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -15,16 +15,18 @@
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pgd_val(e))
-#define pud_none(pud) 0
-#define pud_bad(pud) 0
-#define pud_present(pud) 1
-/*
- * All present pages with !NX bit are kernel-executable:
- */
-static inline int pte_exec_kernel(pte_t pte)
+static inline int pud_none(pud_t pud)
+{
+ return pud_val(pud) == 0;
+}
+static inline int pud_bad(pud_t pud)
+{
+ return (pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
+}
+static inline int pud_present(pud_t pud)
{
- return !(pte_val(pte) & _PAGE_NX);
+ return pud_val(pud) & _PAGE_PRESENT;
}
/* Rules for using set_pte: the pte being assigned *must* be
@@ -39,11 +41,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
smp_wmb();
ptep->pte_low = pte.pte_low;
}
-static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep , pte_t pte)
-{
- native_set_pte(ptep, pte);
-}
/*
* Since this is only called on user PTEs, and the page fault handler
@@ -71,7 +68,7 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
}
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ set_64bit((unsigned long long *)(pudp),native_pud_val(pud));
}
/*
@@ -94,24 +91,29 @@ static inline void native_pmd_clear(pmd_t *pmd)
*(tmp + 1) = 0;
}
-#ifndef CONFIG_PARAVIRT
-#define set_pte(ptep, pte) native_set_pte(ptep, pte)
-#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
-#define set_pte_present(mm, addr, ptep, pte) native_set_pte_present(mm, addr, ptep, pte)
-#define set_pte_atomic(ptep, pte) native_set_pte_atomic(ptep, pte)
-#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
-#define set_pud(pudp, pud) native_set_pud(pudp, pud)
-#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
-#define pmd_clear(pmd) native_pmd_clear(pmd)
-#endif
-
-/*
- * Pentium-II erratum A13: in PAE mode we explicitly have to flush
- * the TLB via cr3 if the top-level pgd is changed...
- * We do not let the generic code free and clear pgd entries due to
- * this erratum.
- */
-static inline void pud_clear (pud_t * pud) { }
+static inline void pud_clear(pud_t *pudp)
+{
+ set_pud(pudp, __pud(0));
+
+ /*
+ * In principle we need to do a cr3 reload here to make sure
+ * the processor recognizes the changed pgd. In practice, all
+ * the places where pud_clear() gets called are followed by
+ * full tlb flushes anyway, so we can defer the cost here.
+ *
+ * Specifically:
+ *
+ * mm/memory.c:free_pmd_range() - immediately after the
+ * pud_clear() it does a pmd_free_tlb(). We change the
+ * mmu_gather structure to do a full tlb flush (which has the
+ * effect of reloading cr3) when the pagetable free is
+ * complete.
+ *
+ * arch/x86/mm/hugetlbpage.c:huge_pmd_unshare() - the call to
+ * this is followed by a flush_tlb_range, which on x86 does a
+ * full tlb flush.
+ */
+}
#define pud_page(pud) \
((struct page *) __va(pud_val(pud) & PAGE_MASK))
@@ -155,21 +157,7 @@ static inline int pte_none(pte_t pte)
static inline unsigned long pte_pfn(pte_t pte)
{
- return pte_val(pte) >> PAGE_SHIFT;
-}
-
-extern unsigned long long __supported_pte_mask;
-
-static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
-{
- return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
- pgprot_val(pgprot)) & __supported_pte_mask);
-}
-
-static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
-{
- return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
- pgprot_val(pgprot)) & __supported_pte_mask);
+ return (pte_val(pte) & ~_PAGE_NX) >> PAGE_SHIFT;
}
/*
@@ -177,7 +165,7 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
* put the 32 bits of offset into the high part.
*/
#define pte_to_pgoff(pte) ((pte).pte_high)
-#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
+#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
#define PTE_FILE_MAX_BITS 32
/* Encode and de-code a swap entry */
@@ -185,8 +173,6 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
#define __swp_offset(x) ((x).val >> 5)
#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
-#define __swp_entry_to_pte(x) ((pte_t){ 0, (x).val })
-
-#define __pmd_free_tlb(tlb, x) do { } while (0)
+#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } })
#endif /* _I386_PGTABLE_3LEVEL_H */
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 1039140652a..cd2524f0745 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -1,5 +1,364 @@
+#ifndef _ASM_X86_PGTABLE_H
+#define _ASM_X86_PGTABLE_H
+
+#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
+#define FIRST_USER_ADDRESS 0
+
+#define _PAGE_BIT_PRESENT 0
+#define _PAGE_BIT_RW 1
+#define _PAGE_BIT_USER 2
+#define _PAGE_BIT_PWT 3
+#define _PAGE_BIT_PCD 4
+#define _PAGE_BIT_ACCESSED 5
+#define _PAGE_BIT_DIRTY 6
+#define _PAGE_BIT_FILE 6
+#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
+#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
+#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
+#define _PAGE_BIT_UNUSED2 10
+#define _PAGE_BIT_UNUSED3 11
+#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
+
+/*
+ * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
+ * sign-extended value on 32-bit with all 1's in the upper word,
+ * which preserves the upper pte values on 64-bit ptes:
+ */
+#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT)
+#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW)
+#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER)
+#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT)
+#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD)
+#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY)
+#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */
+#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */
+#define _PAGE_UNUSED1 (_AC(1, L)<<_PAGE_BIT_UNUSED1)
+#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2)
+#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3)
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX)
+#else
+#define _PAGE_NX 0
+#endif
+
+/* If _PAGE_PRESENT is clear, we use these: */
+#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, saved PTE; unset:swap */
+#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE;
+ pte_present gives true */
+
+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY PAGE_COPY_NOEXEC
+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#ifdef CONFIG_X86_32
+#define _PAGE_KERNEL_EXEC \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX)
+
+#ifndef __ASSEMBLY__
+extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
+#endif /* __ASSEMBLY__ */
+#else
+#define __PAGE_KERNEL_EXEC \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
+#endif
+
+#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
+#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
+#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+
+#ifdef CONFIG_X86_32
+# define MAKE_GLOBAL(x) __pgprot((x))
+#else
+# define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
+#endif
+
+#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX)
+#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
+#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC)
+#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
+#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
+
+/* xwr */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+#ifndef __ASSEMBLY__
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+extern spinlock_t pgd_lock;
+extern struct list_head pgd_list;
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
+static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
+static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; }
+static inline int pte_global(pte_t pte) { return pte_val(pte) & _PAGE_GLOBAL; }
+static inline int pte_exec(pte_t pte) { return !(pte_val(pte) & _PAGE_NX); }
+
+static inline int pmd_large(pmd_t pte) {
+ return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+ (_PAGE_PSE|_PAGE_PRESENT);
+}
+
+static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); }
+static inline pte_t pte_mkold(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); }
+static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); }
+static inline pte_t pte_mkexec(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); }
+static inline pte_t pte_mkdirty(pte_t pte) { return __pte(pte_val(pte) | _PAGE_DIRTY); }
+static inline pte_t pte_mkyoung(pte_t pte) { return __pte(pte_val(pte) | _PAGE_ACCESSED); }
+static inline pte_t pte_mkwrite(pte_t pte) { return __pte(pte_val(pte) | _PAGE_RW); }
+static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_PSE); }
+static inline pte_t pte_clrhuge(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); }
+static inline pte_t pte_mkglobal(pte_t pte) { return __pte(pte_val(pte) | _PAGE_GLOBAL); }
+static inline pte_t pte_clrglobal(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); }
+
+extern pteval_t __supported_pte_mask;
+
+static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+{
+ return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) |
+ pgprot_val(pgprot)) & __supported_pte_mask);
+}
+
+static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+ return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) |
+ pgprot_val(pgprot)) & __supported_pte_mask);
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ pteval_t val = pte_val(pte);
+
+ /*
+ * Chop off the NX bit (if present), and add the NX portion of
+ * the newprot (if present):
+ */
+ val &= _PAGE_CHG_MASK & ~_PAGE_NX;
+ val |= pgprot_val(newprot) & __supported_pte_mask;
+
+ return __pte(val);
+}
+
+#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX))
+
+#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else /* !CONFIG_PARAVIRT */
+#define set_pte(ptep, pte) native_set_pte(ptep, pte)
+#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
+
+#define set_pte_present(mm, addr, ptep, pte) \
+ native_set_pte_present(mm, addr, ptep, pte)
+#define set_pte_atomic(ptep, pte) \
+ native_set_pte_atomic(ptep, pte)
+
+#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
+#define pgd_clear(pgd) native_pgd_clear(pgd)
+#endif
+
+#ifndef set_pud
+# define set_pud(pudp, pud) native_set_pud(pudp, pud)
+#endif
+
+#ifndef __PAGETABLE_PMD_FOLDED
+#define pud_clear(pud) native_pud_clear(pud)
+#endif
+
+#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
+#define pmd_clear(pmd) native_pmd_clear(pmd)
+
+#define pte_update(mm, addr, ptep) do { } while (0)
+#define pte_update_defer(mm, addr, ptep) do { } while (0)
+#endif /* CONFIG_PARAVIRT */
+
+#endif /* __ASSEMBLY__ */
+
#ifdef CONFIG_X86_32
# include "pgtable_32.h"
#else
# include "pgtable_64.h"
#endif
+
+#ifndef __ASSEMBLY__
+
+enum {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+};
+
+/*
+ * Helper function that returns the kernel pagetable entry controlling
+ * the virtual address 'address'. NULL means no pagetable entry present.
+ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
+ * as a pte too.
+ */
+extern pte_t *lookup_address(unsigned long address, int *level);
+
+/* local pte updates need not use xchg for locking */
+static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
+{
+ pte_t res = *ptep;
+
+ /* Pure native function needs no input for mm, addr */
+ native_pte_clear(NULL, 0, ptep);
+ return res;
+}
+
+static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep , pte_t pte)
+{
+ native_set_pte(ptep, pte);
+}
+
+#ifndef CONFIG_PARAVIRT
+/*
+ * Rules for using pte_update - it must be called after any PTE update which
+ * has not been done using the set_pte / clear_pte interfaces. It is used by
+ * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
+ * updates should either be sets, clears, or set_pte_atomic for P->P
+ * transitions, which means this hook should only be called for user PTEs.
+ * This hook implies a P->P protection or access change has taken place, which
+ * requires a subsequent TLB flush. The notification can optionally be delayed
+ * until the TLB flush event by using the pte_update_defer form of the
+ * interface, but care must be taken to assure that the flush happens while
+ * still holding the same page table lock so that the shadow and primary pages
+ * do not become out of sync on SMP.
+ */
+#define pte_update(mm, addr, ptep) do { } while (0)
+#define pte_update_defer(mm, addr, ptep) do { } while (0)
+#endif
+
+/*
+ * We only update the dirty/accessed state if we set
+ * the dirty bit by hand in the kernel, since the hardware
+ * will do the accessed bit for us, and we don't want to
+ * race with other CPU's that might be updating the dirty
+ * bit at the same time.
+ */
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
+({ \
+ int __changed = !pte_same(*(ptep), entry); \
+ if (__changed && dirty) { \
+ *ptep = entry; \
+ pte_update_defer((vma)->vm_mm, (address), (ptep)); \
+ flush_tlb_page(vma, address); \
+ } \
+ __changed; \
+})
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(vma, addr, ptep) ({ \
+ int __ret = 0; \
+ if (pte_young(*(ptep))) \
+ __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
+ &(ptep)->pte); \
+ if (__ret) \
+ pte_update((vma)->vm_mm, addr, ptep); \
+ __ret; \
+})
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(vma, address, ptep) \
+({ \
+ int __young; \
+ __young = ptep_test_and_clear_young((vma), (address), (ptep)); \
+ if (__young) \
+ flush_tlb_page(vma, address); \
+ __young; \
+})
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = native_ptep_get_and_clear(ptep);
+ pte_update(mm, addr, ptep);
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
+{
+ pte_t pte;
+ if (full) {
+ /*
+ * Full address destruction in progress; paravirt does not
+ * care about updates and native needs no locking
+ */
+ pte = native_local_ptep_get_and_clear(ptep);
+ } else {
+ pte = ptep_get_and_clear(mm, addr, ptep);
+ }
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+ pte_update(mm, addr, ptep);
+}
+
+#include <asm-generic/pgtable.h>
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_PGTABLE_H */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index ed3e70d8d04..21e70fbf1da 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -25,20 +25,11 @@
struct mm_struct;
struct vm_area_struct;
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-extern unsigned long empty_zero_page[1024];
extern pgd_t swapper_pg_dir[1024];
extern struct kmem_cache *pmd_cache;
-extern spinlock_t pgd_lock;
-extern struct page *pgd_list;
void check_pgt_cache(void);
-void pmd_ctor(struct kmem_cache *, void *);
-void pgtable_cache_init(void);
+static inline void pgtable_cache_init(void) {}
void paging_init(void);
@@ -58,9 +49,6 @@ void paging_init(void);
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
-#define FIRST_USER_ADDRESS 0
-
#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
@@ -85,113 +73,6 @@ void paging_init(void);
#endif
/*
- * _PAGE_PSE set in the page directory entry just means that
- * the page directory entry points directly to a 4MB-aligned block of
- * memory.
- */
-#define _PAGE_BIT_PRESENT 0
-#define _PAGE_BIT_RW 1
-#define _PAGE_BIT_USER 2
-#define _PAGE_BIT_PWT 3
-#define _PAGE_BIT_PCD 4
-#define _PAGE_BIT_ACCESSED 5
-#define _PAGE_BIT_DIRTY 6
-#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */
-#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
-#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
-#define _PAGE_BIT_UNUSED2 10
-#define _PAGE_BIT_UNUSED3 11
-#define _PAGE_BIT_NX 63
-
-#define _PAGE_PRESENT 0x001
-#define _PAGE_RW 0x002
-#define _PAGE_USER 0x004
-#define _PAGE_PWT 0x008
-#define _PAGE_PCD 0x010
-#define _PAGE_ACCESSED 0x020
-#define _PAGE_DIRTY 0x040
-#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */
-#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */
-#define _PAGE_UNUSED1 0x200 /* available for programmer */
-#define _PAGE_UNUSED2 0x400
-#define _PAGE_UNUSED3 0x800
-
-/* If _PAGE_PRESENT is clear, we use these: */
-#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_PROTNONE 0x080 /* if the user mapped it with PROT_NONE;
- pte_present gives true */
-#ifdef CONFIG_X86_PAE
-#define _PAGE_NX (1ULL<<_PAGE_BIT_NX)
-#else
-#define _PAGE_NX 0
-#endif
-
-#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-#define PAGE_NONE \
- __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED \
- __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
-
-#define PAGE_SHARED_EXEC \
- __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC \
- __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC \
- __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY \
- PAGE_COPY_NOEXEC
-#define PAGE_READONLY \
- __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC \
- __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-
-#define _PAGE_KERNEL \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
-#define _PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-
-extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD)
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-
-#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
-#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
-#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
-
-/*
- * The i386 can't do page protection for execute, and considers that
- * the same are read. Also, write permissions imply read permissions.
- * This is the closest we can get..
- */
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
-
-/*
* Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are
* done without a 'access_ok(VERIFY_WRITE,..)'
@@ -211,133 +92,12 @@ extern unsigned long pg0[];
#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
-/*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
-static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
-static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; }
-
-/*
- * The following only works if pte_present() is not true.
- */
-static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; }
-
-static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; }
-static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; }
-
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
#else
# include <asm/pgtable-2level.h>
#endif
-#ifndef CONFIG_PARAVIRT
-/*
- * Rules for using pte_update - it must be called after any PTE update which
- * has not been done using the set_pte / clear_pte interfaces. It is used by
- * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
- * updates should either be sets, clears, or set_pte_atomic for P->P
- * transitions, which means this hook should only be called for user PTEs.
- * This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush. The notification can optionally be delayed
- * until the TLB flush event by using the pte_update_defer form of the
- * interface, but care must be taken to assure that the flush happens while
- * still holding the same page table lock so that the shadow and primary pages
- * do not become out of sync on SMP.
- */
-#define pte_update(mm, addr, ptep) do { } while (0)
-#define pte_update_defer(mm, addr, ptep) do { } while (0)
-#endif
-
-/* local pte updates need not use xchg for locking */
-static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
-{
- pte_t res = *ptep;
-
- /* Pure native function needs no input for mm, addr */
- native_pte_clear(NULL, 0, ptep);
- return res;
-}
-
-/*
- * We only update the dirty/accessed state if we set
- * the dirty bit by hand in the kernel, since the hardware
- * will do the accessed bit for us, and we don't want to
- * race with other CPU's that might be updating the dirty
- * bit at the same time.
- */
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
-({ \
- int __changed = !pte_same(*(ptep), entry); \
- if (__changed && dirty) { \
- (ptep)->pte_low = (entry).pte_low; \
- pte_update_defer((vma)->vm_mm, (address), (ptep)); \
- flush_tlb_page(vma, address); \
- } \
- __changed; \
-})
-
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define ptep_test_and_clear_young(vma, addr, ptep) ({ \
- int __ret = 0; \
- if (pte_young(*(ptep))) \
- __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
- &(ptep)->pte_low); \
- if (__ret) \
- pte_update((vma)->vm_mm, addr, ptep); \
- __ret; \
-})
-
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(vma, address, ptep) \
-({ \
- int __young; \
- __young = ptep_test_and_clear_young((vma), (address), (ptep)); \
- if (__young) \
- flush_tlb_page(vma, address); \
- __young; \
-})
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- pte_t pte = native_ptep_get_and_clear(ptep);
- pte_update(mm, addr, ptep);
- return pte;
-}
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
-{
- pte_t pte;
- if (full) {
- /*
- * Full address destruction in progress; paravirt does not
- * care about updates and native needs no locking
- */
- pte = native_local_ptep_get_and_clear(ptep);
- } else {
- pte = ptep_get_and_clear(mm, addr, ptep);
- }
- return pte;
-}
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
- pte_update(mm, addr, ptep);
-}
-
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
@@ -367,25 +127,6 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
- pte.pte_low &= _PAGE_CHG_MASK;
- pte.pte_low |= pgprot_val(newprot);
-#ifdef CONFIG_X86_PAE
- /*
- * Chop off the NX bit (if present), and add the NX portion of
- * the newprot (if present):
- */
- pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
- pte.pte_high |= (pgprot_val(newprot) >> 32) & \
- (__supported_pte_mask >> 32);
-#endif
- return pte;
-}
-
-#define pmd_large(pmd) \
-((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
-
/*
* the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
*
@@ -432,26 +173,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pmd_page_vaddr(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-/*
- * Helper function that returns the kernel pagetable entry controlling
- * the virtual address 'address'. NULL means no pagetable entry present.
- * NOTE: the return type is pte_t but if the pmd is PSE then we return it
- * as a pte too.
- */
-extern pte_t *lookup_address(unsigned long address);
-
-/*
- * Make a given kernel text page executable/non-executable.
- * Returns the previous executability setting of that page (which
- * is used to restore the previous state). Used by the SMP bootup code.
- * NOTE: this is an __init function for security reasons.
- */
-#ifdef CONFIG_X86_PAE
- extern int set_kernel_exec(unsigned long vaddr, int enable);
-#else
- static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
-#endif
-
#if defined(CONFIG_HIGHPTE)
#define pte_offset_map(dir, address) \
((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
@@ -497,13 +218,17 @@ static inline void paravirt_pagetable_setup_done(pgd_t *base)
#endif /* !__ASSEMBLY__ */
+/*
+ * kern_addr_valid() is (1) for FLATMEM and (0) for
+ * SPARSEMEM and DISCONTIGMEM
+ */
#ifdef CONFIG_FLATMEM
#define kern_addr_valid(addr) (1)
-#endif /* CONFIG_FLATMEM */
+#else
+#define kern_addr_valid(kaddr) (0)
+#endif
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
remap_pfn_range(vma, vaddr, pfn, size, prot)
-#include <asm-generic/pgtable.h>
-
#endif /* _I386_PGTABLE_H */
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 9b0ff477b39..6e615a103c2 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -17,22 +17,16 @@ extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pgd_t init_level4_pgt[];
-extern unsigned long __supported_pte_mask;
#define swapper_pg_dir init_level4_pgt
extern void paging_init(void);
extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
#endif /* !__ASSEMBLY__ */
+#define SHARED_KERNEL_PMD 1
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
@@ -71,57 +65,68 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#define pgd_none(x) (!pgd_val(x))
#define pud_none(x) (!pud_val(x))
-static inline void set_pte(pte_t *dst, pte_t val)
+struct mm_struct;
+
+static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ *ptep = native_make_pte(0);
+}
+
+static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
- pte_val(*dst) = pte_val(val);
-}
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+ *ptep = pte;
+}
-static inline void set_pmd(pmd_t *dst, pmd_t val)
+static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
- pmd_val(*dst) = pmd_val(val);
-}
+ native_set_pte(ptep, pte);
+}
-static inline void set_pud(pud_t *dst, pud_t val)
+static inline pte_t native_ptep_get_and_clear(pte_t *xp)
{
- pud_val(*dst) = pud_val(val);
+#ifdef CONFIG_SMP
+ return native_make_pte(xchg(&xp->pte, 0));
+#else
+ /* native_local_ptep_get_and_clear, but duplicated because of cyclic dependency */
+ pte_t ret = *xp;
+ native_pte_clear(NULL, 0, xp);
+ return ret;
+#endif
}
-static inline void pud_clear (pud_t *pud)
+static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- set_pud(pud, __pud(0));
+ *pmdp = pmd;
}
-static inline void set_pgd(pgd_t *dst, pgd_t val)
+static inline void native_pmd_clear(pmd_t *pmd)
{
- pgd_val(*dst) = pgd_val(val);
-}
+ native_set_pmd(pmd, native_make_pmd(0));
+}
-static inline void pgd_clear (pgd_t * pgd)
+static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
- set_pgd(pgd, __pgd(0));
+ *pudp = pud;
}
-#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0))
+static inline void native_pud_clear(pud_t *pud)
+{
+ native_set_pud(pud, native_make_pud(0));
+}
-struct mm_struct;
+static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ *pgdp = pgd;
+}
-static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
+static inline void native_pgd_clear(pgd_t * pgd)
{
- pte_t pte;
- if (full) {
- pte = *ptep;
- *ptep = __pte(0);
- } else {
- pte = ptep_get_and_clear(mm, addr, ptep);
- }
- return pte;
+ native_set_pgd(pgd, native_make_pgd(0));
}
#define pte_same(a, b) ((a).pte == (b).pte)
-#define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
-
#endif /* !__ASSEMBLY__ */
#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
@@ -131,8 +136,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
-#define FIRST_USER_ADDRESS 0
#define MAXMEM _AC(0x3fffffffffff, UL)
#define VMALLOC_START _AC(0xffffc20000000000, UL)
@@ -142,91 +145,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
#define MODULES_END _AC(0xfffffffffff00000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
-#define _PAGE_BIT_PRESENT 0
-#define _PAGE_BIT_RW 1
-#define _PAGE_BIT_USER 2
-#define _PAGE_BIT_PWT 3
-#define _PAGE_BIT_PCD 4
-#define _PAGE_BIT_ACCESSED 5
-#define _PAGE_BIT_DIRTY 6
-#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
-#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
-#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
-
-#define _PAGE_PRESENT 0x001
-#define _PAGE_RW 0x002
-#define _PAGE_USER 0x004
-#define _PAGE_PWT 0x008
-#define _PAGE_PCD 0x010
-#define _PAGE_ACCESSED 0x020
-#define _PAGE_DIRTY 0x040
-#define _PAGE_PSE 0x080 /* 2MB page */
-#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_GLOBAL 0x100 /* Global TLB entry */
-
-#define _PAGE_PROTNONE 0x080 /* If not present */
-#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX)
-
-#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY PAGE_COPY_NOEXEC
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define __PAGE_KERNEL \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
-#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define __PAGE_KERNEL_NOCACHE \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX)
-#define __PAGE_KERNEL_RO \
- (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
-#define __PAGE_KERNEL_VSYSCALL \
- (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define __PAGE_KERNEL_VSYSCALL_NOCACHE \
- (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD)
-#define __PAGE_KERNEL_LARGE \
- (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC \
- (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-
-#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
-
-#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
-#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
-#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_VSYSCALL32 __pgprot(__PAGE_KERNEL_VSYSCALL)
-#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
-#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
-
-/* xwr */
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
-
#ifndef __ASSEMBLY__
static inline unsigned long pgd_bad(pgd_t pgd)
@@ -246,66 +164,16 @@ static inline unsigned long pmd_bad(pmd_t pmd)
#define pte_none(x) (!pte_val(x))
#define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this
- right? */
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define pte_pfn(x) ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
-static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
-{
- pte_t pte;
- pte_val(pte) = (page_nr << PAGE_SHIFT);
- pte_val(pte) |= pgprot_val(pgprot);
- pte_val(pte) &= __supported_pte_mask;
- return pte;
-}
-
-/*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
-static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
-static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; }
-
-static inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; }
-static inline pte_t pte_mkold(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); return pte; }
-static inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); return pte; }
-static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_NX)); return pte; }
-static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
-static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; }
-static inline pte_t pte_clrhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_PSE)); return pte; }
-
-struct vm_area_struct;
-
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
-{
- if (!pte_young(*ptep))
- return 0;
- return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte);
-}
-
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- clear_bit(_PAGE_BIT_RW, &ptep->pte);
-}
-
/*
* Macro to mark a page protection value as "uncacheable".
*/
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
-static inline int pmd_large(pmd_t pte) {
- return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE;
-}
-
/*
* Conversion functions: convert a page and protection to a page entry,
@@ -340,29 +208,18 @@ static inline int pmd_large(pmd_t pte) {
pmd_index(address))
#define pmd_none(x) (!pmd_val(x))
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
#define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
+#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE })
#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
/* PTE - Level 1 access. */
/* page, protection -> pte */
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-#define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE)
-/* Change flags of a PTE */
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
- pte_val(pte) &= _PAGE_CHG_MASK;
- pte_val(pte) |= pgprot_val(newprot);
- pte_val(pte) &= __supported_pte_mask;
- return pte;
-}
-
#define pte_index(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
@@ -376,40 +233,20 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define update_mmu_cache(vma,address,pte) do { } while (0)
-/* We only update the dirty/accessed state if we set
- * the dirty bit by hand in the kernel, since the hardware
- * will do the accessed bit for us, and we don't want to
- * race with other CPU's that might be updating the dirty
- * bit at the same time. */
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
-({ \
- int __changed = !pte_same(*(__ptep), __entry); \
- if (__changed && __dirty) { \
- set_pte(__ptep, __entry); \
- flush_tlb_page(__vma, __address); \
- } \
- __changed; \
-})
-
/* Encode and de-code a swap entry */
#define __swp_type(x) (((x).val >> 1) & 0x3f)
#define __swp_offset(x) ((x).val >> 8)
#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-
-extern spinlock_t pgd_lock;
-extern struct list_head pgd_list;
+#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
extern int kern_addr_valid(unsigned long addr);
-pte_t *lookup_address(unsigned long addr);
-
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
remap_pfn_range(vma, vaddr, pfn, size, prot)
#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#define pgtable_cache_init() do { } while (0)
#define check_pgt_cache() do { } while (0)
@@ -422,12 +259,7 @@ pte_t *lookup_address(unsigned long addr);
#define kc_offset_to_vaddr(o) \
(((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define __HAVE_ARCH_PTE_SAME
-#include <asm-generic/pgtable.h>
#endif /* !__ASSEMBLY__ */
#endif /* _X86_64_PGTABLE_H */
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 46e1c04e309..ab4d0c2a3f8 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -1,5 +1,842 @@
+#ifndef __ASM_X86_PROCESSOR_H
+#define __ASM_X86_PROCESSOR_H
+
+#include <asm/processor-flags.h>
+
+/* migration helpers, for KVM - will be removed in 2.6.25: */
+#include <asm/vm86.h>
+#define Xgt_desc_struct desc_ptr
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+
+#include <asm/vm86.h>
+#include <asm/math_emu.h>
+#include <asm/segment.h>
+#include <asm/types.h>
+#include <asm/sigcontext.h>
+#include <asm/current.h>
+#include <asm/cpufeature.h>
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/percpu.h>
+#include <asm/msr.h>
+#include <asm/desc_defs.h>
+#include <asm/nops.h>
+#include <linux/personality.h>
+#include <linux/cpumask.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+static inline void *current_text_addr(void)
+{
+ void *pc;
+ asm volatile("mov $1f,%0\n1:":"=r" (pc));
+ return pc;
+}
+
+#ifdef CONFIG_X86_VSMP
+#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
+#define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
+#else
+#define ARCH_MIN_TASKALIGN 16
+#define ARCH_MIN_MMSTRUCT_ALIGN 0
+#endif
+
+/*
+ * CPU type and hardware bug flags. Kept separately for each CPU.
+ * Members of this structure are referenced in head.S, so think twice
+ * before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+ __u8 x86; /* CPU family */
+ __u8 x86_vendor; /* CPU vendor */
+ __u8 x86_model;
+ __u8 x86_mask;
+#ifdef CONFIG_X86_32
+ char wp_works_ok; /* It doesn't on 386's */
+ char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
+ char hard_math;
+ char rfu;
+ char fdiv_bug;
+ char f00f_bug;
+ char coma_bug;
+ char pad0;
+#else
+ /* number of 4K pages in DTLB/ITLB combined(in pages)*/
+ int x86_tlbsize;
+ __u8 x86_virt_bits, x86_phys_bits;
+ /* cpuid returned core id bits */
+ __u8 x86_coreid_bits;
+ /* Max extended CPUID function supported */
+ __u32 extended_cpuid_level;
+#endif
+ int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
+ __u32 x86_capability[NCAPINTS];
+ char x86_vendor_id[16];
+ char x86_model_id[64];
+ int x86_cache_size; /* in KB - valid for CPUS which support this
+ call */
+ int x86_cache_alignment; /* In bytes */
+ int x86_power;
+ unsigned long loops_per_jiffy;
+#ifdef CONFIG_SMP
+ cpumask_t llc_shared_map; /* cpus sharing the last level cache */
+#endif
+ u16 x86_max_cores; /* cpuid returned max cores value */
+ u16 apicid;
+ u16 x86_clflush_size;
+#ifdef CONFIG_SMP
+ u16 booted_cores; /* number of cores as seen by OS */
+ u16 phys_proc_id; /* Physical processor id. */
+ u16 cpu_core_id; /* Core id */
+ u16 cpu_index; /* index into per_cpu list */
+#endif
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_NSC 8
+#define X86_VENDOR_NUM 9
+#define X86_VENDOR_UNKNOWN 0xff
+
+/*
+ * capabilities of CPUs
+ */
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct cpuinfo_x86 new_cpu_data;
+extern struct tss_struct doublefault_tss;
+extern __u32 cleared_cpu_caps[NCAPINTS];
+
+#ifdef CONFIG_SMP
+DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+#define cpu_data(cpu) per_cpu(cpu_info, cpu)
+#define current_cpu_data cpu_data(smp_processor_id())
+#else
+#define cpu_data(cpu) boot_cpu_data
+#define current_cpu_data boot_cpu_data
+#endif
+
+void cpu_detect(struct cpuinfo_x86 *c);
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void identify_boot_cpu(void);
+extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern unsigned short num_cache_leaves;
+
+#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64)
+extern void detect_ht(struct cpuinfo_x86 *c);
+#else
+static inline void detect_ht(struct cpuinfo_x86 *c) {}
+#endif
+
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+static inline void load_cr3(pgd_t *pgdir)
+{
+ write_cr3(__pa(pgdir));
+}
+
+#ifdef CONFIG_X86_32
+/* This is the TSS defined by the hardware. */
+struct x86_hw_tss {
+ unsigned short back_link, __blh;
+ unsigned long sp0;
+ unsigned short ss0, __ss0h;
+ unsigned long sp1;
+ unsigned short ss1, __ss1h; /* ss1 caches MSR_IA32_SYSENTER_CS */
+ unsigned long sp2;
+ unsigned short ss2, __ss2h;
+ unsigned long __cr3;
+ unsigned long ip;
+ unsigned long flags;
+ unsigned long ax, cx, dx, bx;
+ unsigned long sp, bp, si, di;
+ unsigned short es, __esh;
+ unsigned short cs, __csh;
+ unsigned short ss, __ssh;
+ unsigned short ds, __dsh;
+ unsigned short fs, __fsh;
+ unsigned short gs, __gsh;
+ unsigned short ldt, __ldth;
+ unsigned short trace, io_bitmap_base;
+} __attribute__((packed));
+#else
+struct x86_hw_tss {
+ u32 reserved1;
+ u64 sp0;
+ u64 sp1;
+ u64 sp2;
+ u64 reserved2;
+ u64 ist[7];
+ u32 reserved3;
+ u32 reserved4;
+ u16 reserved5;
+ u16 io_bitmap_base;
+} __attribute__((packed)) ____cacheline_aligned;
+#endif
+
+/*
+ * Size of io_bitmap.
+ */
+#define IO_BITMAP_BITS 65536
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
+
+struct tss_struct {
+ struct x86_hw_tss x86_tss;
+
+ /*
+ * The extra 1 is there because the CPU will access an
+ * additional byte beyond the end of the IO permission
+ * bitmap. The extra byte must be all 1 bits, and must
+ * be within the limit.
+ */
+ unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+ /*
+ * Cache the current maximum and the last task that used the bitmap:
+ */
+ unsigned long io_bitmap_max;
+ struct thread_struct *io_bitmap_owner;
+ /*
+ * pads the TSS to be cacheline-aligned (size is 0x100)
+ */
+ unsigned long __cacheline_filler[35];
+ /*
+ * .. and then another 0x100 bytes for emergency kernel stack
+ */
+ unsigned long stack[64];
+} __attribute__((packed));
+
+DECLARE_PER_CPU(struct tss_struct, init_tss);
+
+/* Save the original ist values for checking stack pointers during debugging */
+struct orig_ist {
+ unsigned long ist[7];
+};
+
+#define MXCSR_DEFAULT 0x1f80
+
+struct i387_fsave_struct {
+ u32 cwd;
+ u32 swd;
+ u32 twd;
+ u32 fip;
+ u32 fcs;
+ u32 foo;
+ u32 fos;
+ u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
+ u32 status; /* software status information */
+};
+
+struct i387_fxsave_struct {
+ u16 cwd;
+ u16 swd;
+ u16 twd;
+ u16 fop;
+ union {
+ struct {
+ u64 rip;
+ u64 rdp;
+ };
+ struct {
+ u32 fip;
+ u32 fcs;
+ u32 foo;
+ u32 fos;
+ };
+ };
+ u32 mxcsr;
+ u32 mxcsr_mask;
+ u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+ u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
+ u32 padding[24];
+} __attribute__((aligned(16)));
+
+struct i387_soft_struct {
+ u32 cwd;
+ u32 swd;
+ u32 twd;
+ u32 fip;
+ u32 fcs;
+ u32 foo;
+ u32 fos;
+ u32 st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
+ u8 ftop, changed, lookahead, no_update, rm, alimit;
+ struct info *info;
+ u32 entry_eip;
+};
+
+union i387_union {
+ struct i387_fsave_struct fsave;
+ struct i387_fxsave_struct fxsave;
+ struct i387_soft_struct soft;
+};
+
+#ifdef CONFIG_X86_32
+/*
+ * the following now lives in the per cpu area:
+ * extern int cpu_llc_id[NR_CPUS];
+ */
+DECLARE_PER_CPU(u8, cpu_llc_id);
+#else
+DECLARE_PER_CPU(struct orig_ist, orig_ist);
+#endif
+
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern unsigned short num_cache_leaves;
+
+struct thread_struct {
+/* cached TLS descriptors. */
+ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+ unsigned long sp0;
+ unsigned long sp;
+#ifdef CONFIG_X86_32
+ unsigned long sysenter_cs;
+#else
+ unsigned long usersp; /* Copy from PDA */
+ unsigned short es, ds, fsindex, gsindex;
+#endif
+ unsigned long ip;
+ unsigned long fs;
+ unsigned long gs;
+/* Hardware debugging registers */
+ unsigned long debugreg0;
+ unsigned long debugreg1;
+ unsigned long debugreg2;
+ unsigned long debugreg3;
+ unsigned long debugreg6;
+ unsigned long debugreg7;
+/* fault info */
+ unsigned long cr2, trap_no, error_code;
+/* floating point info */
+ union i387_union i387 __attribute__((aligned(16)));;
+#ifdef CONFIG_X86_32
+/* virtual 86 mode info */
+ struct vm86_struct __user *vm86_info;
+ unsigned long screen_bitmap;
+ unsigned long v86flags, v86mask, saved_sp0;
+ unsigned int saved_fs, saved_gs;
+#endif
+/* IO permissions */
+ unsigned long *io_bitmap_ptr;
+ unsigned long iopl;
+/* max allowed port in the bitmap, in bytes: */
+ unsigned io_bitmap_max;
+/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
+ unsigned long debugctlmsr;
+/* Debug Store - if not 0 points to a DS Save Area configuration;
+ * goes into MSR_IA32_DS_AREA */
+ unsigned long ds_area_msr;
+};
+
+static inline unsigned long native_get_debugreg(int regno)
+{
+ unsigned long val = 0; /* Damn you, gcc! */
+
+ switch (regno) {
+ case 0:
+ asm("mov %%db0, %0" :"=r" (val)); break;
+ case 1:
+ asm("mov %%db1, %0" :"=r" (val)); break;
+ case 2:
+ asm("mov %%db2, %0" :"=r" (val)); break;
+ case 3:
+ asm("mov %%db3, %0" :"=r" (val)); break;
+ case 6:
+ asm("mov %%db6, %0" :"=r" (val)); break;
+ case 7:
+ asm("mov %%db7, %0" :"=r" (val)); break;
+ default:
+ BUG();
+ }
+ return val;
+}
+
+static inline void native_set_debugreg(int regno, unsigned long value)
+{
+ switch (regno) {
+ case 0:
+ asm("mov %0,%%db0" : /* no output */ :"r" (value));
+ break;
+ case 1:
+ asm("mov %0,%%db1" : /* no output */ :"r" (value));
+ break;
+ case 2:
+ asm("mov %0,%%db2" : /* no output */ :"r" (value));
+ break;
+ case 3:
+ asm("mov %0,%%db3" : /* no output */ :"r" (value));
+ break;
+ case 6:
+ asm("mov %0,%%db6" : /* no output */ :"r" (value));
+ break;
+ case 7:
+ asm("mov %0,%%db7" : /* no output */ :"r" (value));
+ break;
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Set IOPL bits in EFLAGS from given mask
+ */
+static inline void native_set_iopl_mask(unsigned mask)
+{
+#ifdef CONFIG_X86_32
+ unsigned int reg;
+ __asm__ __volatile__ ("pushfl;"
+ "popl %0;"
+ "andl %1, %0;"
+ "orl %2, %0;"
+ "pushl %0;"
+ "popfl"
+ : "=&r" (reg)
+ : "i" (~X86_EFLAGS_IOPL), "r" (mask));
+#endif
+}
+
+static inline void native_load_sp0(struct tss_struct *tss,
+ struct thread_struct *thread)
+{
+ tss->x86_tss.sp0 = thread->sp0;
+#ifdef CONFIG_X86_32
+ /* Only happens when SEP is enabled, no need to test "SEP"arately */
+ if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
+ tss->x86_tss.ss1 = thread->sysenter_cs;
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+ }
+#endif
+}
+
+static inline void native_swapgs(void)
+{
+#ifdef CONFIG_X86_64
+ asm volatile("swapgs" ::: "memory");
+#endif
+}
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define __cpuid native_cpuid
+#define paravirt_enabled() 0
+
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, register) \
+ (var) = native_get_debugreg(register)
+#define set_debugreg(value, register) \
+ native_set_debugreg(register, value)
+
+static inline void load_sp0(struct tss_struct *tss,
+ struct thread_struct *thread)
+{
+ native_load_sp0(tss, thread);
+}
+
+#define set_iopl_mask native_set_iopl_mask
+#define SWAPGS swapgs
+#endif /* CONFIG_PARAVIRT */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4(unsigned long mask)
+{
+ unsigned cr4;
+ mmu_cr4_features |= mask;
+ cr4 = read_cr4();
+ cr4 |= mask;
+ write_cr4(cr4);
+}
+
+static inline void clear_in_cr4(unsigned long mask)
+{
+ unsigned cr4;
+ mmu_cr4_features &= ~mask;
+ cr4 = read_cr4();
+ cr4 &= ~mask;
+ write_cr4(cr4);
+}
+
+struct microcode_header {
+ unsigned int hdrver;
+ unsigned int rev;
+ unsigned int date;
+ unsigned int sig;
+ unsigned int cksum;
+ unsigned int ldrver;
+ unsigned int pf;
+ unsigned int datasize;
+ unsigned int totalsize;
+ unsigned int reserved[3];
+};
+
+struct microcode {
+ struct microcode_header hdr;
+ unsigned int bits[0];
+};
+
+typedef struct microcode microcode_t;
+typedef struct microcode_header microcode_header_t;
+
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+ unsigned int sig;
+ unsigned int pf;
+ unsigned int cksum;
+};
+
+struct extended_sigtable {
+ unsigned int count;
+ unsigned int cksum;
+ unsigned int reserved[3];
+ struct extended_signature sigs[0];
+};
+
+typedef struct {
+ unsigned long seg;
+} mm_segment_t;
+
+
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+extern void prepare_to_copy(struct task_struct *tsk);
+
+unsigned long get_wchan(struct task_struct *p);
+
+/*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(unsigned int op,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ *eax = op;
+ *ecx = 0;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(unsigned int op, int count,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return edx;
+}
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+ __asm__ __volatile__("rep;nop": : :"memory");
+}
+
+/* Stop speculative execution */
+static inline void sync_core(void)
+{
+ int tmp;
+ asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+ : "ebx", "ecx", "edx", "memory");
+}
+
+#define cpu_relax() rep_nop()
+
+static inline void __monitor(const void *eax, unsigned long ecx,
+ unsigned long edx)
+{
+ /* "monitor %eax,%ecx,%edx;" */
+ asm volatile(
+ ".byte 0x0f,0x01,0xc8;"
+ : :"a" (eax), "c" (ecx), "d"(edx));
+}
+
+static inline void __mwait(unsigned long eax, unsigned long ecx)
+{
+ /* "mwait %eax,%ecx;" */
+ asm volatile(
+ ".byte 0x0f,0x01,0xc9;"
+ : :"a" (eax), "c" (ecx));
+}
+
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+ /* "mwait %eax,%ecx;" */
+ asm volatile(
+ "sti; .byte 0x0f,0x01,0xc9;"
+ : :"a" (eax), "c" (ecx));
+}
+
+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+
+extern int force_mwait;
+
+extern void select_idle_routine(const struct cpuinfo_x86 *c);
+
+extern unsigned long boot_option_idle_override;
+
+extern void enable_sep_cpu(void);
+extern int sysenter_setup(void);
+
+/* Defined in head.S */
+extern struct desc_ptr early_gdt_descr;
+
+extern void cpu_set_gdt(int);
+extern void switch_to_new_gdt(void);
+extern void cpu_init(void);
+extern void init_gdt(int cpu);
+
+/* from system description table in BIOS. Mostly for MCA use, but
+ * others may find it useful. */
+extern unsigned int machine_id;
+extern unsigned int machine_submodel_id;
+extern unsigned int BIOS_revision;
+extern unsigned int mca_pentium_flag;
+
+/* Boot loader type from the setup header */
+extern int bootloader_type;
+
+extern char ignore_fpu_irq;
+#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+#ifdef CONFIG_X86_32
+#define BASE_PREFETCH ASM_NOP4
+#define ARCH_HAS_PREFETCH
+#else
+#define BASE_PREFETCH "prefetcht0 (%1)"
+#endif
+
+/* Prefetch instructions for Pentium III and AMD Athlon */
+/* It's not worth to care about 3dnow! prefetches for the K6
+ because they are microcoded there and very slow.
+ However we don't do prefetches for pre XP Athlons currently
+ That should be fixed. */
+static inline void prefetch(const void *x)
+{
+ alternative_input(BASE_PREFETCH,
+ "prefetchnta (%1)",
+ X86_FEATURE_XMM,
+ "r" (x));
+}
+
+/* 3dnow! prefetch to get an exclusive cache line. Useful for
+ spinlocks to avoid one state transition in the cache coherency protocol. */
+static inline void prefetchw(const void *x)
+{
+ alternative_input(BASE_PREFETCH,
+ "prefetchw (%1)",
+ X86_FEATURE_3DNOW,
+ "r" (x));
+}
+
+#define spin_lock_prefetch(x) prefetchw(x)
#ifdef CONFIG_X86_32
-# include "processor_32.h"
+/*
+ * User space process size: 3GB (default).
+ */
+#define TASK_SIZE (PAGE_OFFSET)
+
+#define INIT_THREAD { \
+ .sp0 = sizeof(init_stack) + (long)&init_stack, \
+ .vm86_info = NULL, \
+ .sysenter_cs = __KERNEL_CS, \
+ .io_bitmap_ptr = NULL, \
+ .fs = __KERNEL_PERCPU, \
+}
+
+/*
+ * Note that the .io_bitmap member must be extra-big. This is because
+ * the CPU will access an additional byte beyond the end of the IO
+ * permission bitmap. The extra byte must be all 1 bits, and must
+ * be within the limit.
+ */
+#define INIT_TSS { \
+ .x86_tss = { \
+ .sp0 = sizeof(init_stack) + (long)&init_stack, \
+ .ss0 = __KERNEL_DS, \
+ .ss1 = __KERNEL_CS, \
+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
+ }, \
+ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
+}
+
+#define start_thread(regs, new_eip, new_esp) do { \
+ __asm__("movl %0,%%gs": :"r" (0)); \
+ regs->fs = 0; \
+ set_fs(USER_DS); \
+ regs->ds = __USER_DS; \
+ regs->es = __USER_DS; \
+ regs->ss = __USER_DS; \
+ regs->cs = __USER_CS; \
+ regs->ip = new_eip; \
+ regs->sp = new_esp; \
+} while (0)
+
+
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
+#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
+#define KSTK_TOP(info) \
+({ \
+ unsigned long *__ptr = (unsigned long *)(info); \
+ (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
+})
+
+/*
+ * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * This is necessary to guarantee that the entire "struct pt_regs"
+ * is accessable even if the CPU haven't stored the SS/ESP registers
+ * on the stack (interrupt gate does not save these registers
+ * when switching to the same priv ring).
+ * Therefore beware: accessing the ss/esp fields of the
+ * "struct pt_regs" is possible, but they may contain the
+ * completely wrong values.
+ */
+#define task_pt_regs(task) \
+({ \
+ struct pt_regs *__regs__; \
+ __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
+ __regs__ - 1; \
+})
+
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
+
#else
-# include "processor_64.h"
+/*
+ * User space process size. 47bits minus one guard page.
+ */
+#define TASK_SIZE64 (0x800000000000UL - 4096)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
+ 0xc0000000 : 0xFFFFe000)
+
+#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
+ IA32_PAGE_OFFSET : TASK_SIZE64)
+#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
+ IA32_PAGE_OFFSET : TASK_SIZE64)
+
+#define INIT_THREAD { \
+ .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+}
+
+#define INIT_TSS { \
+ .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+}
+
+#define start_thread(regs, new_rip, new_rsp) do { \
+ asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \
+ load_gs_index(0); \
+ (regs)->ip = (new_rip); \
+ (regs)->sp = (new_rsp); \
+ write_pda(oldrsp, (new_rsp)); \
+ (regs)->cs = __USER_CS; \
+ (regs)->ss = __USER_DS; \
+ (regs)->flags = 0x200; \
+ set_fs(USER_DS); \
+} while (0)
+
+/*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
+
+#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
+#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
+#endif /* CONFIG_X86_64 */
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+
+#define KSTK_EIP(task) (task_pt_regs(task)->ip)
+
#endif
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
deleted file mode 100644
index 13976b08683..00000000000
--- a/include/asm-x86/processor_32.h
+++ /dev/null
@@ -1,786 +0,0 @@
-/*
- * include/asm-i386/processor.h
- *
- * Copyright (C) 1994 Linus Torvalds
- */
-
-#ifndef __ASM_I386_PROCESSOR_H
-#define __ASM_I386_PROCESSOR_H
-
-#include <asm/vm86.h>
-#include <asm/math_emu.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/sigcontext.h>
-#include <asm/cpufeature.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <linux/cache.h>
-#include <linux/threads.h>
-#include <asm/percpu.h>
-#include <linux/cpumask.h>
-#include <linux/init.h>
-#include <asm/processor-flags.h>
-
-/* flag for disabling the tsc */
-extern int tsc_disable;
-
-struct desc_struct {
- unsigned long a,b;
-};
-
-#define desc_empty(desc) \
- (!((desc)->a | (desc)->b))
-
-#define desc_equal(desc1, desc2) \
- (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
-
-/*
- * CPU type and hardware bug flags. Kept separately for each CPU.
- * Members of this structure are referenced in head.S, so think twice
- * before touching them. [mj]
- */
-
-struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
- __u8 x86_mask;
- char wp_works_ok; /* It doesn't on 386's */
- char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
- char hard_math;
- char rfu;
- int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
- unsigned long x86_capability[NCAPINTS];
- char x86_vendor_id[16];
- char x86_model_id[64];
- int x86_cache_size; /* in KB - valid for CPUS which support this
- call */
- int x86_cache_alignment; /* In bytes */
- char fdiv_bug;
- char f00f_bug;
- char coma_bug;
- char pad0;
- int x86_power;
- unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
- cpumask_t llc_shared_map; /* cpus sharing the last level cache */
-#endif
- unsigned char x86_max_cores; /* cpuid returned max cores value */
- unsigned char apicid;
- unsigned short x86_clflush_size;
-#ifdef CONFIG_SMP
- unsigned char booted_cores; /* number of cores as seen by OS */
- __u8 phys_proc_id; /* Physical processor id. */
- __u8 cpu_core_id; /* Core id */
- __u8 cpu_index; /* index into per_cpu list */
-#endif
-} __attribute__((__aligned__(SMP_CACHE_BYTES)));
-
-#define X86_VENDOR_INTEL 0
-#define X86_VENDOR_CYRIX 1
-#define X86_VENDOR_AMD 2
-#define X86_VENDOR_UMC 3
-#define X86_VENDOR_NEXGEN 4
-#define X86_VENDOR_CENTAUR 5
-#define X86_VENDOR_TRANSMETA 7
-#define X86_VENDOR_NSC 8
-#define X86_VENDOR_NUM 9
-#define X86_VENDOR_UNKNOWN 0xff
-
-/*
- * capabilities of CPUs
- */
-
-extern struct cpuinfo_x86 boot_cpu_data;
-extern struct cpuinfo_x86 new_cpu_data;
-extern struct tss_struct doublefault_tss;
-DECLARE_PER_CPU(struct tss_struct, init_tss);
-
-#ifdef CONFIG_SMP
-DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
-#define cpu_data(cpu) per_cpu(cpu_info, cpu)
-#define current_cpu_data cpu_data(smp_processor_id())
-#else
-#define cpu_data(cpu) boot_cpu_data
-#define current_cpu_data boot_cpu_data
-#endif
-
-/*
- * the following now lives in the per cpu area:
- * extern int cpu_llc_id[NR_CPUS];
- */
-DECLARE_PER_CPU(u8, cpu_llc_id);
-extern char ignore_fpu_irq;
-
-void __init cpu_detect(struct cpuinfo_x86 *c);
-
-extern void identify_boot_cpu(void);
-extern void identify_secondary_cpu(struct cpuinfo_x86 *);
-extern void print_cpu_info(struct cpuinfo_x86 *);
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
-
-#ifdef CONFIG_X86_HT
-extern void detect_ht(struct cpuinfo_x86 *c);
-#else
-static inline void detect_ht(struct cpuinfo_x86 *c) {}
-#endif
-
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-#define load_cr3(pgdir) write_cr3(__pa(pgdir))
-
-/*
- * Save the cr4 feature set we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-
-static inline void set_in_cr4 (unsigned long mask)
-{
- unsigned cr4;
- mmu_cr4_features |= mask;
- cr4 = read_cr4();
- cr4 |= mask;
- write_cr4(cr4);
-}
-
-static inline void clear_in_cr4 (unsigned long mask)
-{
- unsigned cr4;
- mmu_cr4_features &= ~mask;
- cr4 = read_cr4();
- cr4 &= ~mask;
- write_cr4(cr4);
-}
-
-/* Stop speculative execution */
-static inline void sync_core(void)
-{
- int tmp;
- asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
-}
-
-static inline void __monitor(const void *eax, unsigned long ecx,
- unsigned long edx)
-{
- /* "monitor %eax,%ecx,%edx;" */
- asm volatile(
- ".byte 0x0f,0x01,0xc8;"
- : :"a" (eax), "c" (ecx), "d"(edx));
-}
-
-static inline void __mwait(unsigned long eax, unsigned long ecx)
-{
- /* "mwait %eax,%ecx;" */
- asm volatile(
- ".byte 0x0f,0x01,0xc9;"
- : :"a" (eax), "c" (ecx));
-}
-
-extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
-
-/* from system description table in BIOS. Mostly for MCA use, but
-others may find it useful. */
-extern unsigned int machine_id;
-extern unsigned int machine_submodel_id;
-extern unsigned int BIOS_revision;
-extern unsigned int mca_pentium_flag;
-
-/* Boot loader type from the setup header */
-extern int bootloader_type;
-
-/*
- * User space process size: 3GB (default).
- */
-#define TASK_SIZE (PAGE_OFFSET)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
-
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
-extern void hard_disable_TSC(void);
-extern void disable_TSC(void);
-extern void hard_enable_TSC(void);
-
-/*
- * Size of io_bitmap.
- */
-#define IO_BITMAP_BITS 65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
-#define INVALID_IO_BITMAP_OFFSET 0x8000
-#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
-
-struct i387_fsave_struct {
- long cwd;
- long swd;
- long twd;
- long fip;
- long fcs;
- long foo;
- long fos;
- long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
- long status; /* software status information */
-};
-
-struct i387_fxsave_struct {
- unsigned short cwd;
- unsigned short swd;
- unsigned short twd;
- unsigned short fop;
- long fip;
- long fcs;
- long foo;
- long fos;
- long mxcsr;
- long mxcsr_mask;
- long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
- long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
- long padding[56];
-} __attribute__ ((aligned (16)));
-
-struct i387_soft_struct {
- long cwd;
- long swd;
- long twd;
- long fip;
- long fcs;
- long foo;
- long fos;
- long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
- unsigned char ftop, changed, lookahead, no_update, rm, alimit;
- struct info *info;
- unsigned long entry_eip;
-};
-
-union i387_union {
- struct i387_fsave_struct fsave;
- struct i387_fxsave_struct fxsave;
- struct i387_soft_struct soft;
-};
-
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
-struct thread_struct;
-
-/* This is the TSS defined by the hardware. */
-struct i386_hw_tss {
- unsigned short back_link,__blh;
- unsigned long esp0;
- unsigned short ss0,__ss0h;
- unsigned long esp1;
- unsigned short ss1,__ss1h; /* ss1 is used to cache MSR_IA32_SYSENTER_CS */
- unsigned long esp2;
- unsigned short ss2,__ss2h;
- unsigned long __cr3;
- unsigned long eip;
- unsigned long eflags;
- unsigned long eax,ecx,edx,ebx;
- unsigned long esp;
- unsigned long ebp;
- unsigned long esi;
- unsigned long edi;
- unsigned short es, __esh;
- unsigned short cs, __csh;
- unsigned short ss, __ssh;
- unsigned short ds, __dsh;
- unsigned short fs, __fsh;
- unsigned short gs, __gsh;
- unsigned short ldt, __ldth;
- unsigned short trace, io_bitmap_base;
-} __attribute__((packed));
-
-struct tss_struct {
- struct i386_hw_tss x86_tss;
-
- /*
- * The extra 1 is there because the CPU will access an
- * additional byte beyond the end of the IO permission
- * bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
- unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
- /*
- * Cache the current maximum and the last task that used the bitmap:
- */
- unsigned long io_bitmap_max;
- struct thread_struct *io_bitmap_owner;
- /*
- * pads the TSS to be cacheline-aligned (size is 0x100)
- */
- unsigned long __cacheline_filler[35];
- /*
- * .. and then another 0x100 bytes for emergency kernel stack
- */
- unsigned long stack[64];
-} __attribute__((packed));
-
-#define ARCH_MIN_TASKALIGN 16
-
-struct thread_struct {
-/* cached TLS descriptors. */
- struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
- unsigned long esp0;
- unsigned long sysenter_cs;
- unsigned long eip;
- unsigned long esp;
- unsigned long fs;
- unsigned long gs;
-/* Hardware debugging registers */
- unsigned long debugreg[8]; /* %%db0-7 debug registers */
-/* fault info */
- unsigned long cr2, trap_no, error_code;
-/* floating point info */
- union i387_union i387;
-/* virtual 86 mode info */
- struct vm86_struct __user * vm86_info;
- unsigned long screen_bitmap;
- unsigned long v86flags, v86mask, saved_esp0;
- unsigned int saved_fs, saved_gs;
-/* IO permissions */
- unsigned long *io_bitmap_ptr;
- unsigned long iopl;
-/* max allowed port in the bitmap, in bytes: */
- unsigned long io_bitmap_max;
-};
-
-#define INIT_THREAD { \
- .esp0 = sizeof(init_stack) + (long)&init_stack, \
- .vm86_info = NULL, \
- .sysenter_cs = __KERNEL_CS, \
- .io_bitmap_ptr = NULL, \
- .fs = __KERNEL_PERCPU, \
-}
-
-/*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
-#define INIT_TSS { \
- .x86_tss = { \
- .esp0 = sizeof(init_stack) + (long)&init_stack, \
- .ss0 = __KERNEL_DS, \
- .ss1 = __KERNEL_CS, \
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
- }, \
- .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
-}
-
-#define start_thread(regs, new_eip, new_esp) do { \
- __asm__("movl %0,%%gs": :"r" (0)); \
- regs->xfs = 0; \
- set_fs(USER_DS); \
- regs->xds = __USER_DS; \
- regs->xes = __USER_DS; \
- regs->xss = __USER_DS; \
- regs->xcs = __USER_CS; \
- regs->eip = new_eip; \
- regs->esp = new_esp; \
-} while (0)
-
-/* Forward declaration, a strange C thing */
-struct task_struct;
-struct mm_struct;
-
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
-/* Prepare to copy thread state - unlazy all lazy status */
-extern void prepare_to_copy(struct task_struct *tsk);
-
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack);
-
-unsigned long get_wchan(struct task_struct *p);
-
-#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info) \
-({ \
- unsigned long *__ptr = (unsigned long *)(info); \
- (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
-})
-
-/*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
- * This is necessary to guarantee that the entire "struct pt_regs"
- * is accessable even if the CPU haven't stored the SS/ESP registers
- * on the stack (interrupt gate does not save these registers
- * when switching to the same priv ring).
- * Therefore beware: accessing the xss/esp fields of the
- * "struct pt_regs" is possible, but they may contain the
- * completely wrong values.
- */
-#define task_pt_regs(task) \
-({ \
- struct pt_regs *__regs__; \
- __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
- __regs__ - 1; \
-})
-
-#define KSTK_EIP(task) (task_pt_regs(task)->eip)
-#define KSTK_ESP(task) (task_pt_regs(task)->esp)
-
-
-struct microcode_header {
- unsigned int hdrver;
- unsigned int rev;
- unsigned int date;
- unsigned int sig;
- unsigned int cksum;
- unsigned int ldrver;
- unsigned int pf;
- unsigned int datasize;
- unsigned int totalsize;
- unsigned int reserved[3];
-};
-
-struct microcode {
- struct microcode_header hdr;
- unsigned int bits[0];
-};
-
-typedef struct microcode microcode_t;
-typedef struct microcode_header microcode_header_t;
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
- unsigned int sig;
- unsigned int pf;
- unsigned int cksum;
-};
-
-struct extended_sigtable {
- unsigned int count;
- unsigned int cksum;
- unsigned int reserved[3];
- struct extended_signature sigs[0];
-};
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
- __asm__ __volatile__("rep;nop": : :"memory");
-}
-
-#define cpu_relax() rep_nop()
-
-static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
-{
- tss->x86_tss.esp0 = thread->esp0;
- /* This can only happen when SEP is enabled, no need to test "SEP"arately */
- if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
- tss->x86_tss.ss1 = thread->sysenter_cs;
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
- }
-}
-
-
-static inline unsigned long native_get_debugreg(int regno)
-{
- unsigned long val = 0; /* Damn you, gcc! */
-
- switch (regno) {
- case 0:
- asm("movl %%db0, %0" :"=r" (val)); break;
- case 1:
- asm("movl %%db1, %0" :"=r" (val)); break;
- case 2:
- asm("movl %%db2, %0" :"=r" (val)); break;
- case 3:
- asm("movl %%db3, %0" :"=r" (val)); break;
- case 6:
- asm("movl %%db6, %0" :"=r" (val)); break;
- case 7:
- asm("movl %%db7, %0" :"=r" (val)); break;
- default:
- BUG();
- }
- return val;
-}
-
-static inline void native_set_debugreg(int regno, unsigned long value)
-{
- switch (regno) {
- case 0:
- asm("movl %0,%%db0" : /* no output */ :"r" (value));
- break;
- case 1:
- asm("movl %0,%%db1" : /* no output */ :"r" (value));
- break;
- case 2:
- asm("movl %0,%%db2" : /* no output */ :"r" (value));
- break;
- case 3:
- asm("movl %0,%%db3" : /* no output */ :"r" (value));
- break;
- case 6:
- asm("movl %0,%%db6" : /* no output */ :"r" (value));
- break;
- case 7:
- asm("movl %0,%%db7" : /* no output */ :"r" (value));
- break;
- default:
- BUG();
- }
-}
-
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
-{
- unsigned int reg;
- __asm__ __volatile__ ("pushfl;"
- "popl %0;"
- "andl %1, %0;"
- "orl %2, %0;"
- "pushl %0;"
- "popfl"
- : "=&r" (reg)
- : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-}
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define paravirt_enabled() 0
-#define __cpuid native_cpuid
-
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
-{
- native_load_esp0(tss, thread);
-}
-
-/*
- * These special macros can be used to get or set a debugging register
- */
-#define get_debugreg(var, register) \
- (var) = native_get_debugreg(register)
-#define set_debugreg(value, register) \
- native_set_debugreg(register, value)
-
-#define set_iopl_mask native_set_iopl_mask
-#endif /* CONFIG_PARAVIRT */
-
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = 0;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return edx;
-}
-
-/* generic versions from gas */
-#define GENERIC_NOP1 ".byte 0x90\n"
-#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
-#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
-
-/* Opteron nops */
-#define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2 ".byte 0x66,0x90\n"
-#define K8_NOP3 ".byte 0x66,0x66,0x90\n"
-#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n"
-#define K8_NOP5 K8_NOP3 K8_NOP2
-#define K8_NOP6 K8_NOP3 K8_NOP3
-#define K8_NOP7 K8_NOP4 K8_NOP3
-#define K8_NOP8 K8_NOP4 K8_NOP4
-
-/* K7 nops */
-/* uses eax dependencies (arbitary choice) */
-#define K7_NOP1 GENERIC_NOP1
-#define K7_NOP2 ".byte 0x8b,0xc0\n"
-#define K7_NOP3 ".byte 0x8d,0x04,0x20\n"
-#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n"
-#define K7_NOP5 K7_NOP4 ASM_NOP1
-#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8 K7_NOP7 ASM_NOP1
-
-/* P6 nops */
-/* uses eax dependencies (Intel-recommended choice) */
-#define P6_NOP1 GENERIC_NOP1
-#define P6_NOP2 ".byte 0x66,0x90\n"
-#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n"
-#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n"
-#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n"
-#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
-
-#ifdef CONFIG_MK8
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
-#elif defined(CONFIG_MK7)
-#define ASM_NOP1 K7_NOP1
-#define ASM_NOP2 K7_NOP2
-#define ASM_NOP3 K7_NOP3
-#define ASM_NOP4 K7_NOP4
-#define ASM_NOP5 K7_NOP5
-#define ASM_NOP6 K7_NOP6
-#define ASM_NOP7 K7_NOP7
-#define ASM_NOP8 K7_NOP8
-#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
- defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
- defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4)
-#define ASM_NOP1 P6_NOP1
-#define ASM_NOP2 P6_NOP2
-#define ASM_NOP3 P6_NOP3
-#define ASM_NOP4 P6_NOP4
-#define ASM_NOP5 P6_NOP5
-#define ASM_NOP6 P6_NOP6
-#define ASM_NOP7 P6_NOP7
-#define ASM_NOP8 P6_NOP8
-#else
-#define ASM_NOP1 GENERIC_NOP1
-#define ASM_NOP2 GENERIC_NOP2
-#define ASM_NOP3 GENERIC_NOP3
-#define ASM_NOP4 GENERIC_NOP4
-#define ASM_NOP5 GENERIC_NOP5
-#define ASM_NOP6 GENERIC_NOP6
-#define ASM_NOP7 GENERIC_NOP7
-#define ASM_NOP8 GENERIC_NOP8
-#endif
-
-#define ASM_NOP_MAX 8
-
-/* Prefetch instructions for Pentium III and AMD Athlon */
-/* It's not worth to care about 3dnow! prefetches for the K6
- because they are microcoded there and very slow.
- However we don't do prefetches for pre XP Athlons currently
- That should be fixed. */
-#define ARCH_HAS_PREFETCH
-static inline void prefetch(const void *x)
-{
- alternative_input(ASM_NOP4,
- "prefetchnta (%1)",
- X86_FEATURE_XMM,
- "r" (x));
-}
-
-#define ARCH_HAS_PREFETCH
-#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
-
-/* 3dnow! prefetch to get an exclusive cache line. Useful for
- spinlocks to avoid one state transition in the cache coherency protocol. */
-static inline void prefetchw(const void *x)
-{
- alternative_input(ASM_NOP4,
- "prefetchw (%1)",
- X86_FEATURE_3DNOW,
- "r" (x));
-}
-#define spin_lock_prefetch(x) prefetchw(x)
-
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
-
-#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
-
-extern unsigned long boot_option_idle_override;
-extern void enable_sep_cpu(void);
-extern int sysenter_setup(void);
-
-/* Defined in head.S */
-extern struct Xgt_desc_struct early_gdt_descr;
-
-extern void cpu_set_gdt(int);
-extern void switch_to_new_gdt(void);
-extern void cpu_init(void);
-extern void init_gdt(int cpu);
-
-extern int force_mwait;
-
-#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
deleted file mode 100644
index e4f19970a82..00000000000
--- a/include/asm-x86/processor_64.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * include/asm-x86_64/processor.h
- *
- * Copyright (C) 1994 Linus Torvalds
- */
-
-#ifndef __ASM_X86_64_PROCESSOR_H
-#define __ASM_X86_64_PROCESSOR_H
-
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/sigcontext.h>
-#include <asm/cpufeature.h>
-#include <linux/threads.h>
-#include <asm/msr.h>
-#include <asm/current.h>
-#include <asm/system.h>
-#include <asm/mmsegment.h>
-#include <asm/percpu.h>
-#include <linux/personality.h>
-#include <linux/cpumask.h>
-#include <asm/processor-flags.h>
-
-#define TF_MASK 0x00000100
-#define IF_MASK 0x00000200
-#define IOPL_MASK 0x00003000
-#define NT_MASK 0x00004000
-#define VM_MASK 0x00020000
-#define AC_MASK 0x00040000
-#define VIF_MASK 0x00080000 /* virtual interrupt flag */
-#define VIP_MASK 0x00100000 /* virtual interrupt pending */
-#define ID_MASK 0x00200000
-
-#define desc_empty(desc) \
- (!((desc)->a | (desc)->b))
-
-#define desc_equal(desc1, desc2) \
- (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
-
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; })
-
-/*
- * CPU type and hardware bug flags. Kept separately for each CPU.
- */
-
-struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
- __u8 x86_mask;
- int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
- __u32 x86_capability[NCAPINTS];
- char x86_vendor_id[16];
- char x86_model_id[64];
- int x86_cache_size; /* in KB */
- int x86_clflush_size;
- int x86_cache_alignment;
- int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/
- __u8 x86_virt_bits, x86_phys_bits;
- __u8 x86_max_cores; /* cpuid returned max cores value */
- __u32 x86_power;
- __u32 extended_cpuid_level; /* Max extended CPUID function supported */
- unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
- cpumask_t llc_shared_map; /* cpus sharing the last level cache */
-#endif
- __u8 apicid;
-#ifdef CONFIG_SMP
- __u8 booted_cores; /* number of cores as seen by OS */
- __u8 phys_proc_id; /* Physical Processor id. */
- __u8 cpu_core_id; /* Core id. */
- __u8 cpu_index; /* index into per_cpu list */
-#endif
-} ____cacheline_aligned;
-
-#define X86_VENDOR_INTEL 0
-#define X86_VENDOR_CYRIX 1
-#define X86_VENDOR_AMD 2
-#define X86_VENDOR_UMC 3
-#define X86_VENDOR_NEXGEN 4
-#define X86_VENDOR_CENTAUR 5
-#define X86_VENDOR_TRANSMETA 7
-#define X86_VENDOR_NUM 8
-#define X86_VENDOR_UNKNOWN 0xff
-
-#ifdef CONFIG_SMP
-DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
-#define cpu_data(cpu) per_cpu(cpu_info, cpu)
-#define current_cpu_data cpu_data(smp_processor_id())
-#else
-#define cpu_data(cpu) boot_cpu_data
-#define current_cpu_data boot_cpu_data
-#endif
-
-extern char ignore_irq13;
-
-extern void identify_cpu(struct cpuinfo_x86 *);
-extern void print_cpu_info(struct cpuinfo_x86 *);
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
-
-/*
- * Save the cr4 feature set we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-
-static inline void set_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features |= mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "orq %0,%%rax\n\t"
- "movq %%rax,%%cr4\n"
- : : "irg" (mask)
- :"ax");
-}
-
-static inline void clear_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features &= ~mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "andq %0,%%rax\n\t"
- "movq %%rax,%%cr4\n"
- : : "irg" (~mask)
- :"ax");
-}
-
-
-/*
- * User space process size. 47bits minus one guard page.
- */
-#define TASK_SIZE64 (0x800000000000UL - 4096)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
-
-#define TASK_SIZE (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
-#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
-
-#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE/3)
-
-/*
- * Size of io_bitmap.
- */
-#define IO_BITMAP_BITS 65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
-#define INVALID_IO_BITMAP_OFFSET 0x8000
-
-struct i387_fxsave_struct {
- u16 cwd;
- u16 swd;
- u16 twd;
- u16 fop;
- u64 rip;
- u64 rdp;
- u32 mxcsr;
- u32 mxcsr_mask;
- u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
- u32 padding[24];
-} __attribute__ ((aligned (16)));
-
-union i387_union {
- struct i387_fxsave_struct fxsave;
-};
-
-struct tss_struct {
- u32 reserved1;
- u64 rsp0;
- u64 rsp1;
- u64 rsp2;
- u64 reserved2;
- u64 ist[7];
- u32 reserved3;
- u32 reserved4;
- u16 reserved5;
- u16 io_bitmap_base;
- /*
- * The extra 1 is there because the CPU will access an
- * additional byte beyond the end of the IO permission
- * bitmap. The extra byte must be all 1 bits, and must
- * be within the limit. Thus we have:
- *
- * 128 bytes, the bitmap itself, for ports 0..0x3ff
- * 8 bytes, for an extra "long" of ~0UL
- */
- unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
-} __attribute__((packed)) ____cacheline_aligned;
-
-
-extern struct cpuinfo_x86 boot_cpu_data;
-DECLARE_PER_CPU(struct tss_struct,init_tss);
-/* Save the original ist values for checking stack pointers during debugging */
-struct orig_ist {
- unsigned long ist[7];
-};
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
-#ifdef CONFIG_X86_VSMP
-#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
-#define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
-#else
-#define ARCH_MIN_TASKALIGN 16
-#define ARCH_MIN_MMSTRUCT_ALIGN 0
-#endif
-
-struct thread_struct {
- unsigned long rsp0;
- unsigned long rsp;
- unsigned long userrsp; /* Copy from PDA */
- unsigned long fs;
- unsigned long gs;
- unsigned short es, ds, fsindex, gsindex;
-/* Hardware debugging registers */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
- unsigned long debugreg6;
- unsigned long debugreg7;
-/* fault info */
- unsigned long cr2, trap_no, error_code;
-/* floating point info */
- union i387_union i387 __attribute__((aligned(16)));
-/* IO permissions. the bitmap could be moved into the GDT, that would make
- switch faster for a limited number of ioperm using tasks. -AK */
- int ioperm;
- unsigned long *io_bitmap_ptr;
- unsigned io_bitmap_max;
-/* cached TLS descriptors. */
- u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
-} __attribute__((aligned(16)));
-
-#define INIT_THREAD { \
- .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_TSS { \
- .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_MMAP \
-{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
-
-#define start_thread(regs,new_rip,new_rsp) do { \
- asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \
- load_gs_index(0); \
- (regs)->rip = (new_rip); \
- (regs)->rsp = (new_rsp); \
- write_pda(oldrsp, (new_rsp)); \
- (regs)->cs = __USER_CS; \
- (regs)->ss = __USER_DS; \
- (regs)->eflags = 0x200; \
- set_fs(USER_DS); \
-} while(0)
-
-#define get_debugreg(var, register) \
- __asm__("movq %%db" #register ", %0" \
- :"=r" (var))
-#define set_debugreg(value, register) \
- __asm__("movq %0,%%db" #register \
- : /* no output */ \
- :"r" (value))
-
-struct task_struct;
-struct mm_struct;
-
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
-/* Prepare to copy thread state - unlazy all lazy status */
-extern void prepare_to_copy(struct task_struct *tsk);
-
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.rsp - 8))
-
-extern unsigned long get_wchan(struct task_struct *p);
-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.rsp0 - 1)
-#define KSTK_EIP(tsk) (task_pt_regs(tsk)->rip)
-#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
-
-
-struct microcode_header {
- unsigned int hdrver;
- unsigned int rev;
- unsigned int date;
- unsigned int sig;
- unsigned int cksum;
- unsigned int ldrver;
- unsigned int pf;
- unsigned int datasize;
- unsigned int totalsize;
- unsigned int reserved[3];
-};
-
-struct microcode {
- struct microcode_header hdr;
- unsigned int bits[0];
-};
-
-typedef struct microcode microcode_t;
-typedef struct microcode_header microcode_header_t;
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
- unsigned int sig;
- unsigned int pf;
- unsigned int cksum;
-};
-
-struct extended_sigtable {
- unsigned int count;
- unsigned int cksum;
- unsigned int reserved[3];
- struct extended_signature sigs[0];
-};
-
-
-#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2)
-#define ASM_NOP1 P6_NOP1
-#define ASM_NOP2 P6_NOP2
-#define ASM_NOP3 P6_NOP3
-#define ASM_NOP4 P6_NOP4
-#define ASM_NOP5 P6_NOP5
-#define ASM_NOP6 P6_NOP6
-#define ASM_NOP7 P6_NOP7
-#define ASM_NOP8 P6_NOP8
-#else
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
-#endif
-
-/* Opteron nops */
-#define K8_NOP1 ".byte 0x90\n"
-#define K8_NOP2 ".byte 0x66,0x90\n"
-#define K8_NOP3 ".byte 0x66,0x66,0x90\n"
-#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n"
-#define K8_NOP5 K8_NOP3 K8_NOP2
-#define K8_NOP6 K8_NOP3 K8_NOP3
-#define K8_NOP7 K8_NOP4 K8_NOP3
-#define K8_NOP8 K8_NOP4 K8_NOP4
-
-/* P6 nops */
-/* uses eax dependencies (Intel-recommended choice) */
-#define P6_NOP1 ".byte 0x90\n"
-#define P6_NOP2 ".byte 0x66,0x90\n"
-#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n"
-#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n"
-#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n"
-#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
-
-#define ASM_NOP_MAX 8
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
- __asm__ __volatile__("rep;nop": : :"memory");
-}
-
-/* Stop speculative execution */
-static inline void sync_core(void)
-{
- int tmp;
- asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
-}
-
-#define ARCH_HAS_PREFETCHW 1
-static inline void prefetchw(void *x)
-{
- alternative_input("prefetcht0 (%1)",
- "prefetchw (%1)",
- X86_FEATURE_3DNOW,
- "r" (x));
-}
-
-#define ARCH_HAS_SPINLOCK_PREFETCH 1
-
-#define spin_lock_prefetch(x) prefetchw(x)
-
-#define cpu_relax() rep_nop()
-
-static inline void __monitor(const void *eax, unsigned long ecx,
- unsigned long edx)
-{
- /* "monitor %eax,%ecx,%edx;" */
- asm volatile(
- ".byte 0x0f,0x01,0xc8;"
- : :"a" (eax), "c" (ecx), "d"(edx));
-}
-
-static inline void __mwait(unsigned long eax, unsigned long ecx)
-{
- /* "mwait %eax,%ecx;" */
- asm volatile(
- ".byte 0x0f,0x01,0xc9;"
- : :"a" (eax), "c" (ecx));
-}
-
-static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
-{
- /* "mwait %eax,%ecx;" */
- asm volatile(
- "sti; .byte 0x0f,0x01,0xc9;"
- : :"a" (eax), "c" (ecx));
-}
-
-extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
-
-#define stack_current() \
-({ \
- struct thread_info *ti; \
- asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
- ti->task; \
-})
-
-#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
-
-extern unsigned long boot_option_idle_override;
-/* Boot loader type from the setup header */
-extern int bootloader_type;
-
-#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
-
-#endif /* __ASM_X86_64_PROCESSOR_H */
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index dabba55f7ed..68563c0709a 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -5,87 +5,24 @@
/* misc architecture specific prototypes */
-struct cpuinfo_x86;
-struct pt_regs;
-
-extern void start_kernel(void);
-extern void pda_init(int);
-
extern void early_idt_handler(void);
-extern void mcheck_init(struct cpuinfo_x86 *c);
extern void init_memory_mapping(unsigned long start, unsigned long end);
-extern void system_call(void);
-extern int kernel_syscall(void);
+extern void system_call(void);
extern void syscall_init(void);
extern void ia32_syscall(void);
-extern void ia32_cstar_target(void);
-extern void ia32_sysenter_target(void);
-
-extern void config_acpi_tables(void);
-extern void ia32_syscall(void);
-
-extern int pmtimer_mark_offset(void);
-extern void pmtimer_resume(void);
-extern void pmtimer_wait(unsigned);
-extern unsigned int do_gettimeoffset_pm(void);
-#ifdef CONFIG_X86_PM_TIMER
-extern u32 pmtmr_ioport;
-#else
-#define pmtmr_ioport 0
-#endif
-extern int nohpet;
-
-extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
-
-extern void early_identify_cpu(struct cpuinfo_x86 *c);
-
-extern int k8_scan_nodes(unsigned long start, unsigned long end);
-
-extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern unsigned long numa_free_all_bootmem(void);
+extern void ia32_cstar_target(void);
+extern void ia32_sysenter_target(void);
extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
-extern void load_gs_index(unsigned gs);
-
-extern unsigned long end_pfn_map;
-
-extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long * rsp);
-extern void show_registers(struct pt_regs *regs);
-
-extern void exception_table_check(void);
-
-extern void acpi_reserve_bootmem(void);
-
-extern void swap_low_mappings(void);
-
-extern void __show_regs(struct pt_regs * regs);
-extern void show_regs(struct pt_regs * regs);
-
extern void syscall32_cpu_init(void);
-extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
-
-extern void early_quirks(void);
extern void check_efer(void);
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
-
-extern unsigned long table_start, table_end;
-
-extern int exception_trace;
-extern unsigned cpu_khz;
-extern unsigned tsc_khz;
-
extern int reboot_force;
-extern int notsc_setup(char *);
-
-extern int gsi_irq_sharing(int gsi);
-
-extern int force_mwait;
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 7524e123383..81a8ee4c55f 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -78,4 +78,66 @@
# define PTRACE_SYSEMU_SINGLESTEP 32
#endif
+#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
+
+#ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+
+/* configuration/status structure used in PTRACE_BTS_CONFIG and
+ PTRACE_BTS_STATUS commands.
+*/
+struct ptrace_bts_config {
+ /* requested or actual size of BTS buffer in bytes */
+ u32 size;
+ /* bitmask of below flags */
+ u32 flags;
+ /* buffer overflow signal */
+ u32 signal;
+ /* actual size of bts_struct in bytes */
+ u32 bts_size;
+};
+#endif
+
+#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */
+#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */
+#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow
+ instead of wrapping around */
+#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available
+ instead of failing */
+
+#define PTRACE_BTS_CONFIG 40
+/* Configure branch trace recording.
+ ADDR points to a struct ptrace_bts_config.
+ DATA gives the size of that buffer.
+ A new buffer is allocated, iff the size changes.
+ Returns the number of bytes read.
+*/
+#define PTRACE_BTS_STATUS 41
+/* Return the current configuration in a struct ptrace_bts_config
+ pointed to by ADDR; DATA gives the size of that buffer.
+ Returns the number of bytes written.
+*/
+#define PTRACE_BTS_SIZE 42
+/* Return the number of available BTS records.
+ DATA and ADDR are ignored.
+*/
+#define PTRACE_BTS_GET 43
+/* Get a single BTS record.
+ DATA defines the index into the BTS array, where 0 is the newest
+ entry, and higher indices refer to older entries.
+ ADDR is pointing to struct bts_struct (see asm/ds.h).
+*/
+#define PTRACE_BTS_CLEAR 44
+/* Clear the BTS buffer.
+ DATA and ADDR are ignored.
+*/
+#define PTRACE_BTS_DRAIN 45
+/* Read all available BTS records and clear the buffer.
+ ADDR points to an array of struct bts_struct.
+ DATA gives the size of that buffer.
+ BTS records are read from oldest to newest.
+ Returns number of BTS records drained.
+*/
+
#endif
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 51ddb259087..d9e04b46a44 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -4,12 +4,15 @@
#include <linux/compiler.h> /* For __user */
#include <asm/ptrace-abi.h>
+
#ifndef __ASSEMBLY__
#ifdef __i386__
/* this struct defines the way the registers are stored on the
stack during a system call. */
+#ifndef __KERNEL__
+
struct pt_regs {
long ebx;
long ecx;
@@ -21,7 +24,7 @@ struct pt_regs {
int xds;
int xes;
int xfs;
- /* int xgs; */
+ /* int gs; */
long orig_eax;
long eip;
int xcs;
@@ -30,44 +33,37 @@ struct pt_regs {
int xss;
};
-#ifdef __KERNEL__
+#else /* __KERNEL__ */
+
+struct pt_regs {
+ long bx;
+ long cx;
+ long dx;
+ long si;
+ long di;
+ long bp;
+ long ax;
+ int ds;
+ int es;
+ int fs;
+ /* int gs; */
+ long orig_ax;
+ long ip;
+ int cs;
+ long flags;
+ long sp;
+ int ss;
+};
#include <asm/vm86.h>
#include <asm/segment.h>
-struct task_struct;
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
-
-/*
- * user_mode_vm(regs) determines whether a register set came from user mode.
- * This is true if V8086 mode was enabled OR if the register set was from
- * protected mode with RPL-3 CS value. This tricky test checks that with
- * one comparison. Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
- */
-static inline int user_mode(struct pt_regs *regs)
-{
- return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
-}
-static inline int user_mode_vm(struct pt_regs *regs)
-{
- return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
-}
-static inline int v8086_mode(struct pt_regs *regs)
-{
- return (regs->eflags & VM_MASK);
-}
-
-#define instruction_pointer(regs) ((regs)->eip)
-#define frame_pointer(regs) ((regs)->ebp)
-#define stack_pointer(regs) ((unsigned long)(regs))
-#define regs_return_value(regs) ((regs)->eax)
-
-extern unsigned long profile_pc(struct pt_regs *regs);
#endif /* __KERNEL__ */
#else /* __i386__ */
+#ifndef __KERNEL__
+
struct pt_regs {
unsigned long r15;
unsigned long r14;
@@ -96,47 +92,143 @@ struct pt_regs {
/* top of stack page */
};
+#else /* __KERNEL__ */
+
+struct pt_regs {
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+ unsigned long bp;
+ unsigned long bx;
+/* arguments: non interrupts/non tracing syscalls only save upto here*/
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long ax;
+ unsigned long cx;
+ unsigned long dx;
+ unsigned long si;
+ unsigned long di;
+ unsigned long orig_ax;
+/* end of arguments */
+/* cpu exception frame or undefined */
+ unsigned long ip;
+ unsigned long cs;
+ unsigned long flags;
+ unsigned long sp;
+ unsigned long ss;
+/* top of stack page */
+};
+
+#endif /* __KERNEL__ */
+#endif /* !__i386__ */
+
#ifdef __KERNEL__
-#define user_mode(regs) (!!((regs)->cs & 3))
-#define user_mode_vm(regs) user_mode(regs)
-#define instruction_pointer(regs) ((regs)->rip)
-#define frame_pointer(regs) ((regs)->rbp)
-#define stack_pointer(regs) ((regs)->rsp)
-#define regs_return_value(regs) ((regs)->rax)
+/* the DS BTS struct is used for ptrace as well */
+#include <asm/ds.h>
+
+struct task_struct;
+
+extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
extern unsigned long profile_pc(struct pt_regs *regs);
+
+extern unsigned long
+convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
+
+#ifdef CONFIG_X86_32
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
+#else
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+#endif
-struct task_struct;
+#define regs_return_value(regs) ((regs)->ax)
+
+/*
+ * user_mode_vm(regs) determines whether a register set came from user mode.
+ * This is true if V8086 mode was enabled OR if the register set was from
+ * protected mode with RPL-3 CS value. This tricky test checks that with
+ * one comparison. Many places in the kernel can bypass this full check
+ * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ */
+static inline int user_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+ return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
+#else
+ return !!(regs->cs & 3);
+#endif
+}
+
+static inline int user_mode_vm(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+ return ((regs->cs & SEGMENT_RPL_MASK) |
+ (regs->flags & VM_MASK)) >= USER_RPL;
+#else
+ return user_mode(regs);
+#endif
+}
+
+static inline int v8086_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+ return (regs->flags & VM_MASK);
+#else
+ return 0; /* No V86 mode support in long mode */
+#endif
+}
+
+/*
+ * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
+ * when it traps. So regs will be the current sp.
+ *
+ * This is valid only for kernel mode traps.
+ */
+static inline unsigned long kernel_trap_sp(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_32
+ return (unsigned long)regs;
+#else
+ return regs->sp;
+#endif
+}
+
+static inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+ return regs->ip;
+}
+
+static inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+ return regs->bp;
+}
+
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step() (1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
+extern void user_enable_block_step(struct task_struct *);
+#ifdef CONFIG_X86_DEBUGCTLMSR
+#define arch_has_block_step() (1)
+#else
+#define arch_has_block_step() (boot_cpu_data.x86 >= 6)
+#endif
+
+struct user_desc;
+extern int do_get_thread_area(struct task_struct *p, int idx,
+ struct user_desc __user *info);
+extern int do_set_thread_area(struct task_struct *p, int idx,
+ struct user_desc __user *info, int can_allocate);
-extern unsigned long
-convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
-
-enum {
- EF_CF = 0x00000001,
- EF_PF = 0x00000004,
- EF_AF = 0x00000010,
- EF_ZF = 0x00000040,
- EF_SF = 0x00000080,
- EF_TF = 0x00000100,
- EF_IE = 0x00000200,
- EF_DF = 0x00000400,
- EF_OF = 0x00000800,
- EF_IOPL = 0x00003000,
- EF_IOPL_RING0 = 0x00000000,
- EF_IOPL_RING1 = 0x00001000,
- EF_IOPL_RING2 = 0x00002000,
- EF_NT = 0x00004000, /* nested task */
- EF_RF = 0x00010000, /* resume */
- EF_VM = 0x00020000, /* virtual mode */
- EF_AC = 0x00040000, /* alignment */
- EF_VIF = 0x00080000, /* virtual interrupt */
- EF_VIP = 0x00100000, /* virtual interrupt pending */
- EF_ID = 0x00200000, /* id */
-};
#endif /* __KERNEL__ */
-#endif /* !__i386__ */
+
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h
index 9b6dd093a9f..46f725b0bc8 100644
--- a/include/asm-x86/resume-trace.h
+++ b/include/asm-x86/resume-trace.h
@@ -1,5 +1,20 @@
-#ifdef CONFIG_X86_32
-# include "resume-trace_32.h"
-#else
-# include "resume-trace_64.h"
+#ifndef _ASM_X86_RESUME_TRACE_H
+#define _ASM_X86_RESUME_TRACE_H
+
+#include <asm/asm.h>
+
+#define TRACE_RESUME(user) do { \
+ if (pm_trace_enabled) { \
+ void *tracedata; \
+ asm volatile(_ASM_MOV_UL " $1f,%0\n" \
+ ".section .tracedata,\"a\"\n" \
+ "1:\t.word %c1\n\t" \
+ _ASM_PTR " %c2\n" \
+ ".previous" \
+ :"=r" (tracedata) \
+ : "i" (__LINE__), "i" (__FILE__)); \
+ generate_resume_trace(tracedata, user); \
+ } \
+} while (0)
+
#endif
diff --git a/include/asm-x86/resume-trace_32.h b/include/asm-x86/resume-trace_32.h
deleted file mode 100644
index ec9cfd65623..00000000000
--- a/include/asm-x86/resume-trace_32.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#define TRACE_RESUME(user) do { \
- if (pm_trace_enabled) { \
- void *tracedata; \
- asm volatile("movl $1f,%0\n" \
- ".section .tracedata,\"a\"\n" \
- "1:\t.word %c1\n" \
- "\t.long %c2\n" \
- ".previous" \
- :"=r" (tracedata) \
- : "i" (__LINE__), "i" (__FILE__)); \
- generate_resume_trace(tracedata, user); \
- } \
-} while (0)
diff --git a/include/asm-x86/resume-trace_64.h b/include/asm-x86/resume-trace_64.h
deleted file mode 100644
index 34bf998fdf6..00000000000
--- a/include/asm-x86/resume-trace_64.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#define TRACE_RESUME(user) do { \
- if (pm_trace_enabled) { \
- void *tracedata; \
- asm volatile("movq $1f,%0\n" \
- ".section .tracedata,\"a\"\n" \
- "1:\t.word %c1\n" \
- "\t.quad %c2\n" \
- ".previous" \
- :"=r" (tracedata) \
- : "i" (__LINE__), "i" (__FILE__)); \
- generate_resume_trace(tracedata, user); \
- } \
-} while (0)
diff --git a/include/asm-x86/rio.h b/include/asm-x86/rio.h
index c7350f6d201..97cdcc9887b 100644
--- a/include/asm-x86/rio.h
+++ b/include/asm-x86/rio.h
@@ -1,6 +1,6 @@
/*
- * Derived from include/asm-i386/mach-summit/mach_mpparse.h
- * and include/asm-i386/mach-default/bios_ebda.h
+ * Derived from include/asm-x86/mach-summit/mach_mpparse.h
+ * and include/asm-x86/mach-default/bios_ebda.h
*
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*/
diff --git a/include/asm-x86/rwlock.h b/include/asm-x86/rwlock.h
index f2b64a429e6..6a8c0d64510 100644
--- a/include/asm-x86/rwlock.h
+++ b/include/asm-x86/rwlock.h
@@ -2,7 +2,6 @@
#define _ASM_X86_RWLOCK_H
#define RW_LOCK_BIAS 0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
/* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */
diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h
index 041906f3c6d..520a379f4b8 100644
--- a/include/asm-x86/rwsem.h
+++ b/include/asm-x86/rwsem.h
@@ -2,7 +2,7 @@
*
* Written by David Howells (dhowells@redhat.com).
*
- * Derived from asm-i386/semaphore.h
+ * Derived from asm-x86/semaphore.h
*
*
* The MSW of the count is the negated number of active writers and waiting
@@ -44,10 +44,14 @@
struct rwsem_waiter;
-extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *));
-extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *sem));
+extern asmregparm struct rw_semaphore *
+ rwsem_down_read_failed(struct rw_semaphore *sem);
+extern asmregparm struct rw_semaphore *
+ rwsem_down_write_failed(struct rw_semaphore *sem);
+extern asmregparm struct rw_semaphore *
+ rwsem_wake(struct rw_semaphore *);
+extern asmregparm struct rw_semaphore *
+ rwsem_downgrade_wake(struct rw_semaphore *sem);
/*
* the semaphore definition
diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h
index 3a1e76257a2..d13c197866d 100644
--- a/include/asm-x86/scatterlist.h
+++ b/include/asm-x86/scatterlist.h
@@ -1,5 +1,35 @@
+#ifndef _ASM_X86_SCATTERLIST_H
+#define _ASM_X86_SCATTERLIST_H
+
+#include <asm/types.h>
+
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+ dma_addr_t dma_address;
+#ifdef CONFIG_X86_64
+ unsigned int dma_length;
+#endif
+};
+
+#define ARCH_HAS_SG_CHAIN
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+/*
+ * These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg) ((sg)->dma_address)
#ifdef CONFIG_X86_32
-# include "scatterlist_32.h"
+# define sg_dma_len(sg) ((sg)->length)
#else
-# include "scatterlist_64.h"
+# define sg_dma_len(sg) ((sg)->dma_length)
+#endif
+
#endif
diff --git a/include/asm-x86/scatterlist_32.h b/include/asm-x86/scatterlist_32.h
deleted file mode 100644
index 0e7d997a34b..00000000000
--- a/include/asm-x86/scatterlist_32.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _I386_SCATTERLIST_H
-#define _I386_SCATTERLIST_H
-
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
- unsigned long sg_magic;
-#endif
- unsigned long page_link;
- unsigned int offset;
- dma_addr_t dma_address;
- unsigned int length;
-};
-
-#define ARCH_HAS_SG_CHAIN
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg) ((sg)->dma_address)
-#define sg_dma_len(sg) ((sg)->length)
-
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-#endif /* !(_I386_SCATTERLIST_H) */
diff --git a/include/asm-x86/scatterlist_64.h b/include/asm-x86/scatterlist_64.h
deleted file mode 100644
index 1847c72befe..00000000000
--- a/include/asm-x86/scatterlist_64.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _X8664_SCATTERLIST_H
-#define _X8664_SCATTERLIST_H
-
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
- unsigned long sg_magic;
-#endif
- unsigned long page_link;
- unsigned int offset;
- unsigned int length;
- dma_addr_t dma_address;
- unsigned int dma_length;
-};
-
-#define ARCH_HAS_SG_CHAIN
-
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg) ((sg)->dma_address)
-#define sg_dma_len(sg) ((sg)->dma_length)
-
-#endif
diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index 605068280e2..23f0535fec6 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -1,5 +1,204 @@
+#ifndef _ASM_X86_SEGMENT_H_
+#define _ASM_X86_SEGMENT_H_
+
+/* Simple and small GDT entries for booting only */
+
+#define GDT_ENTRY_BOOT_CS 2
+#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
+
+#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
+#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
+
+#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2)
+#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8)
+
#ifdef CONFIG_X86_32
-# include "segment_32.h"
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ * 0 - null
+ * 1 - reserved
+ * 2 - reserved
+ * 3 - reserved
+ *
+ * 4 - unused <==== new cacheline
+ * 5 - unused
+ *
+ * ------- start of TLS (Thread-Local Storage) segments:
+ *
+ * 6 - TLS segment #1 [ glibc's TLS segment ]
+ * 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
+ * 8 - TLS segment #3
+ * 9 - reserved
+ * 10 - reserved
+ * 11 - reserved
+ *
+ * ------- start of kernel segments:
+ *
+ * 12 - kernel code segment <==== new cacheline
+ * 13 - kernel data segment
+ * 14 - default user CS
+ * 15 - default user DS
+ * 16 - TSS
+ * 17 - LDT
+ * 18 - PNPBIOS support (16->32 gate)
+ * 19 - PNPBIOS support
+ * 20 - PNPBIOS support
+ * 21 - PNPBIOS support
+ * 22 - PNPBIOS support
+ * 23 - APM BIOS support
+ * 24 - APM BIOS support
+ * 25 - APM BIOS support
+ *
+ * 26 - ESPFIX small SS
+ * 27 - per-cpu [ offset to per-cpu data area ]
+ * 28 - unused
+ * 29 - unused
+ * 30 - unused
+ * 31 - TSS for double fault handler
+ */
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+
+#define GDT_ENTRY_DEFAULT_USER_CS 14
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS 15
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE 12
+
+#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
+
+#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11)
+
+#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
+#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15)
+#ifdef CONFIG_SMP
+#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
#else
-# include "segment_64.h"
+#define __KERNEL_PERCPU 0
+#endif
+
+#define GDT_ENTRY_DOUBLEFAULT_TSS 31
+
+/*
+ * The GDT has 32 entries
+ */
+#define GDT_ENTRIES 32
+
+/* The PnP BIOS entries in the GDT */
+#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
+#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
+#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
+#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
+#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
+
+/* The PnP BIOS selectors */
+#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
+#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
+#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
+#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
+#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
+
+/* Bottom two bits of selector give the ring privilege level */
+#define SEGMENT_RPL_MASK 0x3
+/* Bit 2 is table indicator (LDT/GDT) */
+#define SEGMENT_TI_MASK 0x4
+
+/* User mode is privilege level 3 */
+#define USER_RPL 0x3
+/* LDT segment has TI set, GDT has it cleared */
+#define SEGMENT_LDT 0x4
+#define SEGMENT_GDT 0x0
+
+/*
+ * Matching rules for certain types of segments.
+ */
+
+/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
+#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8)
+
+/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
+#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
+
+/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
+#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
+
+
+#else
+#include <asm/cache.h>
+
+#define __KERNEL_CS 0x10
+#define __KERNEL_DS 0x18
+
+#define __KERNEL32_CS 0x08
+
+/*
+ * we cannot use the same code segment descriptor for user and kernel
+ * -- not even in the long flat mode, because of different DPL /kkeil
+ * The segment offset needs to contain a RPL. Grr. -AK
+ * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ */
+
+#define __USER32_CS 0x23 /* 4*8+3 */
+#define __USER_DS 0x2b /* 5*8+3 */
+#define __USER_CS 0x33 /* 6*8+3 */
+#define __USER32_DS __USER_DS
+
+#define GDT_ENTRY_TSS 8 /* needs two entries */
+#define GDT_ENTRY_LDT 10 /* needs two entries */
+#define GDT_ENTRY_TLS_MIN 12
+#define GDT_ENTRY_TLS_MAX 14
+
+#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
+#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
+
+/* TLS indexes for 64bit - hardcoded in arch_prctl */
+#define FS_TLS 0
+#define GS_TLS 1
+
+#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
+
+#define GDT_ENTRIES 16
+
+#endif
+
+#ifndef CONFIG_PARAVIRT
+#define get_kernel_rpl() 0
+#endif
+
+/* User mode is privilege level 3 */
+#define USER_RPL 0x3
+/* LDT segment has TI set, GDT has it cleared */
+#define SEGMENT_LDT 0x4
+#define SEGMENT_GDT 0x0
+
+/* Bottom two bits of selector give the ring privilege level */
+#define SEGMENT_RPL_MASK 0x3
+/* Bit 2 is table indicator (LDT/GDT) */
+#define SEGMENT_TI_MASK 0x4
+
+#define IDT_ENTRIES 256
+#define GDT_SIZE (GDT_ENTRIES * 8)
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+extern const char early_idt_handlers[IDT_ENTRIES][10];
+#endif
+#endif
+
#endif
diff --git a/include/asm-x86/segment_32.h b/include/asm-x86/segment_32.h
deleted file mode 100644
index 597a47c2515..00000000000
--- a/include/asm-x86/segment_32.h
+++ /dev/null
@@ -1,148 +0,0 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- * 0 - null
- * 1 - reserved
- * 2 - reserved
- * 3 - reserved
- *
- * 4 - unused <==== new cacheline
- * 5 - unused
- *
- * ------- start of TLS (Thread-Local Storage) segments:
- *
- * 6 - TLS segment #1 [ glibc's TLS segment ]
- * 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
- * 8 - TLS segment #3
- * 9 - reserved
- * 10 - reserved
- * 11 - reserved
- *
- * ------- start of kernel segments:
- *
- * 12 - kernel code segment <==== new cacheline
- * 13 - kernel data segment
- * 14 - default user CS
- * 15 - default user DS
- * 16 - TSS
- * 17 - LDT
- * 18 - PNPBIOS support (16->32 gate)
- * 19 - PNPBIOS support
- * 20 - PNPBIOS support
- * 21 - PNPBIOS support
- * 22 - PNPBIOS support
- * 23 - APM BIOS support
- * 24 - APM BIOS support
- * 25 - APM BIOS support
- *
- * 26 - ESPFIX small SS
- * 27 - per-cpu [ offset to per-cpu data area ]
- * 28 - unused
- * 29 - unused
- * 30 - unused
- * 31 - TSS for double fault handler
- */
-#define GDT_ENTRY_TLS_ENTRIES 3
-#define GDT_ENTRY_TLS_MIN 6
-#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
-
-#define GDT_ENTRY_DEFAULT_USER_CS 14
-#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
-
-#define GDT_ENTRY_DEFAULT_USER_DS 15
-#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
-
-#define GDT_ENTRY_KERNEL_BASE 12
-
-#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
-
-#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
-
-#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
-#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
-
-#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6)
-#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11)
-
-#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
-#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
-
-#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15)
-#ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
-#else
-#define __KERNEL_PERCPU 0
-#endif
-
-#define GDT_ENTRY_DOUBLEFAULT_TSS 31
-
-/*
- * The GDT has 32 entries
- */
-#define GDT_ENTRIES 32
-#define GDT_SIZE (GDT_ENTRIES * 8)
-
-/* Simple and small GDT entries for booting only */
-
-#define GDT_ENTRY_BOOT_CS 2
-#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
-
-#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
-
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
-#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
-#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
-#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
-#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
-
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- */
-#define IDT_ENTRIES 256
-
-/* Bottom two bits of selector give the ring privilege level */
-#define SEGMENT_RPL_MASK 0x3
-/* Bit 2 is table indicator (LDT/GDT) */
-#define SEGMENT_TI_MASK 0x4
-
-/* User mode is privilege level 3 */
-#define USER_RPL 0x3
-/* LDT segment has TI set, GDT has it cleared */
-#define SEGMENT_LDT 0x4
-#define SEGMENT_GDT 0x0
-
-#ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl() 0
-#endif
-/*
- * Matching rules for certain types of segments.
- */
-
-/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
-#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8)
-
-/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
-#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
-
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
-
-#endif
diff --git a/include/asm-x86/segment_64.h b/include/asm-x86/segment_64.h
deleted file mode 100644
index 04b8ab21328..00000000000
--- a/include/asm-x86/segment_64.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
-
-#include <asm/cache.h>
-
-/* Simple and small GDT entries for booting only */
-
-#define GDT_ENTRY_BOOT_CS 2
-#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
-
-#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
-
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x18
-
-#define __KERNEL32_CS 0x08
-
-/*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
- */
-
-#define __USER32_CS 0x23 /* 4*8+3 */
-#define __USER_DS 0x2b /* 5*8+3 */
-#define __USER_CS 0x33 /* 6*8+3 */
-#define __USER32_DS __USER_DS
-
-#define GDT_ENTRY_TSS 8 /* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-
-#define GDT_ENTRY_TLS_ENTRIES 3
-
-#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
-
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 16
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
-
-#endif
diff --git a/include/asm-x86/semaphore_32.h b/include/asm-x86/semaphore_32.h
index 835c1d751a9..ac96d3804d0 100644
--- a/include/asm-x86/semaphore_32.h
+++ b/include/asm-x86/semaphore_32.h
@@ -83,10 +83,10 @@ static inline void init_MUTEX_LOCKED (struct semaphore *sem)
sema_init(sem, 0);
}
-fastcall void __down_failed(void /* special register calling convention */);
-fastcall int __down_failed_interruptible(void /* params in registers */);
-fastcall int __down_failed_trylock(void /* params in registers */);
-fastcall void __up_wakeup(void /* special register calling convention */);
+extern asmregparm void __down_failed(atomic_t *count_ptr);
+extern asmregparm int __down_failed_interruptible(atomic_t *count_ptr);
+extern asmregparm int __down_failed_trylock(atomic_t *count_ptr);
+extern asmregparm void __up_wakeup(atomic_t *count_ptr);
/*
* This is ugly, but we want the default case to fall through.
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 24d786e07b4..071e054abd8 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -3,6 +3,13 @@
#define COMMAND_LINE_SIZE 2048
+#ifndef __ASSEMBLY__
+char *machine_specific_memory_setup(void);
+#ifndef CONFIG_PARAVIRT
+#define paravirt_post_allocator_init() do {} while (0)
+#endif
+#endif /* __ASSEMBLY__ */
+
#ifdef __KERNEL__
#ifdef __i386__
@@ -51,9 +58,7 @@ void __init add_memory_region(unsigned long long start,
extern unsigned long init_pg_tables_end;
-#ifndef CONFIG_PARAVIRT
-#define paravirt_post_allocator_init() do {} while (0)
-#endif
+
#endif /* __i386__ */
#endif /* _SETUP */
diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index c047f9dc342..681deade5f0 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h
@@ -63,20 +63,20 @@ struct sigcontext {
unsigned short fs, __fsh;
unsigned short es, __esh;
unsigned short ds, __dsh;
- unsigned long edi;
- unsigned long esi;
- unsigned long ebp;
- unsigned long esp;
- unsigned long ebx;
- unsigned long edx;
- unsigned long ecx;
- unsigned long eax;
+ unsigned long di;
+ unsigned long si;
+ unsigned long bp;
+ unsigned long sp;
+ unsigned long bx;
+ unsigned long dx;
+ unsigned long cx;
+ unsigned long ax;
unsigned long trapno;
unsigned long err;
- unsigned long eip;
+ unsigned long ip;
unsigned short cs, __csh;
- unsigned long eflags;
- unsigned long esp_at_signal;
+ unsigned long flags;
+ unsigned long sp_at_signal;
unsigned short ss, __ssh;
struct _fpstate __user * fpstate;
unsigned long oldmask;
@@ -111,16 +111,16 @@ struct sigcontext {
unsigned long r13;
unsigned long r14;
unsigned long r15;
- unsigned long rdi;
- unsigned long rsi;
- unsigned long rbp;
- unsigned long rbx;
- unsigned long rdx;
- unsigned long rax;
- unsigned long rcx;
- unsigned long rsp;
- unsigned long rip;
- unsigned long eflags; /* RFLAGS */
+ unsigned long di;
+ unsigned long si;
+ unsigned long bp;
+ unsigned long bx;
+ unsigned long dx;
+ unsigned long ax;
+ unsigned long cx;
+ unsigned long sp;
+ unsigned long ip;
+ unsigned long flags;
unsigned short cs;
unsigned short gs;
unsigned short fs;
diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h
index 3d657038ab7..6ffab4fd593 100644
--- a/include/asm-x86/sigcontext32.h
+++ b/include/asm-x86/sigcontext32.h
@@ -48,20 +48,20 @@ struct sigcontext_ia32 {
unsigned short fs, __fsh;
unsigned short es, __esh;
unsigned short ds, __dsh;
- unsigned int edi;
- unsigned int esi;
- unsigned int ebp;
- unsigned int esp;
- unsigned int ebx;
- unsigned int edx;
- unsigned int ecx;
- unsigned int eax;
+ unsigned int di;
+ unsigned int si;
+ unsigned int bp;
+ unsigned int sp;
+ unsigned int bx;
+ unsigned int dx;
+ unsigned int cx;
+ unsigned int ax;
unsigned int trapno;
unsigned int err;
- unsigned int eip;
+ unsigned int ip;
unsigned short cs, __csh;
- unsigned int eflags;
- unsigned int esp_at_signal;
+ unsigned int flags;
+ unsigned int sp_at_signal;
unsigned short ss, __ssh;
unsigned int fpstate; /* really (struct _fpstate_ia32 *) */
unsigned int oldmask;
diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h
index 987a422a2c7..aee7eca585a 100644
--- a/include/asm-x86/signal.h
+++ b/include/asm-x86/signal.h
@@ -245,21 +245,14 @@ static __inline__ int sigfindinword(unsigned long word)
struct pt_regs;
-#define ptrace_signal_deliver(regs, cookie) \
- do { \
- if (current->ptrace & PT_DTRACE) { \
- current->ptrace &= ~PT_DTRACE; \
- (regs)->eflags &= ~TF_MASK; \
- } \
- } while (0)
-
#else /* __i386__ */
#undef __HAVE_ARCH_SIG_BITOPS
+#endif /* !__i386__ */
+
#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-#endif /* !__i386__ */
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h
index e10b7affdfe..56152e31228 100644
--- a/include/asm-x86/smp_32.h
+++ b/include/asm-x86/smp_32.h
@@ -1,51 +1,41 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
+#include <linux/init.h>
+
/*
* We need the APIC definitions automatically as part of 'smp.h'
*/
-#ifndef __ASSEMBLY__
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+# include <asm/mpspec.h>
+# include <asm/apic.h>
+# ifdef CONFIG_X86_IO_APIC
+# include <asm/io_apic.h>
+# endif
#endif
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
-#include <linux/bitops.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#ifdef CONFIG_X86_IO_APIC
-#include <asm/io_apic.h>
-#endif
-#endif
+extern cpumask_t cpu_callout_map;
+extern cpumask_t cpu_callin_map;
-#define BAD_APICID 0xFFu
-#ifdef CONFIG_SMP
-#ifndef __ASSEMBLY__
+extern int smp_num_siblings;
+extern unsigned int num_processors;
-/*
- * Private routines/data
- */
-
extern void smp_alloc_memory(void);
-extern int pic_mode;
-extern int smp_num_siblings;
-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+extern void lock_ipi_call_lock(void);
+extern void unlock_ipi_call_lock(void);
extern void (*mtrr_hook) (void);
extern void zap_low_mappings (void);
-extern void lock_ipi_call_lock(void);
-extern void unlock_ipi_call_lock(void);
-#define MAX_APICID 256
extern u8 __initdata x86_cpu_to_apicid_init[];
-extern void *x86_cpu_to_apicid_ptr;
-DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
-
-#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
+extern void *x86_cpu_to_apicid_early_ptr;
-extern void set_cpu_sibling_map(int cpu);
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+DECLARE_PER_CPU(u8, cpu_llc_id);
+DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
#ifdef CONFIG_HOTPLUG_CPU
extern void cpu_exit_clear(void);
@@ -53,6 +43,9 @@ extern void cpu_uninit(void);
extern void remove_siblinginfo(int cpu);
#endif
+/* Globals due to paravirt */
+extern void set_cpu_sibling_map(int cpu);
+
struct smp_ops
{
void (*smp_prepare_boot_cpu)(void);
@@ -67,6 +60,7 @@ struct smp_ops
int wait);
};
+#ifdef CONFIG_SMP
extern struct smp_ops smp_ops;
static inline void smp_prepare_boot_cpu(void)
@@ -107,10 +101,12 @@ int native_cpu_up(unsigned int cpunum);
void native_smp_cpus_done(unsigned int max_cpus);
#ifndef CONFIG_PARAVIRT
-#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
-do { } while (0)
+#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
#endif
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
+
/*
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(),
@@ -119,9 +115,11 @@ do { } while (0)
DECLARE_PER_CPU(int, cpu_number);
#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
-extern cpumask_t cpu_callout_map;
-extern cpumask_t cpu_callin_map;
-extern cpumask_t cpu_possible_map;
+#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
+
+extern int safe_smp_processor_id(void);
+
+void __cpuinit smp_store_cpu_info(int id);
/* We don't mark CPUs online until __cpu_up(), so we need another measure */
static inline int num_booting_cpus(void)
@@ -129,56 +127,39 @@ static inline int num_booting_cpus(void)
return cpus_weight(cpu_callout_map);
}
-extern int safe_smp_processor_id(void);
-extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
-extern unsigned int num_processors;
-
-void __cpuinit smp_store_cpu_info(int id);
-
-#endif /* !__ASSEMBLY__ */
-
#else /* CONFIG_SMP */
#define safe_smp_processor_id() 0
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
-#define NO_PROC_ID 0xFF /* No processor magic marker */
-
-#endif /* CONFIG_SMP */
-
-#ifndef __ASSEMBLY__
+#endif /* !CONFIG_SMP */
#ifdef CONFIG_X86_LOCAL_APIC
-#ifdef APIC_DEFINITION
+static __inline int logical_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+}
+
+# ifdef APIC_DEFINITION
extern int hard_smp_processor_id(void);
-#else
-#include <mach_apicdef.h>
+# else
+# include <mach_apicdef.h>
static inline int hard_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+ return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
}
-#endif /* APIC_DEFINITION */
+# endif /* APIC_DEFINITION */
#else /* CONFIG_X86_LOCAL_APIC */
-#ifndef CONFIG_SMP
-#define hard_smp_processor_id() 0
-#endif
+# ifndef CONFIG_SMP
+# define hard_smp_processor_id() 0
+# endif
#endif /* CONFIG_X86_LOCAL_APIC */
-extern u8 apicid_2_node[];
-
-#ifdef CONFIG_X86_LOCAL_APIC
-static __inline int logical_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
-}
-#endif
-#endif
-
+#endif /* !ASSEMBLY */
#endif
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index ab612b0ff27..e0a75519ad2 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -1,130 +1,101 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
-/*
- * We need the APIC definitions automatically as part of 'smp.h'
- */
-#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <linux/bitops.h>
#include <linux/init.h>
-extern int disable_apic;
-#include <asm/mpspec.h>
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
#include <asm/apic.h>
#include <asm/io_apic.h>
-#include <asm/thread_info.h>
-
-#ifdef CONFIG_SMP
-
+#include <asm/mpspec.h>
#include <asm/pda.h>
+#include <asm/thread_info.h>
-struct pt_regs;
-
-extern cpumask_t cpu_present_mask;
-extern cpumask_t cpu_possible_map;
-extern cpumask_t cpu_online_map;
extern cpumask_t cpu_callout_map;
extern cpumask_t cpu_initialized;
-/*
- * Private routines/data
- */
-
+extern int smp_num_siblings;
+extern unsigned int num_processors;
+
extern void smp_alloc_memory(void);
-extern volatile unsigned long smp_invalidate_needed;
extern void lock_ipi_call_lock(void);
extern void unlock_ipi_call_lock(void);
-extern int smp_num_siblings;
-extern void smp_send_reschedule(int cpu);
+
extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
void *info, int wait);
-/*
- * cpu_sibling_map and cpu_core_map now live
- * in the per cpu area
- *
- * extern cpumask_t cpu_sibling_map[NR_CPUS];
- * extern cpumask_t cpu_core_map[NR_CPUS];
- */
+extern u16 __initdata x86_cpu_to_apicid_init[];
+extern u16 __initdata x86_bios_cpu_apicid_init[];
+extern void *x86_cpu_to_apicid_early_ptr;
+extern void *x86_bios_cpu_apicid_early_ptr;
+
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
-DECLARE_PER_CPU(u8, cpu_llc_id);
-
-#define SMP_TRAMPOLINE_BASE 0x6000
-
-/*
- * On x86 all CPUs are mapped 1:1 to the APIC space.
- * This simplifies scheduling and IPI sending and
- * compresses data structures.
- */
+DECLARE_PER_CPU(u16, cpu_llc_id);
+DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
+DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
-static inline int num_booting_cpus(void)
+static inline int cpu_present_to_apicid(int mps_cpu)
{
- return cpus_weight(cpu_callout_map);
+ if (cpu_present(mps_cpu))
+ return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+ else
+ return BAD_APICID;
}
-#define raw_smp_processor_id() read_pda(cpunumber)
+#ifdef CONFIG_SMP
+
+#define SMP_TRAMPOLINE_BASE 0x6000
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void prefill_possible_map(void);
-extern unsigned num_processors;
extern unsigned __cpuinitdata disabled_cpus;
-#define NO_PROC_ID 0xFF /* No processor magic marker */
-
-#endif /* CONFIG_SMP */
+#define raw_smp_processor_id() read_pda(cpunumber)
+#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
-#define safe_smp_processor_id() smp_processor_id()
-
-static inline int hard_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
-}
+#define stack_smp_processor_id() \
+ ({ \
+ struct thread_info *ti; \
+ __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
+ ti->cpu; \
+})
/*
- * Some lowlevel functions might want to know about
- * the real APIC ID <-> CPU # mapping.
+ * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies
+ * scheduling and IPI sending and compresses data structures.
*/
-extern u8 __initdata x86_cpu_to_apicid_init[];
-extern void *x86_cpu_to_apicid_ptr;
-DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */
-extern u8 bios_cpu_apicid[];
-
-static inline int cpu_present_to_apicid(int mps_cpu)
+static inline int num_booting_cpus(void)
{
- if (mps_cpu < NR_CPUS)
- return (int)bios_cpu_apicid[mps_cpu];
- else
- return BAD_APICID;
+ return cpus_weight(cpu_callout_map);
}
-#ifndef CONFIG_SMP
+extern void smp_send_reschedule(int cpu);
+
+#else /* CONFIG_SMP */
+
+extern unsigned int boot_cpu_id;
+#define cpu_physical_id(cpu) boot_cpu_id
#define stack_smp_processor_id() 0
-#define cpu_logical_map(x) (x)
-#else
-#include <asm/thread_info.h>
-#define stack_smp_processor_id() \
-({ \
- struct thread_info *ti; \
- __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
- ti->cpu; \
-})
-#endif
+
+#endif /* !CONFIG_SMP */
+
+#define safe_smp_processor_id() smp_processor_id()
static __inline int logical_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+ return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+}
+
+static inline int hard_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
}
-#ifdef CONFIG_SMP
-#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
-#else
-extern unsigned int boot_cpu_id;
-#define cpu_physical_id(cpu) boot_cpu_id
-#endif /* !CONFIG_SMP */
#endif
diff --git a/include/asm-x86/sparsemem.h b/include/asm-x86/sparsemem.h
index 3f203b1d9ee..fa58cd55411 100644
--- a/include/asm-x86/sparsemem.h
+++ b/include/asm-x86/sparsemem.h
@@ -1,5 +1,34 @@
+#ifndef _ASM_X86_SPARSEMEM_H
+#define _ASM_X86_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+/*
+ * generic non-linear memory support:
+ *
+ * 1) we will not split memory into more chunks than will fit into the flags
+ * field of the struct page
+ *
+ * SECTION_SIZE_BITS 2^n: size of each section
+ * MAX_PHYSADDR_BITS 2^n: max size of physical address space
+ * MAX_PHYSMEM_BITS 2^n: how much memory we can have in that space
+ *
+ */
+
#ifdef CONFIG_X86_32
-# include "sparsemem_32.h"
-#else
-# include "sparsemem_64.h"
+# ifdef CONFIG_X86_PAE
+# define SECTION_SIZE_BITS 30
+# define MAX_PHYSADDR_BITS 36
+# define MAX_PHYSMEM_BITS 36
+# else
+# define SECTION_SIZE_BITS 26
+# define MAX_PHYSADDR_BITS 32
+# define MAX_PHYSMEM_BITS 32
+# endif
+#else /* CONFIG_X86_32 */
+# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
+# define MAX_PHYSADDR_BITS 40
+# define MAX_PHYSMEM_BITS 40
+#endif
+
+#endif /* CONFIG_SPARSEMEM */
#endif
diff --git a/include/asm-x86/sparsemem_32.h b/include/asm-x86/sparsemem_32.h
deleted file mode 100644
index cfeed990585..00000000000
--- a/include/asm-x86/sparsemem_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _I386_SPARSEMEM_H
-#define _I386_SPARSEMEM_H
-#ifdef CONFIG_SPARSEMEM
-
-/*
- * generic non-linear memory support:
- *
- * 1) we will not split memory into more chunks than will fit into the
- * flags field of the struct page
- */
-
-/*
- * SECTION_SIZE_BITS 2^N: how big each section will be
- * MAX_PHYSADDR_BITS 2^N: how much physical address space we have
- * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
- */
-#ifdef CONFIG_X86_PAE
-#define SECTION_SIZE_BITS 30
-#define MAX_PHYSADDR_BITS 36
-#define MAX_PHYSMEM_BITS 36
-#else
-#define SECTION_SIZE_BITS 26
-#define MAX_PHYSADDR_BITS 32
-#define MAX_PHYSMEM_BITS 32
-#endif
-
-/* XXX: FIXME -- wli */
-#define kern_addr_valid(kaddr) (0)
-
-#endif /* CONFIG_SPARSEMEM */
-#endif /* _I386_SPARSEMEM_H */
diff --git a/include/asm-x86/sparsemem_64.h b/include/asm-x86/sparsemem_64.h
deleted file mode 100644
index dabb16714a7..00000000000
--- a/include/asm-x86/sparsemem_64.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef _ASM_X86_64_SPARSEMEM_H
-#define _ASM_X86_64_SPARSEMEM_H 1
-
-#ifdef CONFIG_SPARSEMEM
-
-/*
- * generic non-linear memory support:
- *
- * 1) we will not split memory into more chunks than will fit into the flags
- * field of the struct page
- *
- * SECTION_SIZE_BITS 2^n: size of each section
- * MAX_PHYSADDR_BITS 2^n: max size of physical address space
- * MAX_PHYSMEM_BITS 2^n: how much memory we can have in that space
- *
- */
-
-#define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-#define MAX_PHYSADDR_BITS 40
-#define MAX_PHYSMEM_BITS 40
-
-extern int early_pfn_to_nid(unsigned long pfn);
-
-#endif /* CONFIG_SPARSEMEM */
-
-#endif /* _ASM_X86_64_SPARSEMEM_H */
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index d74d85e71dc..23804c1890f 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -1,5 +1,296 @@
+#ifndef _X86_SPINLOCK_H_
+#define _X86_SPINLOCK_H_
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <linux/compiler.h>
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ *
+ * Simple spin lock operations. There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * These are fair FIFO ticket locks, which are currently limited to 256
+ * CPUs.
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
#ifdef CONFIG_X86_32
-# include "spinlock_32.h"
+typedef char _slock_t;
+# define LOCK_INS_DEC "decb"
+# define LOCK_INS_XCH "xchgb"
+# define LOCK_INS_MOV "movb"
+# define LOCK_INS_CMP "cmpb"
+# define LOCK_PTR_REG "a"
#else
-# include "spinlock_64.h"
+typedef int _slock_t;
+# define LOCK_INS_DEC "decl"
+# define LOCK_INS_XCH "xchgl"
+# define LOCK_INS_MOV "movl"
+# define LOCK_INS_CMP "cmpl"
+# define LOCK_PTR_REG "D"
+#endif
+
+#if defined(CONFIG_X86_32) && \
+ (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE))
+/*
+ * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
+ * (PPro errata 66, 92)
+ */
+# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
+#else
+# define UNLOCK_LOCK_PREFIX
+#endif
+
+/*
+ * Ticket locks are conceptually two parts, one indicating the current head of
+ * the queue, and the other indicating the current tail. The lock is acquired
+ * by atomically noting the tail and incrementing it by one (thus adding
+ * ourself to the queue and noting our position), then waiting until the head
+ * becomes equal to the the initial value of the tail.
+ *
+ * We use an xadd covering *both* parts of the lock, to increment the tail and
+ * also load the position of the head, which takes care of memory ordering
+ * issues and should be optimal for the uncontended case. Note the tail must be
+ * in the high part, because a wide xadd increment of the low part would carry
+ * up and contaminate the high part.
+ *
+ * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
+ * save some instructions and make the code more elegant. There really isn't
+ * much between them in performance though, especially as locks are out of line.
+ */
+#if (NR_CPUS < 256)
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+ int tmp = *(volatile signed int *)(&(lock)->slock);
+
+ return (((tmp >> 8) & 0xff) != (tmp & 0xff));
+}
+
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+{
+ int tmp = *(volatile signed int *)(&(lock)->slock);
+
+ return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1;
+}
+
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+ short inc = 0x0100;
+
+ __asm__ __volatile__ (
+ LOCK_PREFIX "xaddw %w0, %1\n"
+ "1:\t"
+ "cmpb %h0, %b0\n\t"
+ "je 2f\n\t"
+ "rep ; nop\n\t"
+ "movb %1, %b0\n\t"
+ /* don't need lfence here, because loads are in-order */
+ "jmp 1b\n"
+ "2:"
+ :"+Q" (inc), "+m" (lock->slock)
+ :
+ :"memory", "cc");
+}
+
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+ int tmp;
+ short new;
+
+ asm volatile(
+ "movw %2,%w0\n\t"
+ "cmpb %h0,%b0\n\t"
+ "jne 1f\n\t"
+ "movw %w0,%w1\n\t"
+ "incb %h1\n\t"
+ "lock ; cmpxchgw %w1,%2\n\t"
+ "1:"
+ "sete %b1\n\t"
+ "movzbl %b1,%0\n\t"
+ :"=&a" (tmp), "=Q" (new), "+m" (lock->slock)
+ :
+ : "memory", "cc");
+
+ return tmp;
+}
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+ __asm__ __volatile__(
+ UNLOCK_LOCK_PREFIX "incb %0"
+ :"+m" (lock->slock)
+ :
+ :"memory", "cc");
+}
+#else
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+ int tmp = *(volatile signed int *)(&(lock)->slock);
+
+ return (((tmp >> 16) & 0xffff) != (tmp & 0xffff));
+}
+
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+{
+ int tmp = *(volatile signed int *)(&(lock)->slock);
+
+ return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1;
+}
+
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+ int inc = 0x00010000;
+ int tmp;
+
+ __asm__ __volatile__ (
+ "lock ; xaddl %0, %1\n"
+ "movzwl %w0, %2\n\t"
+ "shrl $16, %0\n\t"
+ "1:\t"
+ "cmpl %0, %2\n\t"
+ "je 2f\n\t"
+ "rep ; nop\n\t"
+ "movzwl %1, %2\n\t"
+ /* don't need lfence here, because loads are in-order */
+ "jmp 1b\n"
+ "2:"
+ :"+Q" (inc), "+m" (lock->slock), "=r" (tmp)
+ :
+ :"memory", "cc");
+}
+
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+ int tmp;
+ int new;
+
+ asm volatile(
+ "movl %2,%0\n\t"
+ "movl %0,%1\n\t"
+ "roll $16, %0\n\t"
+ "cmpl %0,%1\n\t"
+ "jne 1f\n\t"
+ "addl $0x00010000, %1\n\t"
+ "lock ; cmpxchgl %1,%2\n\t"
+ "1:"
+ "sete %b1\n\t"
+ "movzbl %b1,%0\n\t"
+ :"=&a" (tmp), "=r" (new), "+m" (lock->slock)
+ :
+ : "memory", "cc");
+
+ return tmp;
+}
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+ __asm__ __volatile__(
+ UNLOCK_LOCK_PREFIX "incw %0"
+ :"+m" (lock->slock)
+ :
+ :"memory", "cc");
+}
+#endif
+
+static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+{
+ while (__raw_spin_is_locked(lock))
+ cpu_relax();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ *
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+{
+ return (int)(lock)->lock > 0;
+}
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+{
+ return (lock)->lock == RW_LOCK_BIAS;
+}
+
+static inline void __raw_read_lock(raw_rwlock_t *rw)
+{
+ asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
+ "jns 1f\n"
+ "call __read_lock_failed\n\t"
+ "1:\n"
+ ::LOCK_PTR_REG (rw) : "memory");
+}
+
+static inline void __raw_write_lock(raw_rwlock_t *rw)
+{
+ asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
+ "jz 1f\n"
+ "call __write_lock_failed\n\t"
+ "1:\n"
+ ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
+}
+
+static inline int __raw_read_trylock(raw_rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+
+ atomic_dec(count);
+ if (atomic_read(count) >= 0)
+ return 1;
+ atomic_inc(count);
+ return 0;
+}
+
+static inline int __raw_write_trylock(raw_rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+ return 1;
+ atomic_add(RW_LOCK_BIAS, count);
+ return 0;
+}
+
+static inline void __raw_read_unlock(raw_rwlock_t *rw)
+{
+ asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
+}
+
+static inline void __raw_write_unlock(raw_rwlock_t *rw)
+{
+ asm volatile(LOCK_PREFIX "addl %1, %0"
+ : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
+}
+
+#define _raw_spin_relax(lock) cpu_relax()
+#define _raw_read_relax(lock) cpu_relax()
+#define _raw_write_relax(lock) cpu_relax()
+
#endif
diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h
deleted file mode 100644
index d3bcebed60c..00000000000
--- a/include/asm-x86/spinlock_32.h
+++ /dev/null
@@ -1,221 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <linux/compiler.h>
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define CLI_STRING "cli"
-#define STI_STRING "sti"
-#define CLI_STI_CLOBBERS
-#define CLI_STI_INPUT_ARGS
-#endif /* CONFIG_PARAVIRT */
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations. There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-static inline int __raw_spin_is_locked(raw_spinlock_t *x)
-{
- return *(volatile signed char *)(&(x)->slock) <= 0;
-}
-
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
-{
- asm volatile("\n1:\t"
- LOCK_PREFIX " ; decb %0\n\t"
- "jns 3f\n"
- "2:\t"
- "rep;nop\n\t"
- "cmpb $0,%0\n\t"
- "jle 2b\n\t"
- "jmp 1b\n"
- "3:\n\t"
- : "+m" (lock->slock) : : "memory");
-}
-
-/*
- * It is easier for the lock validator if interrupts are not re-enabled
- * in the middle of a lock-acquire. This is a performance feature anyway
- * so we turn it off:
- *
- * NOTE: there's an irqs-on section here, which normally would have to be
- * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
- */
-#ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
-{
- asm volatile(
- "\n1:\t"
- LOCK_PREFIX " ; decb %[slock]\n\t"
- "jns 5f\n"
- "2:\t"
- "testl $0x200, %[flags]\n\t"
- "jz 4f\n\t"
- STI_STRING "\n"
- "3:\t"
- "rep;nop\n\t"
- "cmpb $0, %[slock]\n\t"
- "jle 3b\n\t"
- CLI_STRING "\n\t"
- "jmp 1b\n"
- "4:\t"
- "rep;nop\n\t"
- "cmpb $0, %[slock]\n\t"
- "jg 1b\n\t"
- "jmp 4b\n"
- "5:\n\t"
- : [slock] "+m" (lock->slock)
- : [flags] "r" (flags)
- CLI_STI_INPUT_ARGS
- : "memory" CLI_STI_CLOBBERS);
-}
-#endif
-
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
-{
- char oldval;
- asm volatile(
- "xchgb %b0,%1"
- :"=q" (oldval), "+m" (lock->slock)
- :"0" (0) : "memory");
- return oldval > 0;
-}
-
-/*
- * __raw_spin_unlock based on writing $1 to the low byte.
- * This method works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
- * (PPro errata 66, 92)
- */
-
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
- asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory");
-}
-
-#else
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
- char oldval = 1;
-
- asm volatile("xchgb %b0, %1"
- : "=q" (oldval), "+m" (lock->slock)
- : "0" (oldval) : "memory");
-}
-
-#endif
-
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
-{
- while (__raw_spin_is_locked(lock))
- cpu_relax();
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores. See
- * semaphore.h for details. -ben
- *
- * the helpers are in arch/i386/kernel/semaphore.c
- */
-
-/**
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-static inline int __raw_read_can_lock(raw_rwlock_t *x)
-{
- return (int)(x)->lock > 0;
-}
-
-/**
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-static inline int __raw_write_can_lock(raw_rwlock_t *x)
-{
- return (x)->lock == RW_LOCK_BIAS;
-}
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
- "jns 1f\n"
- "call __read_lock_failed\n\t"
- "1:\n"
- ::"a" (rw) : "memory");
-}
-
-static inline void __raw_write_lock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",(%0)\n\t"
- "jz 1f\n"
- "call __write_lock_failed\n\t"
- "1:\n"
- ::"a" (rw) : "memory");
-}
-
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- atomic_dec(count);
- if (atomic_read(count) >= 0)
- return 1;
- atomic_inc(count);
- return 0;
-}
-
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
- return 1;
- atomic_add(RW_LOCK_BIAS, count);
- return 0;
-}
-
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
-}
-
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0"
- : "+m" (rw->lock) : : "memory");
-}
-
-#define _raw_spin_relax(lock) cpu_relax()
-#define _raw_read_relax(lock) cpu_relax()
-#define _raw_write_relax(lock) cpu_relax()
-
-#endif /* __ASM_SPINLOCK_H */
diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h
deleted file mode 100644
index 88bf981e73c..00000000000
--- a/include/asm-x86/spinlock_64.h
+++ /dev/null
@@ -1,167 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations. There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
-{
- return *(volatile signed int *)(&(lock)->slock) <= 0;
-}
-
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
-{
- asm volatile(
- "\n1:\t"
- LOCK_PREFIX " ; decl %0\n\t"
- "jns 2f\n"
- "3:\n"
- "rep;nop\n\t"
- "cmpl $0,%0\n\t"
- "jle 3b\n\t"
- "jmp 1b\n"
- "2:\t" : "=m" (lock->slock) : : "memory");
-}
-
-/*
- * Same as __raw_spin_lock, but reenable interrupts during spinning.
- */
-#ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
-{
- asm volatile(
- "\n1:\t"
- LOCK_PREFIX " ; decl %0\n\t"
- "jns 5f\n"
- "testl $0x200, %1\n\t" /* interrupts were disabled? */
- "jz 4f\n\t"
- "sti\n"
- "3:\t"
- "rep;nop\n\t"
- "cmpl $0, %0\n\t"
- "jle 3b\n\t"
- "cli\n\t"
- "jmp 1b\n"
- "4:\t"
- "rep;nop\n\t"
- "cmpl $0, %0\n\t"
- "jg 1b\n\t"
- "jmp 4b\n"
- "5:\n\t"
- : "+m" (lock->slock) : "r" ((unsigned)flags) : "memory");
-}
-#endif
-
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
-{
- int oldval;
-
- asm volatile(
- "xchgl %0,%1"
- :"=q" (oldval), "=m" (lock->slock)
- :"0" (0) : "memory");
-
- return oldval > 0;
-}
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
- asm volatile("movl $1,%0" :"=m" (lock->slock) :: "memory");
-}
-
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
-{
- while (__raw_spin_is_locked(lock))
- cpu_relax();
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- */
-
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
-{
- return (int)(lock)->lock > 0;
-}
-
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
-{
- return (lock)->lock == RW_LOCK_BIAS;
-}
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t"
- "jns 1f\n"
- "call __read_lock_failed\n"
- "1:\n"
- ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
-}
-
-static inline void __raw_write_lock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK_PREFIX "subl %1,(%0)\n\t"
- "jz 1f\n"
- "\tcall __write_lock_failed\n\t"
- "1:\n"
- ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
-}
-
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- atomic_dec(count);
- if (atomic_read(count) >= 0)
- return 1;
- atomic_inc(count);
- return 0;
-}
-
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
- return 1;
- atomic_add(RW_LOCK_BIAS, count);
- return 0;
-}
-
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK_PREFIX " ; incl %0" :"=m" (rw->lock) : : "memory");
-}
-
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK_PREFIX " ; addl $" RW_LOCK_BIAS_STR ",%0"
- : "=m" (rw->lock) : : "memory");
-}
-
-#define _raw_spin_relax(lock) cpu_relax()
-#define _raw_read_relax(lock) cpu_relax()
-#define _raw_write_relax(lock) cpu_relax()
-
-#endif /* __ASM_SPINLOCK_H */
diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h
index 4da9345c150..9029cf78cf5 100644
--- a/include/asm-x86/spinlock_types.h
+++ b/include/asm-x86/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
unsigned int slock;
} raw_spinlock_t;
-#define __RAW_SPIN_LOCK_UNLOCKED { 1 }
+#define __RAW_SPIN_LOCK_UNLOCKED { 0 }
typedef struct {
unsigned int lock;
diff --git a/include/asm-x86/stacktrace.h b/include/asm-x86/stacktrace.h
index 70dd5bae323..30f82526a8e 100644
--- a/include/asm-x86/stacktrace.h
+++ b/include/asm-x86/stacktrace.h
@@ -9,12 +9,13 @@ struct stacktrace_ops {
void (*warning)(void *data, char *msg);
/* msg must contain %s for the symbol */
void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
- void (*address)(void *data, unsigned long address);
+ void (*address)(void *data, unsigned long address, int reliable);
/* On negative return stop dumping */
int (*stack)(void *data, char *name);
};
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data);
#endif
diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h
index a2520732ffd..1bbda3ad779 100644
--- a/include/asm-x86/suspend_32.h
+++ b/include/asm-x86/suspend_32.h
@@ -12,8 +12,8 @@ static inline int arch_prepare_suspend(void) { return 0; }
struct saved_context {
u16 es, fs, gs, ss;
unsigned long cr0, cr2, cr3, cr4;
- struct Xgt_desc_struct gdt;
- struct Xgt_desc_struct idt;
+ struct desc_ptr gdt;
+ struct desc_ptr idt;
u16 ldt;
u16 tss;
unsigned long tr;
diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h
index c505a76bcf6..2eb92cb81a0 100644
--- a/include/asm-x86/suspend_64.h
+++ b/include/asm-x86/suspend_64.h
@@ -15,7 +15,14 @@ arch_prepare_suspend(void)
return 0;
}
-/* Image of the saved processor state. If you touch this, fix acpi/wakeup.S. */
+/*
+ * Image of the saved processor state, used by the low level ACPI suspend to
+ * RAM code and by the low level hibernation code.
+ *
+ * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
+ * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
+ * still work as required.
+ */
struct saved_context {
struct pt_regs regs;
u16 ds, es, fs, gs, ss;
@@ -38,8 +45,6 @@ struct saved_context {
#define loaddebug(thread,register) \
set_debugreg((thread)->debugreg##register, register)
-extern void fix_processor_context(void);
-
/* routines for saving/restoring kernel state */
extern int acpi_save_state_mem(void);
extern char core_restore_code;
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 692562b48f2..ee32ef9367f 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -1,5 +1,414 @@
+#ifndef _ASM_X86_SYSTEM_H_
+#define _ASM_X86_SYSTEM_H_
+
+#include <asm/asm.h>
+#include <asm/segment.h>
+#include <asm/cpufeature.h>
+#include <asm/cmpxchg.h>
+#include <asm/nops.h>
+
+#include <linux/kernel.h>
+#include <linux/irqflags.h>
+
+/* entries in ARCH_DLINFO: */
+#ifdef CONFIG_IA32_EMULATION
+# define AT_VECTOR_SIZE_ARCH 2
+#else
+# define AT_VECTOR_SIZE_ARCH 1
+#endif
+
+#ifdef CONFIG_X86_32
+
+struct task_struct; /* one of the stranger aspects of C forward declarations */
+extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev,
+ struct task_struct *next));
+
+/*
+ * Saving eflags is important. It switches not only IOPL between tasks,
+ * it also protects other tasks from NT leaking through sysenter etc.
+ */
+#define switch_to(prev, next, last) do { \
+ unsigned long esi, edi; \
+ asm volatile("pushfl\n\t" /* Save flags */ \
+ "pushl %%ebp\n\t" \
+ "movl %%esp,%0\n\t" /* save ESP */ \
+ "movl %5,%%esp\n\t" /* restore ESP */ \
+ "movl $1f,%1\n\t" /* save EIP */ \
+ "pushl %6\n\t" /* restore EIP */ \
+ "jmp __switch_to\n" \
+ "1:\t" \
+ "popl %%ebp\n\t" \
+ "popfl" \
+ :"=m" (prev->thread.sp), "=m" (prev->thread.ip), \
+ "=a" (last), "=S" (esi), "=D" (edi) \
+ :"m" (next->thread.sp), "m" (next->thread.ip), \
+ "2" (prev), "d" (next)); \
+} while (0)
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+#else
+#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
+#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
+
+/* frame pointer must be last for get_wchan */
+#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
+
+#define __EXTRA_CLOBBER \
+ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
+ "r12", "r13", "r14", "r15"
+
+/* Save restore flags to clear handle leaking NT */
+#define switch_to(prev, next, last) \
+ asm volatile(SAVE_CONTEXT \
+ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
+ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
+ "call __switch_to\n\t" \
+ ".globl thread_return\n" \
+ "thread_return:\n\t" \
+ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
+ "movq %P[thread_info](%%rsi),%%r8\n\t" \
+ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "movq %%rax,%%rdi\n\t" \
+ "jc ret_from_fork\n\t" \
+ RESTORE_CONTEXT \
+ : "=a" (last) \
+ : [next] "S" (next), [prev] "D" (prev), \
+ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
+ [ti_flags] "i" (offsetof(struct thread_info, flags)), \
+ [tif_fork] "i" (TIF_FORK), \
+ [thread_info] "i" (offsetof(struct task_struct, stack)), \
+ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
+ : "memory", "cc" __EXTRA_CLOBBER)
+#endif
+
+#ifdef __KERNEL__
+#define _set_base(addr, base) do { unsigned long __pr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+ "rorl $16,%%edx\n\t" \
+ "movb %%dl,%2\n\t" \
+ "movb %%dh,%3" \
+ :"=&d" (__pr) \
+ :"m" (*((addr)+2)), \
+ "m" (*((addr)+4)), \
+ "m" (*((addr)+7)), \
+ "0" (base) \
+ ); } while (0)
+
+#define _set_limit(addr, limit) do { unsigned long __lr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+ "rorl $16,%%edx\n\t" \
+ "movb %2,%%dh\n\t" \
+ "andb $0xf0,%%dh\n\t" \
+ "orb %%dh,%%dl\n\t" \
+ "movb %%dl,%2" \
+ :"=&d" (__lr) \
+ :"m" (*(addr)), \
+ "m" (*((addr)+6)), \
+ "0" (limit) \
+ ); } while (0)
+
+#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
+#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
+
+extern void load_gs_index(unsigned);
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg, value) \
+ asm volatile("\n" \
+ "1:\t" \
+ "movl %k0,%%" #seg "\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3:\t" \
+ "movl %k1, %%" #seg "\n\t" \
+ "jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ _ASM_ALIGN "\n\t" \
+ _ASM_PTR " 1b,3b\n" \
+ ".previous" \
+ : :"r" (value), "r" (0))
+
+
+/*
+ * Save a segment register away
+ */
+#define savesegment(seg, value) \
+ asm volatile("mov %%" #seg ",%0":"=rm" (value))
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+ unsigned long __limit;
+ __asm__("lsll %1,%0"
+ :"=r" (__limit):"r" (segment));
+ return __limit+1;
+}
+
+static inline void native_clts(void)
+{
+ asm volatile ("clts");
+}
+
+/*
+ * Volatile isn't enough to prevent the compiler from reordering the
+ * read/write functions for the control registers and messing everything up.
+ * A memory clobber would solve the problem, but would prevent reordering of
+ * all loads stores around it, which can hurt performance. Solution is to
+ * use a variable and mimic reads and writes to it to enforce serialization
+ */
+static unsigned long __force_order;
+
+static inline unsigned long native_read_cr0(void)
+{
+ unsigned long val;
+ asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order));
+ return val;
+}
+
+static inline void native_write_cr0(unsigned long val)
+{
+ asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr2(void)
+{
+ unsigned long val;
+ asm volatile("mov %%cr2,%0\n\t" :"=r" (val), "=m" (__force_order));
+ return val;
+}
+
+static inline void native_write_cr2(unsigned long val)
+{
+ asm volatile("mov %0,%%cr2": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr3(void)
+{
+ unsigned long val;
+ asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order));
+ return val;
+}
+
+static inline void native_write_cr3(unsigned long val)
+{
+ asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr4(void)
+{
+ unsigned long val;
+ asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order));
+ return val;
+}
+
+static inline unsigned long native_read_cr4_safe(void)
+{
+ unsigned long val;
+ /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
+ * exists, so it will never fail. */
+#ifdef CONFIG_X86_32
+ asm volatile("1: mov %%cr4, %0 \n"
+ "2: \n"
+ ".section __ex_table,\"a\" \n"
+ ".long 1b,2b \n"
+ ".previous \n"
+ : "=r" (val), "=m" (__force_order) : "0" (0));
+#else
+ val = native_read_cr4();
+#endif
+ return val;
+}
+
+static inline void native_write_cr4(unsigned long val)
+{
+ asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
+}
+
+#ifdef CONFIG_X86_64
+static inline unsigned long native_read_cr8(void)
+{
+ unsigned long cr8;
+ asm volatile("movq %%cr8,%0" : "=r" (cr8));
+ return cr8;
+}
+
+static inline void native_write_cr8(unsigned long val)
+{
+ asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
+}
+#endif
+
+static inline void native_wbinvd(void)
+{
+ asm volatile("wbinvd": : :"memory");
+}
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define read_cr0() (native_read_cr0())
+#define write_cr0(x) (native_write_cr0(x))
+#define read_cr2() (native_read_cr2())
+#define write_cr2(x) (native_write_cr2(x))
+#define read_cr3() (native_read_cr3())
+#define write_cr3(x) (native_write_cr3(x))
+#define read_cr4() (native_read_cr4())
+#define read_cr4_safe() (native_read_cr4_safe())
+#define write_cr4(x) (native_write_cr4(x))
+#define wbinvd() (native_wbinvd())
+#ifdef CONFIG_X86_64
+#define read_cr8() (native_read_cr8())
+#define write_cr8(x) (native_write_cr8(x))
+#endif
+
+/* Clear the 'TS' bit */
+#define clts() (native_clts())
+
+#endif/* CONFIG_PARAVIRT */
+
+#define stts() write_cr0(8 | read_cr0())
+
+#endif /* __KERNEL__ */
+
+static inline void clflush(void *__p)
+{
+ asm volatile("clflush %0" : "+m" (*(char __force *)__p));
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+void disable_hlt(void);
+void enable_hlt(void);
+
+extern int es7000_plat;
+void cpu_idle_wait(void);
+
+extern unsigned long arch_align_stack(unsigned long sp);
+extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
+
+void default_idle(void);
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
#ifdef CONFIG_X86_32
-# include "system_32.h"
+/*
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
+#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
+#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
#else
-# include "system_64.h"
+#define mb() asm volatile("mfence":::"memory")
+#define rmb() asm volatile("lfence":::"memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#endif
+
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier. All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads. This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies. See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * b = 2;
+ * memory_barrier();
+ * p = &b; q = p;
+ * read_barrier_depends();
+ * d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends(). However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * a = 2;
+ * memory_barrier();
+ * b = 3; y = b;
+ * read_barrier_depends();
+ * x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b". Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
+ * in cases like this where there are no data dependencies.
+ **/
+
+#define read_barrier_depends() do { } while (0)
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#ifdef CONFIG_X86_PPRO_FENCE
+# define smp_rmb() rmb()
+#else
+# define smp_rmb() barrier()
+#endif
+#ifdef CONFIG_X86_OOSTORE
+# define smp_wmb() wmb()
+#else
+# define smp_wmb() barrier()
+#endif
+#define smp_read_barrier_depends() read_barrier_depends()
+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_read_barrier_depends() do { } while (0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+/*
+ * Stop RDTSC speculation. This is needed when you need to use RDTSC
+ * (or get_cycles or vread that possibly accesses the TSC) in a defined
+ * code region.
+ *
+ * (Could use an alternative three way for this if there was one.)
+ */
+static inline void rdtsc_barrier(void)
+{
+ alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
+ alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+}
+
#endif
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
deleted file mode 100644
index ef8468883ba..00000000000
--- a/include/asm-x86/system_32.h
+++ /dev/null
@@ -1,320 +0,0 @@
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
-
-#include <linux/kernel.h>
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/cmpxchg.h>
-
-#ifdef __KERNEL__
-#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
-
-struct task_struct; /* one of the stranger aspects of C forward declarations.. */
-extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
-
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
-#define switch_to(prev,next,last) do { \
- unsigned long esi,edi; \
- asm volatile("pushfl\n\t" /* Save flags */ \
- "pushl %%ebp\n\t" \
- "movl %%esp,%0\n\t" /* save ESP */ \
- "movl %5,%%esp\n\t" /* restore ESP */ \
- "movl $1f,%1\n\t" /* save EIP */ \
- "pushl %6\n\t" /* restore EIP */ \
- "jmp __switch_to\n" \
- "1:\t" \
- "popl %%ebp\n\t" \
- "popfl" \
- :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
- "=a" (last),"=S" (esi),"=D" (edi) \
- :"m" (next->thread.esp),"m" (next->thread.eip), \
- "2" (prev), "d" (next)); \
-} while (0)
-
-#define _set_base(addr,base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
- "rorl $16,%%edx\n\t" \
- "movb %%dl,%2\n\t" \
- "movb %%dh,%3" \
- :"=&d" (__pr) \
- :"m" (*((addr)+2)), \
- "m" (*((addr)+4)), \
- "m" (*((addr)+7)), \
- "0" (base) \
- ); } while(0)
-
-#define _set_limit(addr,limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
- "rorl $16,%%edx\n\t" \
- "movb %2,%%dh\n\t" \
- "andb $0xf0,%%dh\n\t" \
- "orb %%dh,%%dl\n\t" \
- "movb %%dl,%2" \
- :"=&d" (__lr) \
- :"m" (*(addr)), \
- "m" (*((addr)+6)), \
- "0" (limit) \
- ); } while(0)
-
-#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
-#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1) )
-
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value) \
- asm volatile("\n" \
- "1:\t" \
- "mov %0,%%" #seg "\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:\t" \
- "pushl $0\n\t" \
- "popl %%" #seg "\n\t" \
- "jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n\t" \
- ".align 4\n\t" \
- ".long 1b,3b\n" \
- ".previous" \
- : :"rm" (value))
-
-/*
- * Save a segment register away
- */
-#define savesegment(seg, value) \
- asm volatile("mov %%" #seg ",%0":"=rm" (value))
-
-
-static inline void native_clts(void)
-{
- asm volatile ("clts");
-}
-
-static inline unsigned long native_read_cr0(void)
-{
- unsigned long val;
- asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
- return val;
-}
-
-static inline void native_write_cr0(unsigned long val)
-{
- asm volatile("movl %0,%%cr0": :"r" (val));
-}
-
-static inline unsigned long native_read_cr2(void)
-{
- unsigned long val;
- asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
- return val;
-}
-
-static inline void native_write_cr2(unsigned long val)
-{
- asm volatile("movl %0,%%cr2": :"r" (val));
-}
-
-static inline unsigned long native_read_cr3(void)
-{
- unsigned long val;
- asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
- return val;
-}
-
-static inline void native_write_cr3(unsigned long val)
-{
- asm volatile("movl %0,%%cr3": :"r" (val));
-}
-
-static inline unsigned long native_read_cr4(void)
-{
- unsigned long val;
- asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
- return val;
-}
-
-static inline unsigned long native_read_cr4_safe(void)
-{
- unsigned long val;
- /* This could fault if %cr4 does not exist */
- asm volatile("1: movl %%cr4, %0 \n"
- "2: \n"
- ".section __ex_table,\"a\" \n"
- ".long 1b,2b \n"
- ".previous \n"
- : "=r" (val): "0" (0));
- return val;
-}
-
-static inline void native_write_cr4(unsigned long val)
-{
- asm volatile("movl %0,%%cr4": :"r" (val));
-}
-
-static inline void native_wbinvd(void)
-{
- asm volatile("wbinvd": : :"memory");
-}
-
-static inline void clflush(volatile void *__p)
-{
- asm volatile("clflush %0" : "+m" (*(char __force *)__p));
-}
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define read_cr0() (native_read_cr0())
-#define write_cr0(x) (native_write_cr0(x))
-#define read_cr2() (native_read_cr2())
-#define write_cr2(x) (native_write_cr2(x))
-#define read_cr3() (native_read_cr3())
-#define write_cr3(x) (native_write_cr3(x))
-#define read_cr4() (native_read_cr4())
-#define read_cr4_safe() (native_read_cr4_safe())
-#define write_cr4(x) (native_write_cr4(x))
-#define wbinvd() (native_wbinvd())
-
-/* Clear the 'TS' bit */
-#define clts() (native_clts())
-
-#endif/* CONFIG_PARAVIRT */
-
-/* Set the 'TS' bit */
-#define stts() write_cr0(8 | read_cr0())
-
-#endif /* __KERNEL__ */
-
-static inline unsigned long get_limit(unsigned long segment)
-{
- unsigned long __limit;
- __asm__("lsll %1,%0"
- :"=r" (__limit):"r" (segment));
- return __limit+1;
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- *
- * For now, "wmb()" doesn't actually do anything, as all
- * Intel CPU's follow what Intel calls a *Processor Order*,
- * in which all writes are seen in the program order even
- * outside the CPU.
- *
- * I expect future Intel CPU's to have a weaker ordering,
- * but I'd also expect them to finally get their act together
- * and add some real memory barriers if so.
- *
- * Some non intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-
-
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends() do { } while(0)
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb() rmb()
-#else
-# define smp_rmb() barrier()
-#endif
-#ifdef CONFIG_X86_OOSTORE
-# define smp_wmb() wmb()
-#else
-# define smp_wmb() barrier()
-#endif
-#define smp_read_barrier_depends() read_barrier_depends()
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-#include <linux/irqflags.h>
-
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-void disable_hlt(void);
-void enable_hlt(void);
-
-extern int es7000_plat;
-void cpu_idle_wait(void);
-
-extern unsigned long arch_align_stack(unsigned long sp);
-extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
-
-void default_idle(void);
-void __show_registers(struct pt_regs *, int all);
-
-#endif
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 6e9e4841a2d..97fa251ccb2 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -1,126 +1,9 @@
#ifndef __ASM_SYSTEM_H
#define __ASM_SYSTEM_H
-#include <linux/kernel.h>
#include <asm/segment.h>
#include <asm/cmpxchg.h>
-#ifdef __KERNEL__
-
-/* entries in ARCH_DLINFO: */
-#ifdef CONFIG_IA32_EMULATION
-# define AT_VECTOR_SIZE_ARCH 2
-#else
-# define AT_VECTOR_SIZE_ARCH 1
-#endif
-
-#define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
-#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
-
-#define __EXTRA_CLOBBER \
- ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
-
-/* Save restore flags to clear handle leaking NT */
-#define switch_to(prev,next,last) \
- asm volatile(SAVE_CONTEXT \
- "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
- "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
- "call __switch_to\n\t" \
- ".globl thread_return\n" \
- "thread_return:\n\t" \
- "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
- "movq %P[thread_info](%%rsi),%%r8\n\t" \
- LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
- "movq %%rax,%%rdi\n\t" \
- "jc ret_from_fork\n\t" \
- RESTORE_CONTEXT \
- : "=a" (last) \
- : [next] "S" (next), [prev] "D" (prev), \
- [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
- [ti_flags] "i" (offsetof(struct thread_info, flags)),\
- [tif_fork] "i" (TIF_FORK), \
- [thread_info] "i" (offsetof(struct task_struct, stack)), \
- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
- : "memory", "cc" __EXTRA_CLOBBER)
-
-extern void load_gs_index(unsigned);
-
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value) \
- asm volatile("\n" \
- "1:\t" \
- "movl %k0,%%" #seg "\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:\t" \
- "movl %1,%%" #seg "\n\t" \
- "jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n\t" \
- ".align 8\n\t" \
- ".quad 1b,3b\n" \
- ".previous" \
- : :"r" (value), "r" (0))
-
-/*
- * Clear and set 'TS' bit respectively
- */
-#define clts() __asm__ __volatile__ ("clts")
-
-static inline unsigned long read_cr0(void)
-{
- unsigned long cr0;
- asm volatile("movq %%cr0,%0" : "=r" (cr0));
- return cr0;
-}
-
-static inline void write_cr0(unsigned long val)
-{
- asm volatile("movq %0,%%cr0" :: "r" (val));
-}
-
-static inline unsigned long read_cr2(void)
-{
- unsigned long cr2;
- asm volatile("movq %%cr2,%0" : "=r" (cr2));
- return cr2;
-}
-
-static inline void write_cr2(unsigned long val)
-{
- asm volatile("movq %0,%%cr2" :: "r" (val));
-}
-
-static inline unsigned long read_cr3(void)
-{
- unsigned long cr3;
- asm volatile("movq %%cr3,%0" : "=r" (cr3));
- return cr3;
-}
-
-static inline void write_cr3(unsigned long val)
-{
- asm volatile("movq %0,%%cr3" :: "r" (val) : "memory");
-}
-
-static inline unsigned long read_cr4(void)
-{
- unsigned long cr4;
- asm volatile("movq %%cr4,%0" : "=r" (cr4));
- return cr4;
-}
-
-static inline void write_cr4(unsigned long val)
-{
- asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
-}
static inline unsigned long read_cr8(void)
{
@@ -134,52 +17,6 @@ static inline void write_cr8(unsigned long val)
asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
}
-#define stts() write_cr0(8 | read_cr0())
-
-#define wbinvd() \
- __asm__ __volatile__ ("wbinvd": : :"memory")
-
-#endif /* __KERNEL__ */
-
-static inline void clflush(volatile void *__p)
-{
- asm volatile("clflush %0" : "+m" (*(char __force *)__p));
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do {} while(0)
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do {} while(0)
-#endif
-
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- */
-#define mb() asm volatile("mfence":::"memory")
-#define rmb() asm volatile("lfence":::"memory")
-#define wmb() asm volatile("sfence" ::: "memory")
-
-#define read_barrier_depends() do {} while(0)
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-
-#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
-
#include <linux/irqflags.h>
-void cpu_idle_wait(void);
-
-extern unsigned long arch_align_stack(unsigned long sp);
-extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
-
#endif
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
index a516e9192f1..5bd508260ff 100644
--- a/include/asm-x86/thread_info_32.h
+++ b/include/asm-x86/thread_info_32.h
@@ -138,6 +138,10 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_IO_BITMAP 18 /* uses I/O bitmap */
#define TIF_FREEZE 19 /* is freezing for suspend */
#define TIF_NOTSC 20 /* TSC is not accessible in userland */
+#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */
+#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -153,6 +157,10 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP)
#define _TIF_FREEZE (1<<TIF_FREEZE)
#define _TIF_NOTSC (1<<TIF_NOTSC)
+#define _TIF_FORCED_TF (1<<TIF_FORCED_TF)
+#define _TIF_DEBUGCTLMSR (1<<TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR (1<<TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS (1<<TIF_BTS_TRACE_TS)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
@@ -162,8 +170,12 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_NEXT (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUG)
-#define _TIF_WORK_CTXSW_PREV (_TIF_IO_BITMAP | _TIF_NOTSC)
+#define _TIF_WORK_CTXSW \
+ (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \
+ _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS)
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
+
/*
* Thread-synchronous status.
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index 7f6ee68f000..9b531ea015a 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -21,7 +21,7 @@
#ifndef __ASSEMBLY__
struct task_struct;
struct exec_domain;
-#include <asm/mmsegment.h>
+#include <asm/processor.h>
struct thread_info {
struct task_struct *task; /* main task structure */
@@ -33,6 +33,9 @@ struct thread_info {
mm_segment_t addr_limit;
struct restart_block restart_block;
+#ifdef CONFIG_IA32_EMULATION
+ void __user *sysenter_return;
+#endif
};
#endif
@@ -74,20 +77,14 @@ static inline struct thread_info *stack_thread_info(void)
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) \
- ({ \
- struct thread_info *ret; \
- \
- ret = ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)); \
- if (ret) \
- memset(ret, 0, THREAD_SIZE); \
- ret; \
- })
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
#else
-#define alloc_thread_info(tsk) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
+#define THREAD_FLAGS GFP_KERNEL
#endif
+#define alloc_thread_info(tsk) \
+ ((struct thread_info *) __get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+
#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
#else /* !__ASSEMBLY__ */
@@ -124,6 +121,10 @@ static inline struct thread_info *stack_thread_info(void)
#define TIF_DEBUG 21 /* uses debug registers */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define TIF_FREEZE 23 /* is freezing for suspend */
+#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
+#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR 25 /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS 26 /* record scheduling event timestamps */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -141,6 +142,10 @@ static inline struct thread_info *stack_thread_info(void)
#define _TIF_DEBUG (1<<TIF_DEBUG)
#define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP)
#define _TIF_FREEZE (1<<TIF_FREEZE)
+#define _TIF_FORCED_TF (1<<TIF_FORCED_TF)
+#define _TIF_DEBUGCTLMSR (1<<TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR (1<<TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS (1<<TIF_BTS_TRACE_TS)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
@@ -152,7 +157,10 @@ static inline struct thread_info *stack_thread_info(void)
(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
+#define _TIF_WORK_CTXSW \
+ (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS)
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
#define PREEMPT_ACTIVE 0x10000000
diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h
index eac011366dc..68779b048a3 100644
--- a/include/asm-x86/time.h
+++ b/include/asm-x86/time.h
@@ -1,8 +1,12 @@
-#ifndef _ASMi386_TIME_H
-#define _ASMi386_TIME_H
+#ifndef _ASMX86_TIME_H
+#define _ASMX86_TIME_H
+extern void (*late_time_init)(void);
+extern void hpet_time_init(void);
+
+#include <asm/mc146818rtc.h>
+#ifdef CONFIG_X86_32
#include <linux/efi.h>
-#include "mach_time.h"
static inline unsigned long native_get_wallclock(void)
{
@@ -28,8 +32,20 @@ static inline int native_set_wallclock(unsigned long nowtime)
return retval;
}
-extern void (*late_time_init)(void);
-extern void hpet_time_init(void);
+#else
+extern void native_time_init_hook(void);
+
+static inline unsigned long native_get_wallclock(void)
+{
+ return mach_get_cmos_time();
+}
+
+static inline int native_set_wallclock(unsigned long nowtime)
+{
+ return mach_set_rtc_mmss(nowtime);
+}
+
+#endif
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h
index 0db7e994fb8..4f6fcb050c1 100644
--- a/include/asm-x86/timer.h
+++ b/include/asm-x86/timer.h
@@ -2,6 +2,7 @@
#define _ASMi386_TIMER_H
#include <linux/init.h>
#include <linux/pm.h>
+#include <linux/percpu.h>
#define TICK_SIZE (tick_nsec / 1000)
@@ -16,7 +17,7 @@ extern int recalibrate_cpu_khz(void);
#define calculate_cpu_khz() native_calculate_cpu_khz()
#endif
-/* Accellerators for sched_clock()
+/* Accelerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
@@ -31,20 +32,32 @@ extern int recalibrate_cpu_khz(void);
* And since SC is a constant power of two, we can convert the div
* into a shift.
*
- * We can use khz divisor instead of mhz to keep a better percision, since
+ * We can use khz divisor instead of mhz to keep a better precision, since
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
* (mathieu.desnoyers@polymtl.ca)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-extern unsigned long cyc2ns_scale __read_mostly;
+
+DECLARE_PER_CPU(unsigned long, cyc2ns);
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+ return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR;
}
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ unsigned long long ns;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ ns = __cycles_2_ns(cyc);
+ local_irq_restore(flags);
+
+ return ns;
+}
#endif
diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h
index 39a21ab030f..27cfd6c599b 100644
--- a/include/asm-x86/timex.h
+++ b/include/asm-x86/timex.h
@@ -7,6 +7,8 @@
#ifdef CONFIG_X86_ELAN
# define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */
+#elif defined(CONFIG_X86_RDC321X)
+# define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */
#else
# define PIT_TICK_RATE 1193182 /* Underlying HZ */
#endif
diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h
index 9af4cc83a1a..3998709ed63 100644
--- a/include/asm-x86/tlbflush.h
+++ b/include/asm-x86/tlbflush.h
@@ -1,5 +1,158 @@
+#ifndef _ASM_X86_TLBFLUSH_H
+#define _ASM_X86_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define __flush_tlb() __native_flush_tlb()
+#define __flush_tlb_global() __native_flush_tlb_global()
+#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#endif
+
+static inline void __native_flush_tlb(void)
+{
+ write_cr3(read_cr3());
+}
+
+static inline void __native_flush_tlb_global(void)
+{
+ unsigned long cr4 = read_cr4();
+
+ /* clear PGE */
+ write_cr4(cr4 & ~X86_CR4_PGE);
+ /* write old PGE again and flush TLBs */
+ write_cr4(cr4);
+}
+
+static inline void __native_flush_tlb_single(unsigned long addr)
+{
+ __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory");
+}
+
+static inline void __flush_tlb_all(void)
+{
+ if (cpu_has_pge)
+ __flush_tlb_global();
+ else
+ __flush_tlb();
+}
+
+static inline void __flush_tlb_one(unsigned long addr)
+{
+ if (cpu_has_invlpg)
+ __flush_tlb_single(addr);
+ else
+ __flush_tlb();
+}
+
#ifdef CONFIG_X86_32
-# include "tlbflush_32.h"
+# define TLB_FLUSH_ALL 0xffffffff
#else
-# include "tlbflush_64.h"
+# define TLB_FLUSH_ALL -1ULL
+#endif
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes TLBs
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ *
+ * x86-64 can only flush individual pages or full VMs. For a range flush
+ * we always do the full VM. Might be worth trying if for a small
+ * range a few INVLPGs in a row are a win.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (mm == current->active_mm)
+ __flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ if (vma->vm_mm == current->active_mm)
+ __flush_tlb_one(addr);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (vma->vm_mm == current->active_mm)
+ __flush_tlb();
+}
+
+static inline void native_flush_tlb_others(const cpumask_t *cpumask,
+ struct mm_struct *mm,
+ unsigned long va)
+{
+}
+
+#else /* SMP */
+
+#include <asm/smp.h>
+
+#define local_flush_tlb() __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb() flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ flush_tlb_mm(vma->vm_mm);
+}
+
+void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
+ unsigned long va);
+
+#define TLBSTATE_OK 1
+#define TLBSTATE_LAZY 2
+
+#ifdef CONFIG_X86_32
+struct tlb_state
+{
+ struct mm_struct *active_mm;
+ int state;
+ char __cacheline_padding[L1_CACHE_BYTES-8];
+};
+DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+#endif
+
+#endif /* SMP */
+
+#ifndef CONFIG_PARAVIRT
+#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va)
#endif
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ flush_tlb_all();
+}
+
+#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h
deleted file mode 100644
index 2bd5b95e204..00000000000
--- a/include/asm-x86/tlbflush_32.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef _I386_TLBFLUSH_H
-#define _I386_TLBFLUSH_H
-
-#include <linux/mm.h>
-#include <asm/processor.h>
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define __flush_tlb() __native_flush_tlb()
-#define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
-#endif
-
-#define __native_flush_tlb() \
- do { \
- unsigned int tmpreg; \
- \
- __asm__ __volatile__( \
- "movl %%cr3, %0; \n" \
- "movl %0, %%cr3; # flush TLB \n" \
- : "=r" (tmpreg) \
- :: "memory"); \
- } while (0)
-
-/*
- * Global pages have to be flushed a bit differently. Not a real
- * performance problem because this does not happen often.
- */
-#define __native_flush_tlb_global() \
- do { \
- unsigned int tmpreg, cr4, cr4_orig; \
- \
- __asm__ __volatile__( \
- "movl %%cr4, %2; # turn off PGE \n" \
- "movl %2, %1; \n" \
- "andl %3, %1; \n" \
- "movl %1, %%cr4; \n" \
- "movl %%cr3, %0; \n" \
- "movl %0, %%cr3; # flush TLB \n" \
- "movl %2, %%cr4; # turn PGE back on \n" \
- : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \
- : "i" (~X86_CR4_PGE) \
- : "memory"); \
- } while (0)
-
-#define __native_flush_tlb_single(addr) \
- __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
-
-# define __flush_tlb_all() \
- do { \
- if (cpu_has_pge) \
- __flush_tlb_global(); \
- else \
- __flush_tlb(); \
- } while (0)
-
-#define cpu_has_invlpg (boot_cpu_data.x86 > 3)
-
-#ifdef CONFIG_X86_INVLPG
-# define __flush_tlb_one(addr) __flush_tlb_single(addr)
-#else
-# define __flush_tlb_one(addr) \
- do { \
- if (cpu_has_invlpg) \
- __flush_tlb_single(addr); \
- else \
- __flush_tlb(); \
- } while (0)
-#endif
-
-/*
- * TLB flushing:
- *
- * - flush_tlb() flushes the current mm struct TLBs
- * - flush_tlb_all() flushes all processes TLBs
- * - flush_tlb_mm(mm) flushes the specified mm context TLB's
- * - flush_tlb_page(vma, vmaddr) flushes one page
- * - flush_tlb_range(vma, start, end) flushes a range of pages
- * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- * - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
- *
- * ..but the i386 has somewhat limited tlb flushing capabilities,
- * and page-granular flushes are available only on i486 and up.
- */
-
-#define TLB_FLUSH_ALL 0xffffffff
-
-
-#ifndef CONFIG_SMP
-
-#include <linux/sched.h>
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb_all()
-#define local_flush_tlb() __flush_tlb()
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
- if (mm == current->active_mm)
- __flush_tlb();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long addr)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb();
-}
-
-static inline void native_flush_tlb_others(const cpumask_t *cpumask,
- struct mm_struct *mm, unsigned long va)
-{
-}
-
-#else /* SMP */
-
-#include <asm/smp.h>
-
-#define local_flush_tlb() \
- __flush_tlb()
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-
-#define flush_tlb() flush_tlb_current_task()
-
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
-{
- flush_tlb_mm(vma->vm_mm);
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
- unsigned long va);
-
-#define TLBSTATE_OK 1
-#define TLBSTATE_LAZY 2
-
-struct tlb_state
-{
- struct mm_struct *active_mm;
- int state;
- char __cacheline_padding[L1_CACHE_BYTES-8];
-};
-DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
-#endif /* SMP */
-
-#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va) \
- native_flush_tlb_others(&mask, mm, va)
-#endif
-
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
- flush_tlb_all();
-}
-
-#endif /* _I386_TLBFLUSH_H */
diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h
deleted file mode 100644
index 7731fd23d57..00000000000
--- a/include/asm-x86/tlbflush_64.h
+++ /dev/null
@@ -1,100 +0,0 @@
-#ifndef _X8664_TLBFLUSH_H
-#define _X8664_TLBFLUSH_H
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-
-static inline void __flush_tlb(void)
-{
- write_cr3(read_cr3());
-}
-
-static inline void __flush_tlb_all(void)
-{
- unsigned long cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */
- write_cr4(cr4); /* write old PGE again and flush TLBs */
-}
-
-#define __flush_tlb_one(addr) \
- __asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory")
-
-
-/*
- * TLB flushing:
- *
- * - flush_tlb() flushes the current mm struct TLBs
- * - flush_tlb_all() flushes all processes TLBs
- * - flush_tlb_mm(mm) flushes the specified mm context TLB's
- * - flush_tlb_page(vma, vmaddr) flushes one page
- * - flush_tlb_range(vma, start, end) flushes a range of pages
- * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *
- * x86-64 can only flush individual pages or full VMs. For a range flush
- * we always do the full VM. Might be worth trying if for a small
- * range a few INVLPGs in a row are a win.
- */
-
-#ifndef CONFIG_SMP
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb_all()
-#define local_flush_tlb() __flush_tlb()
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
- if (mm == current->active_mm)
- __flush_tlb();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long addr)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb();
-}
-
-#else
-
-#include <asm/smp.h>
-
-#define local_flush_tlb() \
- __flush_tlb()
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-
-#define flush_tlb() flush_tlb_current_task()
-
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
-{
- flush_tlb_mm(vma->vm_mm);
-}
-
-#define TLBSTATE_OK 1
-#define TLBSTATE_LAZY 2
-
-/* Roughly an IPI every 20MB with 4k pages for freeing page table
- ranges. Cost is about 42k of memory for each CPU. */
-#define ARCH_FREE_PTE_NR 5350
-
-#endif
-
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
- flush_tlb_all();
-}
-
-#endif /* _X8664_TLBFLUSH_H */
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index b10fde9798e..8af05a93f09 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -1,5 +1,188 @@
+/*
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#ifndef _ASM_X86_TOPOLOGY_H
+#define _ASM_X86_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+
+/* Mappings between logical cpu number and node number */
#ifdef CONFIG_X86_32
-# include "topology_32.h"
+extern int cpu_to_node_map[];
+
#else
-# include "topology_64.h"
+DECLARE_PER_CPU(int, x86_cpu_to_node_map);
+extern int x86_cpu_to_node_map_init[];
+extern void *x86_cpu_to_node_map_early_ptr;
+/* Returns the number of the current Node. */
+#define numa_node_id() (early_cpu_to_node(raw_smp_processor_id()))
+#endif
+
+extern cpumask_t node_to_cpumask_map[];
+
+#define NUMA_NO_NODE (-1)
+
+/* Returns the number of the node containing CPU 'cpu' */
+#ifdef CONFIG_X86_32
+#define early_cpu_to_node(cpu) cpu_to_node(cpu)
+static inline int cpu_to_node(int cpu)
+{
+ return cpu_to_node_map[cpu];
+}
+
+#else /* CONFIG_X86_64 */
+static inline int early_cpu_to_node(int cpu)
+{
+ int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
+
+ if (cpu_to_node_map)
+ return cpu_to_node_map[cpu];
+ else if (per_cpu_offset(cpu))
+ return per_cpu(x86_cpu_to_node_map, cpu);
+ else
+ return NUMA_NO_NODE;
+}
+
+static inline int cpu_to_node(int cpu)
+{
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (x86_cpu_to_node_map_early_ptr) {
+ printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n",
+ (int)cpu);
+ dump_stack();
+ return ((int *)x86_cpu_to_node_map_early_ptr)[cpu];
+ }
+#endif
+ if (per_cpu_offset(cpu))
+ return per_cpu(x86_cpu_to_node_map, cpu);
+ else
+ return NUMA_NO_NODE;
+}
+#endif /* CONFIG_X86_64 */
+
+/*
+ * Returns the number of the node containing Node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline cpumask_t node_to_cpumask(int node)
+{
+ return node_to_cpumask_map[node];
+}
+
+/* Returns the number of the first CPU on Node 'node'. */
+static inline int node_to_first_cpu(int node)
+{
+ cpumask_t mask = node_to_cpumask(node);
+
+ return first_cpu(mask);
+}
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
+
+#ifdef CONFIG_X86_32
+extern unsigned long node_start_pfn[];
+extern unsigned long node_end_pfn[];
+extern unsigned long node_remap_size[];
+#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
+
+# ifdef CONFIG_X86_HT
+# define ENABLE_TOPO_DEFINES
+# endif
+
+# define SD_CACHE_NICE_TRIES 1
+# define SD_IDLE_IDX 1
+# define SD_NEWIDLE_IDX 2
+# define SD_FORKEXEC_IDX 0
+
+#else
+
+# ifdef CONFIG_SMP
+# define ENABLE_TOPO_DEFINES
+# endif
+
+# define SD_CACHE_NICE_TRIES 2
+# define SD_IDLE_IDX 2
+# define SD_NEWIDLE_IDX 0
+# define SD_FORKEXEC_IDX 1
+
+#endif
+
+/* sched_domains SD_NODE_INIT for NUMAQ machines */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .child = NULL, \
+ .groups = NULL, \
+ .min_interval = 8, \
+ .max_interval = 32, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = SD_CACHE_NICE_TRIES, \
+ .busy_idx = 3, \
+ .idle_idx = SD_IDLE_IDX, \
+ .newidle_idx = SD_NEWIDLE_IDX, \
+ .wake_idx = 1, \
+ .forkexec_idx = SD_FORKEXEC_IDX, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_BALANCE_FORK \
+ | SD_SERIALIZE \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+
+#ifdef CONFIG_X86_64_ACPI_NUMA
+extern int __node_distance(int, int);
+#define node_distance(a, b) __node_distance(a, b)
+#endif
+
+#else /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#endif
+
+extern cpumask_t cpu_coregroup_map(int cpu);
+
+#ifdef ENABLE_TOPO_DEFINES
+#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
+#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
+#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
+#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
+#endif
+
+#ifdef CONFIG_SMP
+#define mc_capable() (boot_cpu_data.x86_max_cores > 1)
+#define smt_capable() (smp_num_siblings > 1)
+#endif
+
#endif
diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h
deleted file mode 100644
index 9040f5a6127..00000000000
--- a/include/asm-x86/topology_32.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * linux/include/asm-i386/topology.h
- *
- * Written by: Matthew Dobson, IBM Corporation
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <colpatch@us.ibm.com>
- */
-#ifndef _ASM_I386_TOPOLOGY_H
-#define _ASM_I386_TOPOLOGY_H
-
-#ifdef CONFIG_X86_HT
-#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
-#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
-#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
-#endif
-
-#ifdef CONFIG_NUMA
-
-#include <asm/mpspec.h>
-
-#include <linux/cpumask.h>
-
-/* Mappings between logical cpu number and node number */
-extern cpumask_t node_2_cpu_mask[];
-extern int cpu_2_node[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int cpu_to_node(int cpu)
-{
- return cpu_2_node[cpu];
-}
-
-/* Returns the number of the node containing Node 'node'. This architecture is flat,
- so it is a pretty simple function! */
-#define parent_node(node) (node)
-
-/* Returns a bitmask of CPUs on Node 'node'. */
-static inline cpumask_t node_to_cpumask(int node)
-{
- return node_2_cpu_mask[node];
-}
-
-/* Returns the number of the first CPU on Node 'node'. */
-static inline int node_to_first_cpu(int node)
-{
- cpumask_t mask = node_to_cpumask(node);
- return first_cpu(mask);
-}
-
-#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node
-#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))
-
-/* sched_domains SD_NODE_INIT for NUMAQ machines */
-#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
- .parent = NULL, \
- .child = NULL, \
- .groups = NULL, \
- .min_interval = 8, \
- .max_interval = 32, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 3, \
- .idle_idx = 1, \
- .newidle_idx = 2, \
- .wake_idx = 1, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_EXEC \
- | SD_BALANCE_FORK \
- | SD_SERIALIZE \
- | SD_WAKE_BALANCE, \
- .last_balance = jiffies, \
- .balance_interval = 1, \
- .nr_balance_failed = 0, \
-}
-
-extern unsigned long node_start_pfn[];
-extern unsigned long node_end_pfn[];
-extern unsigned long node_remap_size[];
-
-#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
-
-#else /* !CONFIG_NUMA */
-/*
- * Other i386 platforms should define their own version of the
- * above macros here.
- */
-
-#include <asm-generic/topology.h>
-
-#endif /* CONFIG_NUMA */
-
-extern cpumask_t cpu_coregroup_map(int cpu);
-
-#ifdef CONFIG_SMP
-#define mc_capable() (boot_cpu_data.x86_max_cores > 1)
-#define smt_capable() (smp_num_siblings > 1)
-#endif
-
-#endif /* _ASM_I386_TOPOLOGY_H */
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
deleted file mode 100644
index a718dda037e..00000000000
--- a/include/asm-x86/topology_64.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef _ASM_X86_64_TOPOLOGY_H
-#define _ASM_X86_64_TOPOLOGY_H
-
-
-#ifdef CONFIG_NUMA
-
-#include <asm/mpspec.h>
-#include <linux/bitops.h>
-
-extern cpumask_t cpu_online_map;
-
-extern unsigned char cpu_to_node[];
-extern cpumask_t node_to_cpumask[];
-
-#ifdef CONFIG_ACPI_NUMA
-extern int __node_distance(int, int);
-#define node_distance(a,b) __node_distance(a,b)
-/* #else fallback version */
-#endif
-
-#define cpu_to_node(cpu) (cpu_to_node[cpu])
-#define parent_node(node) (node)
-#define node_to_first_cpu(node) (first_cpu(node_to_cpumask[node]))
-#define node_to_cpumask(node) (node_to_cpumask[node])
-#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node
-#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus));
-
-#define numa_node_id() read_pda(nodenumber)
-
-/* sched_domains SD_NODE_INIT for x86_64 machines */
-#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
- .parent = NULL, \
- .child = NULL, \
- .groups = NULL, \
- .min_interval = 8, \
- .max_interval = 32, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 2, \
- .busy_idx = 3, \
- .idle_idx = 2, \
- .newidle_idx = 0, \
- .wake_idx = 1, \
- .forkexec_idx = 1, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_FORK \
- | SD_BALANCE_EXEC \
- | SD_SERIALIZE \
- | SD_WAKE_BALANCE, \
- .last_balance = jiffies, \
- .balance_interval = 1, \
- .nr_balance_failed = 0, \
-}
-
-#endif
-
-#ifdef CONFIG_SMP
-#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
-#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
-#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
-#define mc_capable() (boot_cpu_data.x86_max_cores > 1)
-#define smt_capable() (smp_num_siblings > 1)
-#endif
-
-#include <asm-generic/topology.h>
-
-extern cpumask_t cpu_coregroup_map(int cpu);
-
-#endif
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 6baab30dc2c..7d3e27f7d48 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -17,6 +17,8 @@ typedef unsigned long long cycles_t;
extern unsigned int cpu_khz;
extern unsigned int tsc_khz;
+extern void disable_TSC(void);
+
static inline cycles_t get_cycles(void)
{
unsigned long long ret = 0;
@@ -25,39 +27,22 @@ static inline cycles_t get_cycles(void)
if (!cpu_has_tsc)
return 0;
#endif
-
-#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
rdtscll(ret);
-#endif
+
return ret;
}
-/* Like get_cycles, but make sure the CPU is synchronized. */
-static __always_inline cycles_t get_cycles_sync(void)
+static inline cycles_t vget_cycles(void)
{
- unsigned long long ret;
- unsigned eax, edx;
-
- /*
- * Use RDTSCP if possible; it is guaranteed to be synchronous
- * and doesn't cause a VMEXIT on Hypervisors
- */
- alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP,
- ASM_OUTPUT2("=a" (eax), "=d" (edx)),
- "a" (0U), "d" (0U) : "ecx", "memory");
- ret = (((unsigned long long)edx) << 32) | ((unsigned long long)eax);
- if (ret)
- return ret;
-
/*
- * Don't do an additional sync on CPUs where we know
- * RDTSC is already synchronous:
+ * We only do VDSOs on TSC capable CPUs, so this shouldnt
+ * access boot_cpu_data (which is not VDSO-safe):
*/
- alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
- "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
- rdtscll(ret);
-
- return ret;
+#ifndef CONFIG_X86_TSC
+ if (!cpu_has_tsc)
+ return 0;
+#endif
+ return (cycles_t) __native_read_tsc();
}
extern void tsc_init(void);
@@ -73,8 +58,7 @@ int check_tsc_unstable(void);
extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
-#ifdef CONFIG_X86_64
extern void tsc_calibrate(void);
-#endif
+extern int notsc_setup(char *);
#endif
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index f4ce8768ad4..31d79470271 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -65,6 +65,8 @@ struct exception_table_entry
unsigned long insn, fixup;
};
+extern int fixup_exception(struct pt_regs *regs);
+
#define ARCH_HAS_SEARCH_EXTABLE
/*
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 9b15545eb9b..8d8f9b5adbb 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -333,8 +333,6 @@
#ifdef __KERNEL__
-#define NR_syscalls 325
-
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_OLD_STAT
diff --git a/include/asm-x86/user_32.h b/include/asm-x86/user_32.h
index 0e85d2a5e33..ed8b8fc6906 100644
--- a/include/asm-x86/user_32.h
+++ b/include/asm-x86/user_32.h
@@ -75,13 +75,23 @@ struct user_fxsr_struct {
* doesn't use the extra segment registers)
*/
struct user_regs_struct {
- long ebx, ecx, edx, esi, edi, ebp, eax;
- unsigned short ds, __ds, es, __es;
- unsigned short fs, __fs, gs, __gs;
- long orig_eax, eip;
- unsigned short cs, __cs;
- long eflags, esp;
- unsigned short ss, __ss;
+ unsigned long bx;
+ unsigned long cx;
+ unsigned long dx;
+ unsigned long si;
+ unsigned long di;
+ unsigned long bp;
+ unsigned long ax;
+ unsigned long ds;
+ unsigned long es;
+ unsigned long fs;
+ unsigned long gs;
+ unsigned long orig_ax;
+ unsigned long ip;
+ unsigned long cs;
+ unsigned long flags;
+ unsigned long sp;
+ unsigned long ss;
};
/* When the kernel dumps core, it starts by dumping the user struct -
diff --git a/include/asm-x86/user_64.h b/include/asm-x86/user_64.h
index 12785c649ac..a5449d456cc 100644
--- a/include/asm-x86/user_64.h
+++ b/include/asm-x86/user_64.h
@@ -40,13 +40,13 @@
* and both the standard and SIMD floating point data can be accessed via
* the new ptrace requests. In either case, changes to the FPU environment
* will be reflected in the task's state as expected.
- *
+ *
* x86-64 support by Andi Kleen.
*/
/* This matches the 64bit FXSAVE format as defined by AMD. It is the same
as the 32bit format defined by Intel, except that the selector:offset pairs for
- data and eip are replaced with flat 64bit pointers. */
+ data and eip are replaced with flat 64bit pointers. */
struct user_i387_struct {
unsigned short cwd;
unsigned short swd;
@@ -65,13 +65,34 @@ struct user_i387_struct {
* Segment register layout in coredumps.
*/
struct user_regs_struct {
- unsigned long r15,r14,r13,r12,rbp,rbx,r11,r10;
- unsigned long r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
- unsigned long rip,cs,eflags;
- unsigned long rsp,ss;
- unsigned long fs_base, gs_base;
- unsigned long ds,es,fs,gs;
-};
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+ unsigned long bp;
+ unsigned long bx;
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long ax;
+ unsigned long cx;
+ unsigned long dx;
+ unsigned long si;
+ unsigned long di;
+ unsigned long orig_ax;
+ unsigned long ip;
+ unsigned long cs;
+ unsigned long flags;
+ unsigned long sp;
+ unsigned long ss;
+ unsigned long fs_base;
+ unsigned long gs_base;
+ unsigned long ds;
+ unsigned long es;
+ unsigned long fs;
+ unsigned long gs;
+};
/* When the kernel dumps core, it starts by dumping the user struct -
this will be used by gdb to figure out where the data and stack segments
@@ -94,7 +115,7 @@ struct user{
This is actually the bottom of the stack,
the top of the stack is always found in the
esp register. */
- long int signal; /* Signal that caused the core dump. */
+ long int signal; /* Signal that caused the core dump. */
int reserved; /* No longer used */
int pad1;
struct user_pt_regs * u_ar0; /* Used by gdb to help find the values for */
diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h
new file mode 100644
index 00000000000..629bcb6e8e4
--- /dev/null
+++ b/include/asm-x86/vdso.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_X86_VDSO_H
+#define _ASM_X86_VDSO_H 1
+
+#ifdef CONFIG_X86_64
+extern const char VDSO64_PRELINK[];
+
+/*
+ * Given a pointer to the vDSO image, find the pointer to VDSO64_name
+ * as that symbol is defined in the vDSO sources or linker script.
+ */
+#define VDSO64_SYMBOL(base, name) ({ \
+ extern const char VDSO64_##name[]; \
+ (void *) (VDSO64_##name - VDSO64_PRELINK + (unsigned long) (base)); })
+#endif
+
+#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+extern const char VDSO32_PRELINK[];
+
+/*
+ * Given a pointer to the vDSO image, find the pointer to VDSO32_name
+ * as that symbol is defined in the vDSO sources or linker script.
+ */
+#define VDSO32_SYMBOL(base, name) ({ \
+ extern const char VDSO32_##name[]; \
+ (void *) (VDSO32_##name - VDSO32_PRELINK + (unsigned long) (base)); })
+#endif
+
+#endif /* asm-x86/vdso.h */
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index f01c49f5d10..17b3700949b 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -36,6 +36,8 @@ extern volatile unsigned long __jiffies;
extern int vgetcpu_mode;
extern struct timezone sys_tz;
+extern void map_vsyscall(void);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_64_VSYSCALL_H_ */
diff --git a/include/asm-x86/vsyscall32.h b/include/asm-x86/vsyscall32.h
deleted file mode 100644
index c631c082f8f..00000000000
--- a/include/asm-x86/vsyscall32.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_VSYSCALL32_H
-#define _ASM_VSYSCALL32_H 1
-
-/* Values need to match arch/x86_64/ia32/vsyscall.lds */
-
-#ifdef __ASSEMBLY__
-#define VSYSCALL32_BASE 0xffffe000
-#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
-#else
-#define VSYSCALL32_BASE 0xffffe000UL
-#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE)
-#define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
-
-#define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400)
-#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x410)
-#define VSYSCALL32_SIGRETURN ((void __user *)VSYSCALL32_BASE + 0x500)
-#define VSYSCALL32_RTSIGRETURN ((void __user *)VSYSCALL32_BASE + 0x600)
-#endif
-
-#endif
diff --git a/include/asm-x86/xor_32.h b/include/asm-x86/xor_32.h
index 23c86cef3b2..a41ef1bdd42 100644
--- a/include/asm-x86/xor_32.h
+++ b/include/asm-x86/xor_32.h
@@ -1,6 +1,4 @@
/*
- * include/asm-i386/xor.h
- *
* Optimized RAID-5 checksumming functions for MMX and SSE.
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/asm-x86/xor_64.h b/include/asm-x86/xor_64.h
index f942fcc2183..1eee7fcb242 100644
--- a/include/asm-x86/xor_64.h
+++ b/include/asm-x86/xor_64.h
@@ -1,6 +1,4 @@
/*
- * include/asm-x86_64/xor.h
- *
* Optimized RAID-5 checksumming functions for MMX and SSE.
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h
index 1d0ef1ae803..7e3d2859be5 100644
--- a/include/linux/acpi_pmtmr.h
+++ b/include/linux/acpi_pmtmr.h
@@ -25,6 +25,8 @@ static inline u32 acpi_pm_read_early(void)
return acpi_pm_read_verified() & ACPI_PM_MASK;
}
+extern void pmtimer_wait(unsigned);
+
#else
static inline u32 acpi_pm_read_early(void)
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 107787aacb6..85778a4b120 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -103,7 +103,7 @@ struct clocksource {
#define CLOCK_SOURCE_VALID_FOR_HRES 0x20
/* simplify initialization of mask field */
-#define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL<<bits)-1) : -1)
+#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
/**
* clocksource_khz2mult - calculates mult from khz and shift
@@ -215,6 +215,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
/* used to install a new clocksource */
extern int clocksource_register(struct clocksource*);
+extern void clocksource_unregister(struct clocksource*);
extern struct clocksource* clocksource_get_next(void);
extern void clocksource_change_rating(struct clocksource *cs, int rating);
extern void clocksource_resume(void);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0e69d2cf14a..d38655f2be7 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -191,6 +191,10 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
struct compat_timeval __user *tvp);
+asmlinkage long compat_sys_wait4(compat_pid_t pid,
+ compat_uint_t *stat_addr, int options,
+ struct compat_rusage *ru);
+
#define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
#define BITS_TO_COMPAT_LONGS(bits) \
@@ -239,6 +243,17 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
const compat_ulong_t __user *new_nodes);
+extern int compat_ptrace_request(struct task_struct *child,
+ compat_long_t request,
+ compat_ulong_t addr, compat_ulong_t data);
+
+#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
+extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ compat_ulong_t addr, compat_ulong_t data);
+asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+ compat_long_t addr, compat_long_t data);
+#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
+
/*
* epoll (fs/eventpoll.c) compat bits follow ...
*/
diff --git a/include/linux/const.h b/include/linux/const.h
index 07b300bfe34..c22c707c455 100644
--- a/include/linux/const.h
+++ b/include/linux/const.h
@@ -7,13 +7,18 @@
* C code. Therefore we cannot annotate them always with
* 'UL' and other type specifiers unilaterally. We
* use the following macros to deal with this.
+ *
+ * Similarly, _AT() will cast an expression with a type in C, but
+ * leave it unchanged in asm.
*/
#ifdef __ASSEMBLY__
#define _AC(X,Y) X
+#define _AT(T,X) X
#else
#define __AC(X,Y) (X##Y)
#define _AC(X,Y) __AC(X,Y)
+#define _AT(T,X) ((T)(X))
#endif
#endif /* !(_LINUX_CONST_H) */
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 85bd790c201..7047f58306a 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -218,8 +218,8 @@ int __first_cpu(const cpumask_t *srcp);
int __next_cpu(int n, const cpumask_t *srcp);
#define next_cpu(n, src) __next_cpu((n), &(src))
#else
-#define first_cpu(src) 0
-#define next_cpu(n, src) 1
+#define first_cpu(src) ({ (void)(src); 0; })
+#define next_cpu(n, src) ({ (void)(src); 1; })
#endif
#define cpumask_of_cpu(cpu) \
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 576e83bd6d8..7ceb24d87c1 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -355,6 +355,7 @@ typedef struct elf64_shdr {
#define NT_AUXV 6
#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
+#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
/* Note header in a PT_NOTE section */
diff --git a/include/linux/hpet.h b/include/linux/hpet.h
index 707f7cb9e79..9cd94bfd07e 100644
--- a/include/linux/hpet.h
+++ b/include/linux/hpet.h
@@ -64,7 +64,7 @@ struct hpet {
*/
#define Tn_INT_ROUTE_CAP_MASK (0xffffffff00000000ULL)
-#define Tn_INI_ROUTE_CAP_SHIFT (32UL)
+#define Tn_INT_ROUTE_CAP_SHIFT (32UL)
#define Tn_FSB_INT_DELCAP_MASK (0x8000UL)
#define Tn_FSB_INT_DELCAP_SHIFT (15)
#define Tn_FSB_EN_CNF_MASK (0x4000UL)
@@ -115,9 +115,6 @@ static inline void hpet_reserve_timer(struct hpet_data *hd, int timer)
}
int hpet_alloc(struct hpet_data *);
-int hpet_register(struct hpet_task *, int);
-int hpet_unregister(struct hpet_task *);
-int hpet_control(struct hpet_task *, unsigned int, unsigned long);
#endif /* __KERNEL__ */
diff --git a/include/linux/init_ohci1394_dma.h b/include/linux/init_ohci1394_dma.h
new file mode 100644
index 00000000000..3c03a4bba5e
--- /dev/null
+++ b/include/linux/init_ohci1394_dma.h
@@ -0,0 +1,4 @@
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+extern int __initdata init_ohci1394_dma_early;
+extern void __init init_ohci1394_dma_on_all_controllers(void);
+#endif
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6187a8567bc..605d237364d 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -8,6 +8,7 @@
#ifndef _LINUX_IOPORT_H
#define _LINUX_IOPORT_H
+#ifndef __ASSEMBLY__
#include <linux/compiler.h>
#include <linux/types.h>
/*
@@ -153,4 +154,5 @@ extern struct resource * __devm_request_region(struct device *dev,
extern void __devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n);
+#endif /* __ASSEMBLY__ */
#endif /* _LINUX_IOPORT_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a7283c9bead..ff356b2ee47 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -194,6 +194,9 @@ static inline int log_buf_read(int idx) { return 0; }
static inline int log_buf_copy(char *dest, int idx, int len) { return 0; }
#endif
+extern void __attribute__((format(printf, 1, 2)))
+ early_printk(const char *fmt, ...);
+
unsigned long int_sqrt(unsigned long);
extern int printk_ratelimit(void);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 81891581e89..6168c0a4417 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -182,6 +182,15 @@ static inline void kretprobe_assert(struct kretprobe_instance *ri,
}
}
+#ifdef CONFIG_KPROBES_SANITY_TEST
+extern int init_test_probes(void);
+#else
+static inline int init_test_probes(void)
+{
+ return 0;
+}
+#endif /* CONFIG_KPROBES_SANITY_TEST */
+
extern spinlock_t kretprobe_lock;
extern struct mutex kprobe_mutex;
extern int arch_prepare_kprobe(struct kprobe *p);
@@ -227,6 +236,7 @@ void unregister_kretprobe(struct kretprobe *rp);
void kprobe_flush_task(struct task_struct *tk);
void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
+
#else /* CONFIG_KPROBES */
#define __kprobes /**/
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index ff203dd0291..3faf599ea58 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -13,6 +13,10 @@
#define asmlinkage CPP_ASMLINKAGE
#endif
+#ifndef asmregparm
+# define asmregparm
+#endif
+
#ifndef prevent_tail_call
# define prevent_tail_call(ret) do { } while (0)
#endif
@@ -53,6 +57,10 @@
.size name, .-name
#endif
+/* If symbol 'name' is treated as a subroutine (gets called, and returns)
+ * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of
+ * static analysis tools such as stack depth analyzer.
+ */
#ifndef ENDPROC
#define ENDPROC(name) \
.type name, @function; \
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1897ca223ec..1bba6789a50 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1118,9 +1118,21 @@ static inline void vm_stat_account(struct mm_struct *mm,
}
#endif /* CONFIG_PROC_FS */
-#ifndef CONFIG_DEBUG_PAGEALLOC
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern int debug_pagealloc_enabled;
+
+extern void kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline void enable_debug_pagealloc(void)
+{
+ debug_pagealloc_enabled = 1;
+}
+#else
static inline void
kernel_map_pages(struct page *page, int numpages, int enable) {}
+static inline void enable_debug_pagealloc(void)
+{
+}
#endif
extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
@@ -1146,6 +1158,7 @@ extern int randomize_va_space;
#endif
const char * arch_vma_name(struct vm_area_struct *vma);
+void print_vma_addr(char *prefix, unsigned long rip);
struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c6953134836..41f6f28690f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2085,6 +2085,13 @@
#define PCI_VENDOR_ID_BELKIN 0x1799
#define PCI_DEVICE_ID_BELKIN_F5D7010V7 0x701f
+#define PCI_VENDOR_ID_RDC 0x17f3
+#define PCI_DEVICE_ID_RDC_R6020 0x6020
+#define PCI_DEVICE_ID_RDC_R6030 0x6030
+#define PCI_DEVICE_ID_RDC_R6040 0x6040
+#define PCI_DEVICE_ID_RDC_R6060 0x6060
+#define PCI_DEVICE_ID_RDC_R6061 0x6061
+
#define PCI_VENDOR_ID_LENOVO 0x17aa
#define PCI_VENDOR_ID_ARECA 0x17d3
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 926adaae0f9..00412bb494c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,6 +9,30 @@
#include <asm/percpu.h>
+#ifndef PER_CPU_ATTRIBUTES
+#define PER_CPU_ATTRIBUTES
+#endif
+
+#ifdef CONFIG_SMP
+#define DEFINE_PER_CPU(type, name) \
+ __attribute__((__section__(".data.percpu"))) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+ __attribute__((__section__(".data.percpu.shared_aligned"))) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
+ ____cacheline_aligned_in_smp
+#else
+#define DEFINE_PER_CPU(type, name) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+ DEFINE_PER_CPU(type, name)
+#endif
+
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+
/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
#ifndef PERCPU_ENOUGH_ROOM
#ifdef CONFIG_MODULES
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 3ea5750a0f7..515bff053de 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -129,6 +129,81 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data);
#define force_successful_syscall_return() do { } while (0)
#endif
+/*
+ * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
+ *
+ * These do-nothing inlines are used when the arch does not
+ * implement single-step. The kerneldoc comments are here
+ * to document the interface for all arch definitions.
+ */
+
+#ifndef arch_has_single_step
+/**
+ * arch_has_single_step - does this CPU support user-mode single-step?
+ *
+ * If this is defined, then there must be function declarations or
+ * inlines for user_enable_single_step() and user_disable_single_step().
+ * arch_has_single_step() should evaluate to nonzero iff the machine
+ * supports instruction single-step for user mode.
+ * It can be a constant or it can test a CPU feature bit.
+ */
+#define arch_has_single_step() (0)
+
+/**
+ * user_enable_single_step - single-step in user-mode task
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * This can only be called when arch_has_single_step() has returned nonzero.
+ * Set @task so that when it returns to user mode, it will trap after the
+ * next single instruction executes. If arch_has_block_step() is defined,
+ * this must clear the effects of user_enable_block_step() too.
+ */
+static inline void user_enable_single_step(struct task_struct *task)
+{
+ BUG(); /* This can never be called. */
+}
+
+/**
+ * user_disable_single_step - cancel user-mode single-step
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * Clear @task of the effects of user_enable_single_step() and
+ * user_enable_block_step(). This can be called whether or not either
+ * of those was ever called on @task, and even if arch_has_single_step()
+ * returned zero.
+ */
+static inline void user_disable_single_step(struct task_struct *task)
+{
+}
+#endif /* arch_has_single_step */
+
+#ifndef arch_has_block_step
+/**
+ * arch_has_block_step - does this CPU support user-mode block-step?
+ *
+ * If this is defined, then there must be a function declaration or inline
+ * for user_enable_block_step(), and arch_has_single_step() must be defined
+ * too. arch_has_block_step() should evaluate to nonzero iff the machine
+ * supports step-until-branch for user mode. It can be a constant or it
+ * can test a CPU feature bit.
+ */
+#define arch_has_block_step() (0)
+
+/**
+ * user_enable_block_step - step until branch in user-mode task
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * This can only be called when arch_has_block_step() has returned nonzero,
+ * and will never be called when single-instruction stepping is being used.
+ * Set @task so that when it returns to user mode, it will trap after the
+ * next branch or trap taken.
+ */
+static inline void user_enable_block_step(struct task_struct *task)
+{
+ BUG(); /* This can never be called. */
+}
+#endif /* arch_has_block_step */
+
#endif
#endif
diff --git a/include/linux/regset.h b/include/linux/regset.h
new file mode 100644
index 00000000000..8abee655622
--- /dev/null
+++ b/include/linux/regset.h
@@ -0,0 +1,368 @@
+/*
+ * User-mode machine state access
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * Red Hat Author: Roland McGrath.
+ */
+
+#ifndef _LINUX_REGSET_H
+#define _LINUX_REGSET_H 1
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+struct task_struct;
+struct user_regset;
+
+
+/**
+ * user_regset_active_fn - type of @active function in &struct user_regset
+ * @target: thread being examined
+ * @regset: regset being examined
+ *
+ * Return -%ENODEV if not available on the hardware found.
+ * Return %0 if no interesting state in this thread.
+ * Return >%0 number of @size units of interesting state.
+ * Any get call fetching state beyond that number will
+ * see the default initialization state for this data,
+ * so a caller that knows what the default state is need
+ * not copy it all out.
+ * This call is optional; the pointer is %NULL if there
+ * is no inexpensive check to yield a value < @n.
+ */
+typedef int user_regset_active_fn(struct task_struct *target,
+ const struct user_regset *regset);
+
+/**
+ * user_regset_get_fn - type of @get function in &struct user_regset
+ * @target: thread being examined
+ * @regset: regset being examined
+ * @pos: offset into the regset data to access, in bytes
+ * @count: amount of data to copy, in bytes
+ * @kbuf: if not %NULL, a kernel-space pointer to copy into
+ * @ubuf: if @kbuf is %NULL, a user-space pointer to copy into
+ *
+ * Fetch register values. Return %0 on success; -%EIO or -%ENODEV
+ * are usual failure returns. The @pos and @count values are in
+ * bytes, but must be properly aligned. If @kbuf is non-null, that
+ * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then
+ * ubuf gives a userland pointer to access directly, and an -%EFAULT
+ * return value is possible.
+ */
+typedef int user_regset_get_fn(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf);
+
+/**
+ * user_regset_set_fn - type of @set function in &struct user_regset
+ * @target: thread being examined
+ * @regset: regset being examined
+ * @pos: offset into the regset data to access, in bytes
+ * @count: amount of data to copy, in bytes
+ * @kbuf: if not %NULL, a kernel-space pointer to copy from
+ * @ubuf: if @kbuf is %NULL, a user-space pointer to copy from
+ *
+ * Store register values. Return %0 on success; -%EIO or -%ENODEV
+ * are usual failure returns. The @pos and @count values are in
+ * bytes, but must be properly aligned. If @kbuf is non-null, that
+ * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then
+ * ubuf gives a userland pointer to access directly, and an -%EFAULT
+ * return value is possible.
+ */
+typedef int user_regset_set_fn(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/**
+ * user_regset_writeback_fn - type of @writeback function in &struct user_regset
+ * @target: thread being examined
+ * @regset: regset being examined
+ * @immediate: zero if writeback at completion of next context switch is OK
+ *
+ * This call is optional; usually the pointer is %NULL. When
+ * provided, there is some user memory associated with this regset's
+ * hardware, such as memory backing cached register data on register
+ * window machines; the regset's data controls what user memory is
+ * used (e.g. via the stack pointer value).
+ *
+ * Write register data back to user memory. If the @immediate flag
+ * is nonzero, it must be written to the user memory so uaccess or
+ * access_process_vm() can see it when this call returns; if zero,
+ * then it must be written back by the time the task completes a
+ * context switch (as synchronized with wait_task_inactive()).
+ * Return %0 on success or if there was nothing to do, -%EFAULT for
+ * a memory problem (bad stack pointer or whatever), or -%EIO for a
+ * hardware problem.
+ */
+typedef int user_regset_writeback_fn(struct task_struct *target,
+ const struct user_regset *regset,
+ int immediate);
+
+/**
+ * struct user_regset - accessible thread CPU state
+ * @n: Number of slots (registers).
+ * @size: Size in bytes of a slot (register).
+ * @align: Required alignment, in bytes.
+ * @bias: Bias from natural indexing.
+ * @core_note_type: ELF note @n_type value used in core dumps.
+ * @get: Function to fetch values.
+ * @set: Function to store values.
+ * @active: Function to report if regset is active, or %NULL.
+ * @writeback: Function to write data back to user memory, or %NULL.
+ *
+ * This data structure describes a machine resource we call a register set.
+ * This is part of the state of an individual thread, not necessarily
+ * actual CPU registers per se. A register set consists of a number of
+ * similar slots, given by @n. Each slot is @size bytes, and aligned to
+ * @align bytes (which is at least @size).
+ *
+ * These functions must be called only on the current thread or on a
+ * thread that is in %TASK_STOPPED or %TASK_TRACED state, that we are
+ * guaranteed will not be woken up and return to user mode, and that we
+ * have called wait_task_inactive() on. (The target thread always might
+ * wake up for SIGKILL while these functions are working, in which case
+ * that thread's user_regset state might be scrambled.)
+ *
+ * The @pos argument must be aligned according to @align; the @count
+ * argument must be a multiple of @size. These functions are not
+ * responsible for checking for invalid arguments.
+ *
+ * When there is a natural value to use as an index, @bias gives the
+ * difference between the natural index and the slot index for the
+ * register set. For example, x86 GDT segment descriptors form a regset;
+ * the segment selector produces a natural index, but only a subset of
+ * that index space is available as a regset (the TLS slots); subtracting
+ * @bias from a segment selector index value computes the regset slot.
+ *
+ * If nonzero, @core_note_type gives the n_type field (NT_* value)
+ * of the core file note in which this regset's data appears.
+ * NT_PRSTATUS is a special case in that the regset data starts at
+ * offsetof(struct elf_prstatus, pr_reg) into the note data; that is
+ * part of the per-machine ELF formats userland knows about. In
+ * other cases, the core file note contains exactly the whole regset
+ * (@n * @size) and nothing else. The core file note is normally
+ * omitted when there is an @active function and it returns zero.
+ */
+struct user_regset {
+ user_regset_get_fn *get;
+ user_regset_set_fn *set;
+ user_regset_active_fn *active;
+ user_regset_writeback_fn *writeback;
+ unsigned int n;
+ unsigned int size;
+ unsigned int align;
+ unsigned int bias;
+ unsigned int core_note_type;
+};
+
+/**
+ * struct user_regset_view - available regsets
+ * @name: Identifier, e.g. UTS_MACHINE string.
+ * @regsets: Array of @n regsets available in this view.
+ * @n: Number of elements in @regsets.
+ * @e_machine: ELF header @e_machine %EM_* value written in core dumps.
+ * @e_flags: ELF header @e_flags value written in core dumps.
+ * @ei_osabi: ELF header @e_ident[%EI_OSABI] value written in core dumps.
+ *
+ * A regset view is a collection of regsets (&struct user_regset,
+ * above). This describes all the state of a thread that can be seen
+ * from a given architecture/ABI environment. More than one view might
+ * refer to the same &struct user_regset, or more than one regset
+ * might refer to the same machine-specific state in the thread. For
+ * example, a 32-bit thread's state could be examined from the 32-bit
+ * view or from the 64-bit view. Either method reaches the same thread
+ * register state, doing appropriate widening or truncation.
+ */
+struct user_regset_view {
+ const char *name;
+ const struct user_regset *regsets;
+ unsigned int n;
+ u32 e_flags;
+ u16 e_machine;
+ u8 ei_osabi;
+};
+
+/*
+ * This is documented here rather than at the definition sites because its
+ * implementation is machine-dependent but its interface is universal.
+ */
+/**
+ * task_user_regset_view - Return the process's native regset view.
+ * @tsk: a thread of the process in question
+ *
+ * Return the &struct user_regset_view that is native for the given process.
+ * For example, what it would access when it called ptrace().
+ * Throughout the life of the process, this only changes at exec.
+ */
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk);
+
+
+/*
+ * These are helpers for writing regset get/set functions in arch code.
+ * Because @start_pos and @end_pos are always compile-time constants,
+ * these are inlined into very little code though they look large.
+ *
+ * Use one or more calls sequentially for each chunk of regset data stored
+ * contiguously in memory. Call with constants for @start_pos and @end_pos,
+ * giving the range of byte positions in the regset that data corresponds
+ * to; @end_pos can be -1 if this chunk is at the end of the regset layout.
+ * Each call updates the arguments to point past its chunk.
+ */
+
+static inline int user_regset_copyout(unsigned int *pos, unsigned int *count,
+ void **kbuf,
+ void __user **ubuf, const void *data,
+ const int start_pos, const int end_pos)
+{
+ if (*count == 0)
+ return 0;
+ BUG_ON(*pos < start_pos);
+ if (end_pos < 0 || *pos < end_pos) {
+ unsigned int copy = (end_pos < 0 ? *count
+ : min(*count, end_pos - *pos));
+ data += *pos - start_pos;
+ if (*kbuf) {
+ memcpy(*kbuf, data, copy);
+ *kbuf += copy;
+ } else if (__copy_to_user(*ubuf, data, copy))
+ return -EFAULT;
+ else
+ *ubuf += copy;
+ *pos += copy;
+ *count -= copy;
+ }
+ return 0;
+}
+
+static inline int user_regset_copyin(unsigned int *pos, unsigned int *count,
+ const void **kbuf,
+ const void __user **ubuf, void *data,
+ const int start_pos, const int end_pos)
+{
+ if (*count == 0)
+ return 0;
+ BUG_ON(*pos < start_pos);
+ if (end_pos < 0 || *pos < end_pos) {
+ unsigned int copy = (end_pos < 0 ? *count
+ : min(*count, end_pos - *pos));
+ data += *pos - start_pos;
+ if (*kbuf) {
+ memcpy(data, *kbuf, copy);
+ *kbuf += copy;
+ } else if (__copy_from_user(data, *ubuf, copy))
+ return -EFAULT;
+ else
+ *ubuf += copy;
+ *pos += copy;
+ *count -= copy;
+ }
+ return 0;
+}
+
+/*
+ * These two parallel the two above, but for portions of a regset layout
+ * that always read as all-zero or for which writes are ignored.
+ */
+static inline int user_regset_copyout_zero(unsigned int *pos,
+ unsigned int *count,
+ void **kbuf, void __user **ubuf,
+ const int start_pos,
+ const int end_pos)
+{
+ if (*count == 0)
+ return 0;
+ BUG_ON(*pos < start_pos);
+ if (end_pos < 0 || *pos < end_pos) {
+ unsigned int copy = (end_pos < 0 ? *count
+ : min(*count, end_pos - *pos));
+ if (*kbuf) {
+ memset(*kbuf, 0, copy);
+ *kbuf += copy;
+ } else if (__clear_user(*ubuf, copy))
+ return -EFAULT;
+ else
+ *ubuf += copy;
+ *pos += copy;
+ *count -= copy;
+ }
+ return 0;
+}
+
+static inline int user_regset_copyin_ignore(unsigned int *pos,
+ unsigned int *count,
+ const void **kbuf,
+ const void __user **ubuf,
+ const int start_pos,
+ const int end_pos)
+{
+ if (*count == 0)
+ return 0;
+ BUG_ON(*pos < start_pos);
+ if (end_pos < 0 || *pos < end_pos) {
+ unsigned int copy = (end_pos < 0 ? *count
+ : min(*count, end_pos - *pos));
+ if (*kbuf)
+ *kbuf += copy;
+ else
+ *ubuf += copy;
+ *pos += copy;
+ *count -= copy;
+ }
+ return 0;
+}
+
+/**
+ * copy_regset_to_user - fetch a thread's user_regset data into user memory
+ * @target: thread to be examined
+ * @view: &struct user_regset_view describing user thread machine state
+ * @setno: index in @view->regsets
+ * @offset: offset into the regset data, in bytes
+ * @size: amount of data to copy, in bytes
+ * @data: user-mode pointer to copy into
+ */
+static inline int copy_regset_to_user(struct task_struct *target,
+ const struct user_regset_view *view,
+ unsigned int setno,
+ unsigned int offset, unsigned int size,
+ void __user *data)
+{
+ const struct user_regset *regset = &view->regsets[setno];
+
+ if (!access_ok(VERIFY_WRITE, data, size))
+ return -EIO;
+
+ return regset->get(target, regset, offset, size, NULL, data);
+}
+
+/**
+ * copy_regset_from_user - store into thread's user_regset data from user memory
+ * @target: thread to be examined
+ * @view: &struct user_regset_view describing user thread machine state
+ * @setno: index in @view->regsets
+ * @offset: offset into the regset data, in bytes
+ * @size: amount of data to copy, in bytes
+ * @data: user-mode pointer to copy from
+ */
+static inline int copy_regset_from_user(struct task_struct *target,
+ const struct user_regset_view *view,
+ unsigned int setno,
+ unsigned int offset, unsigned int size,
+ const void __user *data)
+{
+ const struct user_regset *regset = &view->regsets[setno];
+
+ if (!access_ok(VERIFY_READ, data, size))
+ return -EIO;
+
+ return regset->set(target, regset, offset, size, NULL, data);
+}
+
+
+#endif /* <linux/regset.h> */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2d0546e884e..9d4797609aa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1922,23 +1922,16 @@ extern int cond_resched_softirq(void);
/*
* Does a critical section need to be broken due to another
- * task waiting?:
+ * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * but a general need for low latency)
*/
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
-# define need_lockbreak(lock) ((lock)->break_lock)
-#else
-# define need_lockbreak(lock) 0
-#endif
-
-/*
- * Does a critical section need to be broken due to another
- * task waiting or preemption being signalled:
- */
-static inline int lock_need_resched(spinlock_t *lock)
+static inline int spin_needbreak(spinlock_t *lock)
{
- if (need_lockbreak(lock) || need_resched())
- return 1;
+#ifdef CONFIG_PREEMPT
+ return spin_is_contended(lock);
+#else
return 0;
+#endif
}
/*
diff --git a/include/linux/smp.h b/include/linux/smp.h
index c25e66bcecf..55232ccf9cf 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -78,6 +78,8 @@ int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait);
*/
void smp_prepare_boot_cpu(void);
+extern unsigned int setup_max_cpus;
+
#else /* !SMP */
/*
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index c376f3b36c8..124449733c5 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -120,6 +120,12 @@ do { \
#define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock)
+#ifdef CONFIG_GENERIC_LOCKBREAK
+#define spin_is_contended(lock) ((lock)->break_lock)
+#else
+#define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock)
+#endif
+
/**
* spin_unlock_wait - wait until the spinlock gets unlocked
* @lock: the spinlock in question.
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index f6a3a951b79..68d88f71f1a 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -19,7 +19,7 @@
typedef struct {
raw_spinlock_t raw_lock;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+#ifdef CONFIG_GENERIC_LOCKBREAK
unsigned int break_lock;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
@@ -35,7 +35,7 @@ typedef struct {
typedef struct {
raw_rwlock_t raw_lock;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+#ifdef CONFIG_GENERIC_LOCKBREAK
unsigned int break_lock;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index ea54c4c9a4e..938234c4a99 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -64,6 +64,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
# define __raw_spin_trylock(lock) ({ (void)(lock); 1; })
#endif /* DEBUG_SPINLOCK */
+#define __raw_spin_is_contended(lock) (((void)(lock), 0))
+
#define __raw_read_can_lock(lock) (((void)(lock), 1))
#define __raw_write_can_lock(lock) (((void)(lock), 1))
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 4360e081695..40280df2a3d 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -211,9 +211,6 @@ static inline int hibernate(void) { return -ENOSYS; }
#ifdef CONFIG_PM_SLEEP
void save_processor_state(void);
void restore_processor_state(void);
-struct saved_context;
-void __save_processor_state(struct saved_context *ctxt);
-void __restore_processor_state(struct saved_context *ctxt);
/* kernel/power/main.c */
extern struct blocking_notifier_head pm_chain_head;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4f3838adbb3..2c3ce4c69b2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -6,6 +6,7 @@
#include <linux/mmzone.h>
#include <linux/list.h>
#include <linux/sched.h>
+#include <linux/pagemap.h>
#include <asm/atomic.h>
#include <asm/page.h>
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 9c4ad755d7e..dfbdfb9836f 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -42,27 +42,27 @@ extern long do_no_restart_syscall(struct restart_block *parm);
static inline void set_ti_thread_flag(struct thread_info *ti, int flag)
{
- set_bit(flag,&ti->flags);
+ set_bit(flag, (unsigned long *)&ti->flags);
}
static inline void clear_ti_thread_flag(struct thread_info *ti, int flag)
{
- clear_bit(flag,&ti->flags);
+ clear_bit(flag, (unsigned long *)&ti->flags);
}
static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag)
{
- return test_and_set_bit(flag,&ti->flags);
+ return test_and_set_bit(flag, (unsigned long *)&ti->flags);
}
static inline int test_and_clear_ti_thread_flag(struct thread_info *ti, int flag)
{
- return test_and_clear_bit(flag,&ti->flags);
+ return test_and_clear_bit(flag, (unsigned long *)&ti->flags);
}
static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
{
- return test_bit(flag,&ti->flags);
+ return test_bit(flag, (unsigned long *)&ti->flags);
}
#define set_thread_flag(flag) \
diff --git a/include/linux/tick.h b/include/linux/tick.h
index f4a1395e05f..0fadf95debe 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -51,8 +51,10 @@ struct tick_sched {
unsigned long idle_jiffies;
unsigned long idle_calls;
unsigned long idle_sleeps;
+ int idle_active;
ktime_t idle_entrytime;
ktime_t idle_sleeptime;
+ ktime_t idle_lastupdate;
ktime_t sleep_length;
unsigned long last_jiffies;
unsigned long next_jiffies;
@@ -103,6 +105,8 @@ extern void tick_nohz_stop_sched_tick(void);
extern void tick_nohz_restart_sched_tick(void);
extern void tick_nohz_update_jiffies(void);
extern ktime_t tick_nohz_get_sleep_length(void);
+extern void tick_nohz_stop_idle(int cpu);
+extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
# else
static inline void tick_nohz_stop_sched_tick(void) { }
static inline void tick_nohz_restart_sched_tick(void) { }
@@ -113,6 +117,8 @@ static inline ktime_t tick_nohz_get_sleep_length(void)
return len;
}
+static inline void tick_nohz_stop_idle(int cpu) { }
+static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; }
# endif /* !NO_HZ */
#endif
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 78cf899b440..de0e71359ed 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -5,7 +5,7 @@
#include <linux/ktime.h>
#include <linux/stddef.h>
-struct tvec_t_base_s;
+struct tvec_base;
struct timer_list {
struct list_head entry;
@@ -14,7 +14,7 @@ struct timer_list {
void (*function)(unsigned long);
unsigned long data;
- struct tvec_t_base_s *base;
+ struct tvec_base *base;
#ifdef CONFIG_TIMER_STATS
void *start_site;
char start_comm[16];
@@ -22,7 +22,7 @@ struct timer_list {
#endif
};
-extern struct tvec_t_base_s boot_tvec_bases;
+extern struct tvec_base boot_tvec_bases;
#define TIMER_INITIALIZER(_function, _expires, _data) { \
.function = (_function), \
diff --git a/include/xen/page.h b/include/xen/page.h
index c0c8fcb2789..031ef22a971 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -156,16 +156,16 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
static inline unsigned long long pte_val_ma(pte_t x)
{
- return ((unsigned long long)x.pte_high << 32) | x.pte_low;
+ return x.pte;
}
#define pmd_val_ma(v) ((v).pmd)
#define pud_val_ma(v) ((v).pgd.pgd)
-#define __pte_ma(x) ((pte_t) { .pte_low = (x), .pte_high = (x)>>32 } )
+#define __pte_ma(x) ((pte_t) { .pte = (x) })
#define __pmd_ma(x) ((pmd_t) { (x) } )
#else /* !X86_PAE */
#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
#define mfn_pte(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define pte_val_ma(x) ((x).pte_low)
+#define pte_val_ma(x) ((x).pte)
#define pmd_val_ma(v) ((v).pud.pgd.pgd)
#define __pte_ma(x) ((pte_t) { (x) } )
#endif /* CONFIG_X86_PAE */
diff --git a/init/main.c b/init/main.c
index f287ca5862b..cb81ed116f6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -128,7 +128,7 @@ static char *ramdisk_execute_command;
#ifdef CONFIG_SMP
/* Setup configured maximum number of CPUs to activate */
-static unsigned int __initdata max_cpus = NR_CPUS;
+unsigned int __initdata setup_max_cpus = NR_CPUS;
/*
* Setup routine for controlling SMP activation
@@ -146,7 +146,7 @@ static inline void disable_ioapic_setup(void) {};
static int __init nosmp(char *str)
{
- max_cpus = 0;
+ setup_max_cpus = 0;
disable_ioapic_setup();
return 0;
}
@@ -155,8 +155,8 @@ early_param("nosmp", nosmp);
static int __init maxcpus(char *str)
{
- get_option(&str, &max_cpus);
- if (max_cpus == 0)
+ get_option(&str, &setup_max_cpus);
+ if (setup_max_cpus == 0)
disable_ioapic_setup();
return 0;
@@ -164,7 +164,7 @@ static int __init maxcpus(char *str)
early_param("maxcpus", maxcpus);
#else
-#define max_cpus NR_CPUS
+#define setup_max_cpus NR_CPUS
#endif
/*
@@ -318,6 +318,10 @@ static int __init unknown_bootoption(char *param, char *val)
return 0;
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
+int __read_mostly debug_pagealloc_enabled = 0;
+#endif
+
static int __init init_setup(char *str)
{
unsigned int i;
@@ -363,7 +367,7 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
#else
-#ifdef __GENERIC_PER_CPU
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
@@ -384,7 +388,7 @@ static void __init setup_per_cpu_areas(void)
ptr += size;
}
}
-#endif /* !__GENERIC_PER_CPU */
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
/* Called by boot processor to activate the rest. */
static void __init smp_init(void)
@@ -393,7 +397,7 @@ static void __init smp_init(void)
/* FIXME: This should be done in userspace --RR */
for_each_present_cpu(cpu) {
- if (num_online_cpus() >= max_cpus)
+ if (num_online_cpus() >= setup_max_cpus)
break;
if (!cpu_online(cpu))
cpu_up(cpu);
@@ -401,7 +405,7 @@ static void __init smp_init(void)
/* Any cleanup work */
printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
- smp_cpus_done(max_cpus);
+ smp_cpus_done(setup_max_cpus);
}
#endif
@@ -552,6 +556,7 @@ asmlinkage void __init start_kernel(void)
preempt_disable();
build_all_zonelists();
page_alloc_init();
+ enable_debug_pagealloc();
printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
parse_early_param();
parse_args("Booting kernel", static_command_line, __start___param,
@@ -824,7 +829,7 @@ static int __init kernel_init(void * unused)
__set_special_pids(1, 1);
cad_pid = task_pid(current);
- smp_prepare_cpus(max_cpus);
+ smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
diff --git a/kernel/Makefile b/kernel/Makefile
index 390d4214626..8885627ea02 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_PM) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
new file mode 100644
index 00000000000..d1a7605c5b8
--- /dev/null
+++ b/kernel/backtracetest.c
@@ -0,0 +1,48 @@
+/*
+ * Simple stack backtrace regression test module
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+
+static struct timer_list backtrace_timer;
+
+static void backtrace_test_timer(unsigned long data)
+{
+ printk("Testing a backtrace from irq context.\n");
+ printk("The following trace is a kernel self test and not a bug!\n");
+ dump_stack();
+}
+static int backtrace_regression_test(void)
+{
+ printk("====[ backtrace testing ]===========\n");
+ printk("Testing a backtrace from process context.\n");
+ printk("The following trace is a kernel self test and not a bug!\n");
+ dump_stack();
+
+ init_timer(&backtrace_timer);
+ backtrace_timer.function = backtrace_test_timer;
+ mod_timer(&backtrace_timer, jiffies + 10);
+
+ msleep(10);
+ printk("====[ end of backtrace testing ]====\n");
+ return 0;
+}
+
+static void exitf(void)
+{
+}
+
+module_init(backtrace_regression_test);
+module_exit(exitf);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1f314221d53..438a0146428 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -479,6 +479,9 @@ void free_irq(unsigned int irq, void *dev_id)
return;
}
printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
+#ifdef CONFIG_DEBUG_SHIRQ
+ dump_stack();
+#endif
spin_unlock_irqrestore(&desc->lock, flags);
return;
}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 50b81b98046..c2f2ccb0549 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -75,6 +75,18 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
#endif
+static int irq_spurious_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct irq_desc *d = &irq_desc[(long) data];
+ return sprintf(page, "count %u\n"
+ "unhandled %u\n"
+ "last_unhandled %u ms\n",
+ d->irq_count,
+ d->irqs_unhandled,
+ jiffies_to_msecs(d->last_unhandled));
+}
+
#define MAX_NAMELEN 128
static int name_unique(unsigned int irq, struct irqaction *new_action)
@@ -118,6 +130,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
void register_irq_proc(unsigned int irq)
{
char name [MAX_NAMELEN];
+ struct proc_dir_entry *entry;
if (!root_irq_dir ||
(irq_desc[irq].chip == &no_irq_chip) ||
@@ -132,8 +145,6 @@ void register_irq_proc(unsigned int irq)
#ifdef CONFIG_SMP
{
- struct proc_dir_entry *entry;
-
/* create /proc/irq/<irq>/smp_affinity */
entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
@@ -144,6 +155,12 @@ void register_irq_proc(unsigned int irq)
}
}
#endif
+
+ entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
+ if (entry) {
+ entry->data = (void *)(long)irq;
+ entry->read_proc = irq_spurious_read;
+ }
}
#undef MAX_NAMELEN
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 32b161972fa..a6b2bc831dd 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
static int irqfixup __read_mostly;
@@ -225,6 +226,8 @@ int noirqdebug_setup(char *str)
}
__setup("noirqdebug", noirqdebug_setup);
+module_param(noirqdebug, bool, 0644);
+MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
static int __init irqfixup_setup(char *str)
{
@@ -236,6 +239,8 @@ static int __init irqfixup_setup(char *str)
}
__setup("irqfixup", irqfixup_setup);
+module_param(irqfixup, int, 0644);
+MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode");
static int __init irqpoll_setup(char *str)
{
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e3a5d817ac9..d0493eafea3 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -824,6 +824,8 @@ static int __init init_kprobes(void)
if (!err)
err = register_die_notifier(&kprobe_exceptions_nb);
+ if (!err)
+ init_test_probes();
return err;
}
diff --git a/kernel/module.c b/kernel/module.c
index f6a4e721fd4..bd60278ee70 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -430,6 +430,14 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
}
+static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ memcpy(pcpudest + per_cpu_offset(cpu), from, size);
+}
+
static int percpu_modinit(void)
{
pcpu_num_used = 2;
diff --git a/kernel/panic.c b/kernel/panic.c
index da4d6bac270..d9e90cfe329 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -20,6 +20,7 @@
#include <linux/kexec.h>
#include <linux/debug_locks.h>
#include <linux/random.h>
+#include <linux/kallsyms.h>
int panic_on_oops;
int tainted;
@@ -280,6 +281,13 @@ static int init_oops_id(void)
}
late_initcall(init_oops_id);
+static void print_oops_end_marker(void)
+{
+ init_oops_id();
+ printk(KERN_WARNING "---[ end trace %016llx ]---\n",
+ (unsigned long long)oops_id);
+}
+
/*
* Called when the architecture exits its oops handler, after printing
* everything.
@@ -287,11 +295,26 @@ late_initcall(init_oops_id);
void oops_exit(void)
{
do_oops_enter_exit();
- init_oops_id();
- printk(KERN_WARNING "---[ end trace %016llx ]---\n",
- (unsigned long long)oops_id);
+ print_oops_end_marker();
}
+#ifdef WANT_WARN_ON_SLOWPATH
+void warn_on_slowpath(const char *file, int line)
+{
+ char function[KSYM_SYMBOL_LEN];
+ unsigned long caller = (unsigned long) __builtin_return_address(0);
+ sprint_symbol(function, caller);
+
+ printk(KERN_WARNING "------------[ cut here ]------------\n");
+ printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
+ line, function);
+ print_modules();
+ dump_stack();
+ print_oops_end_marker();
+}
+EXPORT_SYMBOL(warn_on_slowpath);
+#endif
+
#ifdef CONFIG_CC_STACKPROTECTOR
/*
* Called when gcc's -fstack-protector feature is used, and
diff --git a/kernel/printk.c b/kernel/printk.c
index 3b7c968d0ef..58bbec68411 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -36,6 +36,13 @@
#include <asm/uaccess.h>
+/*
+ * Architectures can override it:
+ */
+void __attribute__((weak)) early_printk(const char *fmt, ...)
+{
+}
+
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
/* printk's without a loglevel use this.. */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index c719bb9d79a..e6e9b8be4b0 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -366,12 +366,73 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
return error;
}
+
+#ifdef PTRACE_SINGLESTEP
+#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
+#else
+#define is_singlestep(request) 0
+#endif
+
+#ifdef PTRACE_SINGLEBLOCK
+#define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK)
+#else
+#define is_singleblock(request) 0
+#endif
+
+#ifdef PTRACE_SYSEMU
+#define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP)
+#else
+#define is_sysemu_singlestep(request) 0
+#endif
+
+static int ptrace_resume(struct task_struct *child, long request, long data)
+{
+ if (!valid_signal(data))
+ return -EIO;
+
+ if (request == PTRACE_SYSCALL)
+ set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ else
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+
+#ifdef TIF_SYSCALL_EMU
+ if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
+ set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ else
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
+
+ if (is_singleblock(request)) {
+ if (unlikely(!arch_has_block_step()))
+ return -EIO;
+ user_enable_block_step(child);
+ } else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+ if (unlikely(!arch_has_single_step()))
+ return -EIO;
+ user_enable_single_step(child);
+ }
+ else
+ user_disable_single_step(child);
+
+ child->exit_code = data;
+ wake_up_process(child);
+
+ return 0;
+}
+
int ptrace_request(struct task_struct *child, long request,
long addr, long data)
{
int ret = -EIO;
switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ return generic_ptrace_peekdata(child, addr, data);
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA:
+ return generic_ptrace_pokedata(child, addr, data);
+
#ifdef PTRACE_OLDSETOPTIONS
case PTRACE_OLDSETOPTIONS:
#endif
@@ -390,6 +451,26 @@ int ptrace_request(struct task_struct *child, long request,
case PTRACE_DETACH: /* detach a process that was attached. */
ret = ptrace_detach(child, data);
break;
+
+#ifdef PTRACE_SINGLESTEP
+ case PTRACE_SINGLESTEP:
+#endif
+#ifdef PTRACE_SINGLEBLOCK
+ case PTRACE_SINGLEBLOCK:
+#endif
+#ifdef PTRACE_SYSEMU
+ case PTRACE_SYSEMU:
+ case PTRACE_SYSEMU_SINGLESTEP:
+#endif
+ case PTRACE_SYSCALL:
+ case PTRACE_CONT:
+ return ptrace_resume(child, request, data);
+
+ case PTRACE_KILL:
+ if (child->exit_state) /* already dead */
+ return 0;
+ return ptrace_resume(child, request, SIGKILL);
+
default:
break;
}
@@ -526,3 +607,87 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
return (copied == sizeof(data)) ? 0 : -EIO;
}
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_ptrace_request(struct task_struct *child, compat_long_t request,
+ compat_ulong_t addr, compat_ulong_t data)
+{
+ compat_ulong_t __user *datap = compat_ptr(data);
+ compat_ulong_t word;
+ int ret;
+
+ switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ ret = access_process_vm(child, addr, &word, sizeof(word), 0);
+ if (ret != sizeof(word))
+ ret = -EIO;
+ else
+ ret = put_user(word, datap);
+ break;
+
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA:
+ ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+ ret = (ret != sizeof(data) ? -EIO : 0);
+ break;
+
+ case PTRACE_GETEVENTMSG:
+ ret = put_user((compat_ulong_t) child->ptrace_message, datap);
+ break;
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ }
+
+ return ret;
+}
+
+#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
+asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+ compat_long_t addr, compat_long_t data)
+{
+ struct task_struct *child;
+ long ret;
+
+ /*
+ * This lock_kernel fixes a subtle race with suid exec
+ */
+ lock_kernel();
+ if (request == PTRACE_TRACEME) {
+ ret = ptrace_traceme();
+ goto out;
+ }
+
+ child = ptrace_get_task_struct(pid);
+ if (IS_ERR(child)) {
+ ret = PTR_ERR(child);
+ goto out;
+ }
+
+ if (request == PTRACE_ATTACH) {
+ ret = ptrace_attach(child);
+ /*
+ * Some architectures need to do book-keeping after
+ * a ptrace attach.
+ */
+ if (!ret)
+ arch_ptrace_attach(child);
+ goto out_put_task_struct;
+ }
+
+ ret = ptrace_check_attach(child, request == PTRACE_KILL);
+ if (!ret)
+ ret = compat_arch_ptrace(child, request, addr, data);
+
+ out_put_task_struct:
+ put_task_struct(child);
+ out:
+ unlock_kernel();
+ return ret;
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
+
+#endif /* CONFIG_COMPAT */
diff --git a/kernel/sched.c b/kernel/sched.c
index 524285e46fa..ba4c88088f6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4945,19 +4945,15 @@ EXPORT_SYMBOL(_cond_resched);
*/
int cond_resched_lock(spinlock_t *lock)
{
+ int resched = need_resched() && system_state == SYSTEM_RUNNING;
int ret = 0;
- if (need_lockbreak(lock)) {
+ if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
- cpu_relax();
- ret = 1;
- spin_lock(lock);
- }
- if (need_resched() && system_state == SYSTEM_RUNNING) {
- spin_release(&lock->dep_map, 1, _THIS_IP_);
- _raw_spin_unlock(lock);
- preempt_enable_no_resched();
- __cond_resched();
+ if (resched && need_resched())
+ __cond_resched();
+ else
+ cpu_relax();
ret = 1;
spin_lock(lock);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index afa4f781f92..bf49ce6f016 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -733,13 +733,13 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
current->comm, task_pid_nr(current), signr);
#if defined(__i386__) && !defined(__arch_um__)
- printk("code at %08lx: ", regs->eip);
+ printk("code at %08lx: ", regs->ip);
{
int i;
for (i = 0; i < 16; i++) {
unsigned char insn;
- __get_user(insn, (unsigned char *)(regs->eip + i));
+ __get_user(insn, (unsigned char *)(regs->ip + i));
printk("%02x ", insn);
}
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bd89bc4eb0b..d7837d45419 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -3,7 +3,9 @@
*
* Copyright (C) 1992 Linus Torvalds
*
- * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ * Distribute under GPLv2.
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
*/
#include <linux/module.h>
@@ -278,9 +280,14 @@ asmlinkage void do_softirq(void)
*/
void irq_enter(void)
{
+#ifdef CONFIG_NO_HZ
+ int cpu = smp_processor_id();
+ if (idle_cpu(cpu) && !in_interrupt())
+ tick_nohz_stop_idle(cpu);
+#endif
__irq_enter();
#ifdef CONFIG_NO_HZ
- if (idle_cpu(smp_processor_id()))
+ if (idle_cpu(cpu))
tick_nohz_update_jiffies();
#endif
}
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index cd72424c266..ae28c824512 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -65,8 +65,7 @@ EXPORT_SYMBOL(_write_trylock);
* even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
* not re-enabled during lock-acquire (which the preempt-spin-ops do):
*/
-#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
- defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
void __lockfunc _read_lock(rwlock_t *lock)
{
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4bc8e48434a..357b68ba23e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -53,6 +53,7 @@
#ifdef CONFIG_X86
#include <asm/nmi.h>
#include <asm/stacktrace.h>
+#include <asm/io.h>
#endif
static int deprecated_sysctl_warning(struct __sysctl_args *args);
@@ -727,6 +728,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
#if defined(CONFIG_MMU)
{
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
new file mode 100644
index 00000000000..88cdb109e13
--- /dev/null
+++ b/kernel/test_kprobes.c
@@ -0,0 +1,216 @@
+/*
+ * test_kprobes.c - simple sanity test for *probes
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+
+#define div_factor 3
+
+static u32 rand1, preh_val, posth_val, jph_val;
+static int errors, handler_errors, num_tests;
+
+static noinline u32 kprobe_target(u32 value)
+{
+ /*
+ * gcc ignores noinline on some architectures unless we stuff
+ * sufficient lard into the function. The get_kprobe() here is
+ * just for that.
+ *
+ * NOTE: We aren't concerned about the correctness of get_kprobe()
+ * here; hence, this call is neither under !preempt nor with the
+ * kprobe_mutex held. This is fine(tm)
+ */
+ if (get_kprobe((void *)0xdeadbeef))
+ printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n");
+
+ return (value / div_factor);
+}
+
+static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ preh_val = (rand1 / div_factor);
+ return 0;
+}
+
+static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
+ unsigned long flags)
+{
+ if (preh_val != (rand1 / div_factor)) {
+ handler_errors++;
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "incorrect value in post_handler\n");
+ }
+ posth_val = preh_val + div_factor;
+}
+
+static struct kprobe kp = {
+ .symbol_name = "kprobe_target",
+ .pre_handler = kp_pre_handler,
+ .post_handler = kp_post_handler
+};
+
+static int test_kprobe(void)
+{
+ int ret;
+
+ ret = register_kprobe(&kp);
+ if (ret < 0) {
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "register_kprobe returned %d\n", ret);
+ return ret;
+ }
+
+ ret = kprobe_target(rand1);
+ unregister_kprobe(&kp);
+
+ if (preh_val == 0) {
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "kprobe pre_handler not called\n");
+ handler_errors++;
+ }
+
+ if (posth_val == 0) {
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "kprobe post_handler not called\n");
+ handler_errors++;
+ }
+
+ return 0;
+}
+
+static u32 j_kprobe_target(u32 value)
+{
+ if (value != rand1) {
+ handler_errors++;
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "incorrect value in jprobe handler\n");
+ }
+
+ jph_val = rand1;
+ jprobe_return();
+ return 0;
+}
+
+static struct jprobe jp = {
+ .entry = j_kprobe_target,
+ .kp.symbol_name = "kprobe_target"
+};
+
+static int test_jprobe(void)
+{
+ int ret;
+
+ ret = register_jprobe(&jp);
+ if (ret < 0) {
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "register_jprobe returned %d\n", ret);
+ return ret;
+ }
+
+ ret = kprobe_target(rand1);
+ unregister_jprobe(&jp);
+ if (jph_val == 0) {
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "jprobe handler not called\n");
+ handler_errors++;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_KRETPROBES
+static u32 krph_val;
+
+static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+ unsigned long ret = regs_return_value(regs);
+
+ if (ret != (rand1 / div_factor)) {
+ handler_errors++;
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "incorrect value in kretprobe handler\n");
+ }
+
+ krph_val = (rand1 / div_factor);
+ return 0;
+}
+
+static struct kretprobe rp = {
+ .handler = return_handler,
+ .kp.symbol_name = "kprobe_target"
+};
+
+static int test_kretprobe(void)
+{
+ int ret;
+
+ ret = register_kretprobe(&rp);
+ if (ret < 0) {
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "register_kretprobe returned %d\n", ret);
+ return ret;
+ }
+
+ ret = kprobe_target(rand1);
+ unregister_kretprobe(&rp);
+ if (krph_val == 0) {
+ printk(KERN_ERR "Kprobe smoke test failed: "
+ "kretprobe handler not called\n");
+ handler_errors++;
+ }
+
+ return 0;
+}
+#endif /* CONFIG_KRETPROBES */
+
+int init_test_probes(void)
+{
+ int ret;
+
+ do {
+ rand1 = random32();
+ } while (rand1 <= div_factor);
+
+ printk(KERN_INFO "Kprobe smoke test started\n");
+ num_tests++;
+ ret = test_kprobe();
+ if (ret < 0)
+ errors++;
+
+ num_tests++;
+ ret = test_jprobe();
+ if (ret < 0)
+ errors++;
+
+#ifdef CONFIG_KRETPROBES
+ num_tests++;
+ ret = test_kretprobe();
+ if (ret < 0)
+ errors++;
+#endif /* CONFIG_KRETPROBES */
+
+ if (errors)
+ printk(KERN_ERR "BUG: Kprobe smoke test: %d out of "
+ "%d tests failed\n", errors, num_tests);
+ else if (handler_errors)
+ printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) "
+ "running handlers\n", handler_errors);
+ else
+ printk(KERN_INFO "Kprobe smoke test passed successfully\n");
+
+ return 0;
+}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 5fb139fef9f..3e59fce6dd4 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -41,6 +41,11 @@ unsigned long clockevent_delta2ns(unsigned long latch,
{
u64 clc = ((u64) latch << evt->shift);
+ if (unlikely(!evt->mult)) {
+ evt->mult = 1;
+ WARN_ON(1);
+ }
+
do_div(clc, evt->mult);
if (clc < 1000)
clc = 1000;
@@ -151,6 +156,14 @@ static void clockevents_notify_released(void)
void clockevents_register_device(struct clock_event_device *dev)
{
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+ /*
+ * A nsec2cyc multiplicator of 0 is invalid and we'd crash
+ * on it, so fix it up and emit a warning:
+ */
+ if (unlikely(!dev->mult)) {
+ dev->mult = 1;
+ WARN_ON(1);
+ }
spin_lock(&clockevents_lock);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 8d6125ad2cf..6e9259a5d50 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,8 +142,13 @@ static void clocksource_watchdog(unsigned long data)
}
if (!list_empty(&watchdog_list)) {
- __mod_timer(&watchdog_timer,
- watchdog_timer.expires + WATCHDOG_INTERVAL);
+ /* Cycle through CPUs to check if the CPUs stay synchronized to
+ * each other. */
+ int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map);
+ if (next_cpu >= NR_CPUS)
+ next_cpu = first_cpu(cpu_online_map);
+ watchdog_timer.expires += WATCHDOG_INTERVAL;
+ add_timer_on(&watchdog_timer, next_cpu);
}
spin_unlock(&watchdog_lock);
}
@@ -165,7 +170,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
if (!started && watchdog) {
watchdog_last = watchdog->read();
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
- add_timer(&watchdog_timer);
+ add_timer_on(&watchdog_timer, first_cpu(cpu_online_map));
}
} else {
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
@@ -175,7 +180,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
if (watchdog)
del_timer(&watchdog_timer);
watchdog = cs;
- init_timer(&watchdog_timer);
+ init_timer_deferrable(&watchdog_timer);
watchdog_timer.function = clocksource_watchdog;
/* Reset watchdog cycles */
@@ -186,7 +191,8 @@ static void clocksource_check_watchdog(struct clocksource *cs)
watchdog_last = watchdog->read();
watchdog_timer.expires =
jiffies + WATCHDOG_INTERVAL;
- add_timer(&watchdog_timer);
+ add_timer_on(&watchdog_timer,
+ first_cpu(cpu_online_map));
}
}
}
@@ -331,6 +337,21 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
spin_unlock_irqrestore(&clocksource_lock, flags);
}
+/**
+ * clocksource_unregister - remove a registered clocksource
+ */
+void clocksource_unregister(struct clocksource *cs)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&clocksource_lock, flags);
+ list_del(&cs->list);
+ if (clocksource_override == cs)
+ clocksource_override = NULL;
+ next_clocksource = select_clocksource();
+ spin_unlock_irqrestore(&clocksource_lock, flags);
+}
+
#ifdef CONFIG_SYSFS
/**
* sysfs_show_current_clocksources - sysfs interface for current clocksource
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 5b86698faa0..e1bd50cbbf5 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -126,9 +126,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
/*
* Broadcast the event to the cpus, which are set in the mask
*/
-int tick_do_broadcast(cpumask_t mask)
+static void tick_do_broadcast(cpumask_t mask)
{
- int ret = 0, cpu = smp_processor_id();
+ int cpu = smp_processor_id();
struct tick_device *td;
/*
@@ -138,7 +138,6 @@ int tick_do_broadcast(cpumask_t mask)
cpu_clear(cpu, mask);
td = &per_cpu(tick_cpu_device, cpu);
td->evtdev->event_handler(td->evtdev);
- ret = 1;
}
if (!cpus_empty(mask)) {
@@ -151,9 +150,7 @@ int tick_do_broadcast(cpumask_t mask)
cpu = first_cpu(mask);
td = &per_cpu(tick_cpu_device, cpu);
td->evtdev->broadcast(mask);
- ret = 1;
}
- return ret;
}
/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index bb13f272490..f13f2b7f4fd 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -70,8 +70,6 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
* Broadcasting support
*/
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern int tick_do_broadcast(cpumask_t mask);
-
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
extern int tick_check_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1a21b6fdb67..63f24b55069 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -9,7 +9,7 @@
*
* Started by: Thomas Gleixner and Ingo Molnar
*
- * For licencing details see kernel-base/COPYING
+ * Distribute under GPLv2.
*/
#include <linux/cpu.h>
#include <linux/err.h>
@@ -143,6 +143,44 @@ void tick_nohz_update_jiffies(void)
local_irq_restore(flags);
}
+void tick_nohz_stop_idle(int cpu)
+{
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+ if (ts->idle_active) {
+ ktime_t now, delta;
+ now = ktime_get();
+ delta = ktime_sub(now, ts->idle_entrytime);
+ ts->idle_lastupdate = now;
+ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+ ts->idle_active = 0;
+ }
+}
+
+static ktime_t tick_nohz_start_idle(int cpu)
+{
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t now, delta;
+
+ now = ktime_get();
+ if (ts->idle_active) {
+ delta = ktime_sub(now, ts->idle_entrytime);
+ ts->idle_lastupdate = now;
+ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+ }
+ ts->idle_entrytime = now;
+ ts->idle_active = 1;
+ return now;
+}
+
+u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
+{
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+ *last_update_time = ktime_to_us(ts->idle_lastupdate);
+ return ktime_to_us(ts->idle_sleeptime);
+}
+
/**
* tick_nohz_stop_sched_tick - stop the idle tick from the idle task
*
@@ -155,13 +193,14 @@ void tick_nohz_stop_sched_tick(void)
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
unsigned long rt_jiffies;
struct tick_sched *ts;
- ktime_t last_update, expires, now, delta;
+ ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
int cpu;
local_irq_save(flags);
cpu = smp_processor_id();
+ now = tick_nohz_start_idle(cpu);
ts = &per_cpu(tick_cpu_sched, cpu);
/*
@@ -193,19 +232,7 @@ void tick_nohz_stop_sched_tick(void)
}
}
- now = ktime_get();
- /*
- * When called from irq_exit we need to account the idle sleep time
- * correctly.
- */
- if (ts->tick_stopped) {
- delta = ktime_sub(now, ts->idle_entrytime);
- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
- }
-
- ts->idle_entrytime = now;
ts->idle_calls++;
-
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin(&xtime_lock);
@@ -296,7 +323,7 @@ void tick_nohz_stop_sched_tick(void)
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
goto out;
- } else if(!tick_program_event(expires, 0))
+ } else if (!tick_program_event(expires, 0))
goto out;
/*
* We are past the event already. So we crossed a
@@ -337,23 +364,22 @@ void tick_nohz_restart_sched_tick(void)
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
unsigned long ticks;
- ktime_t now, delta;
+ ktime_t now;
- if (!ts->tick_stopped)
+ local_irq_disable();
+ tick_nohz_stop_idle(cpu);
+
+ if (!ts->tick_stopped) {
+ local_irq_enable();
return;
+ }
/* Update jiffies first */
- now = ktime_get();
-
- local_irq_disable();
select_nohz_load_balancer(0);
+ now = ktime_get();
tick_do_update_jiffies64(now);
cpu_clear(cpu, nohz_cpu_mask);
- /* Account the idle time */
- delta = ktime_sub(now, ts->idle_entrytime);
- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
@@ -507,7 +533,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
*/
#ifdef CONFIG_HIGH_RES_TIMERS
/*
- * We rearm the timer until we get disabled by the idle code
+ * We rearm the timer until we get disabled by the idle code.
* Called with interrupts disabled and timer->base->cpu_base->lock held.
*/
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ab46ae8c062..092a2366b5a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -82,13 +82,12 @@ static inline s64 __get_nsec_offset(void)
}
/**
- * __get_realtime_clock_ts - Returns the time of day in a timespec
+ * getnstimeofday - Returns the time of day in a timespec
* @ts: pointer to the timespec to be set
*
- * Returns the time of day in a timespec. Used by
- * do_gettimeofday() and get_realtime_clock_ts().
+ * Returns the time of day in a timespec.
*/
-static inline void __get_realtime_clock_ts(struct timespec *ts)
+void getnstimeofday(struct timespec *ts)
{
unsigned long seq;
s64 nsecs;
@@ -104,30 +103,19 @@ static inline void __get_realtime_clock_ts(struct timespec *ts)
timespec_add_ns(ts, nsecs);
}
-/**
- * getnstimeofday - Returns the time of day in a timespec
- * @ts: pointer to the timespec to be set
- *
- * Returns the time of day in a timespec.
- */
-void getnstimeofday(struct timespec *ts)
-{
- __get_realtime_clock_ts(ts);
-}
-
EXPORT_SYMBOL(getnstimeofday);
/**
* do_gettimeofday - Returns the time of day in a timeval
* @tv: pointer to the timeval to be set
*
- * NOTE: Users should be converted to using get_realtime_clock_ts()
+ * NOTE: Users should be converted to using getnstimeofday()
*/
void do_gettimeofday(struct timeval *tv)
{
struct timespec now;
- __get_realtime_clock_ts(&now);
+ getnstimeofday(&now);
tv->tv_sec = now.tv_sec;
tv->tv_usec = now.tv_nsec/1000;
}
@@ -198,7 +186,8 @@ static void change_clocksource(void)
clock->error = 0;
clock->xtime_nsec = 0;
- clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+ clocksource_calculate_interval(clock,
+ (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT));
tick_clock_notify();
@@ -255,7 +244,8 @@ void __init timekeeping_init(void)
ntp_clear();
clock = clocksource_get_next();
- clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+ clocksource_calculate_interval(clock,
+ (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT));
clock->cycle_last = clocksource_read(clock);
xtime.tv_sec = sec;
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index c36bb7ed030..417da8c5bc7 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -26,7 +26,7 @@
* the pid and cmdline from the owner process if applicable.
*
* Start/stop data collection:
- * # echo 1[0] >/proc/timer_stats
+ * # echo [1|0] >/proc/timer_stats
*
* Display the information collected so far:
* # cat /proc/timer_stats
diff --git a/kernel/timer.c b/kernel/timer.c
index f739dfb539c..23f7ead78fa 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -58,59 +58,57 @@ EXPORT_SYMBOL(jiffies_64);
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)
-typedef struct tvec_s {
+struct tvec {
struct list_head vec[TVN_SIZE];
-} tvec_t;
+};
-typedef struct tvec_root_s {
+struct tvec_root {
struct list_head vec[TVR_SIZE];
-} tvec_root_t;
+};
-struct tvec_t_base_s {
+struct tvec_base {
spinlock_t lock;
struct timer_list *running_timer;
unsigned long timer_jiffies;
- tvec_root_t tv1;
- tvec_t tv2;
- tvec_t tv3;
- tvec_t tv4;
- tvec_t tv5;
+ struct tvec_root tv1;
+ struct tvec tv2;
+ struct tvec tv3;
+ struct tvec tv4;
+ struct tvec tv5;
} ____cacheline_aligned;
-typedef struct tvec_t_base_s tvec_base_t;
-
-tvec_base_t boot_tvec_bases;
+struct tvec_base boot_tvec_bases;
EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
+static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
/*
- * Note that all tvec_bases is 2 byte aligned and lower bit of
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
* base in timer_list is guaranteed to be zero. Use the LSB for
* the new flag to indicate whether the timer is deferrable
*/
#define TBASE_DEFERRABLE_FLAG (0x1)
/* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
+static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
{
return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
}
-static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
{
- return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+ return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
}
static inline void timer_set_deferrable(struct timer_list *timer)
{
- timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
+ timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
TBASE_DEFERRABLE_FLAG));
}
static inline void
-timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
+timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
{
- timer->base = (tvec_base_t *)((unsigned long)(new_base) |
+ timer->base = (struct tvec_base *)((unsigned long)(new_base) |
tbase_get_deferrable(timer->base));
}
@@ -246,7 +244,7 @@ unsigned long round_jiffies_relative(unsigned long j)
EXPORT_SYMBOL_GPL(round_jiffies_relative);
-static inline void set_running_timer(tvec_base_t *base,
+static inline void set_running_timer(struct tvec_base *base,
struct timer_list *timer)
{
#ifdef CONFIG_SMP
@@ -254,7 +252,7 @@ static inline void set_running_timer(tvec_base_t *base,
#endif
}
-static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
unsigned long expires = timer->expires;
unsigned long idx = expires - base->timer_jiffies;
@@ -371,14 +369,14 @@ static inline void detach_timer(struct timer_list *timer,
* possible to set timer->base = NULL and drop the lock: the timer remains
* locked.
*/
-static tvec_base_t *lock_timer_base(struct timer_list *timer,
+static struct tvec_base *lock_timer_base(struct timer_list *timer,
unsigned long *flags)
__acquires(timer->base->lock)
{
- tvec_base_t *base;
+ struct tvec_base *base;
for (;;) {
- tvec_base_t *prelock_base = timer->base;
+ struct tvec_base *prelock_base = timer->base;
base = tbase_get_base(prelock_base);
if (likely(base != NULL)) {
spin_lock_irqsave(&base->lock, *flags);
@@ -393,7 +391,7 @@ static tvec_base_t *lock_timer_base(struct timer_list *timer,
int __mod_timer(struct timer_list *timer, unsigned long expires)
{
- tvec_base_t *base, *new_base;
+ struct tvec_base *base, *new_base;
unsigned long flags;
int ret = 0;
@@ -445,7 +443,7 @@ EXPORT_SYMBOL(__mod_timer);
*/
void add_timer_on(struct timer_list *timer, int cpu)
{
- tvec_base_t *base = per_cpu(tvec_bases, cpu);
+ struct tvec_base *base = per_cpu(tvec_bases, cpu);
unsigned long flags;
timer_stats_timer_set_start_info(timer);
@@ -508,7 +506,7 @@ EXPORT_SYMBOL(mod_timer);
*/
int del_timer(struct timer_list *timer)
{
- tvec_base_t *base;
+ struct tvec_base *base;
unsigned long flags;
int ret = 0;
@@ -539,7 +537,7 @@ EXPORT_SYMBOL(del_timer);
*/
int try_to_del_timer_sync(struct timer_list *timer)
{
- tvec_base_t *base;
+ struct tvec_base *base;
unsigned long flags;
int ret = -1;
@@ -591,7 +589,7 @@ int del_timer_sync(struct timer_list *timer)
EXPORT_SYMBOL(del_timer_sync);
#endif
-static int cascade(tvec_base_t *base, tvec_t *tv, int index)
+static int cascade(struct tvec_base *base, struct tvec *tv, int index)
{
/* cascade all the timers from tv up one level */
struct timer_list *timer, *tmp;
@@ -620,7 +618,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
* This function cascades all vectors and executes all expired timer
* vectors.
*/
-static inline void __run_timers(tvec_base_t *base)
+static inline void __run_timers(struct tvec_base *base)
{
struct timer_list *timer;
@@ -657,7 +655,7 @@ static inline void __run_timers(tvec_base_t *base)
int preempt_count = preempt_count();
fn(data);
if (preempt_count != preempt_count()) {
- printk(KERN_WARNING "huh, entered %p "
+ printk(KERN_ERR "huh, entered %p "
"with preempt_count %08x, exited"
" with %08x?\n",
fn, preempt_count,
@@ -678,13 +676,13 @@ static inline void __run_timers(tvec_base_t *base)
* is used on S/390 to stop all activity when a cpus is idle.
* This functions needs to be called disabled.
*/
-static unsigned long __next_timer_interrupt(tvec_base_t *base)
+static unsigned long __next_timer_interrupt(struct tvec_base *base)
{
unsigned long timer_jiffies = base->timer_jiffies;
unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
int index, slot, array, found = 0;
struct timer_list *nte;
- tvec_t *varray[4];
+ struct tvec *varray[4];
/* Look for timer events in tv1. */
index = slot = timer_jiffies & TVR_MASK;
@@ -716,7 +714,7 @@ cascade:
varray[3] = &base->tv5;
for (array = 0; array < 4; array++) {
- tvec_t *varp = varray[array];
+ struct tvec *varp = varray[array];
index = slot = timer_jiffies & TVN_MASK;
do {
@@ -795,7 +793,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
*/
unsigned long get_next_timer_interrupt(unsigned long now)
{
- tvec_base_t *base = __get_cpu_var(tvec_bases);
+ struct tvec_base *base = __get_cpu_var(tvec_bases);
unsigned long expires;
spin_lock(&base->lock);
@@ -894,7 +892,7 @@ static inline void calc_load(unsigned long ticks)
*/
static void run_timer_softirq(struct softirq_action *h)
{
- tvec_base_t *base = __get_cpu_var(tvec_bases);
+ struct tvec_base *base = __get_cpu_var(tvec_bases);
hrtimer_run_pending();
@@ -1223,7 +1221,7 @@ static struct lock_class_key base_lock_keys[NR_CPUS];
static int __cpuinit init_timers_cpu(int cpu)
{
int j;
- tvec_base_t *base;
+ struct tvec_base *base;
static char __cpuinitdata tvec_base_done[NR_CPUS];
if (!tvec_base_done[cpu]) {
@@ -1278,7 +1276,7 @@ static int __cpuinit init_timers_cpu(int cpu)
}
#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
{
struct timer_list *timer;
@@ -1292,8 +1290,8 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
static void __cpuinit migrate_timers(int cpu)
{
- tvec_base_t *old_base;
- tvec_base_t *new_base;
+ struct tvec_base *old_base;
+ struct tvec_base *new_base;
int i;
BUG_ON(cpu_online(cpu));
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c4ecb2994ba..89f4035b526 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -494,6 +494,30 @@ config RCU_TORTURE_TEST
Say M if you want the RCU torture tests to build as a module.
Say N if you are unsure.
+config KPROBES_SANITY_TEST
+ bool "Kprobes sanity tests"
+ depends on DEBUG_KERNEL
+ depends on KPROBES
+ default n
+ help
+ This option provides for testing basic kprobes functionality on
+ boot. A sample kprobe, jprobe and kretprobe are inserted and
+ verified for functionality.
+
+ Say N if you are unsure.
+
+config BACKTRACE_SELF_TEST
+ tristate "Self test for the backtrace code"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option provides a kernel module that can be used to test
+ the kernel stack backtrace code. This option is not useful
+ for distributions or general kernels, but only for kernel
+ developers working on architecture code.
+
+ Say N if you are unsure.
+
config LKDTM
tristate "Linux Kernel Dump Test Tool Module"
depends on DEBUG_KERNEL
@@ -562,5 +586,33 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
+config PROVIDE_OHCI1394_DMA_INIT
+ bool "Provide code for enabling DMA over FireWire early on boot"
+ depends on PCI && X86
+ help
+ If you want to debug problems which hang or crash the kernel early
+ on boot and the crashing machine has a FireWire port, you can use
+ this feature to remotely access the memory of the crashed machine
+ over FireWire. This employs remote DMA as part of the OHCI1394
+ specification which is now the standard for FireWire controllers.
+
+ With remote DMA, you can monitor the printk buffer remotely using
+ firescope and access all memory below 4GB using fireproxy from gdb.
+ Even controlling a kernel debugger is possible using remote DMA.
+
+ Usage:
+
+ If ohci1394_dma=early is used as boot parameter, it will initialize
+ all OHCI1394 controllers which are found in the PCI config space.
+
+ As all changes to the FireWire bus such as enabling and disabling
+ devices cause a bus reset and thereby disable remote DMA for all
+ devices, be sure to have the cable plugged and FireWire enabled on
+ the debugging host before booting the debug target for debugging.
+
+ This code (~1k) is freed after boot. By then, the firewire stack
+ in charge of the OHCI-1394 controllers should be used instead.
+
+ See Documentation/debugging-via-ohci1394.txt for more information.
source "samples/Kconfig"
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 7d02700a4b0..3e3365e5665 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -187,7 +187,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
/*
* wait for the read lock to be granted
*/
-struct rw_semaphore fastcall __sched *
+asmregparm struct rw_semaphore __sched *
rwsem_down_read_failed(struct rw_semaphore *sem)
{
struct rwsem_waiter waiter;
@@ -201,7 +201,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
/*
* wait for the write lock to be granted
*/
-struct rw_semaphore fastcall __sched *
+asmregparm struct rw_semaphore __sched *
rwsem_down_write_failed(struct rw_semaphore *sem)
{
struct rwsem_waiter waiter;
@@ -216,7 +216,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
*/
-struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
+asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
@@ -236,7 +236,7 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
* - caller incremented waiting part of count and discovered it still negative
* - just wake up any readers at the front of the queue
*/
-struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
+asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
unsigned long flags;
diff --git a/mm/memory.c b/mm/memory.c
index 4b0144b24c1..d902d0e25ed 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -513,8 +513,7 @@ again:
if (progress >= 32) {
progress = 0;
if (need_resched() ||
- need_lockbreak(src_ptl) ||
- need_lockbreak(dst_ptl))
+ spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
break;
}
if (pte_none(*src_pte)) {
@@ -853,7 +852,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
tlb_finish_mmu(*tlbp, tlb_start, start);
if (need_resched() ||
- (i_mmap_lock && need_lockbreak(i_mmap_lock))) {
+ (i_mmap_lock && spin_needbreak(i_mmap_lock))) {
if (i_mmap_lock) {
*tlbp = NULL;
goto out;
@@ -1768,8 +1767,7 @@ again:
restart_addr = zap_page_range(vma, start_addr,
end_addr - start_addr, details);
- need_break = need_resched() ||
- need_lockbreak(details->i_mmap_lock);
+ need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
if (restart_addr >= end_addr) {
/* We have now completed this vma: mark it so */
@@ -2756,3 +2754,34 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
return buf - old_buf;
}
+
+/*
+ * Print the name of a VMA.
+ */
+void print_vma_addr(char *prefix, unsigned long ip)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ip);
+ if (vma && vma->vm_file) {
+ struct file *f = vma->vm_file;
+ char *buf = (char *)__get_free_page(GFP_KERNEL);
+ if (buf) {
+ char *p, *s;
+
+ p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE);
+ if (IS_ERR(p))
+ p = "?";
+ s = strrchr(p, '/');
+ if (s)
+ p = s+1;
+ printk("%s%s[%lx+%lx]", prefix, p,
+ vma->vm_start,
+ vma->vm_end - vma->vm_start);
+ free_page((unsigned long)buf);
+ }
+ }
+ up_read(&current->mm->mmap_sem);
+}
diff --git a/mm/mmap.c b/mm/mmap.c
index bfa389fc6de..d2b6d44962b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -251,7 +251,8 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
* not page aligned -Ram Gupta
*/
rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
- if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+ if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
+ (mm->end_data - mm->start_data) > rlim)
goto out;
newbrk = PAGE_ALIGN(brk);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a4a6bf7deaa..4ad5fbbb18b 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/sched.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index b4a38a3d855..4bb97646a67 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -711,11 +711,13 @@ static void snd_intel8x0_setup_periods(struct intel8x0 *chip, struct ichdev *ich
static void fill_nocache(void *buf, int size, int nocache)
{
size = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- change_page_attr(virt_to_page(buf), size, nocache ? PAGE_KERNEL_NOCACHE : PAGE_KERNEL);
- global_flush_tlb();
+ if (nocache)
+ set_pages_uc(virt_to_page(buf), size);
+ else
+ set_pages_wb(virt_to_page(buf), size);
}
#else
-#define fill_nocache(buf,size,nocache)
+#define fill_nocache(buf, size, nocache) do { ; } while (0)
#endif
/*