diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
commit | c226951b93f7cd7c3a10b17384535b617bd43fd0 (patch) | |
tree | 07b8796a5c99fbbf587b8d0dbcbc173cfe5e381e | |
parent | b0df3bd1e553e901ec7297267611a5db88240b38 (diff) | |
parent | e8216dee838c09776680a6f1a2e54d81f3cdfa14 (diff) |
Merge branch 'master' into upstream
497 files changed, 25494 insertions, 5470 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 71d05f48172..766abdab94e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1363,6 +1363,11 @@ running once the system is up. reserve= [KNL,BUGS] Force the kernel to ignore some iomem area + reservetop= [IA-32] + Format: nn[KMG] + Reserves a hole at the top of the kernel virtual + address space. + resume= [SWSUSP] Specify the partition device for software suspend diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index c45daabd3bf..74563b38ffd 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -1,7 +1,6 @@ DCCP protocol ============ -Last updated: 10 November 2005 Contents ======== @@ -42,8 +41,11 @@ Socket options DCCP_SOCKOPT_PACKET_SIZE is used for CCID3 to set default packet size for calculations. -DCCP_SOCKOPT_SERVICE sets the service. This is compulsory as per the -specification. If you don't set it you will get EPROTO. +DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of +service codes (RFC 4340, sec. 8.1.2); if this socket option is not set, +the socket will fall back to 0 (which means that no meaningful service code +is present). Connecting sockets set at most one service option; for +listening sockets, multiple service codes can be specified. Notes ===== diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index 4117802af0f..a66bec222b1 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -52,3 +52,18 @@ suspend image will be as small as possible. Reading from this file will display the current image size limit, which is set to 500 MB by default. + +/sys/power/pm_trace controls the code which saves the last PM event point in +the RTC across reboots, so that you can debug a machine that just hangs +during suspend (or more commonly, during resume). Namely, the RTC is only +used to save the last PM event point if this file contains '1'. Initially it +contains '0' which may be changed to '1' by writing a string representing a +nonzero integer into it. + +To use this debugging feature you should attempt to suspend the machine, then +reboot it and run + + dmesg -s 1000000 | grep 'hash matches' + +CAUTION: Using it will cause your machine's real-time (CMOS) clock to be +set to a random invalid time after a resume. diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 7cee90223d3..20d0d797f53 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm: - drop-caches - zone_reclaim_mode - min_unmapped_ratio +- min_slab_ratio - panic_on_oom ============================================================== @@ -138,7 +139,6 @@ This is value ORed together of 1 = Zone reclaim on 2 = Zone reclaim writes dirty pages out 4 = Zone reclaim swaps pages -8 = Also do a global slab reclaim pass zone_reclaim_mode is set during bootup to 1 if it is determined that pages from remote zones will cause a measurable performance reduction. The @@ -162,18 +162,13 @@ Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations. -It may be advisable to allow slab reclaim if the system makes heavy -use of files and builds up large slab caches. However, the slab -shrink operation is global, may take a long time and free slabs -in all nodes of the system. - ============================================================= min_unmapped_ratio: This is available only on NUMA kernels. -A percentage of the file backed pages in each zone. Zone reclaim will only +A percentage of the total pages in each zone. Zone reclaim will only occur if more than this percentage of pages are file backed and unmapped. This is to insure that a minimal amount of local pages is still available for file I/O even if the node is overallocated. @@ -182,6 +177,24 @@ The default is 1 percent. ============================================================= +min_slab_ratio: + +This is available only on NUMA kernels. + +A percentage of the total pages in each zone. On Zone reclaim +(fallback from the local zone occurs) slabs will be reclaimed if more +than this percentage of pages in a zone are reclaimable slab pages. +This insures that the slab growth stays under control even in NUMA +systems that rarely perform global reclaim. + +The default is 5 percent. + +Note that slab reclaim is triggered in a per zone / node fashion. +The process of reclaiming slab memory is currently not node specific +and may not be fast. + +============================================================= + panic_on_oom This enables or disables panic on out-of-memory feature. If this is set to 1, diff --git a/MAINTAINERS b/MAINTAINERS index bd446e251d5..63673e6513b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -443,6 +443,23 @@ W: http://people.redhat.com/sgrubb/audit/ T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git S: Maintained +AVR32 ARCHITECTURE +P: Atmel AVR32 Support Team +M: avr32@atmel.com +P: Haavard Skinnemoen +M: hskinnemoen@atmel.com +W: http://www.atmel.com/products/AVR32/ +W: http://avr32linux.org/ +W: http://avrfreaks.net/ +S: Supported + +AVR32/AT32AP MACHINE SUPPORT +P: Atmel AVR32 Support Team +M: avr32@atmel.com +P: Haavard Skinnemoen +M: hskinnemoen@atmel.com +S: Supported + AX.25 NETWORK LAYER P: Ralf Baechle M: ralf@linux-mips.org @@ -2031,6 +2048,13 @@ L: netfilter@lists.netfilter.org L: netfilter-devel@lists.netfilter.org S: Supported +NETLABEL +P: Paul Moore +M: paul.moore@hp.com +W: http://netlabel.sf.net +L: netdev@vger.kernel.org +S: Supported + NETROM NETWORK LAYER P: Ralf Baechle M: ralf@linux-mips.org diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 213c7850d5f..2b36afd8e96 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -381,7 +381,7 @@ config ALPHA_EV56 config ALPHA_EV56 prompt "EV56 CPU (speed >= 333MHz)?" - depends on ALPHA_NORITAKE && ALPHA_PRIMO + depends on ALPHA_NORITAKE || ALPHA_PRIMO config ALPHA_EV56 prompt "EV56 CPU (speed >= 400MHz)?" diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 917dad1b74c..550f4907d61 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -270,7 +270,7 @@ callback_init(void * kernel_end) void paging_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long dma_pfn, high_pfn; dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 88a999df0ab..591fc3187c7 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -177,7 +177,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) * Free the page table, if there was one. */ if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE) - pte_free_kernel(pmd_page_kernel(pmd)); + pte_free_kernel(pmd_page_vaddr(pmd)); } addr += PGDIR_SIZE; diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig new file mode 100644 index 00000000000..5f1694eea84 --- /dev/null +++ b/arch/avr32/Kconfig @@ -0,0 +1,196 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +mainmenu "Linux Kernel Configuration" + +config AVR32 + bool + default y + # With EMBEDDED=n, we get lots of stuff automatically selected + # that we usually don't need on AVR32. + select EMBEDDED + help + AVR32 is a high-performance 32-bit RISC microprocessor core, + designed for cost-sensitive embedded applications, with particular + emphasis on low power consumption and high code density. + + There is an AVR32 Linux project with a web page at + http://avr32linux.org/. + +config UID16 + bool + +config GENERIC_HARDIRQS + bool + default y + +config HARDIRQS_SW_RESEND + bool + default y + +config GENERIC_IRQ_PROBE + bool + default y + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config GENERIC_TIME + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + +config GENERIC_BUST_SPINLOCK + bool + +config GENERIC_HWEIGHT + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +source "init/Kconfig" + +menu "System Type and features" + +config SUBARCH_AVR32B + bool +config MMU + bool +config PERFORMANCE_COUNTERS + bool + +config PLATFORM_AT32AP + bool + select SUBARCH_AVR32B + select MMU + select PERFORMANCE_COUNTERS + +choice + prompt "AVR32 CPU type" + default CPU_AT32AP7000 + +config CPU_AT32AP7000 + bool "AT32AP7000" + select PLATFORM_AT32AP +endchoice + +# +# CPU Daughterboards for ATSTK1000 +config BOARD_ATSTK1002 + bool + +choice + prompt "AVR32 board type" + default BOARD_ATSTK1000 + +config BOARD_ATSTK1000 + bool "ATSTK1000 evaluation board" + select BOARD_ATSTK1002 if CPU_AT32AP7000 +endchoice + +choice + prompt "Boot loader type" + default LOADER_U_BOOT + +config LOADER_U_BOOT + bool "U-Boot (or similar) bootloader" +endchoice + +config LOAD_ADDRESS + hex + default 0x10000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y + +config ENTRY_ADDRESS + hex + default 0x90000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y + +config PHYS_OFFSET + hex + default 0x10000000 if CPU_AT32AP7000=y + +source "kernel/Kconfig.preempt" + +config HAVE_ARCH_BOOTMEM_NODE + bool + default n + +config ARCH_HAVE_MEMORY_PRESENT + bool + default n + +config NEED_NODE_MEMMAP_SIZE + bool + default n + +config ARCH_FLATMEM_ENABLE + bool + default y + +config ARCH_DISCONTIGMEM_ENABLE + bool + default n + +config ARCH_SPARSEMEM_ENABLE + bool + default n + +source "mm/Kconfig" + +config OWNERSHIP_TRACE + bool "Ownership trace support" + default y + help + Say Y to generate an Ownership Trace message on every context switch, + enabling Nexus-compliant debuggers to keep track of the PID of the + currently executing task. + +# FPU emulation goes here + +source "kernel/Kconfig.hz" + +config CMDLINE + string "Default kernel command line" + default "" + help + If you don't have a boot loader capable of passing a command line string + to the kernel, you may specify one here. As a minimum, you should specify + the memory size and the root device (e.g., mem=8M, root=/dev/nfs). + +endmenu + +menu "Bus options" + +config PCI + bool + +source "drivers/pci/Kconfig" + +source "drivers/pcmcia/Kconfig" + +endmenu + +menu "Executable file formats" +source "fs/Kconfig.binfmt" +endmenu + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/avr32/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" diff --git a/arch/avr32/Kconfig.debug b/arch/avr32/Kconfig.debug new file mode 100644 index 00000000000..64ace00fe6c --- /dev/null +++ b/arch/avr32/Kconfig.debug @@ -0,0 +1,19 @@ +menu "Kernel hacking" + +config TRACE_IRQFLAGS_SUPPORT + bool + default y + +source "lib/Kconfig.debug" + +config KPROBES + bool "Kprobes" + depends on DEBUG_KERNEL + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + +endmenu diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile new file mode 100644 index 00000000000..cefc95a7398 --- /dev/null +++ b/arch/avr32/Makefile @@ -0,0 +1,84 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2004-2006 Atmel Corporation. + +# Default target when executing plain make +.PHONY: all +all: uImage vmlinux.elf linux.lst + +KBUILD_DEFCONFIG := atstk1002_defconfig + +CFLAGS += -pipe -fno-builtin -mno-pic +AFLAGS += -mrelax -mno-pic +CFLAGS_MODULE += -mno-relax +LDFLAGS_vmlinux += --relax + +cpuflags-$(CONFIG_CPU_AP7000) += -mcpu=ap7000 + +CFLAGS += $(cpuflags-y) +AFLAGS += $(cpuflags-y) + +CHECKFLAGS += -D__avr32__ + +LIBGCC := $(shell $(CC) $(CFLAGS) -print-libgcc-file-name) + +head-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/head.o +head-y += arch/avr32/kernel/head.o +core-$(CONFIG_PLATFORM_AT32AP) += arch/avr32/mach-at32ap/ +core-$(CONFIG_BOARD_ATSTK1000) += arch/avr32/boards/atstk1000/ +core-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/ +core-y += arch/avr32/kernel/ +core-y += arch/avr32/mm/ +libs-y += arch/avr32/lib/ #$(LIBGCC) + +archincdir-$(CONFIG_PLATFORM_AT32AP) := arch-at32ap + +include/asm-avr32/.arch: $(wildcard include/config/platform/*.h) include/config/auto.conf + @echo ' SYMLINK include/asm-avr32/arch -> include/asm-avr32/$(archincdir-y)' +ifneq ($(KBUILD_SRC),) + $(Q)mkdir -p include/asm-avr32 + $(Q)ln -fsn $(srctree)/include/asm-avr32/$(archincdir-y) include/asm-avr32/arch +else + $(Q)ln -fsn $(archincdir-y) include/asm-avr32/arch +endif + @touch $@ + +archprepare: include/asm-avr32/.arch + +BOOT_TARGETS := vmlinux.elf vmlinux.bin uImage uImage.srec + +.PHONY: $(BOOT_TARGETS) install + +boot := arch/$(ARCH)/boot/images + + KBUILD_IMAGE := $(boot)/uImage +vmlinux.elf: KBUILD_IMAGE := $(boot)/vmlinux.elf +vmlinux.cso: KBUILD_IMAGE := $(boot)/vmlinux.cso +uImage.srec: KBUILD_IMAGE := $(boot)/uImage.srec +uImage: KBUILD_IMAGE := $(boot)/uImage + +quiet_cmd_listing = LST $@ + cmd_listing = avr32-linux-objdump $(OBJDUMPFLAGS) -lS $< > $@ +quiet_cmd_disasm = DIS $@ + cmd_disasm = avr32-linux-objdump $(OBJDUMPFLAGS) -d $< > $@ + +vmlinux.elf vmlinux.bin uImage.srec uImage vmlinux.cso: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +install: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ + +linux.s: vmlinux + $(call if_changed,disasm) + +linux.lst: vmlinux + $(call if_changed,listing) + +define archhelp + @echo '* vmlinux.elf - ELF image with load address 0' + @echo ' vmlinux.cso - PathFinder CSO image' + @echo ' uImage - Create a bootable image for U-Boot' +endef diff --git a/arch/avr32/boards/atstk1000/Makefile b/arch/avr32/boards/atstk1000/Makefile new file mode 100644 index 00000000000..df949948053 --- /dev/null +++ b/arch/avr32/boards/atstk1000/Makefile @@ -0,0 +1,2 @@ +obj-y += setup.o spi.o flash.o +obj-$(CONFIG_BOARD_ATSTK1002) += atstk1002.o diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c new file mode 100644 index 00000000000..49164e9aadd --- /dev/null +++ b/arch/avr32/boards/atstk1000/atstk1002.c @@ -0,0 +1,37 @@ +/* + * ATSTK1002 daughterboard-specific init code + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> + +#include <asm/arch/board.h> + +struct eth_platform_data __initdata eth0_data = { + .valid = 1, + .mii_phy_addr = 0x10, + .is_rmii = 0, + .hw_addr = { 0x6a, 0x87, 0x71, 0x14, 0xcd, 0xcb }, +}; + +extern struct lcdc_platform_data atstk1000_fb0_data; + +static int __init atstk1002_init(void) +{ + at32_add_system_devices(); + + at32_add_device_usart(1); /* /dev/ttyS0 */ + at32_add_device_usart(2); /* /dev/ttyS1 */ + at32_add_device_usart(3); /* /dev/ttyS2 */ + + at32_add_device_eth(0, ð0_data); + at32_add_device_spi(0); + at32_add_device_lcdc(0, &atstk1000_fb0_data); + + return 0; +} +postcore_initcall(atstk1002_init); diff --git a/arch/avr32/boards/atstk1000/flash.c b/arch/avr32/boards/atstk1000/flash.c new file mode 100644 index 00000000000..aac4300cca1 --- /dev/null +++ b/arch/avr32/boards/atstk1000/flash.c @@ -0,0 +1,95 @@ +/* + * ATSTK1000 board-specific flash initialization + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/physmap.h> + +#include <asm/arch/smc.h> + +static struct smc_config flash_config __initdata = { + .ncs_read_setup = 0, + .nrd_setup = 40, + .ncs_write_setup = 0, + .nwe_setup = 10, + + .ncs_read_pulse = 80, + .nrd_pulse = 40, + .ncs_write_pulse = 65, + .nwe_pulse = 55, + + .read_cycle = 120, + .write_cycle = 120, + + .bus_width = 2, + .nrd_controlled = 1, + .nwe_controlled = 1, + .byte_write = 1, +}; + +static struct mtd_partition flash_parts[] = { + { + .name = "u-boot", + .offset = 0x00000000, + .size = 0x00020000, /* 128 KiB */ + .mask_flags = MTD_WRITEABLE, + }, + { + .name = "root", + .offset = 0x00020000, + .size = 0x007d0000, + }, + { + .name = "env", + .offset = 0x007f0000, + .size = 0x00010000, + .mask_flags = MTD_WRITEABLE, + }, +}; + +static struct physmap_flash_data flash_data = { + .width = 2, + .nr_parts = ARRAY_SIZE(flash_parts), + .parts = flash_parts, +}; + +static struct resource flash_resource = { + .start = 0x00000000, + .end = 0x007fffff, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device flash_device = { + .name = "physmap-flash", + .id = 0, + .resource = &flash_resource, + .num_resources = 1, + .dev = { + .platform_data = &flash_data, + }, +}; + +/* This needs to be called after the SMC has been initialized */ +static int __init atstk1000_flash_init(void) +{ + int ret; + + ret = smc_set_configuration(0, &flash_config); + if (ret < 0) { + printk(KERN_ERR "atstk1000: failed to set NOR flash timing\n"); + return ret; + } + + platform_device_register(&flash_device); + + return 0; +} +device_initcall(atstk1000_flash_init); diff --git a/arch/avr32/boards/atstk1000/setup.c b/arch/avr32/boards/atstk1000/setup.c new file mode 100644 index 00000000000..191ab85de9a --- /dev/null +++ b/arch/avr32/boards/atstk1000/setup.c @@ -0,0 +1,59 @@ +/* + * ATSTK1000 board-specific setup code. + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/bootmem.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/linkage.h> + +#include <asm/setup.h> + +#include <asm/arch/board.h> + +/* Initialized by bootloader-specific startup code. */ +struct tag *bootloader_tags __initdata; + +struct lcdc_platform_data __initdata atstk1000_fb0_data; + +asmlinkage void __init board_early_init(void) +{ + extern void sdram_init(void); + +#ifdef CONFIG_LOADER_STANDALONE + sdram_init(); +#endif +} + +void __init board_setup_fbmem(unsigned long fbmem_start, + unsigned long fbmem_size) +{ + if (!fbmem_size) + return; + + if (!fbmem_start) { + void *fbmem; + + fbmem = alloc_bootmem_low_pages(fbmem_size); + fbmem_start = __pa(fbmem); + } else { + pg_data_t *pgdat; + + for_each_online_pgdat(pgdat) { + if (fbmem_start >= pgdat->bdata->node_boot_start + && fbmem_start <= pgdat->bdata->node_low_pfn) + reserve_bootmem_node(pgdat, fbmem_start, + fbmem_size); + } + } + + printk("%luKiB framebuffer memory at address 0x%08lx\n", + fbmem_size >> 10, fbmem_start); + atstk1000_fb0_data.fbmem_start = fbmem_start; + atstk1000_fb0_data.fbmem_size = fbmem_size; +} diff --git a/arch/avr32/boards/atstk1000/spi.c b/arch/avr32/boards/atstk1000/spi.c new file mode 100644 index 00000000000..567726c82c6 --- /dev/null +++ b/arch/avr32/boards/atstk1000/spi.c @@ -0,0 +1,27 @@ +/* + * ATSTK1000 SPI devices + * + * Copyright (C) 2005 Atmel Norway + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/device.h> +#include <linux/spi/spi.h> + +static struct spi_board_info spi_board_info[] __initdata = { + { + .modalias = "ltv350qv", + .max_speed_hz = 16000000, + .bus_num = 0, + .chip_select = 1, + }, +}; + +static int board_init_spi(void) +{ + spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info)); + return 0; +} +arch_initcall(board_init_spi); diff --git a/arch/avr32/boot/images/Makefile b/arch/avr32/boot/images/Makefile new file mode 100644 index 00000000000..ccd74eeecec --- /dev/null +++ b/arch/avr32/boot/images/Makefile @@ -0,0 +1,62 @@ +# +# Copyright (C) 2004-2006 Atmel Corporation +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# + +MKIMAGE := $(srctree)/scripts/mkuboot.sh + +extra-y := vmlinux.bin vmlinux.gz + +OBJCOPYFLAGS_vmlinux.bin := -O binary +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +quiet_cmd_uimage = UIMAGE $@ + cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A avr32 -O linux -T kernel \ + -C gzip -a $(CONFIG_LOAD_ADDRESS) -e $(CONFIG_ENTRY_ADDRESS) \ + -n 'Linux-$(KERNELRELEASE)' -d $< $@ + +targets += uImage uImage.srec +$(obj)/uImage: $(obj)/vmlinux.gz + $(call if_changed,uimage) + @echo ' Image $@ is ready' + +OBJCOPYFLAGS_uImage.srec := -I binary -O srec +$(obj)/uImage.srec: $(obj)/uImage + $(call if_changed,objcopy) + +OBJCOPYFLAGS_vmlinux.elf := --change-section-lma .text-0x80000000 \ + --change-section-lma __ex_table-0x80000000 \ + --change-section-lma .rodata-0x80000000 \ + --change-section-lma .data-0x80000000 \ + --change-section-lma .init-0x80000000 \ + --change-section-lma .bss-0x80000000 \ + --change-section-lma .initrd-0x80000000 \ + --change-section-lma __param-0x80000000 \ + --change-section-lma __ksymtab-0x80000000 \ + --change-section-lma __ksymtab_gpl-0x80000000 \ + --change-section-lma __kcrctab-0x80000000 \ + --change-section-lma __kcrctab_gpl-0x80000000 \ + --change-section-lma __ksymtab_strings-0x80000000 \ + --change-section-lma .got-0x80000000 \ + --set-start 0xa0000000 +$(obj)/vmlinux.elf: vmlinux FORCE + $(call if_changed,objcopy) + +quiet_cmd_sfdwarf = SFDWARF $@ + cmd_sfdwarf = sfdwarf $< TO $@ GNUAVR IW $(SFDWARF_FLAGS) > $(obj)/sfdwarf.log + +$(obj)/vmlinux.cso: $(obj)/vmlinux.elf FORCE + $(call if_changed,sfdwarf) + +install: $(BOOTIMAGE) + sh $(srctree)/install-kernel.sh $< + +# Generated files to be removed upon make clean +clean-files := vmlinux* uImage uImage.srec diff --git a/arch/avr32/boot/u-boot/Makefile b/arch/avr32/boot/u-boot/Makefile new file mode 100644 index 00000000000..125ddc96c27 --- /dev/null +++ b/arch/avr32/boot/u-boot/Makefile @@ -0,0 +1,3 @@ +extra-y := head.o + +obj-y := empty.o diff --git a/arch/avr32/boot/u-boot/empty.S b/arch/avr32/boot/u-boot/empty.S new file mode 100644 index 00000000000..8ac91a5f12f --- /dev/null +++ b/arch/avr32/boot/u-boot/empty.S @@ -0,0 +1 @@ +/* Empty file */ diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S new file mode 100644 index 00000000000..4488fa27fe9 --- /dev/null +++ b/arch/avr32/boot/u-boot/head.S @@ -0,0 +1,60 @@ +/* + * Startup code for use with the u-boot bootloader. + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <asm/setup.h> + + /* + * The kernel is loaded where we want it to be and all caches + * have just been flushed. We get two parameters from u-boot: + * + * r12 contains a magic number (ATAG_MAGIC) + * r11 points to a tag table providing information about + * the system. + */ + .section .init.text,"ax" + .global _start +_start: + /* Check if the boot loader actually provided a tag table */ + lddpc r0, magic_number + cp.w r12, r0 + brne no_tag_table + + /* Initialize .bss */ + lddpc r2, bss_start_addr + lddpc r3, end_addr + mov r0, 0 + mov r1, 0 +1: st.d r2++, r0 + cp r2, r3 + brlo 1b + + /* + * Save the tag table address for later use. This must be done + * _after_ .bss has been initialized... + */ + lddpc r0, tag_table_addr + st.w r0[0], r11 + + /* Jump to loader-independent setup code */ + rjmp kernel_entry + + .align 2 +magic_number: + .long ATAG_MAGIC +tag_table_addr: + .long bootloader_tags +bss_start_addr: + .long __bss_start +end_addr: + .long _end + +no_tag_table: + sub r12, pc, (. - 2f) + bral panic +2: .asciz "Boot loader didn't provide correct magic number\n" diff --git a/arch/avr32/configs/atstk1002_defconfig b/arch/avr32/configs/atstk1002_defconfig new file mode 100644 index 00000000000..1d22255009f --- /dev/null +++ b/arch/avr32/configs/atstk1002_defconfig @@ -0,0 +1,754 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.18-rc1 +# Tue Jul 11 12:41:36 2006 +# +CONFIG_AVR32=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +# CONFIG_SYSVIPC is not set +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +# CONFIG_IKCONFIG is not set +# CONFIG_RELAY is not set +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_EMBEDDED=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +# CONFIG_BASE_FULL is not set +# CONFIG_FUTEX is not set +# CONFIG_EPOLL is not set +CONFIG_SHMEM=y +# CONFIG_SLAB is not set +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=1 +CONFIG_SLOB=y + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set + +# +# Block layer +# +# CONFIG_BLK_DEV_IO_TRACE is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_NOOP=y +CONFIG_DEFAULT_IOSCHED="noop" + +# +# System Type and features +# +CONFIG_SUBARCH_AVR32B=y +CONFIG_MMU=y +CONFIG_PERFORMANCE_COUNTERS=y +CONFIG_PLATFORM_AT32AP=y +CONFIG_CPU_AT32AP7000=y +CONFIG_BOARD_ATSTK1002=y +CONFIG_BOARD_ATSTK1000=y +CONFIG_LOADER_U_BOOT=y +CONFIG_LOAD_ADDRESS=0x10000000 +CONFIG_ENTRY_ADDRESS=0x90000000 +CONFIG_PHYS_OFFSET=0x10000000 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_HAVE_ARCH_BOOTMEM_NODE is not set +# CONFIG_ARCH_HAVE_MEMORY_PRESENT is not set +# CONFIG_NEED_NODE_MEMMAP_SIZE is not set +CONFIG_ARCH_FLATMEM_ENABLE=y +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +# CONFIG_ARCH_SPARSEMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +# CONFIG_OWNERSHIP_TRACE is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_CMDLINE="" + +# +# Bus options +# + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_NETDEBUG is not set +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_BIC=y +# CONFIG_IPV6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set + +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set + +# +# TIPC Configuration (EXPERIMENTAL) +# +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_TCPPROBE is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_IEEE80211 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# + +# +# I2O device support +# + +# +# Network device support +# +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m + +# +# PHY device support +# +# CONFIG_PHYLIB is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +CONFIG_MACB=y + +# +# Ethernet (1000 Mbit) +# + +# +# Ethernet (10000 Mbit) +# + +# +# Token Ring devices +# + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +CONFIG_PPP=m +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +CONFIG_PPP_ASYNC=m +# CONFIG_PPP_SYNC_TTY is not set +CONFIG_PPP_DEFLATE=m +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPP_MPPE is not set +# CONFIG_PPPOE is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +# CONFIG_INPUT is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_AT91=y +CONFIG_SERIAL_AT91_CONSOLE=y +# CONFIG_SERIAL_AT91_TTYAT is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_RAW_DRIVER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# SPI support +# +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +CONFIG_SPI_ATMEL=m +# CONFIG_SPI_BITBANG is not set + +# +# SPI Protocol Masters +# + +# +# Dallas's 1-wire bus +# + +# +# Hardware Monitoring support +# +# CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +CONFIG_VIDEO_V4L2=y + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FIRMWARE_EDID is not set +CONFIG_FB=m +CONFIG_FB_CFB_FILLRECT=m +CONFIG_FB_CFB_COPYAREA=m +CONFIG_FB_CFB_IMAGEBLIT=m +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set +CONFIG_FB_SIDSA=m +CONFIG_FB_SIDSA_DEFAULT_BPP=24 +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_VIRTUAL is not set + +# +# Logo configuration +# +# CONFIG_LOGO is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set +CONFIG_LCD_CLASS_DEVICE=m +CONFIG_LCD_DEVICE=y +CONFIG_LCD_LTV350QV=m + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB_ARCH_HAS_HCD is not set +# CONFIG_USB_ARCH_HAS_OHCI is not set +# CONFIG_USB_ARCH_HAS_EHCI is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# + +# +# InfiniBand support +# + +# +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) +# + +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT3_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +CONFIG_MINIX_FS=m +CONFIG_ROMFS_FS=m +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_DNOTIFY is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +CONFIG_CONFIGFS_FS=m + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +CONFIG_CIFS=m +# CONFIG_CIFS_STATS is not set +# CONFIG_CIFS_WEAK_PW_HASH is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=m +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +CONFIG_NLS_CODEPAGE_850=m +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=m +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=m + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_PRINTK_TIME=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_RWSEMS is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set +CONFIG_FRAME_POINTER=y +# CONFIG_UNWIND_INFO is not set +CONFIG_FORCED_INLINING=y +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_KPROBES=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Hardware crypto devices +# + +# +# Library routines +# +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +CONFIG_CRC32=m +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_DEFLATE=m diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile new file mode 100644 index 00000000000..90e5afff54a --- /dev/null +++ b/arch/avr32/kernel/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for the Linux/AVR32 kernel. +# + +extra-y := head.o vmlinux.lds + +obj-$(CONFIG_SUBARCH_AVR32B) += entry-avr32b.o +obj-y += syscall_table.o syscall-stubs.o irq.o +obj-y += setup.o traps.o semaphore.o ptrace.o +obj-y += signal.o sys_avr32.o process.o time.o +obj-y += init_task.o switch_to.o cpu.o +obj-$(CONFIG_MODULES) += module.o avr32_ksyms.o +obj-$(CONFIG_KPROBES) += kprobes.o + +USE_STANDARD_AS_RULE := true + +%.lds: %.lds.c FORCE + $(call if_changed_dep,cpp_lds_S) diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c new file mode 100644 index 00000000000..97d86586566 --- /dev/null +++ b/arch/avr32/kernel/asm-offsets.c @@ -0,0 +1,25 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + */ + +#include <linux/thread_info.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(TI_task, thread_info, task); + OFFSET(TI_exec_domain, thread_info, exec_domain); + OFFSET(TI_flags, thread_info, flags); + OFFSET(TI_cpu, thread_info, cpu); + OFFSET(TI_preempt_count, thread_info, preempt_count); + OFFSET(TI_restart_block, thread_info, restart_block); +} diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c new file mode 100644 index 00000000000..04f767a272b --- /dev/null +++ b/arch/avr32/kernel/avr32_ksyms.c @@ -0,0 +1,55 @@ +/* + * Export AVR32-specific functions for loadable modules. + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> + +#include <asm/checksum.h> +#include <asm/uaccess.h> +#include <asm/delay.h> + +/* + * GCC functions + */ +extern unsigned long long __avr32_lsl64(unsigned long long u, unsigned long b); +extern unsigned long long __avr32_lsr64(unsigned long long u, unsigned long b); +extern unsigned long long __avr32_asr64(unsigned long long u, unsigned long b); +EXPORT_SYMBOL(__avr32_lsl64); +EXPORT_SYMBOL(__avr32_lsr64); +EXPORT_SYMBOL(__avr32_asr64); + +/* + * String functions + */ +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); + +/* + * Userspace access stuff. + */ +EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(strncpy_from_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); + +/* Delay loops (lib/delay.S) */ +EXPORT_SYMBOL(__ndelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__const_udelay); + +/* Bit operations (lib/findbit.S) */ +EXPORT_SYMBOL(find_first_zero_bit); +EXPORT_SYMBOL(find_next_zero_bit); +EXPORT_SYMBOL(find_first_bit); +EXPORT_SYMBOL(find_next_bit); +EXPORT_SYMBOL(generic_find_next_zero_le_bit); diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c new file mode 100644 index 00000000000..342452ba204 --- /dev/null +++ b/arch/avr32/kernel/cpu.c @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/sysdev.h> +#include <linux/seq_file.h> +#include <linux/cpu.h> +#include <linux/percpu.h> +#include <linux/param.h> +#include <linux/errno.h> + +#include <asm/setup.h> +#include <asm/sysreg.h> + +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +#ifdef CONFIG_PERFORMANCE_COUNTERS + +/* + * XXX: If/when a SMP-capable implementation of AVR32 will ever be + * made, we must make sure that the code executes on the correct CPU. + */ +static ssize_t show_pc0event(struct sys_device *dev, char *buf) +{ + unsigned long pccr; + + pccr = sysreg_read(PCCR); + return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f); +} +static ssize_t store_pc0event(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf || val > 0x3f) + return -EINVAL; + val = (val << 12) | (sysreg_read(PCCR) & 0xfffc0fff); + sysreg_write(PCCR, val); + return count; +} +static ssize_t show_pc0count(struct sys_device *dev, char *buf) +{ + unsigned long pcnt0; + + pcnt0 = sysreg_read(PCNT0); + return sprintf(buf, "%lu\n", pcnt0); +} +static ssize_t store_pc0count(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + sysreg_write(PCNT0, val); + + return count; +} + +static ssize_t show_pc1event(struct sys_device *dev, char *buf) +{ + unsigned long pccr; + + pccr = sysreg_read(PCCR); + return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f); +} +static ssize_t store_pc1event(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf || val > 0x3f) + return -EINVAL; + val = (val << 18) | (sysreg_read(PCCR) & 0xff03ffff); + sysreg_write(PCCR, val); + return count; +} +static ssize_t show_pc1count(struct sys_device *dev, char *buf) +{ + unsigned long pcnt1; + + pcnt1 = sysreg_read(PCNT1); + return sprintf(buf, "%lu\n", pcnt1); +} +static ssize_t store_pc1count(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + sysreg_write(PCNT1, val); + + return count; +} + +static ssize_t show_pccycles(struct sys_device *dev, char *buf) +{ + unsigned long pccnt; + + pccnt = sysreg_read(PCCNT); + return sprintf(buf, "%lu\n", pccnt); +} +static ssize_t store_pccycles(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + sysreg_write(PCCNT, val); + + return count; +} + +static ssize_t show_pcenable(struct sys_device *dev, char *buf) +{ + unsigned long pccr; + + pccr = sysreg_read(PCCR); + return sprintf(buf, "%c\n", (pccr & 1)?'1':'0'); +} +static ssize_t store_pcenable(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long pccr, val; + char *endp; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + if (val) + val = 1; + + pccr = sysreg_read(PCCR); + pccr = (pccr & ~1UL) | val; + sysreg_write(PCCR, pccr); + + return count; +} + +static SYSDEV_ATTR(pc0event, 0600, show_pc0event, store_pc0event); +static SYSDEV_ATTR(pc0count, 0600, show_pc0count, store_pc0count); +static SYSDEV_ATTR(pc1event, 0600, show_pc1event, store_pc1event); +static SYSDEV_ATTR(pc1count, 0600, show_pc1count, store_pc1count); +static SYSDEV_ATTR(pccycles, 0600, show_pccycles, store_pccycles); +static SYSDEV_ATTR(pcenable, 0600, show_pcenable, store_pcenable); + +#endif /* CONFIG_PERFORMANCE_COUNTERS */ + +static int __init topology_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct cpu *c = &per_cpu(cpu_devices, cpu); + + register_cpu(c, cpu); + +#ifdef CONFIG_PERFORMANCE_COUNTERS + sysdev_create_file(&c->sysdev, &attr_pc0event); + sysdev_create_file(&c->sysdev, &attr_pc0count); + sysdev_create_file(&c->sysdev, &attr_pc1event); + sysdev_create_file(&c->sysdev, &attr_pc1count); + sysdev_create_file(&c->sysdev, &attr_pccycles); + sysdev_create_file(&c->sysdev, &attr_pcenable); +#endif + } + + return 0; +} + +subsys_initcall(topology_init); + +static const char *cpu_names[] = { + "Morgan", + "AP7000", +}; +#define NR_CPU_NAMES ARRAY_SIZE(cpu_names) + +static const char *arch_names[] = { + "AVR32A", + "AVR32B", +}; +#define NR_ARCH_NAMES ARRAY_SIZE(arch_names) + +static const char *mmu_types[] = { + "No MMU", + "ITLB and DTLB", + "Shared TLB", + "MPU" +}; + +void __init setup_processor(void) +{ + unsigned long config0, config1; + unsigned cpu_id, cpu_rev, arch_id, arch_rev, mmu_type; + unsigned tmp; + + config0 = sysreg_read(CONFIG0); /* 0x0000013e; */ + config1 = sysreg_read(CONFIG1); /* 0x01f689a2; */ + cpu_id = config0 >> 24; + cpu_rev = (config0 >> 16) & 0xff; + arch_id = (config0 >> 13) & 0x07; + arch_rev = (config0 >> 10) & 0x07; + mmu_type = (config0 >> 7) & 0x03; + + boot_cpu_data.arch_type = arch_id; + boot_cpu_data.cpu_type = cpu_id; + boot_cpu_data.arch_revision = arch_rev; + boot_cpu_data.cpu_revision = cpu_rev; + boot_cpu_data.tlb_config = mmu_type; + + tmp = (config1 >> 13) & 0x07; + if (tmp) { + boot_cpu_data.icache.ways = 1 << ((config1 >> 10) & 0x07); + boot_cpu_data.icache.sets = 1 << ((config1 >> 16) & 0x0f); + boot_cpu_data.icache.linesz = 1 << (tmp + 1); + } + tmp = (config1 >> 3) & 0x07; + if (tmp) { + boot_cpu_data.dcache.ways = 1 << (config1 & 0x07); + boot_cpu_data.dcache.sets = 1 << ((config1 >> 6) & 0x0f); + boot_cpu_data.dcache.linesz = 1 << (tmp + 1); + } + + if ((cpu_id >= NR_CPU_NAMES) || (arch_id >= NR_ARCH_NAMES)) { + printk ("Unknown CPU configuration (ID %02x, arch %02x), " + "continuing anyway...\n", + cpu_id, arch_id); + return; + } + + printk ("CPU: %s [%02x] revision %d (%s revision %d)\n", + cpu_names[cpu_id], cpu_id, cpu_rev, + arch_names[arch_id], arch_rev); + printk ("CPU: MMU configuration: %s\n", mmu_types[mmu_type]); + printk ("CPU: features:"); + if (config0 & (1 << 6)) + printk(" fpu"); + if (config0 & (1 << 5)) + printk(" java"); + if (config0 & (1 << 4)) + printk(" perfctr"); + if (config0 & (1 << 3)) + printk(" ocd"); + printk("\n"); +} + +#ifdef CONFIG_PROC_FS +static int c_show(struct seq_file *m, void *v) +{ + unsigned int icache_size, dcache_size; + unsigned int cpu = smp_processor_id(); + + icache_size = boot_cpu_data.icache.ways * + boot_cpu_data.icache.sets * + boot_cpu_data.icache.linesz; + dcache_size = boot_cpu_data.dcache.ways * + boot_cpu_data.dcache.sets * + boot_cpu_data.dcache.linesz; + + seq_printf(m, "processor\t: %d\n", cpu); + + if (boot_cpu_data.arch_type < NR_ARCH_NAMES) + seq_printf(m, "cpu family\t: %s revision %d\n", + arch_names[boot_cpu_data.arch_type], + boot_cpu_data.arch_revision); + if (boot_cpu_data.cpu_type < NR_CPU_NAMES) + seq_printf(m, "cpu type\t: %s revision %d\n", + cpu_names[boot_cpu_data.cpu_type], + boot_cpu_data.cpu_revision); + + seq_printf(m, "i-cache\t\t: %dK (%u ways x %u sets x %u)\n", + icache_size >> 10, + boot_cpu_data.icache.ways, + boot_cpu_data.icache.sets, + boot_cpu_data.icache.linesz); + seq_printf(m, "d-cache\t\t: %dK (%u ways x %u sets x %u)\n", + dcache_size >> 10, + boot_cpu_data.dcache.ways, + boot_cpu_data.dcache.sets, + boot_cpu_data.dcache.linesz); + seq_printf(m, "bogomips\t: %lu.%02lu\n", + boot_cpu_data.loops_per_jiffy / (500000/HZ), + (boot_cpu_data.loops_per_jiffy / (5000/HZ)) % 100); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < 1 ? (void *)1 : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} + +static void c_stop(struct seq_file *m, void *v) +{ + +} + +struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; +#endif /* CONFIG_PROC_FS */ diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S new file mode 100644 index 00000000000..eeb66792bc3 --- /dev/null +++ b/arch/avr32/kernel/entry-avr32b.S @@ -0,0 +1,678 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * This file contains the low-level entry-points into the kernel, that is, + * exception handlers, debug trap handlers, interrupt handlers and the + * system call handler. + */ +#include <linux/errno.h> + +#include <asm/asm.h> +#include <asm/hardirq.h> +#include <asm/irq.h> +#include <asm/ocd.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> +#include <asm/sysreg.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> + +#ifdef CONFIG_PREEMPT +# define preempt_stop mask_interrupts +#else +# define preempt_stop +# define fault_resume_kernel fault_restore_all +#endif + +#define __MASK(x) ((1 << (x)) - 1) +#define IRQ_MASK ((__MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | \ + (__MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)) + + .section .ex.text,"ax",@progbits + .align 2 +exception_vectors: + bral handle_critical + .align 2 + bral handle_critical + .align 2 + bral do_bus_error_write + .align 2 + bral do_bus_error_read + .align 2 + bral do_nmi_ll + .align 2 + bral handle_address_fault + .align 2 + bral handle_protection_fault + .align 2 + bral handle_debug + .align 2 + bral do_illegal_opcode_ll + .align 2 + bral do_illegal_opcode_ll + .align 2 + bral do_illegal_opcode_ll + .align 2 + bral do_fpe_ll + .align 2 + bral do_illegal_opcode_ll + .align 2 + bral handle_address_fault + .align 2 + bral handle_address_fault + .align 2 + bral handle_protection_fault + .align 2 + bral handle_protection_fault + .align 2 + bral do_dtlb_modified + + /* + * r0 : PGD/PT/PTE + * r1 : Offending address + * r2 : Scratch register + * r3 : Cause (5, 12 or 13) + */ +#define tlbmiss_save pushm r0-r3 +#define tlbmiss_restore popm r0-r3 + + .section .tlbx.ex.text,"ax",@progbits + .global itlb_miss +itlb_miss: + tlbmiss_save + rjmp tlb_miss_common + + .section .tlbr.ex.text,"ax",@progbits +dtlb_miss_read: + tlbmiss_save + rjmp tlb_miss_common + + .section .tlbw.ex.text,"ax",@progbits +dtlb_miss_write: + tlbmiss_save + + .global tlb_miss_common +tlb_miss_common: + mfsr r0, SYSREG_PTBR + mfsr r1, SYSREG_TLBEAR + + /* Is it the vmalloc space? */ + bld r1, 31 + brcs handle_vmalloc_miss + + /* First level lookup */ +pgtbl_lookup: + lsr r2, r1, PGDIR_SHIFT + ld.w r0, r0[r2 << 2] + bld r0, _PAGE_BIT_PRESENT + brcc page_table_not_present + + /* TODO: Check access rights on page table if necessary */ + + /* Translate to virtual address in P1. */ + andl r0, 0xf000 + sbr r0, 31 + + /* Second level lookup */ + lsl r1, (32 - PGDIR_SHIFT) + lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT + add r2, r0, r1 << 2 + ld.w r1, r2[0] + bld r1, _PAGE_BIT_PRESENT + brcc page_not_present + + /* Mark the page as accessed */ + sbr r1, _PAGE_BIT_ACCESSED + st.w r2[0], r1 + + /* Drop software flags */ + andl r1, _PAGE_FLAGS_HARDWARE_MASK & 0xffff + mtsr SYSREG_TLBELO, r1 + + /* Figure out which entry we want to replace */ + mfsr r0, SYSREG_TLBARLO + clz r2, r0 + brcc 1f + mov r1, -1 /* All entries have been accessed, */ + mtsr SYSREG_TLBARLO, r1 /* so reset TLBAR */ + mov r2, 0 /* and start at 0 */ +1: mfsr r1, SYSREG_MMUCR + lsl r2, 14 + andl r1, 0x3fff, COH + or r1, r2 + mtsr SYSREG_MMUCR, r1 + + tlbw + + tlbmiss_restore + rete + +handle_vmalloc_miss: + /* Simply do the lookup in init's page table */ + mov r0, lo(swapper_pg_dir) + orh r0, hi(swapper_pg_dir) + rjmp pgtbl_lookup + + + /* --- System Call --- */ + + .section .scall.text,"ax",@progbits +system_call: + pushm r12 /* r12_orig */ + stmts --sp, r0-lr + zero_fp + mfsr r0, SYSREG_RAR_SUP + mfsr r1, SYSREG_RSR_SUP + stm --sp, r0-r1 + + /* check for syscall tracing */ + get_thread_info r0 + ld.w r1, r0[TI_flags] + bld r1, TIF_SYSCALL_TRACE + brcs syscall_trace_enter + +syscall_trace_cont: + cp.w r8, NR_syscalls + brhs syscall_badsys + + lddpc lr, syscall_table_addr + ld.w lr, lr[r8 << 2] + mov r8, r5 /* 5th argument (6th is pushed by stub) */ + icall lr + + .global syscall_return +syscall_return: + get_thread_info r0 + mask_interrupts /* make sure we don't miss an interrupt + setting need_resched or sigpending + between sampling and the rets */ + + /* Store the return value so that the correct value is loaded below */ + stdsp sp[REG_R12], r12 + + ld.w r1, r0[TI_flags] + andl r1, _TIF_ALLWORK_MASK, COH + brne syscall_exit_work + +syscall_exit_cont: + popm r8-r9 + mtsr SYSREG_RAR_SUP, r8 + mtsr SYSREG_RSR_SUP, r9 + ldmts sp++, r0-lr + sub sp, -4 /* r12_orig */ + rets + + .align 2 +syscall_table_addr: + .long sys_call_table + +syscall_badsys: + mov r12, -ENOSYS + rjmp syscall_return + + .global ret_from_fork +ret_from_fork: + rcall schedule_tail + + /* check for syscall tracing */ + get_thread_info r0 + ld.w r1, r0[TI_flags] + andl r1, _TIF_ALLWORK_MASK, COH + brne syscall_exit_work + rjmp syscall_exit_cont + +syscall_trace_enter: + pushm r8-r12 + rcall syscall_trace + popm r8-r12 + rjmp syscall_trace_cont + +syscall_exit_work: + bld r1, TIF_SYSCALL_TRACE + brcc 1f + unmask_interrupts + rcall syscall_trace + mask_interrupts + ld.w r1, r0[TI_flags] + +1: bld r1, TIF_NEED_RESCHED + brcc 2f + unmask_interrupts + rcall schedule + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp 1b + +2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK + tst r1, r2 + breq 3f + unmask_interrupts + mov r12, sp + mov r11, r0 + rcall do_notify_resume + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp 1b + +3: bld r1, TIF_BREAKPOINT + brcc syscall_exit_cont + mfsr r3, SYSREG_TLBEHI + lddsp r2, sp[REG_PC] + andl r3, 0xff, COH + lsl r3, 1 + sbr r3, 30 + sbr r3, 0 + mtdr DBGREG_BWA2A, r2 + mtdr DBGREG_BWC2A, r3 + rjmp syscall_exit_cont + + + /* The slow path of the TLB miss handler */ +page_table_not_present: +page_not_present: + tlbmiss_restore + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_page_fault + rjmp ret_from_exception + + /* This function expects to find offending PC in SYSREG_RAR_EX */ +save_full_context_ex: + mfsr r8, SYSREG_RSR_EX + mov r12, r8 + andh r8, (MODE_MASK >> 16), COH + mfsr r11, SYSREG_RAR_EX + brne 2f + +1: pushm r11, r12 /* PC and SR */ + unmask_exceptions + ret r12 + +2: sub r10, sp, -(FRAME_SIZE_FULL - REG_LR) + stdsp sp[4], r10 /* replace saved SP */ + rjmp 1b + + /* Low-level exception handlers */ +handle_critical: + pushm r12 + pushm r0-r12 + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_critical_exception + + /* We should never get here... */ +bad_return: + sub r12, pc, (. - 1f) + bral panic + .align 2 +1: .asciz "Return from critical exception!" + + .align 1 +do_bus_error_write: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mov r11, 1 + rjmp 1f + +do_bus_error_read: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mov r11, 0 +1: mfsr r12, SYSREG_BEAR + mov r10, sp + rcall do_bus_error + rjmp ret_from_exception + + .align 1 +do_nmi_ll: + sub sp, 4 + stmts --sp, r0-lr + /* FIXME: Make sure RAR_NMI and RSR_NMI are pushed instead of *_EX */ + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_nmi + rjmp bad_return + +handle_address_fault: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_address_exception + rjmp ret_from_exception + +handle_protection_fault: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_page_fault + rjmp ret_from_exception + + .align 1 +do_illegal_opcode_ll: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + mfsr r12, SYSREG_ECR + mov r11, sp + rcall do_illegal_opcode + rjmp ret_from_exception + +do_dtlb_modified: + pushm r0-r3 + mfsr r1, SYSREG_TLBEAR + mfsr r0, SYSREG_PTBR + lsr r2, r1, PGDIR_SHIFT + ld.w r0, r0[r2 << 2] + lsl r1, (32 - PGDIR_SHIFT) + lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT + + /* Translate to virtual address in P1 */ + andl r0, 0xf000 + sbr r0, 31 + add r2, r0, r1 << 2 + ld.w r3, r2[0] + sbr r3, _PAGE_BIT_DIRTY + mov r0, r3 + st.w r2[0], r3 + + /* The page table is up-to-date. Update the TLB entry as well */ + andl r0, lo(_PAGE_FLAGS_HARDWARE_MASK) + mtsr SYSREG_TLBELO, r0 + + /* MMUCR[DRP] is updated automatically, so let's go... */ + tlbw + + popm r0-r3 + rete + +do_fpe_ll: + sub sp, 4 + stmts --sp, r0-lr + rcall save_full_context_ex + unmask_interrupts + mov r12, 26 + mov r11, sp + rcall do_fpe + rjmp ret_from_exception + +ret_from_exception: + mask_interrupts + lddsp r4, sp[REG_SR] + andh r4, (MODE_MASK >> 16), COH + brne fault_resume_kernel + + get_thread_info r0 + ld.w r1, r0[TI_flags] + andl r1, _TIF_WORK_MASK, COH + brne fault_exit_work + +fault_resume_user: + popm r8-r9 + mask_exceptions + mtsr SYSREG_RAR_EX, r8 + mtsr SYSREG_RSR_EX, r9 + ldmts sp++, r0-lr + sub sp, -4 + rete + +fault_resume_kernel: +#ifdef CONFIG_PREEMPT + get_thread_info r0 + ld.w r2, r0[TI_preempt_count] + cp.w r2, 0 + brne 1f + ld.w r1, r0[TI_flags] + bld r1, TIF_NEED_RESCHED + brcc 1f + lddsp r4, sp[REG_SR] + bld r4, SYSREG_GM_OFFSET + brcs 1f + rcall preempt_schedule_irq +1: +#endif + + popm r8-r9 + mask_exceptions + mfsr r1, SYSREG_SR + mtsr SYSREG_RAR_EX, r8 + mtsr SYSREG_RSR_EX, r9 + popm lr + sub sp, -4 /* ignore SP */ + popm r0-r12 + sub sp, -4 /* ignore r12_orig */ + rete + +irq_exit_work: + /* Switch to exception mode so that we can share the same code. */ + mfsr r8, SYSREG_SR + cbr r8, SYSREG_M0_OFFSET + orh r8, hi(SYSREG_BIT(M1) | SYSREG_BIT(M2)) + mtsr SYSREG_SR, r8 + sub pc, -2 + get_thread_info r0 + ld.w r1, r0[TI_flags] + +fault_exit_work: + bld r1, TIF_NEED_RESCHED + brcc 1f + unmask_interrupts + rcall schedule + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp fault_exit_work + +1: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK + tst r1, r2 + breq 2f + unmask_interrupts + mov r12, sp + mov r11, r0 + rcall do_notify_resume + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp fault_exit_work + +2: bld r1, TIF_BREAKPOINT + brcc fault_resume_user + mfsr r3, SYSREG_TLBEHI + lddsp r2, sp[REG_PC] + andl r3, 0xff, COH + lsl r3, 1 + sbr r3, 30 + sbr r3, 0 + mtdr DBGREG_BWA2A, r2 + mtdr DBGREG_BWC2A, r3 + rjmp fault_resume_user + + /* If we get a debug trap from privileged context we end up here */ +handle_debug_priv: + /* Fix up LR and SP in regs. r11 contains the mode we came from */ + mfsr r8, SYSREG_SR + mov r9, r8 + andh r8, hi(~MODE_MASK) + or r8, r11 + mtsr SYSREG_SR, r8 + sub pc, -2 + stdsp sp[REG_LR], lr + mtsr SYSREG_SR, r9 + sub pc, -2 + sub r10, sp, -FRAME_SIZE_FULL + stdsp sp[REG_SP], r10 + mov r12, sp + rcall do_debug_priv + + /* Now, put everything back */ + ssrf SR_EM_BIT + popm r10, r11 + mtsr SYSREG_RAR_DBG, r10 + mtsr SYSREG_RSR_DBG, r11 + mfsr r8, SYSREG_SR + mov r9, r8 + andh r8, hi(~MODE_MASK) + andh r11, hi(MODE_MASK) + or r8, r11 + mtsr SYSREG_SR, r8 + sub pc, -2 + popm lr + mtsr SYSREG_SR, r9 + sub pc, -2 + sub sp, -4 /* skip SP */ + popm r0-r12 + sub sp, -4 + retd + + /* + * At this point, everything is masked, that is, interrupts, + * exceptions and debugging traps. We might get called from + * interrupt or exception context in some rare cases, but this + * will be taken care of by do_debug(), so we're not going to + * do a 100% correct context save here. + */ +handle_debug: + sub sp, 4 /* r12_orig */ + stmts --sp, r0-lr + mfsr r10, SYSREG_RAR_DBG + mfsr r11, SYSREG_RSR_DBG + unmask_exceptions + pushm r10,r11 + andh r11, (MODE_MASK >> 16), COH + brne handle_debug_priv + + mov r12, sp + rcall do_debug + + lddsp r10, sp[REG_SR] + andh r10, (MODE_MASK >> 16), COH + breq debug_resume_user + +debug_restore_all: + popm r10,r11 + mask_exceptions + mtsr SYSREG_RSR_DBG, r11 + mtsr SYSREG_RAR_DBG, r10 + ldmts sp++, r0-lr + sub sp, -4 + retd + +debug_resume_user: + get_thread_info r0 + mask_interrupts + + ld.w r1, r0[TI_flags] + andl r1, _TIF_DBGWORK_MASK, COH + breq debug_restore_all + +1: bld r1, TIF_NEED_RESCHED + brcc 2f + unmask_interrupts + rcall schedule + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp 1b + +2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK + tst r1, r2 + breq 3f + unmask_interrupts + mov r12, sp + mov r11, r0 + rcall do_notify_resume + mask_interrupts + ld.w r1, r0[TI_flags] + rjmp 1b + +3: bld r1, TIF_SINGLE_STEP + brcc debug_restore_all + mfdr r2, DBGREG_DC + sbr r2, DC_SS_BIT + mtdr DBGREG_DC, r2 + rjmp debug_restore_all + + .set rsr_int0, SYSREG_RSR_INT0 + .set rsr_int1, SYSREG_RSR_INT1 + .set rsr_int2, SYSREG_RSR_INT2 + .set rsr_int3, SYSREG_RSR_INT3 + .set rar_int0, SYSREG_RAR_INT0 + .set rar_int1, SYSREG_RAR_INT1 + .set rar_int2, SYSREG_RAR_INT2 + .set rar_int3, SYSREG_RAR_INT3 + + .macro IRQ_LEVEL level + .type irq_level\level, @function +irq_level\level: + sub sp, 4 /* r12_orig */ + stmts --sp,r0-lr + mfsr r8, rar_int\level + mfsr r9, rsr_int\level + pushm r8-r9 + + mov r11, sp + mov r12, \level + + rcall do_IRQ + + lddsp r4, sp[REG_SR] + andh r4, (MODE_MASK >> 16), COH +#ifdef CONFIG_PREEMPT + brne 2f +#else + brne 1f +#endif + + get_thread_info r0 + ld.w r1, r0[TI_flags] + andl r1, _TIF_WORK_MASK, COH + brne irq_exit_work + +1: popm r8-r9 + mtsr rar_int\level, r8 + mtsr rsr_int\level, r9 + ldmts sp++,r0-lr + sub sp, -4 /* ignore r12_orig */ + rete + +#ifdef CONFIG_PREEMPT +2: + get_thread_info r0 + ld.w r2, r0[TI_preempt_count] + cp.w r2, 0 + brne 1b + ld.w r1, r0[TI_flags] + bld r1, TIF_NEED_RESCHED + brcc 1b + lddsp r4, sp[REG_SR] + bld r4, SYSREG_GM_OFFSET + brcs 1b + rcall preempt_schedule_irq + rjmp 1b +#endif + .endm + + .section .irq.text,"ax",@progbits + + .global irq_level0 + .global irq_level1 + .global irq_level2 + .global irq_level3 + IRQ_LEVEL 0 + IRQ_LEVEL 1 + IRQ_LEVEL 2 + IRQ_LEVEL 3 diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S new file mode 100644 index 00000000000..773b7ad87be --- /dev/null +++ b/arch/avr32/kernel/head.S @@ -0,0 +1,45 @@ +/* + * Non-board-specific low-level startup code + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> + +#include <asm/page.h> +#include <asm/thread_info.h> +#include <asm/sysreg.h> + + .section .init.text,"ax" + .global kernel_entry +kernel_entry: + /* Initialize status register */ + lddpc r0, init_sr + mtsr SYSREG_SR, r0 + + /* Set initial stack pointer */ + lddpc sp, stack_addr + sub sp, -THREAD_SIZE + +#ifdef CONFIG_FRAME_POINTER + /* Mark last stack frame */ + mov lr, 0 + mov r7, 0 +#endif + + /* Set up the PIO, SDRAM controller, early printk, etc. */ + rcall board_early_init + + /* Start the show */ + lddpc pc, kernel_start_addr + + .align 2 +init_sr: + .long 0x007f0000 /* Supervisor mode, everything masked */ +stack_addr: + .long init_thread_union +kernel_start_addr: + .long start_kernel diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c new file mode 100644 index 00000000000..effcacf9d1a --- /dev/null +++ b/arch/avr32/kernel/init_task.c @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/init_task.h> +#include <linux/mqueue.h> + +#include <asm/pgtable.h> + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* + * Initial thread structure. Must be aligned on an 8192-byte boundary. + */ +union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c new file mode 100644 index 00000000000..856f3548e66 --- /dev/null +++ b/arch/avr32/kernel/irq.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on arch/i386/kernel/irq.c + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * IRQ's are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel_stat.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/sysdev.h> + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk("unexpected IRQ %u\n", irq); +} + +#ifdef CONFIG_PROC_FS +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *)v, cpu; + struct irqaction *action; + unsigned long flags; + + if (i == 0) { + seq_puts(p, " "); + for_each_online_cpu(cpu) + seq_printf(p, "CPU%d ", cpu); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; + if (!action) + goto unlock; + + seq_printf(p, "%3d: ", i); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]); + seq_printf(p, " %s", action->name); + for (action = action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); + unlock: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } + + return 0; +} +#endif diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c new file mode 100644 index 00000000000..6caf9e8d808 --- /dev/null +++ b/arch/avr32/kernel/kprobes.c @@ -0,0 +1,270 @@ +/* + * Kernel Probes (KProbes) + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * Based on arch/ppc64/kernel/kprobes.c + * Copyright (C) IBM Corporation, 2002, 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kprobes.h> +#include <linux/ptrace.h> + +#include <asm/cacheflush.h> +#include <asm/kdebug.h> +#include <asm/ocd.h> + +DEFINE_PER_CPU(struct kprobe *, current_kprobe); +static unsigned long kprobe_status; +static struct pt_regs jprobe_saved_regs; + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + int ret = 0; + + if ((unsigned long)p->addr & 0x01) { + printk("Attempt to register kprobe at an unaligned address\n"); + ret = -EINVAL; + } + + /* XXX: Might be a good idea to check if p->addr is a valid + * kernel address as well... */ + + if (!ret) { + pr_debug("copy kprobe at %p\n", p->addr); + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; + } + + return ret; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + pr_debug("arming kprobe at %p\n", p->addr); + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + pr_debug("disarming kprobe at %p\n", p->addr); + *p->addr = p->opcode; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + sizeof(kprobe_opcode_t)); +} + +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long dc; + + pr_debug("preparing to singlestep over %p (PC=%08lx)\n", + p->addr, regs->pc); + + BUG_ON(!(sysreg_read(SR) & SYSREG_BIT(SR_D))); + + dc = __mfdr(DBGREG_DC); + dc |= DC_SS; + __mtdr(DBGREG_DC, dc); + + /* + * We must run the instruction from its original location + * since it may actually reference PC. + * + * TODO: Do the instruction replacement directly in icache. + */ + *p->addr = p->opcode; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + sizeof(kprobe_opcode_t)); +} + +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long dc; + + pr_debug("resuming execution at PC=%08lx\n", regs->pc); + + dc = __mfdr(DBGREG_DC); + dc &= ~DC_SS; + __mtdr(DBGREG_DC, dc); + + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + sizeof(kprobe_opcode_t)); +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __get_cpu_var(current_kprobe) = p; +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + void *addr = (void *)regs->pc; + int ret = 0; + + pr_debug("kprobe_handler: kprobe_running=%d\n", + kprobe_running()); + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + + /* Check that we're not recursing */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kprobe_status == KPROBE_HIT_SS) { + printk("FIXME: kprobe hit while single-stepping!\n"); + goto no_kprobe; + } + + printk("FIXME: kprobe hit while handling another kprobe\n"); + goto no_kprobe; + } else { + p = kprobe_running(); + if (p->break_handler && p->break_handler(p, regs)) + goto ss_probe; + } + /* If it's not ours, can't be delete race, (we hold lock). */ + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) + goto no_kprobe; + + kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p); + if (p->pre_handler && p->pre_handler(p, regs)) + /* handler has already set things up, so skip ss setup */ + return 1; + +ss_probe: + prepare_singlestep(p, regs); + kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + return ret; +} + +static int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + + pr_debug("post_kprobe_handler, cur=%p\n", cur); + + if (!cur) + return 0; + + if (cur->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs); + reset_current_kprobe(); + preempt_enable_no_resched(); + + return 1; +} + +static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + + pr_debug("kprobe_fault_handler: trapnr=%d\n", trapnr); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + if (kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + pr_debug("kprobe_exceptions_notify: val=%lu, data=%p\n", + val, data); + + switch (val) { + case DIE_BREAKPOINT: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_FAULT: + if (kprobe_running() + && kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + break; + default: + break; + } + + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + + memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs)); + + /* + * TODO: We should probably save some of the stack here as + * well, since gcc may pass arguments on the stack for certain + * functions (lots of arguments, large aggregates, varargs) + */ + + /* setup return addr to the jprobe handler routine */ + regs->pc = (unsigned long)jp->entry; + return 1; +} + +void __kprobes jprobe_return(void) +{ + asm volatile("breakpoint" ::: "memory"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + /* + * FIXME - we should ideally be validating that we got here 'cos + * of the "trap" in jprobe_return() above, before restoring the + * saved regs... + */ + memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs)); + return 1; +} + +int __init arch_init_kprobes(void) +{ + printk("KPROBES: Enabling monitor mode (MM|DBE)...\n"); + __mtdr(DBGREG_DC, DC_MM | DC_DBE); + + /* TODO: Register kretprobe trampoline */ + return 0; +} diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c new file mode 100644 index 00000000000..dfc32f2817b --- /dev/null +++ b/arch/avr32/kernel/module.c @@ -0,0 +1,324 @@ +/* + * AVR32-specific kernel module loader + * + * Copyright (C) 2005-2006 Atmel Corporation + * + * GOT initialization parts are based on the s390 version + * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH, + * IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/moduleloader.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> + +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + return vmalloc(size); +} + +void module_free(struct module *mod, void *module_region) +{ + vfree(mod->arch.syminfo); + mod->arch.syminfo = NULL; + + vfree(module_region); + /* FIXME: if module_region == mod->init_region, trim exception + * table entries. */ +} + +static inline int check_rela(Elf32_Rela *rela, struct module *module, + char *strings, Elf32_Sym *symbols) +{ + struct mod_arch_syminfo *info; + + info = module->arch.syminfo + ELF32_R_SYM(rela->r_info); + switch (ELF32_R_TYPE(rela->r_info)) { + case R_AVR32_GOT32: + case R_AVR32_GOT16: + case R_AVR32_GOT8: + case R_AVR32_GOT21S: + case R_AVR32_GOT18SW: /* mcall */ + case R_AVR32_GOT16S: /* ld.w */ + if (rela->r_addend != 0) { + printk(KERN_ERR + "GOT relocation against %s at offset %u with addend\n", + strings + symbols[ELF32_R_SYM(rela->r_info)].st_name, + rela->r_offset); + return -ENOEXEC; + } + if (info->got_offset == -1UL) { + info->got_offset = module->arch.got_size; + module->arch.got_size += sizeof(void *); + } + pr_debug("GOT[%3lu] %s\n", info->got_offset, + strings + symbols[ELF32_R_SYM(rela->r_info)].st_name); + break; + } + + return 0; +} + +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *module) +{ + Elf32_Shdr *symtab; + Elf32_Sym *symbols; + Elf32_Rela *rela; + char *strings; + int nrela, i, j; + int ret; + + /* Find the symbol table */ + symtab = NULL; + for (i = 0; i < hdr->e_shnum; i++) + switch (sechdrs[i].sh_type) { + case SHT_SYMTAB: + symtab = &sechdrs[i]; + break; + } + if (!symtab) { + printk(KERN_ERR "module %s: no symbol table\n", module->name); + return -ENOEXEC; + } + + /* Allocate room for one syminfo structure per symbol. */ + module->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym); + module->arch.syminfo = vmalloc(module->arch.nsyms + * sizeof(struct mod_arch_syminfo)); + if (!module->arch.syminfo) + return -ENOMEM; + + symbols = (void *)hdr + symtab->sh_offset; + strings = (void *)hdr + sechdrs[symtab->sh_link].sh_offset; + for (i = 0; i < module->arch.nsyms; i++) { + if (symbols[i].st_shndx == SHN_UNDEF && + strcmp(strings + symbols[i].st_name, + "_GLOBAL_OFFSET_TABLE_") == 0) + /* "Define" it as absolute. */ + symbols[i].st_shndx = SHN_ABS; + module->arch.syminfo[i].got_offset = -1UL; + module->arch.syminfo[i].got_initialized = 0; + } + + /* Allocate GOT entries for symbols that need it. */ + module->arch.got_size = 0; + for (i = 0; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type != SHT_RELA) + continue; + nrela = sechdrs[i].sh_size / sizeof(Elf32_Rela); + rela = (void *)hdr + sechdrs[i].sh_offset; + for (j = 0; j < nrela; j++) { + ret = check_rela(rela + j, module, + strings, symbols); + if (ret) + goto out_free_syminfo; + } + } + + /* + * Increase core size to make room for GOT and set start + * offset for GOT. + */ + module->core_size = ALIGN(module->core_size, 4); + module->arch.got_offset = module->core_size; + module->core_size += module->arch.got_size; + + return 0; + +out_free_syminfo: + vfree(module->arch.syminfo); + module->arch.syminfo = NULL; + + return ret; +} + +static inline int reloc_overflow(struct module *module, const char *reloc_name, + Elf32_Addr relocation) +{ + printk(KERN_ERR "module %s: Value %lx does not fit relocation %s\n", + module->name, (unsigned long)relocation, reloc_name); + return -ENOEXEC; +} + +#define get_u16(loc) (*((uint16_t *)loc)) +#define put_u16(loc, val) (*((uint16_t *)loc) = (val)) + +int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relindex, + struct module *module) +{ + Elf32_Shdr *symsec = sechdrs + symindex; + Elf32_Shdr *relsec = sechdrs + relindex; + Elf32_Shdr *dstsec = sechdrs + relsec->sh_info; + Elf32_Rela *rel = (void *)relsec->sh_addr; + unsigned int i; + int ret = 0; + + for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rela); i++, rel++) { + struct mod_arch_syminfo *info; + Elf32_Sym *sym; + Elf32_Addr relocation; + uint32_t *location; + uint32_t value; + + location = (void *)dstsec->sh_addr + rel->r_offset; + sym = (Elf32_Sym *)symsec->sh_addr + ELF32_R_SYM(rel->r_info); + relocation = sym->st_value + rel->r_addend; + + info = module->arch.syminfo + ELF32_R_SYM(rel->r_info); + + /* Initialize GOT entry if necessary */ + switch (ELF32_R_TYPE(rel->r_info)) { + case R_AVR32_GOT32: + case R_AVR32_GOT16: + case R_AVR32_GOT8: + case R_AVR32_GOT21S: + case R_AVR32_GOT18SW: + case R_AVR32_GOT16S: + if (!info->got_initialized) { + Elf32_Addr *gotent; + + gotent = (module->module_core + + module->arch.got_offset + + info->got_offset); + *gotent = relocation; + info->got_initialized = 1; + } + + relocation = info->got_offset; + break; + } + + switch (ELF32_R_TYPE(rel->r_info)) { + case R_AVR32_32: + case R_AVR32_32_CPENT: + *location = relocation; + break; + case R_AVR32_22H_PCREL: + relocation -= (Elf32_Addr)location; + if ((relocation & 0xffe00001) != 0 + && (relocation & 0xffc00001) != 0xffc00000) + return reloc_overflow(module, + "R_AVR32_22H_PCREL", + relocation); + relocation >>= 1; + + value = *location; + value = ((value & 0xe1ef0000) + | (relocation & 0xffff) + | ((relocation & 0x10000) << 4) + | ((relocation & 0x1e0000) << 8)); + *location = value; + break; + case R_AVR32_11H_PCREL: + relocation -= (Elf32_Addr)location; + if ((relocation & 0xfffffc01) != 0 + && (relocation & 0xfffff801) != 0xfffff800) + return reloc_overflow(module, + "R_AVR32_11H_PCREL", + relocation); + value = get_u16(location); + value = ((value & 0xf00c) + | ((relocation & 0x1fe) << 3) + | ((relocation & 0x600) >> 9)); + put_u16(location, value); + break; + case R_AVR32_9H_PCREL: + relocation -= (Elf32_Addr)location; + if ((relocation & 0xffffff01) != 0 + && (relocation & 0xfffffe01) != 0xfffffe00) + return reloc_overflow(module, + "R_AVR32_9H_PCREL", + relocation); + value = get_u16(location); + value = ((value & 0xf00f) + | ((relocation & 0x1fe) << 3)); + put_u16(location, value); + break; + case R_AVR32_9UW_PCREL: + relocation -= ((Elf32_Addr)location) & 0xfffffffc; + if ((relocation & 0xfffffc03) != 0) + return reloc_overflow(module, + "R_AVR32_9UW_PCREL", + relocation); + value = get_u16(location); + value = ((value & 0xf80f) + | ((relocation & 0x1fc) << 2)); + put_u16(location, value); + break; + case R_AVR32_GOTPC: + /* + * R6 = PC - (PC - GOT) + * + * At this point, relocation contains the + * value of PC. Just subtract the value of + * GOT, and we're done. + */ + pr_debug("GOTPC: PC=0x%lx, got_offset=0x%lx, core=0x%p\n", + relocation, module->arch.got_offset, + module->module_core); + relocation -= ((unsigned long)module->module_core + + module->arch.got_offset); + *location = relocation; + break; + case R_AVR32_GOT18SW: + if ((relocation & 0xfffe0003) != 0 + && (relocation & 0xfffc0003) != 0xffff0000) + return reloc_overflow(module, "R_AVR32_GOT18SW", + relocation); + relocation >>= 2; + /* fall through */ + case R_AVR32_GOT16S: + if ((relocation & 0xffff8000) != 0 + && (relocation & 0xffff0000) != 0xffff0000) + return reloc_overflow(module, "R_AVR32_GOT16S", + relocation); + pr_debug("GOT reloc @ 0x%lx -> %lu\n", + rel->r_offset, relocation); + value = *location; + value = ((value & 0xffff0000) + | (relocation & 0xffff)); + *location = value; + break; + + default: + printk(KERN_ERR "module %s: Unknown relocation: %u\n", + module->name, ELF32_R_TYPE(rel->r_info)); + return -ENOEXEC; + } + } + + return ret; +} + +int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relindex, + struct module *module) +{ + printk(KERN_ERR "module %s: REL relocations are not supported\n", + module->name); + return -ENOEXEC; +} + +int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *module) +{ + vfree(module->arch.syminfo); + module->arch.syminfo = NULL; + + return 0; +} + +void module_arch_cleanup(struct module *module) +{ + +} diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c new file mode 100644 index 00000000000..317dc50945f --- /dev/null +++ b/arch/avr32/kernel/process.c @@ -0,0 +1,276 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/fs.h> +#include <linux/ptrace.h> +#include <linux/reboot.h> +#include <linux/unistd.h> + +#include <asm/sysreg.h> +#include <asm/ocd.h> + +void (*pm_power_off)(void) = NULL; +EXPORT_SYMBOL(pm_power_off); + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +void cpu_idle(void) +{ + /* endless idle loop with no priority at all */ + while (1) { + /* TODO: Enter sleep mode */ + while (!need_resched()) + cpu_relax(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } +} + +void machine_halt(void) +{ +} + +void machine_power_off(void) +{ +} + +void machine_restart(char *cmd) +{ + __mtdr(DBGREG_DC, DC_DBE); + __mtdr(DBGREG_DC, DC_RES); + while (1) ; +} + +/* + * PC is actually discarded when returning from a system call -- the + * return address must be stored in LR. This function will make sure + * LR points to do_exit before starting the thread. + * + * Also, when returning from fork(), r12 is 0, so we must copy the + * argument as well. + * + * r0 : The argument to the main thread function + * r1 : The address of do_exit + * r2 : The address of the main thread function + */ +asmlinkage extern void kernel_thread_helper(void); +__asm__(" .type kernel_thread_helper, @function\n" + "kernel_thread_helper:\n" + " mov r12, r0\n" + " mov lr, r2\n" + " mov pc, r1\n" + " .size kernel_thread_helper, . - kernel_thread_helper"); + +int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + + regs.r0 = (unsigned long)arg; + regs.r1 = (unsigned long)fn; + regs.r2 = (unsigned long)do_exit; + regs.lr = (unsigned long)kernel_thread_helper; + regs.pc = (unsigned long)kernel_thread_helper; + regs.sr = MODE_SUPERVISOR; + + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, + 0, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + +/* + * Free current thread data structures etc + */ +void exit_thread(void) +{ + /* nothing to do */ +} + +void flush_thread(void) +{ + /* nothing to do */ +} + +void release_thread(struct task_struct *dead_task) +{ + /* do nothing */ +} + +static const char *cpu_modes[] = { + "Application", "Supervisor", "Interrupt level 0", "Interrupt level 1", + "Interrupt level 2", "Interrupt level 3", "Exception", "NMI" +}; + +void show_regs(struct pt_regs *regs) +{ + unsigned long sp = regs->sp; + unsigned long lr = regs->lr; + unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT; + + if (!user_mode(regs)) + sp = (unsigned long)regs + FRAME_SIZE_FULL; + + print_symbol("PC is at %s\n", instruction_pointer(regs)); + print_symbol("LR is at %s\n", lr); + printk("pc : [<%08lx>] lr : [<%08lx>] %s\n" + "sp : %08lx r12: %08lx r11: %08lx\n", + instruction_pointer(regs), + lr, print_tainted(), sp, regs->r12, regs->r11); + printk("r10: %08lx r9 : %08lx r8 : %08lx\n", + regs->r10, regs->r9, regs->r8); + printk("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n", + regs->r7, regs->r6, regs->r5, regs->r4); + printk("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n", + regs->r3, regs->r2, regs->r1, regs->r0); + printk("Flags: %c%c%c%c%c\n", + regs->sr & SR_Q ? 'Q' : 'q', + regs->sr & SR_V ? 'V' : 'v', + regs->sr & SR_N ? 'N' : 'n', + regs->sr & SR_Z ? 'Z' : 'z', + regs->sr & SR_C ? 'C' : 'c'); + printk("Mode bits: %c%c%c%c%c%c%c%c%c\n", + regs->sr & SR_H ? 'H' : 'h', + regs->sr & SR_R ? 'R' : 'r', + regs->sr & SR_J ? 'J' : 'j', + regs->sr & SR_EM ? 'E' : 'e', + regs->sr & SR_I3M ? '3' : '.', + regs->sr & SR_I2M ? '2' : '.', + regs->sr & SR_I1M ? '1' : '.', + regs->sr & SR_I0M ? '0' : '.', + regs->sr & SR_GM ? 'G' : 'g'); + printk("CPU Mode: %s\n", cpu_modes[mode]); + + show_trace(NULL, (unsigned long *)sp, regs); +} +EXPORT_SYMBOL(show_regs); + +/* Fill in the fpu structure for a core dump. This is easy -- we don't have any */ +int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) +{ + /* Not valid */ + return 0; +} + +asmlinkage void ret_from_fork(void); + +int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, + unsigned long unused, + struct task_struct *p, struct pt_regs *regs) +{ + struct pt_regs *childregs; + + childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)p->thread_info)) - 1; + *childregs = *regs; + + if (user_mode(regs)) + childregs->sp = usp; + else + childregs->sp = (unsigned long)p->thread_info + THREAD_SIZE; + + childregs->r12 = 0; /* Set return value for child */ + + p->thread.cpu_context.sr = MODE_SUPERVISOR | SR_GM; + p->thread.cpu_context.ksp = (unsigned long)childregs; + p->thread.cpu_context.pc = (unsigned long)ret_from_fork; + + return 0; +} + +/* r12-r8 are dummy parameters to force the compiler to use the stack */ +asmlinkage int sys_fork(struct pt_regs *regs) +{ + return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); +} + +asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, + unsigned long parent_tidptr, + unsigned long child_tidptr, struct pt_regs *regs) +{ + if (!newsp) + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, + (int __user *)parent_tidptr, + (int __user *)child_tidptr); +} + +asmlinkage int sys_vfork(struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, + 0, NULL, NULL); +} + +asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv, + char __user *__user *uenvp, struct pt_regs *regs) +{ + int error; + char *filename; + + filename = getname(ufilename); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + + error = do_execve(filename, uargv, uenvp, regs); + if (error == 0) + current->ptrace &= ~PT_DTRACE; + putname(filename); + +out: + return error; +} + + +/* + * This function is supposed to answer the question "who called + * schedule()?" + */ +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long pc; + unsigned long stack_page; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + stack_page = (unsigned long)p->thread_info; + BUG_ON(!stack_page); + + /* + * The stored value of PC is either the address right after + * the call to __switch_to() or ret_from_fork. + */ + pc = thread_saved_pc(p); + if (in_sched_functions(pc)) { +#ifdef CONFIG_FRAME_POINTER + unsigned long fp = p->thread.cpu_context.r7; + BUG_ON(fp < stack_page || fp > (THREAD_SIZE + stack_page)); + pc = *(unsigned long *)fp; +#else + /* + * We depend on the frame size of schedule here, which + * is actually quite ugly. It might be possible to + * determine the frame size automatically at build + * time by doing this: + * - compile sched.c + * - disassemble the resulting sched.o + * - look for 'sub sp,??' shortly after '<schedule>:' + */ + unsigned long sp = p->thread.cpu_context.ksp + 16; + BUG_ON(sp < stack_page || sp > (THREAD_SIZE + stack_page)); + pc = *(unsigned long *)sp; +#endif + } + + return pc; +} diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c new file mode 100644 index 00000000000..3c89e59029a --- /dev/null +++ b/arch/avr32/kernel/ptrace.c @@ -0,0 +1,371 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/ptrace.h> +#include <linux/errno.h> +#include <linux/user.h> +#include <linux/security.h> +#include <linux/unistd.h> +#include <linux/notifier.h> + +#include <asm/traps.h> +#include <asm/uaccess.h> +#include <asm/ocd.h> +#include <asm/mmu_context.h> +#include <asm/kdebug.h> + +static struct pt_regs *get_user_regs(struct task_struct *tsk) +{ + return (struct pt_regs *)((unsigned long) tsk->thread_info + + THREAD_SIZE - sizeof(struct pt_regs)); +} + +static void ptrace_single_step(struct task_struct *tsk) +{ + pr_debug("ptrace_single_step: pid=%u, SR=0x%08lx\n", + tsk->pid, tsk->thread.cpu_context.sr); + if (!(tsk->thread.cpu_context.sr & SR_D)) { + /* + * Set a breakpoint at the current pc to force the + * process into debug mode. The syscall/exception + * exit code will set a breakpoint at the return + * address when this flag is set. + */ + pr_debug("ptrace_single_step: Setting TIF_BREAKPOINT\n"); + set_tsk_thread_flag(tsk, TIF_BREAKPOINT); + } + + /* The monitor code will do the actual step for us */ + set_tsk_thread_flag(tsk, TIF_SINGLE_STEP); +} + +/* + * Called by kernel/ptrace.c when detaching + * + * Make sure any single step bits, etc. are not set + */ +void ptrace_disable(struct task_struct *child) +{ + clear_tsk_thread_flag(child, TIF_SINGLE_STEP); +} + +/* + * Handle hitting a breakpoint + */ +static void ptrace_break(struct task_struct *tsk, struct pt_regs *regs) +{ + siginfo_t info; + + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = (void __user *)instruction_pointer(regs); + + pr_debug("ptrace_break: Sending SIGTRAP to PID %u (pc = 0x%p)\n", + tsk->pid, info.si_addr); + force_sig_info(SIGTRAP, &info, tsk); +} + +/* + * Read the word at offset "offset" into the task's "struct user". We + * actually access the pt_regs struct stored on the kernel stack. + */ +static int ptrace_read_user(struct task_struct *tsk, unsigned long offset, + unsigned long __user *data) +{ + unsigned long *regs; + unsigned long value; + + pr_debug("ptrace_read_user(%p, %#lx, %p)\n", + tsk, offset, data); + + if (offset & 3 || offset >= sizeof(struct user)) { + printk("ptrace_read_user: invalid offset 0x%08lx\n", offset); + return -EIO; + } + + regs = (unsigned long *)get_user_regs(tsk); + + value = 0; + if (offset < sizeof(struct pt_regs)) + value = regs[offset / sizeof(regs[0])]; + + return put_user(value, data); +} + +/* + * Write the word "value" to offset "offset" into the task's "struct + * user". We actually access the pt_regs struct stored on the kernel + * stack. + */ +static int ptrace_write_user(struct task_struct *tsk, unsigned long offset, + unsigned long value) +{ + unsigned long *regs; + + if (offset & 3 || offset >= sizeof(struct user)) { + printk("ptrace_write_user: invalid offset 0x%08lx\n", offset); + return -EIO; + } + + if (offset >= sizeof(struct pt_regs)) + return 0; + + regs = (unsigned long *)get_user_regs(tsk); + regs[offset / sizeof(regs[0])] = value; + + return 0; +} + +static int ptrace_getregs(struct task_struct *tsk, void __user *uregs) +{ + struct pt_regs *regs = get_user_regs(tsk); + + return copy_to_user(uregs, regs, sizeof(*regs)) ? -EFAULT : 0; +} + +static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs) +{ + struct pt_regs newregs; + int ret; + + ret = -EFAULT; + if (copy_from_user(&newregs, uregs, sizeof(newregs)) == 0) { + struct pt_regs *regs = get_user_regs(tsk); + + ret = -EINVAL; + if (valid_user_regs(&newregs)) { + *regs = newregs; + ret = 0; + } + } + + return ret; +} + +long arch_ptrace(struct task_struct *child, long request, long addr, long data) +{ + unsigned long tmp; + int ret; + + pr_debug("arch_ptrace(%ld, %ld, %#lx, %#lx)\n", + request, child->pid, addr, data); + + pr_debug("ptrace: Enabling monitor mode...\n"); + __mtdr(DBGREG_DC, __mfdr(DBGREG_DC) | DC_MM | DC_DBE); + + switch (request) { + /* Read the word at location addr in the child process */ + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + ret = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + if (ret == sizeof(tmp)) + ret = put_user(tmp, (unsigned long __user *)data); + else + ret = -EIO; + break; + + case PTRACE_PEEKUSR: + ret = ptrace_read_user(child, addr, + (unsigned long __user *)data); + break; + + /* Write the word in data at location addr */ + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + ret = access_process_vm(child, addr, &data, sizeof(data), 1); + if (ret == sizeof(data)) + ret = 0; + else + ret = -EIO; + break; + + case PTRACE_POKEUSR: + ret = ptrace_write_user(child, addr, data); + break; + + /* continue and stop at next (return from) syscall */ + case PTRACE_SYSCALL: + /* restart after signal */ + case PTRACE_CONT: + ret = -EIO; + if (!valid_signal(data)) + break; + if (request == PTRACE_SYSCALL) + set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->exit_code = data; + /* XXX: Are we sure no breakpoints are active here? */ + wake_up_process(child); + ret = 0; + break; + + /* + * Make the child exit. Best I can do is send it a + * SIGKILL. Perhaps it should be put in the status that it + * wants to exit. + */ + case PTRACE_KILL: + ret = 0; + if (child->exit_state == EXIT_ZOMBIE) + break; + child->exit_code = SIGKILL; + wake_up_process(child); + break; + + /* + * execute single instruction. + */ + case PTRACE_SINGLESTEP: + ret = -EIO; + if (!valid_signal(data)) + break; + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + ptrace_single_step(child); + child->exit_code = data; + wake_up_process(child); + ret = 0; + break; + + /* Detach a process that was attached */ + case PTRACE_DETACH: + ret = ptrace_detach(child, data); + break; + + case PTRACE_GETREGS: + ret = ptrace_getregs(child, (void __user *)data); + break; + + case PTRACE_SETREGS: + ret = ptrace_setregs(child, (const void __user *)data); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + pr_debug("sys_ptrace returning %d (DC = 0x%08lx)\n", ret, __mfdr(DBGREG_DC)); + return ret; +} + +asmlinkage void syscall_trace(void) +{ + pr_debug("syscall_trace called\n"); + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + if (!(current->ptrace & PT_PTRACED)) + return; + + pr_debug("syscall_trace: notifying parent\n"); + /* The 0x80 provides a way for the tracing parent to + * distinguish between a syscall stop and SIGTRAP delivery */ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); + + /* + * this isn't the same as continuing with a signal, but it + * will do for normal use. strace only continues with a + * signal if the stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + pr_debug("syscall_trace: sending signal %d to PID %u\n", + current->exit_code, current->pid); + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + +asmlinkage void do_debug_priv(struct pt_regs *regs) +{ + unsigned long dc, ds; + unsigned long die_val; + + ds = __mfdr(DBGREG_DS); + + pr_debug("do_debug_priv: pc = %08lx, ds = %08lx\n", regs->pc, ds); + + if (ds & DS_SSS) + die_val = DIE_SSTEP; + else + die_val = DIE_BREAKPOINT; + + if (notify_die(die_val, regs, 0, SIGTRAP) == NOTIFY_STOP) + return; + + if (likely(ds & DS_SSS)) { + extern void itlb_miss(void); + extern void tlb_miss_common(void); + struct thread_info *ti; + + dc = __mfdr(DBGREG_DC); + dc &= ~DC_SS; + __mtdr(DBGREG_DC, dc); + + ti = current_thread_info(); + ti->flags |= _TIF_BREAKPOINT; + + /* The TLB miss handlers don't check thread flags */ + if ((regs->pc >= (unsigned long)&itlb_miss) + && (regs->pc <= (unsigned long)&tlb_miss_common)) { + __mtdr(DBGREG_BWA2A, sysreg_read(RAR_EX)); + __mtdr(DBGREG_BWC2A, 0x40000001 | (get_asid() << 1)); + } + + /* + * If we're running in supervisor mode, the breakpoint + * will take us where we want directly, no need to + * single step. + */ + if ((regs->sr & MODE_MASK) != MODE_SUPERVISOR) + ti->flags |= TIF_SINGLE_STEP; + } else { + panic("Unable to handle debug trap at pc = %08lx\n", + regs->pc); + } +} + +/* + * Handle breakpoints, single steps and other debuggy things. To keep + * things simple initially, we run with interrupts and exceptions + * disabled all the time. + */ +asmlinkage void do_debug(struct pt_regs *regs) +{ + unsigned long dc, ds; + + ds = __mfdr(DBGREG_DS); + pr_debug("do_debug: pc = %08lx, ds = %08lx\n", regs->pc, ds); + + if (test_thread_flag(TIF_BREAKPOINT)) { + pr_debug("TIF_BREAKPOINT set\n"); + /* We're taking care of it */ + clear_thread_flag(TIF_BREAKPOINT); + __mtdr(DBGREG_BWC2A, 0); + } + + if (test_thread_flag(TIF_SINGLE_STEP)) { + pr_debug("TIF_SINGLE_STEP set, ds = 0x%08lx\n", ds); + if (ds & DS_SSS) { + dc = __mfdr(DBGREG_DC); + dc &= ~DC_SS; + __mtdr(DBGREG_DC, dc); + + clear_thread_flag(TIF_SINGLE_STEP); + ptrace_break(current, regs); + } + } else { + /* regular breakpoint */ + ptrace_break(current, regs); + } +} diff --git a/arch/avr32/kernel/semaphore.c b/arch/avr32/kernel/semaphore.c new file mode 100644 index 00000000000..1e2705a0501 --- /dev/null +++ b/arch/avr32/kernel/semaphore.c @@ -0,0 +1,148 @@ +/* + * AVR32 sempahore implementation. + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/arch/i386/kernel/semaphore.c + * Copyright (C) 1999 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/module.h> + +#include <asm/semaphore.h> +#include <asm/atomic.h> + +/* + * Semaphores are implemented using a two-way counter: + * The "count" variable is decremented for each process + * that tries to acquire the semaphore, while the "sleeping" + * variable is a count of such acquires. + * + * Notably, the inline "up()" and "down()" functions can + * efficiently test if they need to do any extra work (up + * needs to do something only if count was negative before + * the increment operation. + * + * "sleeping" and the contention routine ordering is protected + * by the spinlock in the semaphore's waitqueue head. + * + * Note that these functions are only called when there is + * contention on the lock, and as such all this is the + * "non-critical" part of the whole semaphore business. The + * critical part is the inline stuff in <asm/semaphore.h> + * where we want to avoid any extra jumps and calls. + */ + +/* + * Logic: + * - only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - when we go from a non-negative count to a negative do we + * (a) synchronize with the "sleeper" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +void __up(struct semaphore *sem) +{ + wake_up(&sem->wait); +} +EXPORT_SYMBOL(__up); + +void __sched __down(struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + + tsk->state = TASK_UNINTERRUPTIBLE; + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); + + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock in + * the wait_queue_head. + */ + if (atomic_add_return(sleepers - 1, &sem->count) >= 0) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); + tsk->state = TASK_UNINTERRUPTIBLE; + } + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + tsk->state = TASK_RUNNING; +} +EXPORT_SYMBOL(__down); + +int __sched __down_interruptible(struct semaphore *sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + unsigned long flags; + + tsk->state = TASK_INTERRUPTIBLE; + spin_lock_irqsave(&sem->wait.lock, flags); + add_wait_queue_exclusive_locked(&sem->wait, &wait); + + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into the trylock + * failure case - we won't be sleeping, and we can't + * get the lock as it has contention. Just correct the + * count and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock in + * the wait_queue_head. + */ + if (atomic_add_return(sleepers - 1, &sem->count) >= 0) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + schedule(); + + spin_lock_irqsave(&sem->wait.lock, flags); + tsk->state = TASK_INTERRUPTIBLE; + } + remove_wait_queue_locked(&sem->wait, &wait); + wake_up_locked(&sem->wait); + spin_unlock_irqrestore(&sem->wait.lock, flags); + + tsk->state = TASK_RUNNING; + return retval; +} +EXPORT_SYMBOL(__down_interruptible); diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c new file mode 100644 index 00000000000..5d68f3c6990 --- /dev/null +++ b/arch/avr32/kernel/setup.c @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/console.h> +#include <linux/ioport.h> +#include <linux/bootmem.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/root_dev.h> +#include <linux/cpu.h> + +#include <asm/sections.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/setup.h> +#include <asm/sysreg.h> + +#include <asm/arch/board.h> +#include <asm/arch/init.h> + +extern int root_mountflags; + +/* + * Bootloader-provided information about physical memory + */ +struct tag_mem_range *mem_phys; +struct tag_mem_range *mem_reserved; +struct tag_mem_range *mem_ramdisk; + +/* + * Initialize loops_per_jiffy as 5000000 (500MIPS). + * Better make it too large than too small... + */ +struct avr32_cpuinfo boot_cpu_data = { + .loops_per_jiffy = 5000000 +}; +EXPORT_SYMBOL(boot_cpu_data); + +static char command_line[COMMAND_LINE_SIZE]; + +/* + * Should be more than enough, but if you have a _really_ complex + * setup, you might need to increase the size of this... + */ +static struct tag_mem_range __initdata mem_range_cache[32]; +static unsigned mem_range_next_free; + +/* + * Standard memory resources + */ +static struct resource mem_res[] = { + { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM + }, + { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM, + }, +}; + +#define kernel_code mem_res[0] +#define kernel_data mem_res[1] + +/* + * Early framebuffer allocation. Works as follows: + * - If fbmem_size is zero, nothing will be allocated or reserved. + * - If fbmem_start is zero when setup_bootmem() is called, + * fbmem_size bytes will be allocated from the bootmem allocator. + * - If fbmem_start is nonzero, an area of size fbmem_size will be + * reserved at the physical address fbmem_start if necessary. If + * the area isn't in a memory region known to the kernel, it will + * be left alone. + * + * Board-specific code may use these variables to set up platform data + * for the framebuffer driver if fbmem_size is nonzero. + */ +static unsigned long __initdata fbmem_start; +static unsigned long __initdata fbmem_size; + +/* + * "fbmem=xxx[kKmM]" allocates the specified amount of boot memory for + * use as framebuffer. + * + * "fbmem=xxx[kKmM]@yyy[kKmM]" defines a memory region of size xxx and + * starting at yyy to be reserved for use as framebuffer. + * + * The kernel won't verify that the memory region starting at yyy + * actually contains usable RAM. + */ +static int __init early_parse_fbmem(char *p) +{ + fbmem_size = memparse(p, &p); + if (*p == '@') + fbmem_start = memparse(p, &p); + return 0; +} +early_param("fbmem", early_parse_fbmem); + +static inline void __init resource_init(void) +{ + struct tag_mem_range *region; + + kernel_code.start = __pa(init_mm.start_code); + kernel_code.end = __pa(init_mm.end_code - 1); + kernel_data.start = __pa(init_mm.end_code); + kernel_data.end = __pa(init_mm.brk - 1); + + for (region = mem_phys; region; region = region->next) { + struct resource *res; + unsigned long phys_start, phys_end; + + if (region->size == 0) + continue; + + phys_start = region->addr; + phys_end = phys_start + region->size - 1; + + res = alloc_bootmem_low(sizeof(*res)); + res->name = "System RAM"; + res->start = phys_start; + res->end = phys_end; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + request_resource (&iomem_resource, res); + + if (kernel_code.start >= res->start && + kernel_code.end <= res->end) + request_resource (res, &kernel_code); + if (kernel_data.start >= res->start && + kernel_data.end <= res->end) + request_resource (res, &kernel_data); + } +} + +static int __init parse_tag_core(struct tag *tag) +{ + if (tag->hdr.size > 2) { + if ((tag->u.core.flags & 1) == 0) + root_mountflags &= ~MS_RDONLY; + ROOT_DEV = new_decode_dev(tag->u.core.rootdev); + } + return 0; +} +__tagtable(ATAG_CORE, parse_tag_core); + +static int __init parse_tag_mem_range(struct tag *tag, + struct tag_mem_range **root) +{ + struct tag_mem_range *cur, **pprev; + struct tag_mem_range *new; + + /* + * Ignore zero-sized entries. If we're running standalone, the + * SDRAM code may emit such entries if something goes + * wrong... + */ + if (tag->u.mem_range.size == 0) + return 0; + + /* + * Copy the data so the bootmem init code doesn't need to care + * about it. + */ + if (mem_range_next_free >= + (sizeof(mem_range_cache) / sizeof(mem_range_cache[0]))) + panic("Physical memory map too complex!\n"); + + new = &mem_range_cache[mem_range_next_free++]; + *new = tag->u.mem_range; + + pprev = root; + cur = *root; + while (cur) { + pprev = &cur->next; + cur = cur->next; + } + + *pprev = new; + new->next = NULL; + + return 0; +} + +static int __init parse_tag_mem(struct tag *tag) +{ + return parse_tag_mem_range(tag, &mem_phys); +} +__tagtable(ATAG_MEM, parse_tag_mem); + +static int __init parse_tag_cmdline(struct tag *tag) +{ + strlcpy(saved_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); + return 0; +} +__tagtable(ATAG_CMDLINE, parse_tag_cmdline); + +static int __init parse_tag_rdimg(struct tag *tag) +{ + return parse_tag_mem_range(tag, &mem_ramdisk); +} +__tagtable(ATAG_RDIMG, parse_tag_rdimg); + +static int __init parse_tag_clock(struct tag *tag) +{ + /* + * We'll figure out the clocks by peeking at the system + * manager regs directly. + */ + return 0; +} +__tagtable(ATAG_CLOCK, parse_tag_clock); + +static int __init parse_tag_rsvd_mem(struct tag *tag) +{ + return parse_tag_mem_range(tag, &mem_reserved); +} +__tagtable(ATAG_RSVD_MEM, parse_tag_rsvd_mem); + +static int __init parse_tag_ethernet(struct tag *tag) +{ +#if 0 + const struct platform_device *pdev; + + /* + * We really need a bus type that supports "classes"...this + * will do for now (until we must handle other kinds of + * ethernet controllers) + */ + pdev = platform_get_device("macb", tag->u.ethernet.mac_index); + if (pdev && pdev->dev.platform_data) { + struct eth_platform_data *data = pdev->dev.platform_data; + + data->valid = 1; + data->mii_phy_addr = tag->u.ethernet.mii_phy_addr; + memcpy(data->hw_addr, tag->u.ethernet.hw_address, + sizeof(data->hw_addr)); + } +#endif + return 0; +} +__tagtable(ATAG_ETHERNET, parse_tag_ethernet); + +/* + * Scan the tag table for this tag, and call its parse function. The + * tag table is built by the linker from all the __tagtable + * declarations. + */ +static int __init parse_tag(struct tag *tag) +{ + extern struct tagtable __tagtable_begin, __tagtable_end; + struct tagtable *t; + + for (t = &__tagtable_begin; t < &__tagtable_end; t++) + if (tag->hdr.tag == t->tag) { + t->parse(tag); + break; + } + + return t < &__tagtable_end; +} + +/* + * Parse all tags in the list we got from the boot loader + */ +static void __init parse_tags(struct tag *t) +{ + for (; t->hdr.tag != ATAG_NONE; t = tag_next(t)) + if (!parse_tag(t)) + printk(KERN_WARNING + "Ignoring unrecognised tag 0x%08x\n", + t->hdr.tag); +} + +void __init setup_arch (char **cmdline_p) +{ + struct clk *cpu_clk; + + parse_tags(bootloader_tags); + + setup_processor(); + setup_platform(); + + cpu_clk = clk_get(NULL, "cpu"); + if (IS_ERR(cpu_clk)) { + printk(KERN_WARNING "Warning: Unable to get CPU clock\n"); + } else { + unsigned long cpu_hz = clk_get_rate(cpu_clk); + + /* + * Well, duh, but it's probably a good idea to + * increment the use count. + */ + clk_enable(cpu_clk); + + boot_cpu_data.clk = cpu_clk; + boot_cpu_data.loops_per_jiffy = cpu_hz * 4; + printk("CPU: Running at %lu.%03lu MHz\n", + ((cpu_hz + 500) / 1000) / 1000, + ((cpu_hz + 500) / 1000) % 1000); + } + + init_mm.start_code = (unsigned long) &_text; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + parse_early_param(); + + setup_bootmem(); + + board_setup_fbmem(fbmem_start, fbmem_size); + +#ifdef CONFIG_VT + conswitchp = &dummy_con; +#endif + + paging_init(); + + resource_init(); +} diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c new file mode 100644 index 00000000000..33096651c24 --- /dev/null +++ b/arch/avr32/kernel/signal.c @@ -0,0 +1,328 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/arch/sh/kernel/signal.c + * Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima + * Copyright (C) 1991, 1992 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> +#include <linux/suspend.h> + +#include <asm/uaccess.h> +#include <asm/ucontext.h> + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, + struct pt_regs *regs) +{ + return do_sigaltstack(uss, uoss, regs->sp); +} + +struct rt_sigframe +{ + struct siginfo info; + struct ucontext uc; + unsigned long retcode; +}; + +static int +restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +{ + int err = 0; + +#define COPY(x) err |= __get_user(regs->x, &sc->x) + COPY(sr); + COPY(pc); + COPY(lr); + COPY(sp); + COPY(r12); + COPY(r11); + COPY(r10); + COPY(r9); + COPY(r8); + COPY(r7); + COPY(r6); + COPY(r5); + COPY(r4); + COPY(r3); + COPY(r2); + COPY(r1); + COPY(r0); +#undef COPY + + /* + * Don't allow anyone to pretend they're running in supervisor + * mode or something... + */ + err |= !valid_user_regs(regs); + + return err; +} + + +asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + sigset_t set; + + frame = (struct rt_sigframe __user *)regs->sp; + pr_debug("SIG return: frame = %p\n", frame); + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + pr_debug("Context restored: pc = %08lx, lr = %08lx, sp = %08lx\n", + regs->pc, regs->lr, regs->sp); + + return regs->r12; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +static int +setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) +{ + int err = 0; + +#define COPY(x) err |= __put_user(regs->x, &sc->x) + COPY(sr); + COPY(pc); + COPY(lr); + COPY(sp); + COPY(r12); + COPY(r11); + COPY(r10); + COPY(r9); + COPY(r8); + COPY(r7); + COPY(r6); + COPY(r5); + COPY(r4); + COPY(r3); + COPY(r2); + COPY(r1); + COPY(r0); +#undef COPY + + return err; +} + +static inline void __user * +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize) +{ + unsigned long sp = regs->sp; + + if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + + return (void __user *)((sp - framesize) & ~3); +} + +static int +setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + err = -EFAULT; + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) + goto out; + + /* + * Set up the return code: + * + * mov r8, __NR_rt_sigreturn + * scall + * + * Note: This will blow up since we're using a non-executable + * stack. Better use SA_RESTORER. + */ +#if __NR_rt_sigreturn > 127 +# error __NR_rt_sigreturn must be < 127 to fit in a short mov +#endif + err = __put_user(0x3008d733 | (__NR_rt_sigreturn << 20), + &frame->retcode); + + err |= copy_siginfo_to_user(&frame->info, info); + + /* Set up the ucontext */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __put_user((void __user *)current->sas_ss_sp, + &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, + &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (err) + goto out; + + regs->r12 = sig; + regs->r11 = (unsigned long) &frame->info; + regs->r10 = (unsigned long) &frame->uc; + regs->sp = (unsigned long) frame; + if (ka->sa.sa_flags & SA_RESTORER) + regs->lr = (unsigned long)ka->sa.sa_restorer; + else { + printk(KERN_NOTICE "[%s:%d] did not set SA_RESTORER\n", + current->comm, current->pid); + regs->lr = (unsigned long) &frame->retcode; + } + + pr_debug("SIG deliver [%s:%d]: sig=%d sp=0x%lx pc=0x%lx->0x%p lr=0x%lx\n", + current->comm, current->pid, sig, regs->sp, + regs->pc, ka->sa.sa_handler, regs->lr); + + regs->pc = (unsigned long) ka->sa.sa_handler; + +out: + return err; +} + +static inline void restart_syscall(struct pt_regs *regs) +{ + if (regs->r12 == -ERESTART_RESTARTBLOCK) + regs->r8 = __NR_restart_syscall; + else + regs->r12 = regs->r12_orig; + regs->pc -= 2; +} + +static inline void +handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *oldset, struct pt_regs *regs, int syscall) +{ + int ret; + + /* + * Set up the stack frame + */ + ret = setup_rt_frame(sig, ka, info, oldset, regs); + + /* + * Check that the resulting registers are sane + */ + ret |= !valid_user_regs(regs); + + /* + * Block the signal if we were unsuccessful. + */ + if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, + &ka->sa.sa_mask); + sigaddset(¤t->blocked, sig); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } + + if (ret == 0) + return; + + force_sigsegv(sig, current); +} + +/* + * Note that 'init' is a special process: it doesn't get signals it + * doesn't want to handle. Thus you cannot kill init even with a + * SIGKILL even by mistake. + */ +int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + + /* + * We want the common case to go fast, which is why we may in + * certain cases get here from kernel mode. Just return + * without doing anything if so. + */ + if (!user_mode(regs)) + return 0; + + if (try_to_freeze()) { + signr = 0; + if (!signal_pending(current)) + goto no_signal; + } + + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else if (!oldset) + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); +no_signal: + if (syscall) { + switch (regs->r12) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + if (signr > 0) { + regs->r12 = -EINTR; + break; + } + /* fall through */ + case -ERESTARTSYS: + if (signr > 0 && !(ka.sa.sa_flags & SA_RESTART)) { + regs->r12 = -EINTR; + break; + } + /* fall through */ + case -ERESTARTNOINTR: + restart_syscall(regs); + } + } + + if (signr == 0) { + /* No signal to deliver -- put the saved sigmask back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + return 0; + } + + handle_signal(signr, &ka, &info, oldset, regs, syscall); + return 1; +} + +asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti) +{ + int syscall = 0; + + if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR) + syscall = 1; + + if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) + do_signal(regs, ¤t->blocked, syscall); +} diff --git a/arch/avr32/kernel/switch_to.S b/arch/avr32/kernel/switch_to.S new file mode 100644 index 00000000000..a48d046723c --- /dev/null +++ b/arch/avr32/kernel/switch_to.S @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/sysreg.h> + + .text + .global __switch_to + .type __switch_to, @function + + /* Switch thread context from "prev" to "next", returning "last" + * r12 : prev + * r11 : &prev->thread + 1 + * r10 : &next->thread + */ +__switch_to: + stm --r11, r0,r1,r2,r3,r4,r5,r6,r7,sp,lr + mfsr r9, SYSREG_SR + st.w --r11, r9 + ld.w r8, r10++ + /* + * schedule() may have been called from a mode with a different + * set of registers. Make sure we don't lose anything here. + */ + pushm r10,r12 + mtsr SYSREG_SR, r8 + frs /* flush the return stack */ + sub pc, -2 /* flush the pipeline */ + popm r10,r12 + ldm r10++, r0,r1,r2,r3,r4,r5,r6,r7,sp,pc + .size __switch_to, . - __switch_to diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c new file mode 100644 index 00000000000..6ec5693da44 --- /dev/null +++ b/arch/avr32/kernel/sys_avr32.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/mm.h> +#include <linux/unistd.h> + +#include <asm/mman.h> +#include <asm/uaccess.h> + +asmlinkage int sys_pipe(unsigned long __user *filedes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(filedes, fd, sizeof(fd))) + error = -EFAULT; + } + return error; +} + +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset) +{ + int error = -EBADF; + struct file *file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return error; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, offset); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); + return error; +} diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S new file mode 100644 index 00000000000..7589a9b426c --- /dev/null +++ b/arch/avr32/kernel/syscall-stubs.S @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Stubs for syscalls that require access to pt_regs or that take more + * than five parameters. + */ + +#define ARG6 r3 + + .text + .global __sys_rt_sigsuspend + .type __sys_rt_sigsuspend,@function +__sys_rt_sigsuspend: + mov r10, sp + rjmp sys_rt_sigsuspend + + .global __sys_sigaltstack + .type __sys_sigaltstack,@function +__sys_sigaltstack: + mov r10, sp + rjmp sys_sigaltstack + + .global __sys_rt_sigreturn + .type __sys_rt_sigreturn,@function +__sys_rt_sigreturn: + mov r12, sp + rjmp sys_rt_sigreturn + + .global __sys_fork + .type __sys_fork,@function +__sys_fork: + mov r12, sp + rjmp sys_fork + + .global __sys_clone + .type __sys_clone,@function +__sys_clone: + mov r8, sp + rjmp sys_clone + + .global __sys_vfork + .type __sys_vfork,@function +__sys_vfork: + mov r12, sp + rjmp sys_vfork + + .global __sys_execve + .type __sys_execve,@function +__sys_execve: + mov r9, sp + rjmp sys_execve + + .global __sys_mmap2 + .type __sys_mmap2,@function +__sys_mmap2: + pushm lr + st.w --sp, ARG6 + rcall sys_mmap2 + sub sp, -4 + popm pc + + .global __sys_sendto + .type __sys_sendto,@function +__sys_sendto: + pushm lr + st.w --sp, ARG6 + rcall sys_sendto + sub sp, -4 + popm pc + + .global __sys_recvfrom + .type __sys_recvfrom,@function +__sys_recvfrom: + pushm lr + st.w --sp, ARG6 + rcall sys_recvfrom + sub sp, -4 + popm pc + + .global __sys_pselect6 + .type __sys_pselect6,@function +__sys_pselect6: + pushm lr + st.w --sp, ARG6 + rcall sys_pselect6 + sub sp, -4 + popm pc + + .global __sys_splice + .type __sys_splice,@function +__sys_splice: + pushm lr + st.w --sp, ARG6 + rcall sys_splice + sub sp, -4 + popm pc diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S new file mode 100644 index 00000000000..63b206965d0 --- /dev/null +++ b/arch/avr32/kernel/syscall_table.S @@ -0,0 +1,289 @@ +/* + * AVR32 system call table + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#if !defined(CONFIG_NFSD) && !defined(CONFIG_NFSD_MODULE) +#define sys_nfsservctl sys_ni_syscall +#endif + +#if !defined(CONFIG_SYSV_IPC) +# define sys_ipc sys_ni_syscall +#endif + + .section .rodata,"a",@progbits + .type sys_call_table,@object + .global sys_call_table + .align 2 +sys_call_table: + .long sys_restart_syscall + .long sys_exit + .long __sys_fork + .long sys_read + .long sys_write + .long sys_open /* 5 */ + .long sys_close + .long sys_umask + .long sys_creat + .long sys_link + .long sys_unlink /* 10 */ + .long __sys_execve + .long sys_chdir + .long sys_time + .long sys_mknod + .long sys_chmod /* 15 */ + .long sys_chown + .long sys_lchown + .long sys_lseek + .long sys_llseek + .long sys_getpid /* 20 */ + .long sys_mount + .long sys_umount + .long sys_setuid + .long sys_getuid + .long sys_stime /* 25 */ + .long sys_ptrace + .long sys_alarm + .long sys_pause + .long sys_utime + .long sys_newstat /* 30 */ + .long sys_newfstat + .long sys_newlstat + .long sys_access + .long sys_chroot + .long sys_sync /* 35 */ + .long sys_fsync + .long sys_kill + .long sys_rename + .long sys_mkdir + .long sys_rmdir /* 40 */ + .long sys_dup + .long sys_pipe + .long sys_times + .long __sys_clone + .long sys_brk /* 45 */ + .long sys_setgid + .long sys_getgid + .long sys_getcwd + .long sys_geteuid + .long sys_getegid /* 50 */ + .long sys_acct + .long sys_setfsuid + .long sys_setfsgid + .long sys_ioctl + .long sys_fcntl /* 55 */ + .long sys_setpgid + .long sys_mremap + .long sys_setresuid + .long sys_getresuid + .long sys_setreuid /* 60 */ + .long sys_setregid + .long sys_ustat + .long sys_dup2 + .long sys_getppid + .long sys_getpgrp /* 65 */ + .long sys_setsid + .long sys_rt_sigaction + .long __sys_rt_sigreturn + .long sys_rt_sigprocmask + .long sys_rt_sigpending /* 70 */ + .long sys_rt_sigtimedwait + .long sys_rt_sigqueueinfo + .long __sys_rt_sigsuspend + .long sys_sethostname + .long sys_setrlimit /* 75 */ + .long sys_getrlimit + .long sys_getrusage + .long sys_gettimeofday + .long sys_settimeofday + .long sys_getgroups /* 80 */ + .long sys_setgroups + .long sys_select + .long sys_symlink + .long sys_fchdir + .long sys_readlink /* 85 */ + .long sys_pread64 + .long sys_pwrite64 + .long sys_swapon + .long sys_reboot + .long __sys_mmap2 /* 90 */ + .long sys_munmap + .long sys_truncate + .long sys_ftruncate + .long sys_fchmod + .long sys_fchown /* 95 */ + .long sys_getpriority + .long sys_setpriority + .long sys_wait4 + .long sys_statfs + .long sys_fstatfs /* 100 */ + .long sys_vhangup + .long __sys_sigaltstack + .long sys_syslog + .long sys_setitimer + .long sys_getitimer /* 105 */ + .long sys_swapoff + .long sys_sysinfo + .long sys_ipc + .long sys_sendfile + .long sys_setdomainname /* 110 */ + .long sys_newuname + .long sys_adjtimex + .long sys_mprotect + .long __sys_vfork + .long sys_init_module /* 115 */ + .long sys_delete_module + .long sys_quotactl + .long sys_getpgid + .long sys_bdflush + .long sys_sysfs /* 120 */ + .long sys_personality + .long sys_ni_syscall /* reserved for afs_syscall */ + .long sys_getdents + .long sys_flock + .long sys_msync /* 125 */ + .long sys_readv + .long sys_writev + .long sys_getsid + .long sys_fdatasync + .long sys_sysctl /* 130 */ + .long sys_mlock + .long sys_munlock + .long sys_mlockall + .long sys_munlockall + .long sys_sched_setparam /* 135 */ + .long sys_sched_getparam + .long sys_sched_setscheduler + .long sys_sched_getscheduler + .long sys_sched_yield + .long sys_sched_get_priority_max /* 140 */ + .long sys_sched_get_priority_min + .long sys_sched_rr_get_interval + .long sys_nanosleep + .long sys_poll + .long sys_nfsservctl /* 145 */ + .long sys_setresgid + .long sys_getresgid + .long sys_prctl + .long sys_socket + .long sys_bind /* 150 */ + .long sys_connect + .long sys_listen + .long sys_accept + .long sys_getsockname + .long sys_getpeername /* 155 */ + .long sys_socketpair + .long sys_send + .long sys_recv + .long __sys_sendto + .long __sys_recvfrom /* 160 */ + .long sys_shutdown + .long sys_setsockopt + .long sys_getsockopt + .long sys_sendmsg + .long sys_recvmsg /* 165 */ + .long sys_truncate64 + .long sys_ftruncate64 + .long sys_stat64 + .long sys_lstat64 + .long sys_fstat64 /* 170 */ + .long sys_pivot_root + .long sys_mincore + .long sys_madvise + .long sys_getdents64 + .long sys_fcntl64 /* 175 */ + .long sys_gettid + .long sys_readahead + .long sys_setxattr + .long sys_lsetxattr + .long sys_fsetxattr /* 180 */ + .long sys_getxattr + .long sys_lgetxattr + .long sys_fgetxattr + .long sys_listxattr + .long sys_llistxattr /* 185 */ + .long sys_flistxattr + .long sys_removexattr + .long sys_lremovexattr + .long sys_fremovexattr + .long sys_tkill /* 190 */ + .long sys_sendfile64 + .long sys_futex + .long sys_sched_setaffinity + .long sys_sched_getaffinity + .long sys_capget /* 195 */ + .long sys_capset + .long sys_io_setup + .long sys_io_destroy + .long sys_io_getevents + .long sys_io_submit /* 200 */ + .long sys_io_cancel + .long sys_fadvise64 + .long sys_exit_group + .long sys_lookup_dcookie + .long sys_epoll_create /* 205 */ + .long sys_epoll_ctl + .long sys_epoll_wait + .long sys_remap_file_pages + .long sys_set_tid_address + .long sys_timer_create /* 210 */ + .long sys_timer_settime + .long sys_timer_gettime + .long sys_timer_getoverrun + .long sys_timer_delete + .long sys_clock_settime /* 215 */ + .long sys_clock_gettime + .long sys_clock_getres + .long sys_clock_nanosleep + .long sys_statfs64 + .long sys_fstatfs64 /* 220 */ + .long sys_tgkill + .long sys_ni_syscall /* reserved for TUX */ + .long sys_utimes + .long sys_fadvise64_64 + .long sys_cacheflush /* 225 */ + .long sys_ni_syscall /* sys_vserver */ + .long sys_mq_open + .long sys_mq_unlink + .long sys_mq_timedsend + .long sys_mq_timedreceive /* 230 */ + .long sys_mq_notify + .long sys_mq_getsetattr + .long sys_kexec_load + .long sys_waitid + .long sys_add_key /* 235 */ + .long sys_request_key + .long sys_keyctl + .long sys_ioprio_set + .long sys_ioprio_get + .long sys_inotify_init /* 240 */ + .long sys_inotify_add_watch + .long sys_inotify_rm_watch + .long sys_openat + .long sys_mkdirat + .long sys_mknodat /* 245 */ + .long sys_fchownat + .long sys_futimesat + .long sys_fstatat64 + .long sys_unlinkat + .long sys_renameat /* 250 */ + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat + .long sys_fchmodat + .long sys_faccessat /* 255 */ + .long __sys_pselect6 + .long sys_ppoll + .long sys_unshare + .long sys_set_robust_list + .long sys_get_robust_list /* 260 */ + .long __sys_splice + .long sys_sync_file_range + .long sys_tee + .long sys_vmsplice + .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c new file mode 100644 index 00000000000..b0e6b5855a3 --- /dev/null +++ b/arch/avr32/kernel/time.c @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on MIPS implementation arch/mips/kernel/time.c + * Copyright 2001 MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/clocksource.h> +#include <linux/time.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel_stat.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/profile.h> +#include <linux/sysdev.h> + +#include <asm/div64.h> +#include <asm/sysreg.h> +#include <asm/io.h> +#include <asm/sections.h> + +static cycle_t read_cycle_count(void) +{ + return (cycle_t)sysreg_read(COUNT); +} + +static struct clocksource clocksource_avr32 = { + .name = "avr32", + .rating = 350, + .read = read_cycle_count, + .mask = CLOCKSOURCE_MASK(32), + .shift = 16, + .is_continuous = 1, +}; + +/* + * By default we provide the null RTC ops + */ +static unsigned long null_rtc_get_time(void) +{ + return mktime(2004, 1, 1, 0, 0, 0); +} + +static int null_rtc_set_time(unsigned long sec) +{ + return 0; +} + +static unsigned long (*rtc_get_time)(void) = null_rtc_get_time; +static int (*rtc_set_time)(unsigned long) = null_rtc_set_time; + +/* how many counter cycles in a jiffy? */ +static unsigned long cycles_per_jiffy; + +/* cycle counter value at the previous timer interrupt */ +static unsigned int timerhi, timerlo; + +/* the count value for the next timer interrupt */ +static unsigned int expirelo; + +static void avr32_timer_ack(void) +{ + unsigned int count; + + /* Ack this timer interrupt and set the next one */ + expirelo += cycles_per_jiffy; + if (expirelo == 0) { + printk(KERN_DEBUG "expirelo == 0\n"); + sysreg_write(COMPARE, expirelo + 1); + } else { + sysreg_write(COMPARE, expirelo); + } + + /* Check to see if we have missed any timer interrupts */ + count = sysreg_read(COUNT); + if ((count - expirelo) < 0x7fffffff) { + expirelo = count + cycles_per_jiffy; + sysreg_write(COMPARE, expirelo); + } +} + +static unsigned int avr32_hpt_read(void) +{ + return sysreg_read(COUNT); +} + +/* + * Taken from MIPS c0_hpt_timer_init(). + * + * Why is it so complicated, and what is "count"? My assumption is + * that `count' specifies the "reference cycle", i.e. the cycle since + * reset that should mean "zero". The reason COUNT is written twice is + * probably to make sure we don't get any timer interrupts while we + * are messing with the counter. + */ +static void avr32_hpt_init(unsigned int count) +{ + count = sysreg_read(COUNT) - count; + expirelo = (count / cycles_per_jiffy + 1) * cycles_per_jiffy; + sysreg_write(COUNT, expirelo - cycles_per_jiffy); + sysreg_write(COMPARE, expirelo); + sysreg_write(COUNT, count); +} + +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long sched_clock(void) +{ + /* There must be better ways...? */ + return (unsigned long long)jiffies * (1000000000 / HZ); +} + +/* + * local_timer_interrupt() does profiling and process accounting on a + * per-CPU basis. + * + * In UP mode, it is invoked from the (global) timer_interrupt. + */ +static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + if (current->pid) + profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(regs)); +} + +static irqreturn_t +timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned int count; + + /* ack timer interrupt and try to set next interrupt */ + count = avr32_hpt_read(); + avr32_timer_ack(); + + /* Update timerhi/timerlo for intra-jiffy calibration */ + timerhi += count < timerlo; /* Wrap around */ + timerlo = count; + + /* + * Call the generic timer interrupt handler + */ + write_seqlock(&xtime_lock); + do_timer(regs); + write_sequnlock(&xtime_lock); + + /* + * In UP mode, we call local_timer_interrupt() to do profiling + * and process accounting. + * + * SMP is not supported yet. + */ + local_timer_interrupt(irq, dev_id, regs); + + return IRQ_HANDLED; +} + +static struct irqaction timer_irqaction = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED, + .name = "timer", +}; + +void __init time_init(void) +{ + unsigned long mult, shift, count_hz; + int ret; + + xtime.tv_sec = rtc_get_time(); + xtime.tv_nsec = 0; + + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + + printk("Before time_init: count=%08lx, compare=%08lx\n", + (unsigned long)sysreg_read(COUNT), + (unsigned long)sysreg_read(COMPARE)); + + count_hz = clk_get_rate(boot_cpu_data.clk); + shift = clocksource_avr32.shift; + mult = clocksource_hz2mult(count_hz, shift); + clocksource_avr32.mult = mult; + + printk("Cycle counter: mult=%lu, shift=%lu\n", mult, shift); + + { + u64 tmp; + + tmp = TICK_NSEC; + tmp <<= shift; + tmp += mult / 2; + do_div(tmp, mult); + + cycles_per_jiffy = tmp; + } + + /* This sets up the high precision timer for the first interrupt. */ + avr32_hpt_init(avr32_hpt_read()); + + printk("After time_init: count=%08lx, compare=%08lx\n", + (unsigned long)sysreg_read(COUNT), + (unsigned long)sysreg_read(COMPARE)); + + ret = clocksource_register(&clocksource_avr32); + if (ret) + printk(KERN_ERR + "timer: could not register clocksource: %d\n", ret); + + ret = setup_irq(0, &timer_irqaction); + if (ret) + printk("timer: could not request IRQ 0: %d\n", ret); +} + +static struct sysdev_class timer_class = { + set_kset_name("timer"), +}; + +static struct sys_device timer_device = { + .id = 0, + .cls = &timer_class, +}; + +static int __init init_timer_sysfs(void) +{ + int err = sysdev_class_register(&timer_class); + if (!err) + err = sysdev_register(&timer_device); + return err; +} + +device_initcall(init_timer_sysfs); diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c new file mode 100644 index 00000000000..7e803f4d7a1 --- /dev/null +++ b/arch/avr32/kernel/traps.c @@ -0,0 +1,425 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/notifier.h> + +#include <asm/traps.h> +#include <asm/sysreg.h> +#include <asm/addrspace.h> +#include <asm/ocd.h> +#include <asm/mmu_context.h> +#include <asm/uaccess.h> + +static void dump_mem(const char *str, unsigned long bottom, unsigned long top) +{ + unsigned long p; + int i; + + printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top); + + for (p = bottom & ~31; p < top; ) { + printk("%04lx: ", p & 0xffff); + + for (i = 0; i < 8; i++, p += 4) { + unsigned int val; + + if (p < bottom || p >= top) + printk(" "); + else { + if (__get_user(val, (unsigned int __user *)p)) { + printk("\n"); + goto out; + } + printk("%08x ", val); + } + } + printk("\n"); + } + +out: + return; +} + +#ifdef CONFIG_FRAME_POINTER +static inline void __show_trace(struct task_struct *tsk, unsigned long *sp, + struct pt_regs *regs) +{ + unsigned long __user *fp; + unsigned long __user *last_fp = NULL; + + if (regs) { + fp = (unsigned long __user *)regs->r7; + } else if (tsk == current) { + register unsigned long __user *real_fp __asm__("r7"); + fp = real_fp; + } else { + fp = (unsigned long __user *)tsk->thread.cpu_context.r7; + } + + /* + * Walk the stack until (a) we get an exception, (b) the frame + * pointer becomes zero, or (c) the frame pointer gets stuck + * at the same value. + */ + while (fp && fp != last_fp) { + unsigned long lr, new_fp = 0; + + last_fp = fp; + if (__get_user(lr, fp)) + break; + if (fp && __get_user(new_fp, fp + 1)) + break; + fp = (unsigned long __user *)new_fp; + + printk(" [<%08lx>] ", lr); + print_symbol("%s\n", lr); + } + printk("\n"); +} +#else +static inline void __show_trace(struct task_struct *tsk, unsigned long *sp, + struct pt_regs *regs) +{ + unsigned long addr; + + while (!kstack_end(sp)) { + addr = *sp++; + if (kernel_text_address(addr)) { + printk(" [<%08lx>] ", addr); + print_symbol("%s\n", addr); + } + } +} +#endif + +void show_trace(struct task_struct *tsk, unsigned long *sp, + struct pt_regs *regs) +{ + if (regs && + (((regs->sr & MODE_MASK) == MODE_EXCEPTION) || + ((regs->sr & MODE_MASK) == MODE_USER))) + return; + + printk ("Call trace:"); +#ifdef CONFIG_KALLSYMS + printk("\n"); +#endif + + __show_trace(tsk, sp, regs); + printk("\n"); +} + +void show_stack(struct task_struct *tsk, unsigned long *sp) +{ + unsigned long stack; + + if (!tsk) + tsk = current; + if (sp == 0) { + if (tsk == current) { + register unsigned long *real_sp __asm__("sp"); + sp = real_sp; + } else { + sp = (unsigned long *)tsk->thread.cpu_context.ksp; + } + } + + stack = (unsigned long)sp; + dump_mem("Stack: ", stack, + THREAD_SIZE + (unsigned long)tsk->thread_info); + show_trace(tsk, sp, NULL); +} + +void dump_stack(void) +{ + show_stack(NULL, NULL); +} +EXPORT_SYMBOL(dump_stack); + +ATOMIC_NOTIFIER_HEAD(avr32_die_chain); + +int register_die_notifier(struct notifier_block *nb) +{ + pr_debug("register_die_notifier: %p\n", nb); + + return atomic_notifier_chain_register(&avr32_die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); + +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&avr32_die_chain, nb); +} +EXPORT_SYMBOL(unregister_die_notifier); + +static DEFINE_SPINLOCK(die_lock); + +void __die(const char *str, struct pt_regs *regs, unsigned long err, + const char *file, const char *func, unsigned long line) +{ + struct task_struct *tsk = current; + static int die_counter; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + + printk(KERN_ALERT "%s", str); + if (file && func) + printk(" in %s:%s, line %ld", file, func, line); + printk("[#%d]:\n", ++die_counter); + print_modules(); + show_regs(regs); + printk("Process %s (pid: %d, stack limit = 0x%p)\n", + tsk->comm, tsk->pid, tsk->thread_info + 1); + + if (!user_mode(regs) || in_interrupt()) { + dump_mem("Stack: ", regs->sp, + THREAD_SIZE + (unsigned long)tsk->thread_info); + } + + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + do_exit(SIGSEGV); +} + +void __die_if_kernel(const char *str, struct pt_regs *regs, unsigned long err, + const char *file, const char *func, unsigned long line) +{ + if (!user_mode(regs)) + __die(str, regs, err, file, func, line); +} + +asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs) +{ +#ifdef CONFIG_SUBARCH_AVR32B + /* + * The exception entry always saves RSR_EX. For NMI, this is + * wrong; it should be RSR_NMI + */ + regs->sr = sysreg_read(RSR_NMI); +#endif + + printk("NMI taken!!!!\n"); + die("NMI", regs, ecr); + BUG(); +} + +asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs) +{ + printk("Unable to handle critical exception %lu at pc = %08lx!\n", + ecr, regs->pc); + die("Oops", regs, ecr); + BUG(); +} + +asmlinkage void do_address_exception(unsigned long ecr, struct pt_regs *regs) +{ + siginfo_t info; + + die_if_kernel("Oops: Address exception in kernel mode", regs, ecr); + +#ifdef DEBUG + if (ecr == ECR_ADDR_ALIGN_X) + pr_debug("Instruction Address Exception at pc = %08lx\n", + regs->pc); + else if (ecr == ECR_ADDR_ALIGN_R) + pr_debug("Data Address Exception (Read) at pc = %08lx\n", + regs->pc); + else if (ecr == ECR_ADDR_ALIGN_W) + pr_debug("Data Address Exception (Write) at pc = %08lx\n", + regs->pc); + else + BUG(); + + show_regs(regs); +#endif + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *)regs->pc; + + force_sig_info(SIGBUS, &info, current); +} + +/* This way of handling undefined instructions is stolen from ARM */ +static LIST_HEAD(undef_hook); +static spinlock_t undef_lock = SPIN_LOCK_UNLOCKED; + +void register_undef_hook(struct undef_hook *hook) +{ + spin_lock_irq(&undef_lock); + list_add(&hook->node, &undef_hook); + spin_unlock_irq(&undef_lock); +} + +void unregister_undef_hook(struct undef_hook *hook) +{ + spin_lock_irq(&undef_lock); + list_del(&hook->node); + spin_unlock_irq(&undef_lock); +} + +static int do_cop_absent(u32 insn) +{ + int cop_nr; + u32 cpucr; + if ( (insn & 0xfdf00000) == 0xf1900000 ) + /* LDC0 */ + cop_nr = 0; + else + cop_nr = (insn >> 13) & 0x7; + + /* Try enabling the coprocessor */ + cpucr = sysreg_read(CPUCR); + cpucr |= (1 << (24 + cop_nr)); + sysreg_write(CPUCR, cpucr); + + cpucr = sysreg_read(CPUCR); + if ( !(cpucr & (1 << (24 + cop_nr))) ){ + printk("Coprocessor #%i not found!\n", cop_nr); + return -1; + } + + return 0; +} + +#ifdef CONFIG_BUG +#ifdef CONFIG_DEBUG_BUGVERBOSE +static inline void do_bug_verbose(struct pt_regs *regs, u32 insn) +{ + char *file; + u16 line; + char c; + + if (__get_user(line, (u16 __user *)(regs->pc + 2))) + return; + if (__get_user(file, (char * __user *)(regs->pc + 4)) + || (unsigned long)file < PAGE_OFFSET + || __get_user(c, file)) + file = "<bad filename>"; + + printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); +} +#else +static inline void do_bug_verbose(struct pt_regs *regs, u32 insn) +{ + +} +#endif +#endif + +asmlinkage void do_illegal_opcode(unsigned long ecr, struct pt_regs *regs) +{ + u32 insn; + struct undef_hook *hook; + siginfo_t info; + void __user *pc; + + if (!user_mode(regs)) + goto kernel_trap; + + local_irq_enable(); + + pc = (void __user *)instruction_pointer(regs); + if (__get_user(insn, (u32 __user *)pc)) + goto invalid_area; + + if (ecr == ECR_COPROC_ABSENT) { + if (do_cop_absent(insn) == 0) + return; + } + + spin_lock_irq(&undef_lock); + list_for_each_entry(hook, &undef_hook, node) { + if ((insn & hook->insn_mask) == hook->insn_val) { + if (hook->fn(regs, insn) == 0) { + spin_unlock_irq(&undef_lock); + return; + } + } + } + spin_unlock_irq(&undef_lock); + +invalid_area: + +#ifdef DEBUG + printk("Illegal instruction at pc = %08lx\n", regs->pc); + if (regs->pc < TASK_SIZE) { + unsigned long ptbr, pgd, pte, *p; + + ptbr = sysreg_read(PTBR); + p = (unsigned long *)ptbr; + pgd = p[regs->pc >> 22]; + p = (unsigned long *)((pgd & 0x1ffff000) | 0x80000000); + pte = p[(regs->pc >> 12) & 0x3ff]; + printk("page table: 0x%08lx -> 0x%08lx -> 0x%08lx\n", ptbr, pgd, pte); + } +#endif + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_addr = (void __user *)regs->pc; + switch (ecr) { + case ECR_ILLEGAL_OPCODE: + case ECR_UNIMPL_INSTRUCTION: + info.si_code = ILL_ILLOPC; + break; + case ECR_PRIVILEGE_VIOLATION: + info.si_code = ILL_PRVOPC; + break; + case ECR_COPROC_ABSENT: + info.si_code = ILL_COPROC; + break; + default: + BUG(); + } + + force_sig_info(SIGILL, &info, current); + return; + +kernel_trap: +#ifdef CONFIG_BUG + if (__kernel_text_address(instruction_pointer(regs))) { + insn = *(u16 *)instruction_pointer(regs); + if (insn == AVR32_BUG_OPCODE) { + do_bug_verbose(regs, insn); + die("Kernel BUG", regs, 0); + return; + } + } +#endif + + die("Oops: Illegal instruction in kernel code", regs, ecr); +} + +asmlinkage void do_fpe(unsigned long ecr, struct pt_regs *regs) +{ + siginfo_t info; + + printk("Floating-point exception at pc = %08lx\n", regs->pc); + + /* We have no FPU... */ + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_addr = (void __user *)regs->pc; + info.si_code = ILL_COPROC; + + force_sig_info(SIGILL, &info, current); +} + + +void __init trap_init(void) +{ + +} diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c new file mode 100644 index 00000000000..cdd627c6b7d --- /dev/null +++ b/arch/avr32/kernel/vmlinux.lds.c @@ -0,0 +1,139 @@ +/* + * AVR32 linker script for the Linux kernel + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define LOAD_OFFSET 0x00000000 +#include <asm-generic/vmlinux.lds.h> + +OUTPUT_FORMAT("elf32-avr32", "elf32-avr32", "elf32-avr32") +OUTPUT_ARCH(avr32) +ENTRY(_start) + +/* Big endian */ +jiffies = jiffies_64 + 4; + +SECTIONS +{ + . = CONFIG_ENTRY_ADDRESS; + .init : AT(ADDR(.init) - LOAD_OFFSET) { + _stext = .; + __init_begin = .; + _sinittext = .; + *(.text.reset) + *(.init.text) + _einittext = .; + . = ALIGN(4); + __tagtable_begin = .; + *(.taglist) + __tagtable_end = .; + *(.init.data) + . = ALIGN(16); + __setup_start = .; + *(.init.setup) + __setup_end = .; + . = ALIGN(4); + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + __security_initcall_start = .; + *(.security_initcall.init) + __security_initcall_end = .; + . = ALIGN(32); + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + . = ALIGN(4096); + __init_end = .; + } + + . = ALIGN(8192); + .text : AT(ADDR(.text) - LOAD_OFFSET) { + _evba = .; + _text = .; + *(.ex.text) + . = 0x50; + *(.tlbx.ex.text) + . = 0x60; + *(.tlbr.ex.text) + . = 0x70; + *(.tlbw.ex.text) + . = 0x100; + *(.scall.text) + *(.irq.text) + *(.text) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) + _etext = .; + } = 0xd703d703 + + . = ALIGN(4); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + RODATA + + . = ALIGN(8192); + + .data : AT(ADDR(.data) - LOAD_OFFSET) { + _data = .; + _sdata = .; + /* + * First, the init task union, aligned to an 8K boundary. + */ + *(.data.init_task) + + /* Then, the cacheline aligned data */ + . = ALIGN(32); + *(.data.cacheline_aligned) + + /* And the rest... */ + *(.data.rel*) + *(.data) + CONSTRUCTORS + + _edata = .; + } + + + . = ALIGN(8); + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __bss_start = .; + *(.bss) + *(COMMON) + . = ALIGN(8); + __bss_stop = .; + _end = .; + } + + /* When something in the kernel is NOT compiled as a module, the module + * cleanup code and data are put into these segments. Both can then be + * thrown away, as cleanup code is never called unless it's a module. + */ + /DISCARD/ : { + *(.exit.text) + *(.exit.data) + *(.exitcall.exit) + } + + DWARF_DEBUG +} diff --git a/arch/avr32/lib/Makefile b/arch/avr32/lib/Makefile new file mode 100644 index 00000000000..09ac43e4052 --- /dev/null +++ b/arch/avr32/lib/Makefile @@ -0,0 +1,10 @@ +# +# Makefile for AVR32-specific library files +# + +lib-y := copy_user.o clear_user.o +lib-y += strncpy_from_user.o strnlen_user.o +lib-y += delay.o memset.o memcpy.o findbit.o +lib-y += csum_partial.o csum_partial_copy_generic.o +lib-y += io-readsw.o io-readsl.o io-writesw.o io-writesl.o +lib-y += __avr32_lsl64.o __avr32_lsr64.o __avr32_asr64.o diff --git a/arch/avr32/lib/__avr32_asr64.S b/arch/avr32/lib/__avr32_asr64.S new file mode 100644 index 00000000000..368b6bca4c7 --- /dev/null +++ b/arch/avr32/lib/__avr32_asr64.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * DWtype __avr32_asr64(DWtype u, word_type b) + */ + .text + .global __avr32_asr64 + .type __avr32_asr64,@function +__avr32_asr64: + cp.w r12, 0 + reteq r12 + + rsub r9, r12, 32 + brle 1f + + lsl r8, r11, r9 + lsr r10, r10, r12 + asr r11, r11, r12 + or r10, r8 + retal r12 + +1: neg r9 + asr r10, r11, r9 + asr r11, 31 + retal r12 diff --git a/arch/avr32/lib/__avr32_lsl64.S b/arch/avr32/lib/__avr32_lsl64.S new file mode 100644 index 00000000000..f1dbc2b3625 --- /dev/null +++ b/arch/avr32/lib/__avr32_lsl64.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * DWtype __avr32_lsl64(DWtype u, word_type b) + */ + .text + .global __avr32_lsl64 + .type __avr32_lsl64,@function +__avr32_lsl64: + cp.w r12, 0 + reteq r12 + + rsub r9, r12, 32 + brle 1f + + lsr r8, r10, r9 + lsl r10, r10, r12 + lsl r11, r11, r12 + or r11, r8 + retal r12 + +1: neg r9 + lsl r11, r10, r9 + mov r10, 0 + retal r12 diff --git a/arch/avr32/lib/__avr32_lsr64.S b/arch/avr32/lib/__avr32_lsr64.S new file mode 100644 index 00000000000..e65bb7f0d24 --- /dev/null +++ b/arch/avr32/lib/__avr32_lsr64.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * DWtype __avr32_lsr64(DWtype u, word_type b) + */ + .text + .global __avr32_lsr64 + .type __avr32_lsr64,@function +__avr32_lsr64: + cp.w r12, 0 + reteq r12 + + rsub r9, r12, 32 + brle 1f + + lsl r8, r11, r9 + lsr r11, r11, r12 + lsr r10, r10, r12 + or r10, r8 + retal r12 + +1: neg r9 + lsr r10, r11, r9 + mov r11, 0 + retal r12 diff --git a/arch/avr32/lib/clear_user.S b/arch/avr32/lib/clear_user.S new file mode 100644 index 00000000000..d8991b6f8eb --- /dev/null +++ b/arch/avr32/lib/clear_user.S @@ -0,0 +1,76 @@ +/* + * Copyright 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <asm/page.h> +#include <asm/thread_info.h> +#include <asm/asm.h> + + .text + .align 1 + .global clear_user + .type clear_user, "function" +clear_user: + branch_if_kernel r8, __clear_user + ret_if_privileged r8, r12, r11, r11 + + .global __clear_user + .type __clear_user, "function" +__clear_user: + mov r9, r12 + mov r8, 0 + andl r9, 3, COH + brne 5f + +1: sub r11, 4 + brlt 2f + +10: st.w r12++, r8 + sub r11, 4 + brge 10b + +2: sub r11, -4 + reteq 0 + + /* Unaligned count or address */ + bld r11, 1 + brcc 12f +11: st.h r12++, r8 + sub r11, 2 + reteq 0 +12: st.b r12++, r8 + retal 0 + + /* Unaligned address */ +5: cp.w r11, 4 + brlt 2b + + lsl r9, 2 + add pc, pc, r9 +13: st.b r12++, r8 + sub r11, 1 +14: st.b r12++, r8 + sub r11, 1 +15: st.b r12++, r8 + sub r11, 1 + rjmp 1b + + .size clear_user, . - clear_user + .size __clear_user, . - __clear_user + + .section .fixup, "ax" + .align 1 +18: sub r11, -4 +19: retal r11 + + .section __ex_table, "a" + .align 2 + .long 10b, 18b + .long 11b, 19b + .long 12b, 19b + .long 13b, 19b + .long 14b, 19b + .long 15b, 19b diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S new file mode 100644 index 00000000000..ea59c04b07d --- /dev/null +++ b/arch/avr32/lib/copy_user.S @@ -0,0 +1,119 @@ +/* + * Copy to/from userspace with optional address space checking. + * + * Copyright 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <asm/page.h> +#include <asm/thread_info.h> +#include <asm/asm.h> + + /* + * __kernel_size_t + * __copy_user(void *to, const void *from, __kernel_size_t n) + * + * Returns the number of bytes not copied. Might be off by + * max 3 bytes if we get a fault in the main loop. + * + * The address-space checking functions simply fall through to + * the non-checking version. + */ + .text + .align 1 + .global copy_from_user + .type copy_from_user, @function +copy_from_user: + branch_if_kernel r8, __copy_user + ret_if_privileged r8, r11, r10, r10 + rjmp __copy_user + .size copy_from_user, . - copy_from_user + + .global copy_to_user + .type copy_to_user, @function +copy_to_user: + branch_if_kernel r8, __copy_user + ret_if_privileged r8, r12, r10, r10 + .size copy_to_user, . - copy_to_user + + .global __copy_user + .type __copy_user, @function +__copy_user: + mov r9, r11 + andl r9, 3, COH + brne 6f + + /* At this point, from is word-aligned */ +1: sub r10, 4 + brlt 3f + +2: +10: ld.w r8, r11++ +11: st.w r12++, r8 + sub r10, 4 + brge 2b + +3: sub r10, -4 + reteq 0 + + /* + * Handle unaligned count. Need to be careful with r10 here so + * that we return the correct value even if we get a fault + */ +4: +20: ld.ub r8, r11++ +21: st.b r12++, r8 + sub r10, 1 + reteq 0 +22: ld.ub r8, r11++ +23: st.b r12++, r8 + sub r10, 1 + reteq 0 +24: ld.ub r8, r11++ +25: st.b r12++, r8 + retal 0 + + /* Handle unaligned from-pointer */ +6: cp.w r10, 4 + brlt 4b + rsub r9, r9, 4 + +30: ld.ub r8, r11++ +31: st.b r12++, r8 + sub r10, 1 + sub r9, 1 + breq 1b +32: ld.ub r8, r11++ +33: st.b r12++, r8 + sub r10, 1 + sub r9, 1 + breq 1b +34: ld.ub r8, r11++ +35: st.b r12++, r8 + sub r10, 1 + rjmp 1b + .size __copy_user, . - __copy_user + + .section .fixup,"ax" + .align 1 +19: sub r10, -4 +29: retal r10 + + .section __ex_table,"a" + .align 2 + .long 10b, 19b + .long 11b, 19b + .long 20b, 29b + .long 21b, 29b + .long 22b, 29b + .long 23b, 29b + .long 24b, 29b + .long 25b, 29b + .long 30b, 29b + .long 31b, 29b + .long 32b, 29b + .long 33b, 29b + .long 34b, 29b + .long 35b, 29b diff --git a/arch/avr32/lib/csum_partial.S b/arch/avr32/lib/csum_partial.S new file mode 100644 index 00000000000..6a262b528eb --- /dev/null +++ b/arch/avr32/lib/csum_partial.S @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * unsigned int csum_partial(const unsigned char *buff, + * int len, unsigned int sum) + */ + .text + .global csum_partial + .type csum_partial,"function" + .align 1 +csum_partial: + /* checksum complete words, aligned or not */ +3: sub r11, 4 + brlt 5f +4: ld.w r9, r12++ + add r10, r9 + acr r10 + sub r11, 4 + brge 4b + + /* return if we had a whole number of words */ +5: sub r11, -4 + reteq r10 + + /* checksum any remaining bytes at the end */ + mov r9, 0 + mov r8, 0 + cp r11, 2 + brlt 6f + ld.uh r9, r12++ + sub r11, 2 + breq 7f + lsl r9, 16 +6: ld.ub r8, r12++ + lsl r8, 8 +7: or r9, r8 + add r10, r9 + acr r10 + + retal r10 + .size csum_partial, . - csum_partial diff --git a/arch/avr32/lib/csum_partial_copy_generic.S b/arch/avr32/lib/csum_partial_copy_generic.S new file mode 100644 index 00000000000..a3a0f9b8929 --- /dev/null +++ b/arch/avr32/lib/csum_partial_copy_generic.S @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <asm/errno.h> +#include <asm/asm.h> + + /* + * unsigned int csum_partial_copy_generic(const char *src, char *dst, int len + * int sum, int *src_err_ptr, + * int *dst_err_ptr) + * + * Copy src to dst while checksumming, otherwise like csum_partial. + */ + + .macro ld_src size, reg, ptr +9999: ld.\size \reg, \ptr + .section __ex_table, "a" + .long 9999b, fixup_ld_src + .previous + .endm + + .macro st_dst size, ptr, reg +9999: st.\size \ptr, \reg + .section __ex_table, "a" + .long 9999b, fixup_st_dst + .previous + .endm + + .text + .global csum_partial_copy_generic + .type csum_partial_copy_generic,"function" + .align 1 +csum_partial_copy_generic: + pushm r4-r7,lr + + /* The inner loop */ +1: sub r10, 4 + brlt 5f +2: ld_src w, r5, r12++ + st_dst w, r11++, r5 + add r9, r5 + acr r9 + sub r10, 4 + brge 2b + + /* return if we had a whole number of words */ +5: sub r10, -4 + brne 7f + +6: mov r12, r9 + popm r4-r7,pc + + /* handle additional bytes at the tail */ +7: mov r5, 0 + mov r4, 32 +8: ld_src ub, r6, r12++ + st_dst b, r11++, r6 + lsl r5, 8 + sub r4, 8 + bfins r5, r6, 0, 8 + sub r10, 1 + brne 8b + + lsl r5, r5, r4 + add r9, r5 + acr r9 + rjmp 6b + + /* Exception handler */ + .section .fixup,"ax" + .align 1 +fixup_ld_src: + mov r9, -EFAULT + cp.w r8, 0 + breq 1f + st.w r8[0], r9 + +1: /* + * TODO: zero the complete destination - computing the rest + * is too much work + */ + + mov r9, 0 + rjmp 6b + +fixup_st_dst: + mov r9, -EFAULT + lddsp r8, sp[20] + cp.w r8, 0 + breq 1f + st.w r8[0], r9 +1: mov r9, 0 + rjmp 6b + + .previous diff --git a/arch/avr32/lib/delay.c b/arch/avr32/lib/delay.c new file mode 100644 index 00000000000..462c8307b68 --- /dev/null +++ b/arch/avr32/lib/delay.c @@ -0,0 +1,55 @@ +/* + * Precise Delay Loops for avr32 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/types.h> + +#include <asm/delay.h> +#include <asm/processor.h> +#include <asm/sysreg.h> + +int read_current_timer(unsigned long *timer_value) +{ + *timer_value = sysreg_read(COUNT); + return 0; +} + +void __delay(unsigned long loops) +{ + unsigned bclock, now; + + bclock = sysreg_read(COUNT); + do { + now = sysreg_read(COUNT); + } while ((now - bclock) < loops); +} + +inline void __const_udelay(unsigned long xloops) +{ + unsigned long long loops; + + asm("mulu.d %0, %1, %2" + : "=r"(loops) + : "r"(current_cpu_data.loops_per_jiffy * HZ), "r"(xloops)); + __delay(loops >> 32); +} + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S new file mode 100644 index 00000000000..2b4856f4bf7 --- /dev/null +++ b/arch/avr32/lib/findbit.S @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> + + .text + /* + * unsigned long find_first_zero_bit(const unsigned long *addr, + * unsigned long size) + */ +ENTRY(find_first_zero_bit) + cp.w r11, 0 + reteq r11 + mov r9, r11 +1: ld.w r8, r12[0] + com r8 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 + + /* + * unsigned long find_next_zero_bit(const unsigned long *addr, + * unsigned long size, + * unsigned long offset) + */ +ENTRY(find_next_zero_bit) + lsr r8, r10, 5 + sub r9, r11, r10 + retle r11 + + lsl r8, 2 + add r12, r8 + andl r10, 31, COH + breq 1f + + /* offset is not word-aligned. Handle the first (32 - r10) bits */ + ld.w r8, r12[0] + com r8 + sub r12, -4 + lsr r8, r8, r10 + brne .L_found + + /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */ + add r9, r10 + sub r9, 32 + retle r11 + + /* Main loop. offset must be word-aligned */ +1: ld.w r8, r12[0] + com r8 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 + + /* Common return path for when a bit is actually found. */ +.L_found: + brev r8 + clz r10, r8 + rsub r9, r11 + add r10, r9 + + /* XXX: If we don't have to return exactly "size" when the bit + is not found, we may drop this "min" thing */ + min r12, r11, r10 + retal r12 + + /* + * unsigned long find_first_bit(const unsigned long *addr, + * unsigned long size) + */ +ENTRY(find_first_bit) + cp.w r11, 0 + reteq r11 + mov r9, r11 +1: ld.w r8, r12[0] + cp.w r8, 0 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 + + /* + * unsigned long find_next_bit(const unsigned long *addr, + * unsigned long size, + * unsigned long offset) + */ +ENTRY(find_next_bit) + lsr r8, r10, 5 + sub r9, r11, r10 + retle r11 + + lsl r8, 2 + add r12, r8 + andl r10, 31, COH + breq 1f + + /* offset is not word-aligned. Handle the first (32 - r10) bits */ + ld.w r8, r12[0] + sub r12, -4 + lsr r8, r8, r10 + brne .L_found + + /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */ + add r9, r10 + sub r9, 32 + retle r11 + + /* Main loop. offset must be word-aligned */ +1: ld.w r8, r12[0] + cp.w r8, 0 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 + +ENTRY(generic_find_next_zero_le_bit) + lsr r8, r10, 5 + sub r9, r11, r10 + retle r11 + + lsl r8, 2 + add r12, r8 + andl r10, 31, COH + breq 1f + + /* offset is not word-aligned. Handle the first (32 - r10) bits */ + ldswp.w r8, r12[0] + sub r12, -4 + lsr r8, r8, r10 + brne .L_found + + /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */ + add r9, r10 + sub r9, 32 + retle r11 + + /* Main loop. offset must be word-aligned */ +1: ldswp.w r8, r12[0] + cp.w r8, 0 + brne .L_found + sub r12, -4 + sub r9, 32 + brgt 1b + retal r11 diff --git a/arch/avr32/lib/io-readsl.S b/arch/avr32/lib/io-readsl.S new file mode 100644 index 00000000000..b103511ed6c --- /dev/null +++ b/arch/avr32/lib/io-readsl.S @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + .global __raw_readsl + .type __raw_readsl,@function +__raw_readsl: + cp.w r10, 0 + reteq r12 + + /* + * If r11 isn't properly aligned, we might get an exception on + * some implementations. But there's not much we can do about it. + */ +1: ld.w r8, r12[0] + sub r10, 1 + st.w r11++, r8 + brne 1b + + retal r12 diff --git a/arch/avr32/lib/io-readsw.S b/arch/avr32/lib/io-readsw.S new file mode 100644 index 00000000000..456be990902 --- /dev/null +++ b/arch/avr32/lib/io-readsw.S @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +.Lnot_word_aligned: + /* + * Bad alignment will cause a hardware exception, which is as + * good as anything. No need for us to check for proper alignment. + */ + ld.uh r8, r12[0] + sub r10, 1 + st.h r11++, r8 + + /* fall through */ + + .global __raw_readsw + .type __raw_readsw,@function +__raw_readsw: + cp.w r10, 0 + reteq r12 + mov r9, 3 + tst r11, r9 + brne .Lnot_word_aligned + + sub r10, 2 + brlt 2f + +1: ldins.h r8:t, r12[0] + ldins.h r8:b, r12[0] + st.w r11++, r8 + sub r10, 2 + brge 1b + +2: sub r10, -2 + reteq r12 + + ld.uh r8, r12[0] + st.h r11++, r8 + retal r12 diff --git a/arch/avr32/lib/io-writesl.S b/arch/avr32/lib/io-writesl.S new file mode 100644 index 00000000000..22138b3a16e --- /dev/null +++ b/arch/avr32/lib/io-writesl.S @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + .global __raw_writesl + .type __raw_writesl,@function +__raw_writesl: + cp.w r10, 0 + reteq r12 + +1: ld.w r8, r11++ + sub r10, 1 + st.w r12[0], r8 + brne 1b + + retal r12 diff --git a/arch/avr32/lib/io-writesw.S b/arch/avr32/lib/io-writesw.S new file mode 100644 index 00000000000..8c4a53f1c52 --- /dev/null +++ b/arch/avr32/lib/io-writesw.S @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +.Lnot_word_aligned: + ld.uh r8, r11++ + sub r10, 1 + st.h r12[0], r8 + + .global __raw_writesw + .type __raw_writesw,@function +__raw_writesw: + cp.w r10, 0 + mov r9, 3 + reteq r12 + tst r11, r9 + brne .Lnot_word_aligned + + sub r10, 2 + brlt 2f + +1: ld.w r8, r11++ + bfextu r9, r8, 16, 16 + st.h r12[0], r9 + st.h r12[0], r8 + sub r10, 2 + brge 1b + +2: sub r10, -2 + reteq r12 + + ld.uh r8, r11++ + st.h r12[0], r8 + retal r12 diff --git a/arch/avr32/lib/libgcc.h b/arch/avr32/lib/libgcc.h new file mode 100644 index 00000000000..5a091b5e361 --- /dev/null +++ b/arch/avr32/lib/libgcc.h @@ -0,0 +1,33 @@ +/* Definitions for various functions 'borrowed' from gcc-3.4.3 */ + +#define BITS_PER_UNIT 8 + +typedef int QItype __attribute__ ((mode (QI))); +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int HItype __attribute__ ((mode (HI))); +typedef unsigned int UHItype __attribute__ ((mode (HI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +typedef float SFtype __attribute__ ((mode (SF))); +typedef float DFtype __attribute__ ((mode (DF))); +typedef int word_type __attribute__ ((mode (__word__))); + +#define W_TYPE_SIZE (4 * BITS_PER_UNIT) +#define Wtype SItype +#define UWtype USItype +#define HWtype SItype +#define UHWtype USItype +#define DWtype DItype +#define UDWtype UDItype +#define __NW(a,b) __ ## a ## si ## b +#define __NDW(a,b) __ ## a ## di ## b + +struct DWstruct {Wtype high, low;}; + +typedef union +{ + struct DWstruct s; + DWtype ll; +} DWunion; diff --git a/arch/avr32/lib/longlong.h b/arch/avr32/lib/longlong.h new file mode 100644 index 00000000000..cd5e369ac43 --- /dev/null +++ b/arch/avr32/lib/longlong.h @@ -0,0 +1,98 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000 + Free Software Foundation, Inc. + + This definition file is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2, or (at your option) any later version. + + This definition file is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Borrowed from gcc-3.4.3 */ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +#define count_leading_zeros(count, x) ((count) = __builtin_clz(x)) + +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0; \ + UWtype __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +#define udiv_qrnnd __udiv_qrnnd_c + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) + +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart (u); \ + __uh = __ll_highpart (u); \ + __vl = __ll_lowpart (v); \ + __vh = __ll_highpart (v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart (__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ + } while (0) diff --git a/arch/avr32/lib/memcpy.S b/arch/avr32/lib/memcpy.S new file mode 100644 index 00000000000..0abb26142b6 --- /dev/null +++ b/arch/avr32/lib/memcpy.S @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + /* + * void *memcpy(void *to, const void *from, unsigned long n) + * + * This implementation does word-aligned loads in the main loop, + * possibly sacrificing alignment of stores. + * + * Hopefully, in most cases, both "to" and "from" will be + * word-aligned to begin with. + */ + .text + .global memcpy + .type memcpy, @function +memcpy: + mov r9, r11 + andl r9, 3, COH + brne 1f + + /* At this point, "from" is word-aligned */ +2: sub r10, 4 + mov r9, r12 + brlt 4f + +3: ld.w r8, r11++ + sub r10, 4 + st.w r12++, r8 + brge 3b + +4: neg r10 + reteq r9 + + /* Handle unaligned count */ + lsl r10, 2 + add pc, pc, r10 + ld.ub r8, r11++ + st.b r12++, r8 + ld.ub r8, r11++ + st.b r12++, r8 + ld.ub r8, r11++ + st.b r12++, r8 + retal r9 + + /* Handle unaligned "from" pointer */ +1: sub r10, 4 + brlt 4b + add r10, r9 + lsl r9, 2 + add pc, pc, r9 + ld.ub r8, r11++ + st.b r12++, r8 + ld.ub r8, r11++ + st.b r12++, r8 + ld.ub r8, r11++ + st.b r12++, r8 + rjmp 2b diff --git a/arch/avr32/lib/memset.S b/arch/avr32/lib/memset.S new file mode 100644 index 00000000000..40da32c0480 --- /dev/null +++ b/arch/avr32/lib/memset.S @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/arch/arm/lib/memset.S + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <asm/asm.h> + + /* + * r12: void *b + * r11: int c + * r10: size_t len + * + * Returns b in r12 + */ + .text + .global memset + .type memset, @function + .align 5 +memset: + mov r9, r12 + mov r8, r12 + or r11, r11, r11 << 8 + andl r9, 3, COH + brne 1f + +2: or r11, r11, r11 << 16 + sub r10, 4 + brlt 5f + + /* Let's do some real work */ +4: st.w r8++, r11 + sub r10, 4 + brge 4b + + /* + * When we get here, we've got less than 4 bytes to set. r10 + * might be negative. + */ +5: sub r10, -4 + reteq r12 + + /* Fastpath ends here, exactly 32 bytes from memset */ + + /* Handle unaligned count or pointer */ + bld r10, 1 + brcc 6f + st.b r8++, r11 + st.b r8++, r11 + bld r10, 0 + retcc r12 +6: st.b r8++, r11 + retal r12 + + /* Handle unaligned pointer */ +1: sub r10, 4 + brlt 5b + add r10, r9 + lsl r9, 1 + add pc, r9 + st.b r8++, r11 + st.b r8++, r11 + st.b r8++, r11 + rjmp 2b + + .size memset, . - memset diff --git a/arch/avr32/lib/strncpy_from_user.S b/arch/avr32/lib/strncpy_from_user.S new file mode 100644 index 00000000000..72bd50599ec --- /dev/null +++ b/arch/avr32/lib/strncpy_from_user.S @@ -0,0 +1,60 @@ +/* + * Copy to/from userspace with optional address space checking. + * + * Copyright 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/errno.h> + +#include <asm/page.h> +#include <asm/thread_info.h> +#include <asm/asm.h> + + /* + * long strncpy_from_user(char *dst, const char *src, long count) + * + * On success, returns the length of the string, not including + * the terminating NUL. + * + * If the string is longer than count, returns count + * + * If userspace access fails, returns -EFAULT + */ + .text + .align 1 + .global strncpy_from_user + .type strncpy_from_user, "function" +strncpy_from_user: + mov r9, -EFAULT + branch_if_kernel r8, __strncpy_from_user + ret_if_privileged r8, r11, r10, r9 + + .global __strncpy_from_user + .type __strncpy_from_user, "function" +__strncpy_from_user: + cp.w r10, 0 + reteq 0 + + mov r9, r10 + +1: ld.ub r8, r11++ + st.b r12++, r8 + cp.w r8, 0 + breq 2f + sub r9, 1 + brne 1b + +2: sub r10, r9 + retal r10 + + .section .fixup, "ax" + .align 1 +3: mov r12, -EFAULT + retal r12 + + .section __ex_table, "a" + .align 2 + .long 1b, 3b diff --git a/arch/avr32/lib/strnlen_user.S b/arch/avr32/lib/strnlen_user.S new file mode 100644 index 00000000000..65ce11afa66 --- /dev/null +++ b/arch/avr32/lib/strnlen_user.S @@ -0,0 +1,67 @@ +/* + * Copy to/from userspace with optional address space checking. + * + * Copyright 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <asm/page.h> +#include <asm/thread_info.h> +#include <asm/processor.h> +#include <asm/asm.h> + + .text + .align 1 + .global strnlen_user + .type strnlen_user, "function" +strnlen_user: + branch_if_kernel r8, __strnlen_user + sub r8, r11, 1 + add r8, r12 + retcs 0 + brmi adjust_length /* do a closer inspection */ + + .global __strnlen_user + .type __strnlen_user, "function" +__strnlen_user: + mov r10, r12 + +10: ld.ub r8, r12++ + cp.w r8, 0 + breq 2f + sub r11, 1 + brne 10b + + sub r12, -1 +2: sub r12, r10 + retal r12 + + + .type adjust_length, "function" +adjust_length: + cp.w r12, 0 /* addr must always be < TASK_SIZE */ + retmi 0 + + pushm lr + lddpc lr, _task_size + sub r11, lr, r12 + mov r9, r11 + rcall __strnlen_user + cp.w r12, r9 + brgt 1f + popm pc +1: popm pc, r12=0 + + .align 2 +_task_size: + .long TASK_SIZE + + .section .fixup, "ax" + .align 1 +19: retal 0 + + .section __ex_table, "a" + .align 2 + .long 10b, 19b diff --git a/arch/avr32/mach-at32ap/Makefile b/arch/avr32/mach-at32ap/Makefile new file mode 100644 index 00000000000..f62eb691551 --- /dev/null +++ b/arch/avr32/mach-at32ap/Makefile @@ -0,0 +1,2 @@ +obj-y += at32ap.o clock.o pio.o intc.o extint.o hsmc.o +obj-$(CONFIG_CPU_AT32AP7000) += at32ap7000.o diff --git a/arch/avr32/mach-at32ap/at32ap.c b/arch/avr32/mach-at32ap/at32ap.c new file mode 100644 index 00000000000..f7cedf5aabe --- /dev/null +++ b/arch/avr32/mach-at32ap/at32ap.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/platform_device.h> + +#include <asm/io.h> + +#include <asm/arch/init.h> +#include <asm/arch/sm.h> + +struct at32_sm system_manager; + +static int __init at32_sm_init(void) +{ + struct resource *regs; + struct at32_sm *sm = &system_manager; + int ret = -ENXIO; + + regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0); + if (!regs) + goto fail; + + spin_lock_init(&sm->lock); + sm->pdev = &at32_sm_device; + + ret = -ENOMEM; + sm->regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!sm->regs) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret); + return ret; +} + +void __init setup_platform(void) +{ + at32_sm_init(); + at32_clock_init(); + at32_portmux_init(); + + /* FIXME: This doesn't belong here */ + at32_setup_serial_console(1); +} + +static int __init pdc_probe(struct platform_device *pdev) +{ + struct clk *pclk, *hclk; + + pclk = clk_get(&pdev->dev, "pclk"); + if (IS_ERR(pclk)) { + dev_err(&pdev->dev, "no pclk defined\n"); + return PTR_ERR(pclk); + } + hclk = clk_get(&pdev->dev, "hclk"); + if (IS_ERR(hclk)) { + dev_err(&pdev->dev, "no hclk defined\n"); + clk_put(pclk); + return PTR_ERR(hclk); + } + + clk_enable(pclk); + clk_enable(hclk); + + dev_info(&pdev->dev, "Atmel Peripheral DMA Controller enabled\n"); + return 0; +} + +static struct platform_driver pdc_driver = { + .probe = pdc_probe, + .driver = { + .name = "pdc", + }, +}; + +static int __init pdc_init(void) +{ + return platform_driver_register(&pdc_driver); +} +arch_initcall(pdc_init); diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c new file mode 100644 index 00000000000..37982b60398 --- /dev/null +++ b/arch/avr32/mach-at32ap/at32ap7000.c @@ -0,0 +1,876 @@ +/* + * Copyright (C) 2005-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/clk.h> +#include <linux/init.h> +#include <linux/platform_device.h> + +#include <asm/io.h> + +#include <asm/arch/board.h> +#include <asm/arch/portmux.h> +#include <asm/arch/sm.h> + +#include "clock.h" +#include "pio.h" +#include "sm.h" + +#define PBMEM(base) \ + { \ + .start = base, \ + .end = base + 0x3ff, \ + .flags = IORESOURCE_MEM, \ + } +#define IRQ(num) \ + { \ + .start = num, \ + .end = num, \ + .flags = IORESOURCE_IRQ, \ + } +#define NAMED_IRQ(num, _name) \ + { \ + .start = num, \ + .end = num, \ + .name = _name, \ + .flags = IORESOURCE_IRQ, \ + } + +#define DEFINE_DEV(_name, _id) \ +static struct platform_device _name##_id##_device = { \ + .name = #_name, \ + .id = _id, \ + .resource = _name##_id##_resource, \ + .num_resources = ARRAY_SIZE(_name##_id##_resource), \ +} +#define DEFINE_DEV_DATA(_name, _id) \ +static struct platform_device _name##_id##_device = { \ + .name = #_name, \ + .id = _id, \ + .dev = { \ + .platform_data = &_name##_id##_data, \ + }, \ + .resource = _name##_id##_resource, \ + .num_resources = ARRAY_SIZE(_name##_id##_resource), \ +} + +#define DEV_CLK(_name, devname, bus, _index) \ +static struct clk devname##_##_name = { \ + .name = #_name, \ + .dev = &devname##_device.dev, \ + .parent = &bus##_clk, \ + .mode = bus##_clk_mode, \ + .get_rate = bus##_clk_get_rate, \ + .index = _index, \ +} + +enum { + PIOA, + PIOB, + PIOC, + PIOD, +}; + +enum { + FUNC_A, + FUNC_B, +}; + +unsigned long at32ap7000_osc_rates[3] = { + [0] = 32768, + /* FIXME: these are ATSTK1002-specific */ + [1] = 20000000, + [2] = 12000000, +}; + +static unsigned long osc_get_rate(struct clk *clk) +{ + return at32ap7000_osc_rates[clk->index]; +} + +static unsigned long pll_get_rate(struct clk *clk, unsigned long control) +{ + unsigned long div, mul, rate; + + if (!(control & SM_BIT(PLLEN))) + return 0; + + div = SM_BFEXT(PLLDIV, control) + 1; + mul = SM_BFEXT(PLLMUL, control) + 1; + + rate = clk->parent->get_rate(clk->parent); + rate = (rate + div / 2) / div; + rate *= mul; + + return rate; +} + +static unsigned long pll0_get_rate(struct clk *clk) +{ + u32 control; + + control = sm_readl(&system_manager, PM_PLL0); + + return pll_get_rate(clk, control); +} + +static unsigned long pll1_get_rate(struct clk *clk) +{ + u32 control; + + control = sm_readl(&system_manager, PM_PLL1); + + return pll_get_rate(clk, control); +} + +/* + * The AT32AP7000 has five primary clock sources: One 32kHz + * oscillator, two crystal oscillators and two PLLs. + */ +static struct clk osc32k = { + .name = "osc32k", + .get_rate = osc_get_rate, + .users = 1, + .index = 0, +}; +static struct clk osc0 = { + .name = "osc0", + .get_rate = osc_get_rate, + .users = 1, + .index = 1, +}; +static struct clk osc1 = { + .name = "osc1", + .get_rate = osc_get_rate, + .index = 2, +}; +static struct clk pll0 = { + .name = "pll0", + .get_rate = pll0_get_rate, + .parent = &osc0, +}; +static struct clk pll1 = { + .name = "pll1", + .get_rate = pll1_get_rate, + .parent = &osc0, +}; + +/* + * The main clock can be either osc0 or pll0. The boot loader may + * have chosen one for us, so we don't really know which one until we + * have a look at the SM. + */ +static struct clk *main_clock; + +/* + * Synchronous clocks are generated from the main clock. The clocks + * must satisfy the constraint + * fCPU >= fHSB >= fPB + * i.e. each clock must not be faster than its parent. + */ +static unsigned long bus_clk_get_rate(struct clk *clk, unsigned int shift) +{ + return main_clock->get_rate(main_clock) >> shift; +}; + +static void cpu_clk_mode(struct clk *clk, int enabled) +{ + struct at32_sm *sm = &system_manager; + unsigned long flags; + u32 mask; + + spin_lock_irqsave(&sm->lock, flags); + mask = sm_readl(sm, PM_CPU_MASK); + if (enabled) + mask |= 1 << clk->index; + else + mask &= ~(1 << clk->index); + sm_writel(sm, PM_CPU_MASK, mask); + spin_unlock_irqrestore(&sm->lock, flags); +} + +static unsigned long cpu_clk_get_rate(struct clk *clk) +{ + unsigned long cksel, shift = 0; + + cksel = sm_readl(&system_manager, PM_CKSEL); + if (cksel & SM_BIT(CPUDIV)) + shift = SM_BFEXT(CPUSEL, cksel) + 1; + + return bus_clk_get_rate(clk, shift); +} + +static void hsb_clk_mode(struct clk *clk, int enabled) +{ + struct at32_sm *sm = &system_manager; + unsigned long flags; + u32 mask; + + spin_lock_irqsave(&sm->lock, flags); + mask = sm_readl(sm, PM_HSB_MASK); + if (enabled) + mask |= 1 << clk->index; + else + mask &= ~(1 << clk->index); + sm_writel(sm, PM_HSB_MASK, mask); + spin_unlock_irqrestore(&sm->lock, flags); +} + +static unsigned long hsb_clk_get_rate(struct clk *clk) +{ + unsigned long cksel, shift = 0; + + cksel = sm_readl(&system_manager, PM_CKSEL); + if (cksel & SM_BIT(HSBDIV)) + shift = SM_BFEXT(HSBSEL, cksel) + 1; + + return bus_clk_get_rate(clk, shift); +} + +static void pba_clk_mode(struct clk *clk, int enabled) +{ + struct at32_sm *sm = &system_manager; + unsigned long flags; + u32 mask; + + spin_lock_irqsave(&sm->lock, flags); + mask = sm_readl(sm, PM_PBA_MASK); + if (enabled) + mask |= 1 << clk->index; + else + mask &= ~(1 << clk->index); + sm_writel(sm, PM_PBA_MASK, mask); + spin_unlock_irqrestore(&sm->lock, flags); +} + +static unsigned long pba_clk_get_rate(struct clk *clk) +{ + unsigned long cksel, shift = 0; + + cksel = sm_readl(&system_manager, PM_CKSEL); + if (cksel & SM_BIT(PBADIV)) + shift = SM_BFEXT(PBASEL, cksel) + 1; + + return bus_clk_get_rate(clk, shift); +} + +static void pbb_clk_mode(struct clk *clk, int enabled) +{ + struct at32_sm *sm = &system_manager; + unsigned long flags; + u32 mask; + + spin_lock_irqsave(&sm->lock, flags); + mask = sm_readl(sm, PM_PBB_MASK); + if (enabled) + mask |= 1 << clk->index; + else + mask &= ~(1 << clk->index); + sm_writel(sm, PM_PBB_MASK, mask); + spin_unlock_irqrestore(&sm->lock, flags); +} + +static unsigned long pbb_clk_get_rate(struct clk *clk) +{ + unsigned long cksel, shift = 0; + + cksel = sm_readl(&system_manager, PM_CKSEL); + if (cksel & SM_BIT(PBBDIV)) + shift = SM_BFEXT(PBBSEL, cksel) + 1; + + return bus_clk_get_rate(clk, shift); +} + +static struct clk cpu_clk = { + .name = "cpu", + .get_rate = cpu_clk_get_rate, + .users = 1, +}; +static struct clk hsb_clk = { + .name = "hsb", + .parent = &cpu_clk, + .get_rate = hsb_clk_get_rate, +}; +static struct clk pba_clk = { + .name = "pba", + .parent = &hsb_clk, + .mode = hsb_clk_mode, + .get_rate = pba_clk_get_rate, + .index = 1, +}; +static struct clk pbb_clk = { + .name = "pbb", + .parent = &hsb_clk, + .mode = hsb_clk_mode, + .get_rate = pbb_clk_get_rate, + .users = 1, + .index = 2, +}; + +/* -------------------------------------------------------------------- + * Generic Clock operations + * -------------------------------------------------------------------- */ + +static void genclk_mode(struct clk *clk, int enabled) +{ + u32 control; + + BUG_ON(clk->index > 7); + + control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index); + if (enabled) + control |= SM_BIT(CEN); + else + control &= ~SM_BIT(CEN); + sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control); +} + +static unsigned long genclk_get_rate(struct clk *clk) +{ + u32 control; + unsigned long div = 1; + + BUG_ON(clk->index > 7); + + if (!clk->parent) + return 0; + + control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index); + if (control & SM_BIT(DIVEN)) + div = 2 * (SM_BFEXT(DIV, control) + 1); + + return clk->parent->get_rate(clk->parent) / div; +} + +static long genclk_set_rate(struct clk *clk, unsigned long rate, int apply) +{ + u32 control; + unsigned long parent_rate, actual_rate, div; + + BUG_ON(clk->index > 7); + + if (!clk->parent) + return 0; + + parent_rate = clk->parent->get_rate(clk->parent); + control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index); + + if (rate > 3 * parent_rate / 4) { + actual_rate = parent_rate; + control &= ~SM_BIT(DIVEN); + } else { + div = (parent_rate + rate) / (2 * rate) - 1; + control = SM_BFINS(DIV, div, control) | SM_BIT(DIVEN); + actual_rate = parent_rate / (2 * (div + 1)); + } + + printk("clk %s: new rate %lu (actual rate %lu)\n", + clk->name, rate, actual_rate); + + if (apply) + sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, + control); + + return actual_rate; +} + +int genclk_set_parent(struct clk *clk, struct clk *parent) +{ + u32 control; + + BUG_ON(clk->index > 7); + + printk("clk %s: new parent %s (was %s)\n", + clk->name, parent->name, + clk->parent ? clk->parent->name : "(null)"); + + control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index); + + if (parent == &osc1 || parent == &pll1) + control |= SM_BIT(OSCSEL); + else if (parent == &osc0 || parent == &pll0) + control &= ~SM_BIT(OSCSEL); + else + return -EINVAL; + + if (parent == &pll0 || parent == &pll1) + control |= SM_BIT(PLLSEL); + else + control &= ~SM_BIT(PLLSEL); + + sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control); + clk->parent = parent; + + return 0; +} + +/* -------------------------------------------------------------------- + * System peripherals + * -------------------------------------------------------------------- */ +static struct resource sm_resource[] = { + PBMEM(0xfff00000), + NAMED_IRQ(19, "eim"), + NAMED_IRQ(20, "pm"), + NAMED_IRQ(21, "rtc"), +}; +struct platform_device at32_sm_device = { + .name = "sm", + .id = 0, + .resource = sm_resource, + .num_resources = ARRAY_SIZE(sm_resource), +}; +DEV_CLK(pclk, at32_sm, pbb, 0); + +static struct resource intc0_resource[] = { + PBMEM(0xfff00400), +}; +struct platform_device at32_intc0_device = { + .name = "intc", + .id = 0, + .resource = intc0_resource, + .num_resources = ARRAY_SIZE(intc0_resource), +}; +DEV_CLK(pclk, at32_intc0, pbb, 1); + +static struct clk ebi_clk = { + .name = "ebi", + .parent = &hsb_clk, + .mode = hsb_clk_mode, + .get_rate = hsb_clk_get_rate, + .users = 1, +}; +static struct clk hramc_clk = { + .name = "hramc", + .parent = &hsb_clk, + .mode = hsb_clk_mode, + .get_rate = hsb_clk_get_rate, + .users = 1, +}; + +static struct resource smc0_resource[] = { + PBMEM(0xfff03400), +}; +DEFINE_DEV(smc, 0); +DEV_CLK(pclk, smc0, pbb, 13); +DEV_CLK(mck, smc0, hsb, 0); + +static struct platform_device pdc_device = { + .name = "pdc", + .id = 0, +}; +DEV_CLK(hclk, pdc, hsb, 4); +DEV_CLK(pclk, pdc, pba, 16); + +static struct clk pico_clk = { + .name = "pico", + .parent = &cpu_clk, + .mode = cpu_clk_mode, + .get_rate = cpu_clk_get_rate, + .users = 1, +}; + +/* -------------------------------------------------------------------- + * PIO + * -------------------------------------------------------------------- */ + +static struct resource pio0_resource[] = { + PBMEM(0xffe02800), + IRQ(13), +}; +DEFINE_DEV(pio, 0); +DEV_CLK(mck, pio0, pba, 10); + +static struct resource pio1_resource[] = { + PBMEM(0xffe02c00), + IRQ(14), +}; +DEFINE_DEV(pio, 1); +DEV_CLK(mck, pio1, pba, 11); + +static struct resource pio2_resource[] = { + PBMEM(0xffe03000), + IRQ(15), +}; +DEFINE_DEV(pio, 2); +DEV_CLK(mck, pio2, pba, 12); + +static struct resource pio3_resource[] = { + PBMEM(0xffe03400), + IRQ(16), +}; +DEFINE_DEV(pio, 3); +DEV_CLK(mck, pio3, pba, 13); + +void __init at32_add_system_devices(void) +{ + system_manager.eim_first_irq = NR_INTERNAL_IRQS; + + platform_device_register(&at32_sm_device); + platform_device_register(&at32_intc0_device); + platform_device_register(&smc0_device); + platform_device_register(&pdc_device); + + platform_device_register(&pio0_device); + platform_device_register(&pio1_device); + platform_device_register(&pio2_device); + platform_device_register(&pio3_device); +} + +/* -------------------------------------------------------------------- + * USART + * -------------------------------------------------------------------- */ + +static struct resource usart0_resource[] = { + PBMEM(0xffe00c00), + IRQ(7), +}; +DEFINE_DEV(usart, 0); +DEV_CLK(usart, usart0, pba, 4); + +static struct resource usart1_resource[] = { + PBMEM(0xffe01000), + IRQ(7), +}; +DEFINE_DEV(usart, 1); +DEV_CLK(usart, usart1, pba, 4); + +static struct resource usart2_resource[] = { + PBMEM(0xffe01400), + IRQ(8), +}; +DEFINE_DEV(usart, 2); +DEV_CLK(usart, usart2, pba, 5); + +static struct resource usart3_resource[] = { + PBMEM(0xffe01800), + IRQ(9), +}; +DEFINE_DEV(usart, 3); +DEV_CLK(usart, usart3, pba, 6); + +static inline void configure_usart0_pins(void) +{ + portmux_set_func(PIOA, 8, FUNC_B); /* RXD */ + portmux_set_func(PIOA, 9, FUNC_B); /* TXD */ +} + +static inline void configure_usart1_pins(void) +{ + portmux_set_func(PIOA, 17, FUNC_A); /* RXD */ + portmux_set_func(PIOA, 18, FUNC_A); /* TXD */ +} + +static inline void configure_usart2_pins(void) +{ + portmux_set_func(PIOB, 26, FUNC_B); /* RXD */ + portmux_set_func(PIOB, 27, FUNC_B); /* TXD */ +} + +static inline void configure_usart3_pins(void) +{ + portmux_set_func(PIOB, 18, FUNC_B); /* RXD */ + portmux_set_func(PIOB, 17, FUNC_B); /* TXD */ +} + +static struct platform_device *setup_usart(unsigned int id) +{ + struct platform_device *pdev; + + switch (id) { + case 0: + pdev = &usart0_device; + configure_usart0_pins(); + break; + case 1: + pdev = &usart1_device; + configure_usart1_pins(); + break; + case 2: + pdev = &usart2_device; + configure_usart2_pins(); + break; + case 3: + pdev = &usart3_device; + configure_usart3_pins(); + break; + default: + pdev = NULL; + break; + } + + return pdev; +} + +struct platform_device *__init at32_add_device_usart(unsigned int id) +{ + struct platform_device *pdev; + + pdev = setup_usart(id); + if (pdev) + platform_device_register(pdev); + + return pdev; +} + +struct platform_device *at91_default_console_device; + +void __init at32_setup_serial_console(unsigned int usart_id) +{ + at91_default_console_device = setup_usart(usart_id); +} + +/* -------------------------------------------------------------------- + * Ethernet + * -------------------------------------------------------------------- */ + +static struct eth_platform_data macb0_data; +static struct resource macb0_resource[] = { + PBMEM(0xfff01800), + IRQ(25), +}; +DEFINE_DEV_DATA(macb, 0); +DEV_CLK(hclk, macb0, hsb, 8); +DEV_CLK(pclk, macb0, pbb, 6); + +struct platform_device *__init +at32_add_device_eth(unsigned int id, struct eth_platform_data *data) +{ + struct platform_device *pdev; + + switch (id) { + case 0: + pdev = &macb0_device; + + portmux_set_func(PIOC, 3, FUNC_A); /* TXD0 */ + portmux_set_func(PIOC, 4, FUNC_A); /* TXD1 */ + portmux_set_func(PIOC, 7, FUNC_A); /* TXEN */ + portmux_set_func(PIOC, 8, FUNC_A); /* TXCK */ + portmux_set_func(PIOC, 9, FUNC_A); /* RXD0 */ + portmux_set_func(PIOC, 10, FUNC_A); /* RXD1 */ + portmux_set_func(PIOC, 13, FUNC_A); /* RXER */ + portmux_set_func(PIOC, 15, FUNC_A); /* RXDV */ + portmux_set_func(PIOC, 16, FUNC_A); /* MDC */ + portmux_set_func(PIOC, 17, FUNC_A); /* MDIO */ + + if (!data->is_rmii) { + portmux_set_func(PIOC, 0, FUNC_A); /* COL */ + portmux_set_func(PIOC, 1, FUNC_A); /* CRS */ + portmux_set_func(PIOC, 2, FUNC_A); /* TXER */ + portmux_set_func(PIOC, 5, FUNC_A); /* TXD2 */ + portmux_set_func(PIOC, 6, FUNC_A); /* TXD3 */ + portmux_set_func(PIOC, 11, FUNC_A); /* RXD2 */ + portmux_set_func(PIOC, 12, FUNC_A); /* RXD3 */ + portmux_set_func(PIOC, 14, FUNC_A); /* RXCK */ + portmux_set_func(PIOC, 18, FUNC_A); /* SPD */ + } + break; + + default: + return NULL; + } + + memcpy(pdev->dev.platform_data, data, sizeof(struct eth_platform_data)); + platform_device_register(pdev); + + return pdev; +} + +/* -------------------------------------------------------------------- + * SPI + * -------------------------------------------------------------------- */ +static struct resource spi0_resource[] = { + PBMEM(0xffe00000), + IRQ(3), +}; +DEFINE_DEV(spi, 0); +DEV_CLK(mck, spi0, pba, 0); + +struct platform_device *__init at32_add_device_spi(unsigned int id) +{ + struct platform_device *pdev; + + switch (id) { + case 0: + pdev = &spi0_device; + portmux_set_func(PIOA, 0, FUNC_A); /* MISO */ + portmux_set_func(PIOA, 1, FUNC_A); /* MOSI */ + portmux_set_func(PIOA, 2, FUNC_A); /* SCK */ + portmux_set_func(PIOA, 3, FUNC_A); /* NPCS0 */ + portmux_set_func(PIOA, 4, FUNC_A); /* NPCS1 */ + portmux_set_func(PIOA, 5, FUNC_A); /* NPCS2 */ + break; + + default: + return NULL; + } + + platform_device_register(pdev); + return pdev; +} + +/* -------------------------------------------------------------------- + * LCDC + * -------------------------------------------------------------------- */ +static struct lcdc_platform_data lcdc0_data; +static struct resource lcdc0_resource[] = { + { + .start = 0xff000000, + .end = 0xff000fff, + .flags = IORESOURCE_MEM, + }, + IRQ(1), +}; +DEFINE_DEV_DATA(lcdc, 0); +DEV_CLK(hclk, lcdc0, hsb, 7); +static struct clk lcdc0_pixclk = { + .name = "pixclk", + .dev = &lcdc0_device.dev, + .mode = genclk_mode, + .get_rate = genclk_get_rate, + .set_rate = genclk_set_rate, + .set_parent = genclk_set_parent, + .index = 7, +}; + +struct platform_device *__init +at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data) +{ + struct platform_device *pdev; + + switch (id) { + case 0: + pdev = &lcdc0_device; + portmux_set_func(PIOC, 19, FUNC_A); /* CC */ + portmux_set_func(PIOC, 20, FUNC_A); /* HSYNC */ + portmux_set_func(PIOC, 21, FUNC_A); /* PCLK */ + portmux_set_func(PIOC, 22, FUNC_A); /* VSYNC */ + portmux_set_func(PIOC, 23, FUNC_A); /* DVAL */ + portmux_set_func(PIOC, 24, FUNC_A); /* MODE */ + portmux_set_func(PIOC, 25, FUNC_A); /* PWR */ + portmux_set_func(PIOC, 26, FUNC_A); /* DATA0 */ + portmux_set_func(PIOC, 27, FUNC_A); /* DATA1 */ + portmux_set_func(PIOC, 28, FUNC_A); /* DATA2 */ + portmux_set_func(PIOC, 29, FUNC_A); /* DATA3 */ + portmux_set_func(PIOC, 30, FUNC_A); /* DATA4 */ + portmux_set_func(PIOC, 31, FUNC_A); /* DATA5 */ + portmux_set_func(PIOD, 0, FUNC_A); /* DATA6 */ + portmux_set_func(PIOD, 1, FUNC_A); /* DATA7 */ + portmux_set_func(PIOD, 2, FUNC_A); /* DATA8 */ + portmux_set_func(PIOD, 3, FUNC_A); /* DATA9 */ + portmux_set_func(PIOD, 4, FUNC_A); /* DATA10 */ + portmux_set_func(PIOD, 5, FUNC_A); /* DATA11 */ + portmux_set_func(PIOD, 6, FUNC_A); /* DATA12 */ + portmux_set_func(PIOD, 7, FUNC_A); /* DATA13 */ + portmux_set_func(PIOD, 8, FUNC_A); /* DATA14 */ + portmux_set_func(PIOD, 9, FUNC_A); /* DATA15 */ + portmux_set_func(PIOD, 10, FUNC_A); /* DATA16 */ + portmux_set_func(PIOD, 11, FUNC_A); /* DATA17 */ + portmux_set_func(PIOD, 12, FUNC_A); /* DATA18 */ + portmux_set_func(PIOD, 13, FUNC_A); /* DATA19 */ + portmux_set_func(PIOD, 14, FUNC_A); /* DATA20 */ + portmux_set_func(PIOD, 15, FUNC_A); /* DATA21 */ + portmux_set_func(PIOD, 16, FUNC_A); /* DATA22 */ + portmux_set_func(PIOD, 17, FUNC_A); /* DATA23 */ + + clk_set_parent(&lcdc0_pixclk, &pll0); + clk_set_rate(&lcdc0_pixclk, clk_get_rate(&pll0)); + break; + + default: + return NULL; + } + + memcpy(pdev->dev.platform_data, data, + sizeof(struct lcdc_platform_data)); + + platform_device_register(pdev); + return pdev; +} + +struct clk *at32_clock_list[] = { + &osc32k, + &osc0, + &osc1, + &pll0, + &pll1, + &cpu_clk, + &hsb_clk, + &pba_clk, + &pbb_clk, + &at32_sm_pclk, + &at32_intc0_pclk, + &ebi_clk, + &hramc_clk, + &smc0_pclk, + &smc0_mck, + &pdc_hclk, + &pdc_pclk, + &pico_clk, + &pio0_mck, + &pio1_mck, + &pio2_mck, + &pio3_mck, + &usart0_usart, + &usart1_usart, + &usart2_usart, + &usart3_usart, + &macb0_hclk, + &macb0_pclk, + &spi0_mck, + &lcdc0_hclk, + &lcdc0_pixclk, +}; +unsigned int at32_nr_clocks = ARRAY_SIZE(at32_clock_list); + +void __init at32_portmux_init(void) +{ + at32_init_pio(&pio0_device); + at32_init_pio(&pio1_device); + at32_init_pio(&pio2_device); + at32_init_pio(&pio3_device); +} + +void __init at32_clock_init(void) +{ + struct at32_sm *sm = &system_manager; + u32 cpu_mask = 0, hsb_mask = 0, pba_mask = 0, pbb_mask = 0; + int i; + + if (sm_readl(sm, PM_MCCTRL) & SM_BIT(PLLSEL)) + main_clock = &pll0; + else + main_clock = &osc0; + + if (sm_readl(sm, PM_PLL0) & SM_BIT(PLLOSC)) + pll0.parent = &osc1; + if (sm_readl(sm, PM_PLL1) & SM_BIT(PLLOSC)) + pll1.parent = &osc1; + + /* + * Turn on all clocks that have at least one user already, and + * turn off everything else. We only do this for module + * clocks, and even though it isn't particularly pretty to + * check the address of the mode function, it should do the + * trick... + */ + for (i = 0; i < ARRAY_SIZE(at32_clock_list); i++) { + struct clk *clk = at32_clock_list[i]; + + if (clk->mode == &cpu_clk_mode) + cpu_mask |= 1 << clk->index; + else if (clk->mode == &hsb_clk_mode) + hsb_mask |= 1 << clk->index; + else if (clk->mode == &pba_clk_mode) + pba_mask |= 1 << clk->index; + else if (clk->mode == &pbb_clk_mode) + pbb_mask |= 1 << clk->index; + } + + sm_writel(sm, PM_CPU_MASK, cpu_mask); + sm_writel(sm, PM_HSB_MASK, hsb_mask); + sm_writel(sm, PM_PBA_MASK, pba_mask); + sm_writel(sm, PM_PBB_MASK, pbb_mask); +} diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c new file mode 100644 index 00000000000..3d0d1097389 --- /dev/null +++ b/arch/avr32/mach-at32ap/clock.c @@ -0,0 +1,148 @@ +/* + * Clock management for AT32AP CPUs + * + * Copyright (C) 2006 Atmel Corporation + * + * Based on arch/arm/mach-at91rm9200/clock.c + * Copyright (C) 2005 David Brownell + * Copyright (C) 2005 Ivan Kokshaysky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/device.h> +#include <linux/string.h> + +#include "clock.h" + +static spinlock_t clk_lock = SPIN_LOCK_UNLOCKED; + +struct clk *clk_get(struct device *dev, const char *id) +{ + int i; + + for (i = 0; i < at32_nr_clocks; i++) { + struct clk *clk = at32_clock_list[i]; + + if (clk->dev == dev && strcmp(id, clk->name) == 0) + return clk; + } + + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL(clk_get); + +void clk_put(struct clk *clk) +{ + /* clocks are static for now, we can't free them */ +} +EXPORT_SYMBOL(clk_put); + +static void __clk_enable(struct clk *clk) +{ + if (clk->parent) + __clk_enable(clk->parent); + if (clk->users++ == 0 && clk->mode) + clk->mode(clk, 1); +} + +int clk_enable(struct clk *clk) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_lock, flags); + __clk_enable(clk); + spin_unlock_irqrestore(&clk_lock, flags); + + return 0; +} +EXPORT_SYMBOL(clk_enable); + +static void __clk_disable(struct clk *clk) +{ + BUG_ON(clk->users == 0); + + if (--clk->users == 0 && clk->mode) + clk->mode(clk, 0); + if (clk->parent) + __clk_disable(clk->parent); +} + +void clk_disable(struct clk *clk) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_lock, flags); + __clk_disable(clk); + spin_unlock_irqrestore(&clk_lock, flags); +} +EXPORT_SYMBOL(clk_disable); + +unsigned long clk_get_rate(struct clk *clk) +{ + unsigned long flags; + unsigned long rate; + + spin_lock_irqsave(&clk_lock, flags); + rate = clk->get_rate(clk); + spin_unlock_irqrestore(&clk_lock, flags); + + return rate; +} +EXPORT_SYMBOL(clk_get_rate); + +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + unsigned long flags, actual_rate; + + if (!clk->set_rate) + return -ENOSYS; + + spin_lock_irqsave(&clk_lock, flags); + actual_rate = clk->set_rate(clk, rate, 0); + spin_unlock_irqrestore(&clk_lock, flags); + + return actual_rate; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + unsigned long flags; + long ret; + + if (!clk->set_rate) + return -ENOSYS; + + spin_lock_irqsave(&clk_lock, flags); + ret = clk->set_rate(clk, rate, 1); + spin_unlock_irqrestore(&clk_lock, flags); + + return (ret < 0) ? ret : 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + unsigned long flags; + int ret; + + if (!clk->set_parent) + return -ENOSYS; + + spin_lock_irqsave(&clk_lock, flags); + ret = clk->set_parent(clk, parent); + spin_unlock_irqrestore(&clk_lock, flags); + + return ret; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); diff --git a/arch/avr32/mach-at32ap/clock.h b/arch/avr32/mach-at32ap/clock.h new file mode 100644 index 00000000000..f953f044ba4 --- /dev/null +++ b/arch/avr32/mach-at32ap/clock.h @@ -0,0 +1,30 @@ +/* + * Clock management for AT32AP CPUs + * + * Copyright (C) 2006 Atmel Corporation + * + * Based on arch/arm/mach-at91rm9200/clock.c + * Copyright (C) 2005 David Brownell + * Copyright (C) 2005 Ivan Kokshaysky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/clk.h> + +struct clk { + const char *name; /* Clock name/function */ + struct device *dev; /* Device the clock is used by */ + struct clk *parent; /* Parent clock, if any */ + void (*mode)(struct clk *clk, int enabled); + unsigned long (*get_rate)(struct clk *clk); + long (*set_rate)(struct clk *clk, unsigned long rate, + int apply); + int (*set_parent)(struct clk *clk, struct clk *parent); + u16 users; /* Enabled if non-zero */ + u16 index; /* Sibling index */ +}; + +extern struct clk *at32_clock_list[]; +extern unsigned int at32_nr_clocks; diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c new file mode 100644 index 00000000000..7da9c5f7a0e --- /dev/null +++ b/arch/avr32/mach-at32ap/extint.c @@ -0,0 +1,171 @@ +/* + * External interrupt handling for AT32AP CPUs + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/platform_device.h> +#include <linux/random.h> + +#include <asm/io.h> + +#include <asm/arch/sm.h> + +#include "sm.h" + +static void eim_ack_irq(unsigned int irq) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq)); +} + +static void eim_mask_irq(unsigned int irq) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq)); +} + +static void eim_mask_ack_irq(unsigned int irq) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq)); + sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq)); +} + +static void eim_unmask_irq(unsigned int irq) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + sm_writel(sm, EIM_IER, 1 << (irq - sm->eim_first_irq)); +} + +static int eim_set_irq_type(unsigned int irq, unsigned int flow_type) +{ + struct at32_sm *sm = get_irq_chip_data(irq); + unsigned int i = irq - sm->eim_first_irq; + u32 mode, edge, level; + unsigned long flags; + int ret = 0; + + flow_type &= IRQ_TYPE_SENSE_MASK; + + spin_lock_irqsave(&sm->lock, flags); + + mode = sm_readl(sm, EIM_MODE); + edge = sm_readl(sm, EIM_EDGE); + level = sm_readl(sm, EIM_LEVEL); + + switch (flow_type) { + case IRQ_TYPE_LEVEL_LOW: + mode |= 1 << i; + level &= ~(1 << i); + break; + case IRQ_TYPE_LEVEL_HIGH: + mode |= 1 << i; + level |= 1 << i; + break; + case IRQ_TYPE_EDGE_RISING: + mode &= ~(1 << i); + edge |= 1 << i; + break; + case IRQ_TYPE_EDGE_FALLING: + mode &= ~(1 << i); + edge &= ~(1 << i); + break; + default: + ret = -EINVAL; + break; + } + + sm_writel(sm, EIM_MODE, mode); + sm_writel(sm, EIM_EDGE, edge); + sm_writel(sm, EIM_LEVEL, level); + + spin_unlock_irqrestore(&sm->lock, flags); + + return ret; +} + +struct irq_chip eim_chip = { + .name = "eim", + .ack = eim_ack_irq, + .mask = eim_mask_irq, + .mask_ack = eim_mask_ack_irq, + .unmask = eim_unmask_irq, + .set_type = eim_set_irq_type, +}; + +static void demux_eim_irq(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) +{ + struct at32_sm *sm = desc->handler_data; + struct irq_desc *ext_desc; + unsigned long status, pending; + unsigned int i, ext_irq; + + spin_lock(&sm->lock); + + status = sm_readl(sm, EIM_ISR); + pending = status & sm_readl(sm, EIM_IMR); + + while (pending) { + i = fls(pending) - 1; + pending &= ~(1 << i); + + ext_irq = i + sm->eim_first_irq; + ext_desc = irq_desc + ext_irq; + ext_desc->handle_irq(ext_irq, ext_desc, regs); + } + + spin_unlock(&sm->lock); +} + +static int __init eim_init(void) +{ + struct at32_sm *sm = &system_manager; + unsigned int i; + unsigned int nr_irqs; + unsigned int int_irq; + u32 pattern; + + /* + * The EIM is really the same module as SM, so register + * mapping, etc. has been taken care of already. + */ + + /* + * Find out how many interrupt lines that are actually + * implemented in hardware. + */ + sm_writel(sm, EIM_IDR, ~0UL); + sm_writel(sm, EIM_MODE, ~0UL); + pattern = sm_readl(sm, EIM_MODE); + nr_irqs = fls(pattern); + + sm->eim_chip = &eim_chip; + + for (i = 0; i < nr_irqs; i++) { + set_irq_chip(sm->eim_first_irq + i, &eim_chip); + set_irq_chip_data(sm->eim_first_irq + i, sm); + } + + int_irq = platform_get_irq_byname(sm->pdev, "eim"); + + set_irq_chained_handler(int_irq, demux_eim_irq); + set_irq_data(int_irq, sm); + + printk("EIM: External Interrupt Module at 0x%p, IRQ %u\n", + sm->regs, int_irq); + printk("EIM: Handling %u external IRQs, starting with IRQ %u\n", + nr_irqs, sm->eim_first_irq); + + return 0; +} +arch_initcall(eim_init); diff --git a/arch/avr32/mach-at32ap/hsmc.c b/arch/avr32/mach-at32ap/hsmc.c new file mode 100644 index 00000000000..7691721928a --- /dev/null +++ b/arch/avr32/mach-at32ap/hsmc.c @@ -0,0 +1,164 @@ +/* + * Static Memory Controller for AT32 chips + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define DEBUG +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include <asm/io.h> +#include <asm/arch/smc.h> + +#include "hsmc.h" + +#define NR_CHIP_SELECTS 6 + +struct hsmc { + void __iomem *regs; + struct clk *pclk; + struct clk *mck; +}; + +static struct hsmc *hsmc; + +int smc_set_configuration(int cs, const struct smc_config *config) +{ + unsigned long mul; + unsigned long offset; + u32 setup, pulse, cycle, mode; + + if (!hsmc) + return -ENODEV; + if (cs >= NR_CHIP_SELECTS) + return -EINVAL; + + /* + * cycles = x / T = x * f + * = ((x * 1000000000) * ((f * 65536) / 1000000000)) / 65536 + * = ((x * 1000000000) * (((f / 10000) * 65536) / 100000)) / 65536 + */ + mul = (clk_get_rate(hsmc->mck) / 10000) << 16; + mul /= 100000; + +#define ns2cyc(x) ((((x) * mul) + 65535) >> 16) + + setup = (HSMC_BF(NWE_SETUP, ns2cyc(config->nwe_setup)) + | HSMC_BF(NCS_WR_SETUP, ns2cyc(config->ncs_write_setup)) + | HSMC_BF(NRD_SETUP, ns2cyc(config->nrd_setup)) + | HSMC_BF(NCS_RD_SETUP, ns2cyc(config->ncs_read_setup))); + pulse = (HSMC_BF(NWE_PULSE, ns2cyc(config->nwe_pulse)) + | HSMC_BF(NCS_WR_PULSE, ns2cyc(config->ncs_write_pulse)) + | HSMC_BF(NRD_PULSE, ns2cyc(config->nrd_pulse)) + | HSMC_BF(NCS_RD_PULSE, ns2cyc(config->ncs_read_pulse))); + cycle = (HSMC_BF(NWE_CYCLE, ns2cyc(config->write_cycle)) + | HSMC_BF(NRD_CYCLE, ns2cyc(config->read_cycle))); + + switch (config->bus_width) { + case 1: + mode = HSMC_BF(DBW, HSMC_DBW_8_BITS); + break; + case 2: + mode = HSMC_BF(DBW, HSMC_DBW_16_BITS); + break; + case 4: + mode = HSMC_BF(DBW, HSMC_DBW_32_BITS); + break; + default: + return -EINVAL; + } + + if (config->nrd_controlled) + mode |= HSMC_BIT(READ_MODE); + if (config->nwe_controlled) + mode |= HSMC_BIT(WRITE_MODE); + if (config->byte_write) + mode |= HSMC_BIT(BAT); + + pr_debug("smc cs%d: setup/%08x pulse/%08x cycle/%08x mode/%08x\n", + cs, setup, pulse, cycle, mode); + + offset = cs * 0x10; + hsmc_writel(hsmc, SETUP0 + offset, setup); + hsmc_writel(hsmc, PULSE0 + offset, pulse); + hsmc_writel(hsmc, CYCLE0 + offset, cycle); + hsmc_writel(hsmc, MODE0 + offset, mode); + hsmc_readl(hsmc, MODE0); /* I/O barrier */ + + return 0; +} +EXPORT_SYMBOL(smc_set_configuration); + +static int hsmc_probe(struct platform_device *pdev) +{ + struct resource *regs; + struct clk *pclk, *mck; + int ret; + + if (hsmc) + return -EBUSY; + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) + return -ENXIO; + pclk = clk_get(&pdev->dev, "pclk"); + if (IS_ERR(pclk)) + return PTR_ERR(pclk); + mck = clk_get(&pdev->dev, "mck"); + if (IS_ERR(mck)) { + ret = PTR_ERR(mck); + goto out_put_pclk; + } + + ret = -ENOMEM; + hsmc = kzalloc(sizeof(struct hsmc), GFP_KERNEL); + if (!hsmc) + goto out_put_clocks; + + clk_enable(pclk); + clk_enable(mck); + + hsmc->pclk = pclk; + hsmc->mck = mck; + hsmc->regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!hsmc->regs) + goto out_disable_clocks; + + dev_info(&pdev->dev, "Atmel Static Memory Controller at 0x%08lx\n", + (unsigned long)regs->start); + + platform_set_drvdata(pdev, hsmc); + + return 0; + +out_disable_clocks: + clk_disable(mck); + clk_disable(pclk); + kfree(hsmc); +out_put_clocks: + clk_put(mck); +out_put_pclk: + clk_put(pclk); + hsmc = NULL; + return ret; +} + +static struct platform_driver hsmc_driver = { + .probe = hsmc_probe, + .driver = { + .name = "smc", + }, +}; + +static int __init hsmc_init(void) +{ + return platform_driver_register(&hsmc_driver); +} +arch_initcall(hsmc_init); diff --git a/arch/avr32/mach-at32ap/hsmc.h b/arch/avr32/mach-at32ap/hsmc.h new file mode 100644 index 00000000000..5681276fafd --- /dev/null +++ b/arch/avr32/mach-at32ap/hsmc.h @@ -0,0 +1,127 @@ +/* + * Register definitions for Atmel Static Memory Controller (SMC) + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_HSMC_H__ +#define __ASM_AVR32_HSMC_H__ + +/* HSMC register offsets */ +#define HSMC_SETUP0 0x0000 +#define HSMC_PULSE0 0x0004 +#define HSMC_CYCLE0 0x0008 +#define HSMC_MODE0 0x000c +#define HSMC_SETUP1 0x0010 +#define HSMC_PULSE1 0x0014 +#define HSMC_CYCLE1 0x0018 +#define HSMC_MODE1 0x001c +#define HSMC_SETUP2 0x0020 +#define HSMC_PULSE2 0x0024 +#define HSMC_CYCLE2 0x0028 +#define HSMC_MODE2 0x002c +#define HSMC_SETUP3 0x0030 +#define HSMC_PULSE3 0x0034 +#define HSMC_CYCLE3 0x0038 +#define HSMC_MODE3 0x003c +#define HSMC_SETUP4 0x0040 +#define HSMC_PULSE4 0x0044 +#define HSMC_CYCLE4 0x0048 +#define HSMC_MODE4 0x004c +#define HSMC_SETUP5 0x0050 +#define HSMC_PULSE5 0x0054 +#define HSMC_CYCLE5 0x0058 +#define HSMC_MODE5 0x005c + +/* Bitfields in SETUP0 */ +#define HSMC_NWE_SETUP_OFFSET 0 +#define HSMC_NWE_SETUP_SIZE 6 +#define HSMC_NCS_WR_SETUP_OFFSET 8 +#define HSMC_NCS_WR_SETUP_SIZE 6 +#define HSMC_NRD_SETUP_OFFSET 16 +#define HSMC_NRD_SETUP_SIZE 6 +#define HSMC_NCS_RD_SETUP_OFFSET 24 +#define HSMC_NCS_RD_SETUP_SIZE 6 + +/* Bitfields in PULSE0 */ +#define HSMC_NWE_PULSE_OFFSET 0 +#define HSMC_NWE_PULSE_SIZE 7 +#define HSMC_NCS_WR_PULSE_OFFSET 8 +#define HSMC_NCS_WR_PULSE_SIZE 7 +#define HSMC_NRD_PULSE_OFFSET 16 +#define HSMC_NRD_PULSE_SIZE 7 +#define HSMC_NCS_RD_PULSE_OFFSET 24 +#define HSMC_NCS_RD_PULSE_SIZE 7 + +/* Bitfields in CYCLE0 */ +#define HSMC_NWE_CYCLE_OFFSET 0 +#define HSMC_NWE_CYCLE_SIZE 9 +#define HSMC_NRD_CYCLE_OFFSET 16 +#define HSMC_NRD_CYCLE_SIZE 9 + +/* Bitfields in MODE0 */ +#define HSMC_READ_MODE_OFFSET 0 +#define HSMC_READ_MODE_SIZE 1 +#define HSMC_WRITE_MODE_OFFSET 1 +#define HSMC_WRITE_MODE_SIZE 1 +#define HSMC_EXNW_MODE_OFFSET 4 +#define HSMC_EXNW_MODE_SIZE 2 +#define HSMC_BAT_OFFSET 8 +#define HSMC_BAT_SIZE 1 +#define HSMC_DBW_OFFSET 12 +#define HSMC_DBW_SIZE 2 +#define HSMC_TDF_CYCLES_OFFSET 16 +#define HSMC_TDF_CYCLES_SIZE 4 +#define HSMC_TDF_MODE_OFFSET 20 +#define HSMC_TDF_MODE_SIZE 1 +#define HSMC_PMEN_OFFSET 24 +#define HSMC_PMEN_SIZE 1 +#define HSMC_PS_OFFSET 28 +#define HSMC_PS_SIZE 2 + +/* Constants for READ_MODE */ +#define HSMC_READ_MODE_NCS_CONTROLLED 0 +#define HSMC_READ_MODE_NRD_CONTROLLED 1 + +/* Constants for WRITE_MODE */ +#define HSMC_WRITE_MODE_NCS_CONTROLLED 0 +#define HSMC_WRITE_MODE_NWE_CONTROLLED 1 + +/* Constants for EXNW_MODE */ +#define HSMC_EXNW_MODE_DISABLED 0 +#define HSMC_EXNW_MODE_RESERVED 1 +#define HSMC_EXNW_MODE_FROZEN 2 +#define HSMC_EXNW_MODE_READY 3 + +/* Constants for BAT */ +#define HSMC_BAT_BYTE_SELECT 0 +#define HSMC_BAT_BYTE_WRITE 1 + +/* Constants for DBW */ +#define HSMC_DBW_8_BITS 0 +#define HSMC_DBW_16_BITS 1 +#define HSMC_DBW_32_BITS 2 + +/* Bit manipulation macros */ +#define HSMC_BIT(name) \ + (1 << HSMC_##name##_OFFSET) +#define HSMC_BF(name,value) \ + (((value) & ((1 << HSMC_##name##_SIZE) - 1)) \ + << HSMC_##name##_OFFSET) +#define HSMC_BFEXT(name,value) \ + (((value) >> HSMC_##name##_OFFSET) \ + & ((1 << HSMC_##name##_SIZE) - 1)) +#define HSMC_BFINS(name,value,old) \ + (((old) & ~(((1 << HSMC_##name##_SIZE) - 1) \ + << HSMC_##name##_OFFSET)) | HSMC_BF(name,value)) + +/* Register access macros */ +#define hsmc_readl(port,reg) \ + readl((port)->regs + HSMC_##reg) +#define hsmc_writel(port,reg,value) \ + writel((value), (port)->regs + HSMC_##reg) + +#endif /* __ASM_AVR32_HSMC_H__ */ diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c new file mode 100644 index 00000000000..74f8c9f2f03 --- /dev/null +++ b/arch/avr32/mach-at32ap/intc.c @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/platform_device.h> + +#include <asm/io.h> + +#include "intc.h" + +struct intc { + void __iomem *regs; + struct irq_chip chip; +}; + +extern struct platform_device at32_intc0_device; + +/* + * TODO: We may be able to implement mask/unmask by setting IxM flags + * in the status register. + */ +static void intc_mask_irq(unsigned int irq) +{ + +} + +static void intc_unmask_irq(unsigned int irq) +{ + +} + +static struct intc intc0 = { + .chip = { + .name = "intc", + .mask = intc_mask_irq, + .unmask = intc_unmask_irq, + }, +}; + +/* + * All interrupts go via intc at some point. + */ +asmlinkage void do_IRQ(int level, struct pt_regs *regs) +{ + struct irq_desc *desc; + unsigned int irq; + unsigned long status_reg; + + local_irq_disable(); + + irq_enter(); + + irq = intc_readl(&intc0, INTCAUSE0 - 4 * level); + desc = irq_desc + irq; + desc->handle_irq(irq, desc, regs); + + /* + * Clear all interrupt level masks so that we may handle + * interrupts during softirq processing. If this is a nested + * interrupt, interrupts must stay globally disabled until we + * return. + */ + status_reg = sysreg_read(SR); + status_reg &= ~(SYSREG_BIT(I0M) | SYSREG_BIT(I1M) + | SYSREG_BIT(I2M) | SYSREG_BIT(I3M)); + sysreg_write(SR, status_reg); + + irq_exit(); +} + +void __init init_IRQ(void) +{ + extern void _evba(void); + extern void irq_level0(void); + struct resource *regs; + struct clk *pclk; + unsigned int i; + u32 offset, readback; + + regs = platform_get_resource(&at32_intc0_device, IORESOURCE_MEM, 0); + if (!regs) { + printk(KERN_EMERG "intc: no mmio resource defined\n"); + goto fail; + } + pclk = clk_get(&at32_intc0_device.dev, "pclk"); + if (IS_ERR(pclk)) { + printk(KERN_EMERG "intc: no clock defined\n"); + goto fail; + } + + clk_enable(pclk); + + intc0.regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!intc0.regs) { + printk(KERN_EMERG "intc: failed to map registers (0x%08lx)\n", + (unsigned long)regs->start); + goto fail; + } + + /* + * Initialize all interrupts to level 0 (lowest priority). The + * priority level may be changed by calling + * irq_set_priority(). + * + */ + offset = (unsigned long)&irq_level0 - (unsigned long)&_evba; + for (i = 0; i < NR_INTERNAL_IRQS; i++) { + intc_writel(&intc0, INTPR0 + 4 * i, offset); + readback = intc_readl(&intc0, INTPR0 + 4 * i); + if (readback == offset) + set_irq_chip_and_handler(i, &intc0.chip, + handle_simple_irq); + } + + /* Unmask all interrupt levels */ + sysreg_write(SR, (sysreg_read(SR) + & ~(SR_I3M | SR_I2M | SR_I1M | SR_I0M))); + + return; + +fail: + panic("Interrupt controller initialization failed!\n"); +} + diff --git a/arch/avr32/mach-at32ap/intc.h b/arch/avr32/mach-at32ap/intc.h new file mode 100644 index 00000000000..d289ca2fff1 --- /dev/null +++ b/arch/avr32/mach-at32ap/intc.h @@ -0,0 +1,327 @@ +/* + * Automatically generated by gen-header.xsl + */ +#ifndef __ASM_AVR32_PERIHP_INTC_H__ +#define __ASM_AVR32_PERIHP_INTC_H__ + +#define INTC_NUM_INT_GRPS 33 + +#define INTC_INTPR0 0x0 +# define INTC_INTPR0_INTLEV_OFFSET 30 +# define INTC_INTPR0_INTLEV_SIZE 2 +# define INTC_INTPR0_OFFSET_OFFSET 0 +# define INTC_INTPR0_OFFSET_SIZE 24 +#define INTC_INTREQ0 0x100 +# define INTC_INTREQ0_IREQUEST0_OFFSET 0 +# define INTC_INTREQ0_IREQUEST0_SIZE 1 +# define INTC_INTREQ0_IREQUEST1_OFFSET 1 +# define INTC_INTREQ0_IREQUEST1_SIZE 1 +#define INTC_INTPR1 0x4 +# define INTC_INTPR1_INTLEV_OFFSET 30 +# define INTC_INTPR1_INTLEV_SIZE 2 +# define INTC_INTPR1_OFFSET_OFFSET 0 +# define INTC_INTPR1_OFFSET_SIZE 24 +#define INTC_INTREQ1 0x104 +# define INTC_INTREQ1_IREQUEST32_OFFSET 0 +# define INTC_INTREQ1_IREQUEST32_SIZE 1 +# define INTC_INTREQ1_IREQUEST33_OFFSET 1 +# define INTC_INTREQ1_IREQUEST33_SIZE 1 +# define INTC_INTREQ1_IREQUEST34_OFFSET 2 +# define INTC_INTREQ1_IREQUEST34_SIZE 1 +# define INTC_INTREQ1_IREQUEST35_OFFSET 3 +# define INTC_INTREQ1_IREQUEST35_SIZE 1 +# define INTC_INTREQ1_IREQUEST36_OFFSET 4 +# define INTC_INTREQ1_IREQUEST36_SIZE 1 +# define INTC_INTREQ1_IREQUEST37_OFFSET 5 +# define INTC_INTREQ1_IREQUEST37_SIZE 1 +#define INTC_INTPR2 0x8 +# define INTC_INTPR2_INTLEV_OFFSET 30 +# define INTC_INTPR2_INTLEV_SIZE 2 +# define INTC_INTPR2_OFFSET_OFFSET 0 +# define INTC_INTPR2_OFFSET_SIZE 24 +#define INTC_INTREQ2 0x108 +# define INTC_INTREQ2_IREQUEST64_OFFSET 0 +# define INTC_INTREQ2_IREQUEST64_SIZE 1 +# define INTC_INTREQ2_IREQUEST65_OFFSET 1 +# define INTC_INTREQ2_IREQUEST65_SIZE 1 +# define INTC_INTREQ2_IREQUEST66_OFFSET 2 +# define INTC_INTREQ2_IREQUEST66_SIZE 1 +# define INTC_INTREQ2_IREQUEST67_OFFSET 3 +# define INTC_INTREQ2_IREQUEST67_SIZE 1 +# define INTC_INTREQ2_IREQUEST68_OFFSET 4 +# define INTC_INTREQ2_IREQUEST68_SIZE 1 +#define INTC_INTPR3 0xc +# define INTC_INTPR3_INTLEV_OFFSET 30 +# define INTC_INTPR3_INTLEV_SIZE 2 +# define INTC_INTPR3_OFFSET_OFFSET 0 +# define INTC_INTPR3_OFFSET_SIZE 24 +#define INTC_INTREQ3 0x10c +# define INTC_INTREQ3_IREQUEST96_OFFSET 0 +# define INTC_INTREQ3_IREQUEST96_SIZE 1 +#define INTC_INTPR4 0x10 +# define INTC_INTPR4_INTLEV_OFFSET 30 +# define INTC_INTPR4_INTLEV_SIZE 2 +# define INTC_INTPR4_OFFSET_OFFSET 0 +# define INTC_INTPR4_OFFSET_SIZE 24 +#define INTC_INTREQ4 0x110 +# define INTC_INTREQ4_IREQUEST128_OFFSET 0 +# define INTC_INTREQ4_IREQUEST128_SIZE 1 +#define INTC_INTPR5 0x14 +# define INTC_INTPR5_INTLEV_OFFSET 30 +# define INTC_INTPR5_INTLEV_SIZE 2 +# define INTC_INTPR5_OFFSET_OFFSET 0 +# define INTC_INTPR5_OFFSET_SIZE 24 +#define INTC_INTREQ5 0x114 +# define INTC_INTREQ5_IREQUEST160_OFFSET 0 +# define INTC_INTREQ5_IREQUEST160_SIZE 1 +#define INTC_INTPR6 0x18 +# define INTC_INTPR6_INTLEV_OFFSET 30 +# define INTC_INTPR6_INTLEV_SIZE 2 +# define INTC_INTPR6_OFFSET_OFFSET 0 +# define INTC_INTPR6_OFFSET_SIZE 24 +#define INTC_INTREQ6 0x118 +# define INTC_INTREQ6_IREQUEST192_OFFSET 0 +# define INTC_INTREQ6_IREQUEST192_SIZE 1 +#define INTC_INTPR7 0x1c +# define INTC_INTPR7_INTLEV_OFFSET 30 +# define INTC_INTPR7_INTLEV_SIZE 2 +# define INTC_INTPR7_OFFSET_OFFSET 0 +# define INTC_INTPR7_OFFSET_SIZE 24 +#define INTC_INTREQ7 0x11c +# define INTC_INTREQ7_IREQUEST224_OFFSET 0 +# define INTC_INTREQ7_IREQUEST224_SIZE 1 +#define INTC_INTPR8 0x20 +# define INTC_INTPR8_INTLEV_OFFSET 30 +# define INTC_INTPR8_INTLEV_SIZE 2 +# define INTC_INTPR8_OFFSET_OFFSET 0 +# define INTC_INTPR8_OFFSET_SIZE 24 +#define INTC_INTREQ8 0x120 +# define INTC_INTREQ8_IREQUEST256_OFFSET 0 +# define INTC_INTREQ8_IREQUEST256_SIZE 1 +#define INTC_INTPR9 0x24 +# define INTC_INTPR9_INTLEV_OFFSET 30 +# define INTC_INTPR9_INTLEV_SIZE 2 +# define INTC_INTPR9_OFFSET_OFFSET 0 +# define INTC_INTPR9_OFFSET_SIZE 24 +#define INTC_INTREQ9 0x124 +# define INTC_INTREQ9_IREQUEST288_OFFSET 0 +# define INTC_INTREQ9_IREQUEST288_SIZE 1 +#define INTC_INTPR10 0x28 +# define INTC_INTPR10_INTLEV_OFFSET 30 +# define INTC_INTPR10_INTLEV_SIZE 2 +# define INTC_INTPR10_OFFSET_OFFSET 0 +# define INTC_INTPR10_OFFSET_SIZE 24 +#define INTC_INTREQ10 0x128 +# define INTC_INTREQ10_IREQUEST320_OFFSET 0 +# define INTC_INTREQ10_IREQUEST320_SIZE 1 +#define INTC_INTPR11 0x2c +# define INTC_INTPR11_INTLEV_OFFSET 30 +# define INTC_INTPR11_INTLEV_SIZE 2 +# define INTC_INTPR11_OFFSET_OFFSET 0 +# define INTC_INTPR11_OFFSET_SIZE 24 +#define INTC_INTREQ11 0x12c +# define INTC_INTREQ11_IREQUEST352_OFFSET 0 +# define INTC_INTREQ11_IREQUEST352_SIZE 1 +#define INTC_INTPR12 0x30 +# define INTC_INTPR12_INTLEV_OFFSET 30 +# define INTC_INTPR12_INTLEV_SIZE 2 +# define INTC_INTPR12_OFFSET_OFFSET 0 +# define INTC_INTPR12_OFFSET_SIZE 24 +#define INTC_INTREQ12 0x130 +# define INTC_INTREQ12_IREQUEST384_OFFSET 0 +# define INTC_INTREQ12_IREQUEST384_SIZE 1 +#define INTC_INTPR13 0x34 +# define INTC_INTPR13_INTLEV_OFFSET 30 +# define INTC_INTPR13_INTLEV_SIZE 2 +# define INTC_INTPR13_OFFSET_OFFSET 0 +# define INTC_INTPR13_OFFSET_SIZE 24 +#define INTC_INTREQ13 0x134 +# define INTC_INTREQ13_IREQUEST416_OFFSET 0 +# define INTC_INTREQ13_IREQUEST416_SIZE 1 +#define INTC_INTPR14 0x38 +# define INTC_INTPR14_INTLEV_OFFSET 30 +# define INTC_INTPR14_INTLEV_SIZE 2 +# define INTC_INTPR14_OFFSET_OFFSET 0 +# define INTC_INTPR14_OFFSET_SIZE 24 +#define INTC_INTREQ14 0x138 +# define INTC_INTREQ14_IREQUEST448_OFFSET 0 +# define INTC_INTREQ14_IREQUEST448_SIZE 1 +#define INTC_INTPR15 0x3c +# define INTC_INTPR15_INTLEV_OFFSET 30 +# define INTC_INTPR15_INTLEV_SIZE 2 +# define INTC_INTPR15_OFFSET_OFFSET 0 +# define INTC_INTPR15_OFFSET_SIZE 24 +#define INTC_INTREQ15 0x13c +# define INTC_INTREQ15_IREQUEST480_OFFSET 0 +# define INTC_INTREQ15_IREQUEST480_SIZE 1 +#define INTC_INTPR16 0x40 +# define INTC_INTPR16_INTLEV_OFFSET 30 +# define INTC_INTPR16_INTLEV_SIZE 2 +# define INTC_INTPR16_OFFSET_OFFSET 0 +# define INTC_INTPR16_OFFSET_SIZE 24 +#define INTC_INTREQ16 0x140 +# define INTC_INTREQ16_IREQUEST512_OFFSET 0 +# define INTC_INTREQ16_IREQUEST512_SIZE 1 +#define INTC_INTPR17 0x44 +# define INTC_INTPR17_INTLEV_OFFSET 30 +# define INTC_INTPR17_INTLEV_SIZE 2 +# define INTC_INTPR17_OFFSET_OFFSET 0 +# define INTC_INTPR17_OFFSET_SIZE 24 +#define INTC_INTREQ17 0x144 +# define INTC_INTREQ17_IREQUEST544_OFFSET 0 +# define INTC_INTREQ17_IREQUEST544_SIZE 1 +#define INTC_INTPR18 0x48 +# define INTC_INTPR18_INTLEV_OFFSET 30 +# define INTC_INTPR18_INTLEV_SIZE 2 +# define INTC_INTPR18_OFFSET_OFFSET 0 +# define INTC_INTPR18_OFFSET_SIZE 24 +#define INTC_INTREQ18 0x148 +# define INTC_INTREQ18_IREQUEST576_OFFSET 0 +# define INTC_INTREQ18_IREQUEST576_SIZE 1 +#define INTC_INTPR19 0x4c +# define INTC_INTPR19_INTLEV_OFFSET 30 +# define INTC_INTPR19_INTLEV_SIZE 2 +# define INTC_INTPR19_OFFSET_OFFSET 0 +# define INTC_INTPR19_OFFSET_SIZE 24 +#define INTC_INTREQ19 0x14c +# define INTC_INTREQ19_IREQUEST608_OFFSET 0 +# define INTC_INTREQ19_IREQUEST608_SIZE 1 +# define INTC_INTREQ19_IREQUEST609_OFFSET 1 +# define INTC_INTREQ19_IREQUEST609_SIZE 1 +# define INTC_INTREQ19_IREQUEST610_OFFSET 2 +# define INTC_INTREQ19_IREQUEST610_SIZE 1 +# define INTC_INTREQ19_IREQUEST611_OFFSET 3 +# define INTC_INTREQ19_IREQUEST611_SIZE 1 +#define INTC_INTPR20 0x50 +# define INTC_INTPR20_INTLEV_OFFSET 30 +# define INTC_INTPR20_INTLEV_SIZE 2 +# define INTC_INTPR20_OFFSET_OFFSET 0 +# define INTC_INTPR20_OFFSET_SIZE 24 +#define INTC_INTREQ20 0x150 +# define INTC_INTREQ20_IREQUEST640_OFFSET 0 +# define INTC_INTREQ20_IREQUEST640_SIZE 1 +#define INTC_INTPR21 0x54 +# define INTC_INTPR21_INTLEV_OFFSET 30 +# define INTC_INTPR21_INTLEV_SIZE 2 +# define INTC_INTPR21_OFFSET_OFFSET 0 +# define INTC_INTPR21_OFFSET_SIZE 24 +#define INTC_INTREQ21 0x154 +# define INTC_INTREQ21_IREQUEST672_OFFSET 0 +# define INTC_INTREQ21_IREQUEST672_SIZE 1 +#define INTC_INTPR22 0x58 +# define INTC_INTPR22_INTLEV_OFFSET 30 +# define INTC_INTPR22_INTLEV_SIZE 2 +# define INTC_INTPR22_OFFSET_OFFSET 0 +# define INTC_INTPR22_OFFSET_SIZE 24 +#define INTC_INTREQ22 0x158 +# define INTC_INTREQ22_IREQUEST704_OFFSET 0 +# define INTC_INTREQ22_IREQUEST704_SIZE 1 +# define INTC_INTREQ22_IREQUEST705_OFFSET 1 +# define INTC_INTREQ22_IREQUEST705_SIZE 1 +# define INTC_INTREQ22_IREQUEST706_OFFSET 2 +# define INTC_INTREQ22_IREQUEST706_SIZE 1 +#define INTC_INTPR23 0x5c +# define INTC_INTPR23_INTLEV_OFFSET 30 +# define INTC_INTPR23_INTLEV_SIZE 2 +# define INTC_INTPR23_OFFSET_OFFSET 0 +# define INTC_INTPR23_OFFSET_SIZE 24 +#define INTC_INTREQ23 0x15c +# define INTC_INTREQ23_IREQUEST736_OFFSET 0 +# define INTC_INTREQ23_IREQUEST736_SIZE 1 +# define INTC_INTREQ23_IREQUEST737_OFFSET 1 +# define INTC_INTREQ23_IREQUEST737_SIZE 1 +# define INTC_INTREQ23_IREQUEST738_OFFSET 2 +# define INTC_INTREQ23_IREQUEST738_SIZE 1 +#define INTC_INTPR24 0x60 +# define INTC_INTPR24_INTLEV_OFFSET 30 +# define INTC_INTPR24_INTLEV_SIZE 2 +# define INTC_INTPR24_OFFSET_OFFSET 0 +# define INTC_INTPR24_OFFSET_SIZE 24 +#define INTC_INTREQ24 0x160 +# define INTC_INTREQ24_IREQUEST768_OFFSET 0 +# define INTC_INTREQ24_IREQUEST768_SIZE 1 +#define INTC_INTPR25 0x64 +# define INTC_INTPR25_INTLEV_OFFSET 30 +# define INTC_INTPR25_INTLEV_SIZE 2 +# define INTC_INTPR25_OFFSET_OFFSET 0 +# define INTC_INTPR25_OFFSET_SIZE 24 +#define INTC_INTREQ25 0x164 +# define INTC_INTREQ25_IREQUEST800_OFFSET 0 +# define INTC_INTREQ25_IREQUEST800_SIZE 1 +#define INTC_INTPR26 0x68 +# define INTC_INTPR26_INTLEV_OFFSET 30 +# define INTC_INTPR26_INTLEV_SIZE 2 +# define INTC_INTPR26_OFFSET_OFFSET 0 +# define INTC_INTPR26_OFFSET_SIZE 24 +#define INTC_INTREQ26 0x168 +# define INTC_INTREQ26_IREQUEST832_OFFSET 0 +# define INTC_INTREQ26_IREQUEST832_SIZE 1 +#define INTC_INTPR27 0x6c +# define INTC_INTPR27_INTLEV_OFFSET 30 +# define INTC_INTPR27_INTLEV_SIZE 2 +# define INTC_INTPR27_OFFSET_OFFSET 0 +# define INTC_INTPR27_OFFSET_SIZE 24 +#define INTC_INTREQ27 0x16c +# define INTC_INTREQ27_IREQUEST864_OFFSET 0 +# define INTC_INTREQ27_IREQUEST864_SIZE 1 +#define INTC_INTPR28 0x70 +# define INTC_INTPR28_INTLEV_OFFSET 30 +# define INTC_INTPR28_INTLEV_SIZE 2 +# define INTC_INTPR28_OFFSET_OFFSET 0 +# define INTC_INTPR28_OFFSET_SIZE 24 +#define INTC_INTREQ28 0x170 +# define INTC_INTREQ28_IREQUEST896_OFFSET 0 +# define INTC_INTREQ28_IREQUEST896_SIZE 1 +#define INTC_INTPR29 0x74 +# define INTC_INTPR29_INTLEV_OFFSET 30 +# define INTC_INTPR29_INTLEV_SIZE 2 +# define INTC_INTPR29_OFFSET_OFFSET 0 +# define INTC_INTPR29_OFFSET_SIZE 24 +#define INTC_INTREQ29 0x174 +# define INTC_INTREQ29_IREQUEST928_OFFSET 0 +# define INTC_INTREQ29_IREQUEST928_SIZE 1 +#define INTC_INTPR30 0x78 +# define INTC_INTPR30_INTLEV_OFFSET 30 +# define INTC_INTPR30_INTLEV_SIZE 2 +# define INTC_INTPR30_OFFSET_OFFSET 0 +# define INTC_INTPR30_OFFSET_SIZE 24 +#define INTC_INTREQ30 0x178 +# define INTC_INTREQ30_IREQUEST960_OFFSET 0 +# define INTC_INTREQ30_IREQUEST960_SIZE 1 +#define INTC_INTPR31 0x7c +# define INTC_INTPR31_INTLEV_OFFSET 30 +# define INTC_INTPR31_INTLEV_SIZE 2 +# define INTC_INTPR31_OFFSET_OFFSET 0 +# define INTC_INTPR31_OFFSET_SIZE 24 +#define INTC_INTREQ31 0x17c +# define INTC_INTREQ31_IREQUEST992_OFFSET 0 +# define INTC_INTREQ31_IREQUEST992_SIZE 1 +#define INTC_INTPR32 0x80 +# define INTC_INTPR32_INTLEV_OFFSET 30 +# define INTC_INTPR32_INTLEV_SIZE 2 +# define INTC_INTPR32_OFFSET_OFFSET 0 +# define INTC_INTPR32_OFFSET_SIZE 24 +#define INTC_INTREQ32 0x180 +# define INTC_INTREQ32_IREQUEST1024_OFFSET 0 +# define INTC_INTREQ32_IREQUEST1024_SIZE 1 +#define INTC_INTCAUSE0 0x20c +# define INTC_INTCAUSE0_CAUSEGRP_OFFSET 0 +# define INTC_INTCAUSE0_CAUSEGRP_SIZE 6 +#define INTC_INTCAUSE1 0x208 +# define INTC_INTCAUSE1_CAUSEGRP_OFFSET 0 +# define INTC_INTCAUSE1_CAUSEGRP_SIZE 6 +#define INTC_INTCAUSE2 0x204 +# define INTC_INTCAUSE2_CAUSEGRP_OFFSET 0 +# define INTC_INTCAUSE2_CAUSEGRP_SIZE 6 +#define INTC_INTCAUSE3 0x200 +# define INTC_INTCAUSE3_CAUSEGRP_OFFSET 0 +# define INTC_INTCAUSE3_CAUSEGRP_SIZE 6 + +#define INTC_BIT(name) (1 << INTC_##name##_OFFSET) +#define INTC_MKBF(name, value) (((value) & ((1 << INTC_##name##_SIZE) - 1)) << INTC_##name##_OFFSET) +#define INTC_GETBF(name, value) (((value) >> INTC_##name##_OFFSET) & ((1 << INTC_##name##_SIZE) - 1)) + +#define intc_readl(port,reg) readl((port)->regs + INTC_##reg) +#define intc_writel(port,reg,value) writel((value), (port)->regs + INTC_##reg) + +#endif /* __ASM_AVR32_PERIHP_INTC_H__ */ diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c new file mode 100644 index 00000000000..d3aabfca859 --- /dev/null +++ b/arch/avr32/mach-at32ap/pio.c @@ -0,0 +1,118 @@ +/* + * Atmel PIO2 Port Multiplexer support + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/platform_device.h> + +#include <asm/io.h> + +#include <asm/arch/portmux.h> + +#include "pio.h" + +#define MAX_NR_PIO_DEVICES 8 + +struct pio_device { + void __iomem *regs; + const struct platform_device *pdev; + struct clk *clk; + u32 alloc_mask; + char name[32]; +}; + +static struct pio_device pio_dev[MAX_NR_PIO_DEVICES]; + +void portmux_set_func(unsigned int portmux_id, unsigned int pin_id, + unsigned int function_id) +{ + struct pio_device *pio; + u32 mask = 1 << pin_id; + + BUG_ON(portmux_id >= MAX_NR_PIO_DEVICES); + + pio = &pio_dev[portmux_id]; + + if (function_id) + pio_writel(pio, BSR, mask); + else + pio_writel(pio, ASR, mask); + pio_writel(pio, PDR, mask); +} + +static int __init pio_probe(struct platform_device *pdev) +{ + struct pio_device *pio = NULL; + + BUG_ON(pdev->id >= MAX_NR_PIO_DEVICES); + pio = &pio_dev[pdev->id]; + BUG_ON(!pio->regs); + + /* TODO: Interrupts */ + + platform_set_drvdata(pdev, pio); + + printk(KERN_INFO "%s: Atmel Port Multiplexer at 0x%p (irq %d)\n", + pio->name, pio->regs, platform_get_irq(pdev, 0)); + + return 0; +} + +static struct platform_driver pio_driver = { + .probe = pio_probe, + .driver = { + .name = "pio", + }, +}; + +static int __init pio_init(void) +{ + return platform_driver_register(&pio_driver); +} +subsys_initcall(pio_init); + +void __init at32_init_pio(struct platform_device *pdev) +{ + struct resource *regs; + struct pio_device *pio; + + if (pdev->id > MAX_NR_PIO_DEVICES) { + dev_err(&pdev->dev, "only %d PIO devices supported\n", + MAX_NR_PIO_DEVICES); + return; + } + + pio = &pio_dev[pdev->id]; + snprintf(pio->name, sizeof(pio->name), "pio%d", pdev->id); + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) { + dev_err(&pdev->dev, "no mmio resource defined\n"); + return; + } + + pio->clk = clk_get(&pdev->dev, "mck"); + if (IS_ERR(pio->clk)) + /* + * This is a fatal error, but if we continue we might + * be so lucky that we manage to initialize the + * console and display this message... + */ + dev_err(&pdev->dev, "no mck clock defined\n"); + else + clk_enable(pio->clk); + + pio->pdev = pdev; + pio->regs = ioremap(regs->start, regs->end - regs->start + 1); + + pio_writel(pio, ODR, ~0UL); + pio_writel(pio, PER, ~0UL); +} diff --git a/arch/avr32/mach-at32ap/pio.h b/arch/avr32/mach-at32ap/pio.h new file mode 100644 index 00000000000..cfea1235159 --- /dev/null +++ b/arch/avr32/mach-at32ap/pio.h @@ -0,0 +1,178 @@ +/* + * Atmel PIO2 Port Multiplexer support + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ARCH_AVR32_AT32AP_PIO_H__ +#define __ARCH_AVR32_AT32AP_PIO_H__ + +/* PIO register offsets */ +#define PIO_PER 0x0000 +#define PIO_PDR 0x0004 +#define PIO_PSR 0x0008 +#define PIO_OER 0x0010 +#define PIO_ODR 0x0014 +#define PIO_OSR 0x0018 +#define PIO_IFER 0x0020 +#define PIO_IFDR 0x0024 +#define PIO_ISFR 0x0028 +#define PIO_SODR 0x0030 +#define PIO_CODR 0x0034 +#define PIO_ODSR 0x0038 +#define PIO_PDSR 0x003c +#define PIO_IER 0x0040 +#define PIO_IDR 0x0044 +#define PIO_IMR 0x0048 +#define PIO_ISR 0x004c +#define PIO_MDER 0x0050 +#define PIO_MDDR 0x0054 +#define PIO_MDSR 0x0058 +#define PIO_PUDR 0x0060 +#define PIO_PUER 0x0064 +#define PIO_PUSR 0x0068 +#define PIO_ASR 0x0070 +#define PIO_BSR 0x0074 +#define PIO_ABSR 0x0078 +#define PIO_OWER 0x00a0 +#define PIO_OWDR 0x00a4 +#define PIO_OWSR 0x00a8 + +/* Bitfields in PER */ + +/* Bitfields in PDR */ + +/* Bitfields in PSR */ + +/* Bitfields in OER */ + +/* Bitfields in ODR */ + +/* Bitfields in OSR */ + +/* Bitfields in IFER */ + +/* Bitfields in IFDR */ + +/* Bitfields in ISFR */ + +/* Bitfields in SODR */ + +/* Bitfields in CODR */ + +/* Bitfields in ODSR */ + +/* Bitfields in PDSR */ + +/* Bitfields in IER */ + +/* Bitfields in IDR */ + +/* Bitfields in IMR */ + +/* Bitfields in ISR */ + +/* Bitfields in MDER */ + +/* Bitfields in MDDR */ + +/* Bitfields in MDSR */ + +/* Bitfields in PUDR */ + +/* Bitfields in PUER */ + +/* Bitfields in PUSR */ + +/* Bitfields in ASR */ + +/* Bitfields in BSR */ + +/* Bitfields in ABSR */ +#define PIO_P0_OFFSET 0 +#define PIO_P0_SIZE 1 +#define PIO_P1_OFFSET 1 +#define PIO_P1_SIZE 1 +#define PIO_P2_OFFSET 2 +#define PIO_P2_SIZE 1 +#define PIO_P3_OFFSET 3 +#define PIO_P3_SIZE 1 +#define PIO_P4_OFFSET 4 +#define PIO_P4_SIZE 1 +#define PIO_P5_OFFSET 5 +#define PIO_P5_SIZE 1 +#define PIO_P6_OFFSET 6 +#define PIO_P6_SIZE 1 +#define PIO_P7_OFFSET 7 +#define PIO_P7_SIZE 1 +#define PIO_P8_OFFSET 8 +#define PIO_P8_SIZE 1 +#define PIO_P9_OFFSET 9 +#define PIO_P9_SIZE 1 +#define PIO_P10_OFFSET 10 +#define PIO_P10_SIZE 1 +#define PIO_P11_OFFSET 11 +#define PIO_P11_SIZE 1 +#define PIO_P12_OFFSET 12 +#define PIO_P12_SIZE 1 +#define PIO_P13_OFFSET 13 +#define PIO_P13_SIZE 1 +#define PIO_P14_OFFSET 14 +#define PIO_P14_SIZE 1 +#define PIO_P15_OFFSET 15 +#define PIO_P15_SIZE 1 +#define PIO_P16_OFFSET 16 +#define PIO_P16_SIZE 1 +#define PIO_P17_OFFSET 17 +#define PIO_P17_SIZE 1 +#define PIO_P18_OFFSET 18 +#define PIO_P18_SIZE 1 +#define PIO_P19_OFFSET 19 +#define PIO_P19_SIZE 1 +#define PIO_P20_OFFSET 20 +#define PIO_P20_SIZE 1 +#define PIO_P21_OFFSET 21 +#define PIO_P21_SIZE 1 +#define PIO_P22_OFFSET 22 +#define PIO_P22_SIZE 1 +#define PIO_P23_OFFSET 23 +#define PIO_P23_SIZE 1 +#define PIO_P24_OFFSET 24 +#define PIO_P24_SIZE 1 +#define PIO_P25_OFFSET 25 +#define PIO_P25_SIZE 1 +#define PIO_P26_OFFSET 26 +#define PIO_P26_SIZE 1 +#define PIO_P27_OFFSET 27 +#define PIO_P27_SIZE 1 +#define PIO_P28_OFFSET 28 +#define PIO_P28_SIZE 1 +#define PIO_P29_OFFSET 29 +#define PIO_P29_SIZE 1 +#define PIO_P30_OFFSET 30 +#define PIO_P30_SIZE 1 +#define PIO_P31_OFFSET 31 +#define PIO_P31_SIZE 1 + +/* Bitfields in OWER */ + +/* Bitfields in OWDR */ + +/* Bitfields in OWSR */ + +/* Bit manipulation macros */ +#define PIO_BIT(name) (1 << PIO_##name##_OFFSET) +#define PIO_BF(name,value) (((value) & ((1 << PIO_##name##_SIZE) - 1)) << PIO_##name##_OFFSET) +#define PIO_BFEXT(name,value) (((value) >> PIO_##name##_OFFSET) & ((1 << PIO_##name##_SIZE) - 1)) +#define PIO_BFINS(name,value,old) (((old) & ~(((1 << PIO_##name##_SIZE) - 1) << PIO_##name##_OFFSET)) | PIO_BF(name,value)) + +/* Register access macros */ +#define pio_readl(port,reg) readl((port)->regs + PIO_##reg) +#define pio_writel(port,reg,value) writel((value), (port)->regs + PIO_##reg) + +void at32_init_pio(struct platform_device *pdev); + +#endif /* __ARCH_AVR32_AT32AP_PIO_H__ */ diff --git a/arch/avr32/mach-at32ap/sm.c b/arch/avr32/mach-at32ap/sm.c new file mode 100644 index 00000000000..03306eb0345 --- /dev/null +++ b/arch/avr32/mach-at32ap/sm.c @@ -0,0 +1,289 @@ +/* + * System Manager driver for AT32AP CPUs + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/random.h> +#include <linux/spinlock.h> + +#include <asm/intc.h> +#include <asm/io.h> +#include <asm/irq.h> + +#include <asm/arch/sm.h> + +#include "sm.h" + +#define SM_EIM_IRQ_RESOURCE 1 +#define SM_PM_IRQ_RESOURCE 2 +#define SM_RTC_IRQ_RESOURCE 3 + +#define to_eim(irqc) container_of(irqc, struct at32_sm, irqc) + +struct at32_sm system_manager; + +int __init at32_sm_init(void) +{ + struct resource *regs; + struct at32_sm *sm = &system_manager; + int ret = -ENXIO; + + regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0); + if (!regs) + goto fail; + + spin_lock_init(&sm->lock); + sm->pdev = &at32_sm_device; + + ret = -ENOMEM; + sm->regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!sm->regs) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret); + return ret; +} + +/* + * External Interrupt Module (EIM). + * + * EIM gets level- or edge-triggered interrupts of either polarity + * from the outside and converts it to active-high level-triggered + * interrupts that the internal interrupt controller can handle. EIM + * also provides masking/unmasking of interrupts, as well as + * acknowledging of edge-triggered interrupts. + */ + +static irqreturn_t spurious_eim_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + printk(KERN_WARNING "Spurious EIM interrupt %d\n", irq); + disable_irq(irq); + return IRQ_NONE; +} + +static struct irqaction eim_spurious_action = { + .handler = spurious_eim_interrupt, +}; + +static irqreturn_t eim_handle_irq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct irq_controller * irqc = dev_id; + struct at32_sm *sm = to_eim(irqc); + unsigned long pending; + + /* + * No need to disable interrupts globally. The interrupt + * level relevant to this group must be masked all the time, + * so we know that this particular EIM instance will not be + * re-entered. + */ + spin_lock(&sm->lock); + + pending = intc_get_pending(sm->irqc.irq_group); + if (unlikely(!pending)) { + printk(KERN_ERR "EIM (group %u): No interrupts pending!\n", + sm->irqc.irq_group); + goto unlock; + } + + do { + struct irqaction *action; + unsigned int i; + + i = fls(pending) - 1; + pending &= ~(1 << i); + action = sm->action[i]; + + /* Acknowledge the interrupt */ + sm_writel(sm, EIM_ICR, 1 << i); + + spin_unlock(&sm->lock); + + if (action->flags & SA_INTERRUPT) + local_irq_disable(); + action->handler(sm->irqc.first_irq + i, action->dev_id, regs); + local_irq_enable(); + spin_lock(&sm->lock); + if (action->flags & SA_SAMPLE_RANDOM) + add_interrupt_randomness(sm->irqc.first_irq + i); + } while (pending); + +unlock: + spin_unlock(&sm->lock); + return IRQ_HANDLED; +} + +static void eim_mask(struct irq_controller *irqc, unsigned int irq) +{ + struct at32_sm *sm = to_eim(irqc); + unsigned int i; + + i = irq - sm->irqc.first_irq; + sm_writel(sm, EIM_IDR, 1 << i); +} + +static void eim_unmask(struct irq_controller *irqc, unsigned int irq) +{ + struct at32_sm *sm = to_eim(irqc); + unsigned int i; + + i = irq - sm->irqc.first_irq; + sm_writel(sm, EIM_IER, 1 << i); +} + +static int eim_setup(struct irq_controller *irqc, unsigned int irq, + struct irqaction *action) +{ + struct at32_sm *sm = to_eim(irqc); + sm->action[irq - sm->irqc.first_irq] = action; + /* Acknowledge earlier interrupts */ + sm_writel(sm, EIM_ICR, (1<<(irq - sm->irqc.first_irq))); + eim_unmask(irqc, irq); + return 0; +} + +static void eim_free(struct irq_controller *irqc, unsigned int irq, + void *dev) +{ + struct at32_sm *sm = to_eim(irqc); + eim_mask(irqc, irq); + sm->action[irq - sm->irqc.first_irq] = &eim_spurious_action; +} + +static int eim_set_type(struct irq_controller *irqc, unsigned int irq, + unsigned int type) +{ + struct at32_sm *sm = to_eim(irqc); + unsigned long flags; + u32 value, pattern; + + spin_lock_irqsave(&sm->lock, flags); + + pattern = 1 << (irq - sm->irqc.first_irq); + + value = sm_readl(sm, EIM_MODE); + if (type & IRQ_TYPE_LEVEL) + value |= pattern; + else + value &= ~pattern; + sm_writel(sm, EIM_MODE, value); + value = sm_readl(sm, EIM_EDGE); + if (type & IRQ_EDGE_RISING) + value |= pattern; + else + value &= ~pattern; + sm_writel(sm, EIM_EDGE, value); + value = sm_readl(sm, EIM_LEVEL); + if (type & IRQ_LEVEL_HIGH) + value |= pattern; + else + value &= ~pattern; + sm_writel(sm, EIM_LEVEL, value); + + spin_unlock_irqrestore(&sm->lock, flags); + + return 0; +} + +static unsigned int eim_get_type(struct irq_controller *irqc, + unsigned int irq) +{ + struct at32_sm *sm = to_eim(irqc); + unsigned long flags; + unsigned int type = 0; + u32 mode, edge, level, pattern; + + pattern = 1 << (irq - sm->irqc.first_irq); + + spin_lock_irqsave(&sm->lock, flags); + mode = sm_readl(sm, EIM_MODE); + edge = sm_readl(sm, EIM_EDGE); + level = sm_readl(sm, EIM_LEVEL); + spin_unlock_irqrestore(&sm->lock, flags); + + if (mode & pattern) + type |= IRQ_TYPE_LEVEL; + if (edge & pattern) + type |= IRQ_EDGE_RISING; + if (level & pattern) + type |= IRQ_LEVEL_HIGH; + + return type; +} + +static struct irq_controller_class eim_irq_class = { + .typename = "EIM", + .handle = eim_handle_irq, + .setup = eim_setup, + .free = eim_free, + .mask = eim_mask, + .unmask = eim_unmask, + .set_type = eim_set_type, + .get_type = eim_get_type, +}; + +static int __init eim_init(void) +{ + struct at32_sm *sm = &system_manager; + unsigned int i; + u32 pattern; + int ret; + + /* + * The EIM is really the same module as SM, so register + * mapping, etc. has been taken care of already. + */ + + /* + * Find out how many interrupt lines that are actually + * implemented in hardware. + */ + sm_writel(sm, EIM_IDR, ~0UL); + sm_writel(sm, EIM_MODE, ~0UL); + pattern = sm_readl(sm, EIM_MODE); + sm->irqc.nr_irqs = fls(pattern); + + ret = -ENOMEM; + sm->action = kmalloc(sizeof(*sm->action) * sm->irqc.nr_irqs, + GFP_KERNEL); + if (!sm->action) + goto out; + + for (i = 0; i < sm->irqc.nr_irqs; i++) + sm->action[i] = &eim_spurious_action; + + spin_lock_init(&sm->lock); + sm->irqc.irq_group = sm->pdev->resource[SM_EIM_IRQ_RESOURCE].start; + sm->irqc.class = &eim_irq_class; + + ret = intc_register_controller(&sm->irqc); + if (ret < 0) + goto out_free_actions; + + printk("EIM: External Interrupt Module at 0x%p, IRQ group %u\n", + sm->regs, sm->irqc.irq_group); + printk("EIM: Handling %u external IRQs, starting with IRQ%u\n", + sm->irqc.nr_irqs, sm->irqc.first_irq); + + return 0; + +out_free_actions: + kfree(sm->action); +out: + return ret; +} +arch_initcall(eim_init); diff --git a/arch/avr32/mach-at32ap/sm.h b/arch/avr32/mach-at32ap/sm.h new file mode 100644 index 00000000000..27565822ae2 --- /dev/null +++ b/arch/avr32/mach-at32ap/sm.h @@ -0,0 +1,240 @@ +/* + * Register definitions for SM + * + * System Manager + */ +#ifndef __ASM_AVR32_SM_H__ +#define __ASM_AVR32_SM_H__ + +/* SM register offsets */ +#define SM_PM_MCCTRL 0x0000 +#define SM_PM_CKSEL 0x0004 +#define SM_PM_CPU_MASK 0x0008 +#define SM_PM_HSB_MASK 0x000c +#define SM_PM_PBA_MASK 0x0010 +#define SM_PM_PBB_MASK 0x0014 +#define SM_PM_PLL0 0x0020 +#define SM_PM_PLL1 0x0024 +#define SM_PM_VCTRL 0x0030 +#define SM_PM_VMREF 0x0034 +#define SM_PM_VMV 0x0038 +#define SM_PM_IER 0x0040 +#define SM_PM_IDR 0x0044 +#define SM_PM_IMR 0x0048 +#define SM_PM_ISR 0x004c +#define SM_PM_ICR 0x0050 +#define SM_PM_GCCTRL 0x0060 +#define SM_RTC_CTRL 0x0080 +#define SM_RTC_VAL 0x0084 +#define SM_RTC_TOP 0x0088 +#define SM_RTC_IER 0x0090 +#define SM_RTC_IDR 0x0094 +#define SM_RTC_IMR 0x0098 +#define SM_RTC_ISR 0x009c +#define SM_RTC_ICR 0x00a0 +#define SM_WDT_CTRL 0x00b0 +#define SM_WDT_CLR 0x00b4 +#define SM_WDT_EXT 0x00b8 +#define SM_RC_RCAUSE 0x00c0 +#define SM_EIM_IER 0x0100 +#define SM_EIM_IDR 0x0104 +#define SM_EIM_IMR 0x0108 +#define SM_EIM_ISR 0x010c +#define SM_EIM_ICR 0x0110 +#define SM_EIM_MODE 0x0114 +#define SM_EIM_EDGE 0x0118 +#define SM_EIM_LEVEL 0x011c +#define SM_EIM_TEST 0x0120 +#define SM_EIM_NMIC 0x0124 + +/* Bitfields in PM_MCCTRL */ + +/* Bitfields in PM_CKSEL */ +#define SM_CPUSEL_OFFSET 0 +#define SM_CPUSEL_SIZE 3 +#define SM_CPUDIV_OFFSET 7 +#define SM_CPUDIV_SIZE 1 +#define SM_HSBSEL_OFFSET 8 +#define SM_HSBSEL_SIZE 3 +#define SM_HSBDIV_OFFSET 15 +#define SM_HSBDIV_SIZE 1 +#define SM_PBASEL_OFFSET 16 +#define SM_PBASEL_SIZE 3 +#define SM_PBADIV_OFFSET 23 +#define SM_PBADIV_SIZE 1 +#define SM_PBBSEL_OFFSET 24 +#define SM_PBBSEL_SIZE 3 +#define SM_PBBDIV_OFFSET 31 +#define SM_PBBDIV_SIZE 1 + +/* Bitfields in PM_CPU_MASK */ + +/* Bitfields in PM_HSB_MASK */ + +/* Bitfields in PM_PBA_MASK */ + +/* Bitfields in PM_PBB_MASK */ + +/* Bitfields in PM_PLL0 */ +#define SM_PLLEN_OFFSET 0 +#define SM_PLLEN_SIZE 1 +#define SM_PLLOSC_OFFSET 1 +#define SM_PLLOSC_SIZE 1 +#define SM_PLLOPT_OFFSET 2 +#define SM_PLLOPT_SIZE 3 +#define SM_PLLDIV_OFFSET 8 +#define SM_PLLDIV_SIZE 8 +#define SM_PLLMUL_OFFSET 16 +#define SM_PLLMUL_SIZE 8 +#define SM_PLLCOUNT_OFFSET 24 +#define SM_PLLCOUNT_SIZE 6 +#define SM_PLLTEST_OFFSET 31 +#define SM_PLLTEST_SIZE 1 + +/* Bitfields in PM_PLL1 */ + +/* Bitfields in PM_VCTRL */ +#define SM_VAUTO_OFFSET 0 +#define SM_VAUTO_SIZE 1 +#define SM_PM_VCTRL_VAL_OFFSET 8 +#define SM_PM_VCTRL_VAL_SIZE 7 + +/* Bitfields in PM_VMREF */ +#define SM_REFSEL_OFFSET 0 +#define SM_REFSEL_SIZE 4 + +/* Bitfields in PM_VMV */ +#define SM_PM_VMV_VAL_OFFSET 0 +#define SM_PM_VMV_VAL_SIZE 8 + +/* Bitfields in PM_IER */ + +/* Bitfields in PM_IDR */ + +/* Bitfields in PM_IMR */ + +/* Bitfields in PM_ISR */ + +/* Bitfields in PM_ICR */ +#define SM_LOCK0_OFFSET 0 +#define SM_LOCK0_SIZE 1 +#define SM_LOCK1_OFFSET 1 +#define SM_LOCK1_SIZE 1 +#define SM_WAKE_OFFSET 2 +#define SM_WAKE_SIZE 1 +#define SM_VOK_OFFSET 3 +#define SM_VOK_SIZE 1 +#define SM_VMRDY_OFFSET 4 +#define SM_VMRDY_SIZE 1 +#define SM_CKRDY_OFFSET 5 +#define SM_CKRDY_SIZE 1 + +/* Bitfields in PM_GCCTRL */ +#define SM_OSCSEL_OFFSET 0 +#define SM_OSCSEL_SIZE 1 +#define SM_PLLSEL_OFFSET 1 +#define SM_PLLSEL_SIZE 1 +#define SM_CEN_OFFSET 2 +#define SM_CEN_SIZE 1 +#define SM_CPC_OFFSET 3 +#define SM_CPC_SIZE 1 +#define SM_DIVEN_OFFSET 4 +#define SM_DIVEN_SIZE 1 +#define SM_DIV_OFFSET 8 +#define SM_DIV_SIZE 8 + +/* Bitfields in RTC_CTRL */ +#define SM_PCLR_OFFSET 1 +#define SM_PCLR_SIZE 1 +#define SM_TOPEN_OFFSET 2 +#define SM_TOPEN_SIZE 1 +#define SM_CLKEN_OFFSET 3 +#define SM_CLKEN_SIZE 1 +#define SM_PSEL_OFFSET 8 +#define SM_PSEL_SIZE 16 + +/* Bitfields in RTC_VAL */ +#define SM_RTC_VAL_VAL_OFFSET 0 +#define SM_RTC_VAL_VAL_SIZE 31 + +/* Bitfields in RTC_TOP */ +#define SM_RTC_TOP_VAL_OFFSET 0 +#define SM_RTC_TOP_VAL_SIZE 32 + +/* Bitfields in RTC_IER */ + +/* Bitfields in RTC_IDR */ + +/* Bitfields in RTC_IMR */ + +/* Bitfields in RTC_ISR */ + +/* Bitfields in RTC_ICR */ +#define SM_TOPI_OFFSET 0 +#define SM_TOPI_SIZE 1 + +/* Bitfields in WDT_CTRL */ +#define SM_KEY_OFFSET 24 +#define SM_KEY_SIZE 8 + +/* Bitfields in WDT_CLR */ + +/* Bitfields in WDT_EXT */ + +/* Bitfields in RC_RCAUSE */ +#define SM_POR_OFFSET 0 +#define SM_POR_SIZE 1 +#define SM_BOD_OFFSET 1 +#define SM_BOD_SIZE 1 +#define SM_EXT_OFFSET 2 +#define SM_EXT_SIZE 1 +#define SM_WDT_OFFSET 3 +#define SM_WDT_SIZE 1 +#define SM_NTAE_OFFSET 4 +#define SM_NTAE_SIZE 1 +#define SM_SERP_OFFSET 5 +#define SM_SERP_SIZE 1 + +/* Bitfields in EIM_IER */ + +/* Bitfields in EIM_IDR */ + +/* Bitfields in EIM_IMR */ + +/* Bitfields in EIM_ISR */ + +/* Bitfields in EIM_ICR */ + +/* Bitfields in EIM_MODE */ + +/* Bitfields in EIM_EDGE */ +#define SM_INT0_OFFSET 0 +#define SM_INT0_SIZE 1 +#define SM_INT1_OFFSET 1 +#define SM_INT1_SIZE 1 +#define SM_INT2_OFFSET 2 +#define SM_INT2_SIZE 1 +#define SM_INT3_OFFSET 3 +#define SM_INT3_SIZE 1 + +/* Bitfields in EIM_LEVEL */ + +/* Bitfields in EIM_TEST */ +#define SM_TESTEN_OFFSET 31 +#define SM_TESTEN_SIZE 1 + +/* Bitfields in EIM_NMIC */ +#define SM_EN_OFFSET 0 +#define SM_EN_SIZE 1 + +/* Bit manipulation macros */ +#define SM_BIT(name) (1 << SM_##name##_OFFSET) +#define SM_BF(name,value) (((value) & ((1 << SM_##name##_SIZE) - 1)) << SM_##name##_OFFSET) +#define SM_BFEXT(name,value) (((value) >> SM_##name##_OFFSET) & ((1 << SM_##name##_SIZE) - 1)) +#define SM_BFINS(name,value,old) (((old) & ~(((1 << SM_##name##_SIZE) - 1) << SM_##name##_OFFSET)) | SM_BF(name,value)) + +/* Register access macros */ +#define sm_readl(port,reg) readl((port)->regs + SM_##reg) +#define sm_writel(port,reg,value) writel((value), (port)->regs + SM_##reg) + +#endif /* __ASM_AVR32_SM_H__ */ diff --git a/arch/avr32/mm/Makefile b/arch/avr32/mm/Makefile new file mode 100644 index 00000000000..0066491f90d --- /dev/null +++ b/arch/avr32/mm/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the Linux/AVR32 kernel. +# + +obj-y += init.o clear_page.o copy_page.o dma-coherent.o +obj-y += ioremap.o cache.o fault.o tlb.o diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c new file mode 100644 index 00000000000..450515b245a --- /dev/null +++ b/arch/avr32/mm/cache.c @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/highmem.h> +#include <linux/unistd.h> + +#include <asm/cacheflush.h> +#include <asm/cachectl.h> +#include <asm/processor.h> +#include <asm/uaccess.h> + +/* + * If you attempt to flush anything more than this, you need superuser + * privileges. The value is completely arbitrary. + */ +#define CACHEFLUSH_MAX_LEN 1024 + +void invalidate_dcache_region(void *start, size_t size) +{ + unsigned long v, begin, end, linesz; + + linesz = boot_cpu_data.dcache.linesz; + + //printk("invalidate dcache: %p + %u\n", start, size); + + /* You asked for it, you got it */ + begin = (unsigned long)start & ~(linesz - 1); + end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1); + + for (v = begin; v < end; v += linesz) + invalidate_dcache_line((void *)v); +} + +void clean_dcache_region(void *start, size_t size) +{ + unsigned long v, begin, end, linesz; + + linesz = boot_cpu_data.dcache.linesz; + begin = (unsigned long)start & ~(linesz - 1); + end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1); + + for (v = begin; v < end; v += linesz) + clean_dcache_line((void *)v); + flush_write_buffer(); +} + +void flush_dcache_region(void *start, size_t size) +{ + unsigned long v, begin, end, linesz; + + linesz = boot_cpu_data.dcache.linesz; + begin = (unsigned long)start & ~(linesz - 1); + end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1); + + for (v = begin; v < end; v += linesz) + flush_dcache_line((void *)v); + flush_write_buffer(); +} + +void invalidate_icache_region(void *start, size_t size) +{ + unsigned long v, begin, end, linesz; + + linesz = boot_cpu_data.icache.linesz; + begin = (unsigned long)start & ~(linesz - 1); + end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1); + + for (v = begin; v < end; v += linesz) + invalidate_icache_line((void *)v); +} + +static inline void __flush_icache_range(unsigned long start, unsigned long end) +{ + unsigned long v, linesz; + + linesz = boot_cpu_data.dcache.linesz; + for (v = start; v < end; v += linesz) { + clean_dcache_line((void *)v); + invalidate_icache_line((void *)v); + } + + flush_write_buffer(); +} + +/* + * This one is called after a module has been loaded. + */ +void flush_icache_range(unsigned long start, unsigned long end) +{ + unsigned long linesz; + + linesz = boot_cpu_data.dcache.linesz; + __flush_icache_range(start & ~(linesz - 1), + (end + linesz - 1) & ~(linesz - 1)); +} + +/* + * This one is called from do_no_page(), do_swap_page() and install_page(). + */ +void flush_icache_page(struct vm_area_struct *vma, struct page *page) +{ + if (vma->vm_flags & VM_EXEC) { + void *v = kmap(page); + __flush_icache_range((unsigned long)v, (unsigned long)v + PAGE_SIZE); + kunmap(v); + } +} + +/* + * This one is used by copy_to_user_page() + */ +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + if (vma->vm_flags & VM_EXEC) + flush_icache_range(addr, addr + len); +} + +asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len) +{ + int ret; + + if (len > CACHEFLUSH_MAX_LEN) { + ret = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out; + } + + ret = -EFAULT; + if (!access_ok(VERIFY_WRITE, addr, len)) + goto out; + + switch (operation) { + case CACHE_IFLUSH: + flush_icache_range((unsigned long)addr, + (unsigned long)addr + len); + ret = 0; + break; + default: + ret = -EINVAL; + } + +out: + return ret; +} diff --git a/arch/avr32/mm/clear_page.S b/arch/avr32/mm/clear_page.S new file mode 100644 index 00000000000..5d70dca0069 --- /dev/null +++ b/arch/avr32/mm/clear_page.S @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/page.h> + +/* + * clear_page + * r12: P1 address (to) + */ + .text + .global clear_page +clear_page: + sub r9, r12, -PAGE_SIZE + mov r10, 0 + mov r11, 0 +0: st.d r12++, r10 + cp r12, r9 + brne 0b + mov pc, lr diff --git a/arch/avr32/mm/copy_page.S b/arch/avr32/mm/copy_page.S new file mode 100644 index 00000000000..c2b3752946b --- /dev/null +++ b/arch/avr32/mm/copy_page.S @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/page.h> + +/* + * copy_page + * + * r12 to (P1 address) + * r11 from (P1 address) + * r8-r10 scratch + */ + .text + .global copy_page +copy_page: + sub r10, r11, -(1 << PAGE_SHIFT) + /* pref r11[0] */ +1: /* pref r11[8] */ + ld.d r8, r11++ + st.d r12++, r8 + cp r11, r10 + brlo 1b + mov pc, lr diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c new file mode 100644 index 00000000000..44ab8a7bdae --- /dev/null +++ b/arch/avr32/mm/dma-coherent.c @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/dma-mapping.h> + +#include <asm/addrspace.h> +#include <asm/cacheflush.h> + +void dma_cache_sync(void *vaddr, size_t size, int direction) +{ + /* + * No need to sync an uncached area + */ + if (PXSEG(vaddr) == P2SEG) + return; + + switch (direction) { + case DMA_FROM_DEVICE: /* invalidate only */ + dma_cache_inv(vaddr, size); + break; + case DMA_TO_DEVICE: /* writeback only */ + dma_cache_wback(vaddr, size); + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + dma_cache_wback_inv(vaddr, size); + break; + default: + BUG(); + } +} +EXPORT_SYMBOL(dma_cache_sync); + +static struct page *__dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp) +{ + struct page *page, *free, *end; + int order; + + size = PAGE_ALIGN(size); + order = get_order(size); + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + split_page(page, order); + + /* + * When accessing physical memory with valid cache data, we + * get a cache hit even if the virtual memory region is marked + * as uncached. + * + * Since the memory is newly allocated, there is no point in + * doing a writeback. If the previous owner cares, he should + * have flushed the cache before releasing the memory. + */ + invalidate_dcache_region(phys_to_virt(page_to_phys(page)), size); + + *handle = page_to_bus(page); + free = page + (size >> PAGE_SHIFT); + end = page + (1 << order); + + /* + * Free any unused pages + */ + while (free < end) { + __free_page(free); + free++; + } + + return page; +} + +static void __dma_free(struct device *dev, size_t size, + struct page *page, dma_addr_t handle) +{ + struct page *end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT); + + while (page < end) + __free_page(page++); +} + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp) +{ + struct page *page; + void *ret = NULL; + + page = __dma_alloc(dev, size, handle, gfp); + if (page) + ret = phys_to_uncached(page_to_phys(page)); + + return ret; +} +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle) +{ + void *addr = phys_to_cached(uncached_to_phys(cpu_addr)); + struct page *page; + + pr_debug("dma_free_coherent addr %p (phys %08lx) size %u\n", + cpu_addr, (unsigned long)handle, (unsigned)size); + BUG_ON(!virt_addr_valid(addr)); + page = virt_to_page(addr); + __dma_free(dev, size, page, handle); +} +EXPORT_SYMBOL(dma_free_coherent); + +#if 0 +void *dma_alloc_writecombine(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp) +{ + struct page *page; + + page = __dma_alloc(dev, size, handle, gfp); + + /* Now, map the page into P3 with write-combining turned on */ + return __ioremap(page_to_phys(page), size, _PAGE_BUFFER); +} +EXPORT_SYMBOL(dma_alloc_writecombine); + +void dma_free_writecombine(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle) +{ + struct page *page; + + iounmap(cpu_addr); + + page = bus_to_page(handle); + __dma_free(dev, size, page, handle); +} +EXPORT_SYMBOL(dma_free_writecombine); +#endif diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c new file mode 100644 index 00000000000..678557260a3 --- /dev/null +++ b/arch/avr32/mm/fault.c @@ -0,0 +1,315 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/arch/sh/mm/fault.c: + * Copyright (C) 1999 Niibe Yutaka + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/pagemap.h> + +#include <asm/kdebug.h> +#include <asm/mmu_context.h> +#include <asm/sysreg.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> + +#ifdef DEBUG +static void dump_code(unsigned long pc) +{ + char *p = (char *)pc; + char val; + int i; + + + printk(KERN_DEBUG "Code:"); + for (i = 0; i < 16; i++) { + if (__get_user(val, p + i)) + break; + printk(" %02x", val); + } + printk("\n"); +} +#endif + +#ifdef CONFIG_KPROBES +ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + +/* Hook to register for page fault notifications */ +int register_page_fault_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); +} + +int unregister_page_fault_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); +} + +static inline int notify_page_fault(enum die_val val, struct pt_regs *regs, + int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .trapnr = trap, + }; + return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); +} +#else +static inline int notify_page_fault(enum die_val val, struct pt_regs *regs, + int trap, int sig) +{ + return NOTIFY_DONE; +} +#endif + +/* + * This routine handles page faults. It determines the address and the + * problem, and then passes it off to one of the appropriate routines. + * + * ecr is the Exception Cause Register. Possible values are: + * 5: Page not found (instruction access) + * 6: Protection fault (instruction access) + * 12: Page not found (read access) + * 13: Page not found (write access) + * 14: Protection fault (read access) + * 15: Protection fault (write access) + */ +asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct *vma; + const struct exception_table_entry *fixup; + unsigned long address; + unsigned long page; + int writeaccess = 0; + + if (notify_page_fault(DIE_PAGE_FAULT, regs, + ecr, SIGSEGV) == NOTIFY_STOP) + return; + + address = sysreg_read(TLBEAR); + + tsk = current; + mm = tsk->mm; + + /* + * If we're in an interrupt or have no user context, we must + * not take the fault... + */ + if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM)) + goto no_context; + + local_irq_enable(); + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + + /* + * Ok, we have a good vm_area for this memory access, so we + * can handle it... + */ +good_area: + //pr_debug("good area: vm_flags = 0x%lx\n", vma->vm_flags); + switch (ecr) { + case ECR_PROTECTION_X: + case ECR_TLB_MISS_X: + if (!(vma->vm_flags & VM_EXEC)) + goto bad_area; + break; + case ECR_PROTECTION_R: + case ECR_TLB_MISS_R: + if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + goto bad_area; + break; + case ECR_PROTECTION_W: + case ECR_TLB_MISS_W: + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + writeaccess = 1; + break; + default: + panic("Unhandled case %lu in do_page_fault!", ecr); + } + + /* + * If for any reason at all we couldn't handle the fault, make + * sure we exit gracefully rather than endlessly redo the + * fault. + */ +survive: + switch (handle_mm_fault(mm, vma, address, writeaccess)) { + case VM_FAULT_MINOR: + tsk->min_flt++; + break; + case VM_FAULT_MAJOR: + tsk->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto do_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + + up_read(&mm->mmap_sem); + return; + + /* + * Something tried to access memory that isn't in our memory + * map. Fix it, but check if it's kernel or user first... + */ +bad_area: + pr_debug("Bad area [%s:%u]: addr %08lx, ecr %lu\n", + tsk->comm, tsk->pid, address, ecr); + + up_read(&mm->mmap_sem); + + if (user_mode(regs)) { + /* Hmm...we have to pass address and ecr somehow... */ + /* tsk->thread.address = address; + tsk->thread.error_code = ecr; */ +#ifdef DEBUG + show_regs(regs); + dump_code(regs->pc); + + page = sysreg_read(PTBR); + printk("ptbr = %08lx", page); + if (page) { + page = ((unsigned long *)page)[address >> 22]; + printk(" pgd = %08lx", page); + if (page & _PAGE_PRESENT) { + page &= PAGE_MASK; + address &= 0x003ff000; + page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT]; + printk(" pte = %08lx\n", page); + } + } +#endif + pr_debug("Sending SIGSEGV to PID %d...\n", + tsk->pid); + force_sig(SIGSEGV, tsk); + return; + } + +no_context: + pr_debug("No context\n"); + + /* Are we prepared to handle this kernel fault? */ + fixup = search_exception_tables(regs->pc); + if (fixup) { + regs->pc = fixup->fixup; + pr_debug("Found fixup at %08lx\n", fixup->fixup); + return; + } + + /* + * Oops. The kernel tried to access some bad page. We'll have + * to terminate things with extreme prejudice. + */ + if (address < PAGE_SIZE) + printk(KERN_ALERT + "Unable to handle kernel NULL pointer dereference"); + else + printk(KERN_ALERT + "Unable to handle kernel paging request"); + printk(" at virtual address %08lx\n", address); + printk(KERN_ALERT "pc = %08lx\n", regs->pc); + + page = sysreg_read(PTBR); + printk(KERN_ALERT "ptbr = %08lx", page); + if (page) { + page = ((unsigned long *)page)[address >> 22]; + printk(" pgd = %08lx", page); + if (page & _PAGE_PRESENT) { + page &= PAGE_MASK; + address &= 0x003ff000; + page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT]; + printk(" pte = %08lx\n", page); + } + } + die("\nOops", regs, ecr); + do_exit(SIGKILL); + + /* + * We ran out of memory, or some other thing happened to us + * that made us unable to handle the page fault gracefully. + */ +out_of_memory: + printk("Out of memory\n"); + up_read(&mm->mmap_sem); + if (current->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: Killing process %s\n", tsk->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel or + * user mode. + */ + /* address, error_code, trap_no, ... */ +#ifdef DEBUG + show_regs(regs); + dump_code(regs->pc); +#endif + pr_debug("Sending SIGBUS to PID %d...\n", tsk->pid); + force_sig(SIGBUS, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) + goto no_context; +} + +asmlinkage void do_bus_error(unsigned long addr, int write_access, + struct pt_regs *regs) +{ + printk(KERN_ALERT + "Bus error at physical address 0x%08lx (%s access)\n", + addr, write_access ? "write" : "read"); + printk(KERN_INFO "DTLB dump:\n"); + dump_dtlb(); + die("Bus Error", regs, write_access); + do_exit(SIGKILL); +} + +/* + * This functionality is currently not possible to implement because + * we're using segmentation to ensure a fixed mapping of the kernel + * virtual address space. + * + * It would be possible to implement this, but it would require us to + * disable segmentation at startup and load the kernel mappings into + * the TLB like any other pages. There will be lots of trickery to + * avoid recursive invocation of the TLB miss handler, though... + */ +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + +} +EXPORT_SYMBOL(kernel_map_pages); +#endif diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c new file mode 100644 index 00000000000..3e6c4103980 --- /dev/null +++ b/arch/avr32/mm/init.c @@ -0,0 +1,480 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/initrd.h> +#include <linux/mmzone.h> +#include <linux/bootmem.h> +#include <linux/pagemap.h> +#include <linux/pfn.h> +#include <linux/nodemask.h> + +#include <asm/page.h> +#include <asm/mmu_context.h> +#include <asm/tlb.h> +#include <asm/io.h> +#include <asm/dma.h> +#include <asm/setup.h> +#include <asm/sections.h> + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +struct page *empty_zero_page; + +/* + * Cache of MMU context last used. + */ +unsigned long mmu_context_cache = NO_CONTEXT; + +#define START_PFN (NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT) +#define MAX_LOW_PFN (NODE_DATA(0)->bdata->node_low_pfn) + +void show_mem(void) +{ + int total = 0, reserved = 0, cached = 0; + int slab = 0, free = 0, shared = 0; + pg_data_t *pgdat; + + printk("Mem-info:\n"); + show_free_areas(); + + for_each_online_pgdat(pgdat) { + struct page *page, *end; + + page = pgdat->node_mem_map; + end = page + pgdat->node_spanned_pages; + + do { + total++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (PageSlab(page)) + slab++; + else if (!page_count(page)) + free++; + else + shared += page_count(page) - 1; + page++; + } while (page < end); + } + + printk ("%d pages of RAM\n", total); + printk ("%d free pages\n", free); + printk ("%d reserved pages\n", reserved); + printk ("%d slab pages\n", slab); + printk ("%d pages shared\n", shared); + printk ("%d pages swap cached\n", cached); +} + +static void __init print_memory_map(const char *what, + struct tag_mem_range *mem) +{ + printk ("%s:\n", what); + for (; mem; mem = mem->next) { + printk (" %08lx - %08lx\n", + (unsigned long)mem->addr, + (unsigned long)(mem->addr + mem->size)); + } +} + +#define MAX_LOWMEM HIGHMEM_START +#define MAX_LOWMEM_PFN PFN_DOWN(MAX_LOWMEM) + +/* + * Sort a list of memory regions in-place by ascending address. + * + * We're using bubble sort because we only have singly linked lists + * with few elements. + */ +static void __init sort_mem_list(struct tag_mem_range **pmem) +{ + int done; + struct tag_mem_range **a, **b; + + if (!*pmem) + return; + + do { + done = 1; + a = pmem, b = &(*pmem)->next; + while (*b) { + if ((*a)->addr > (*b)->addr) { + struct tag_mem_range *tmp; + tmp = (*b)->next; + (*b)->next = *a; + *a = *b; + *b = tmp; + done = 0; + } + a = &(*a)->next; + b = &(*a)->next; + } + } while (!done); +} + +/* + * Find a free memory region large enough for storing the + * bootmem bitmap. + */ +static unsigned long __init +find_bootmap_pfn(const struct tag_mem_range *mem) +{ + unsigned long bootmap_pages, bootmap_len; + unsigned long node_pages = PFN_UP(mem->size); + unsigned long bootmap_addr = mem->addr; + struct tag_mem_range *reserved = mem_reserved; + struct tag_mem_range *ramdisk = mem_ramdisk; + unsigned long kern_start = virt_to_phys(_stext); + unsigned long kern_end = virt_to_phys(_end); + + bootmap_pages = bootmem_bootmap_pages(node_pages); + bootmap_len = bootmap_pages << PAGE_SHIFT; + + /* + * Find a large enough region without reserved pages for + * storing the bootmem bitmap. We can take advantage of the + * fact that all lists have been sorted. + * + * We have to check explicitly reserved regions as well as the + * kernel image and any RAMDISK images... + * + * Oh, and we have to make sure we don't overwrite the taglist + * since we're going to use it until the bootmem allocator is + * fully up and running. + */ + while (1) { + if ((bootmap_addr < kern_end) && + ((bootmap_addr + bootmap_len) > kern_start)) + bootmap_addr = kern_end; + + while (reserved && + (bootmap_addr >= (reserved->addr + reserved->size))) + reserved = reserved->next; + + if (reserved && + ((bootmap_addr + bootmap_len) >= reserved->addr)) { + bootmap_addr = reserved->addr + reserved->size; + continue; + } + + while (ramdisk && + (bootmap_addr >= (ramdisk->addr + ramdisk->size))) + ramdisk = ramdisk->next; + + if (!ramdisk || + ((bootmap_addr + bootmap_len) < ramdisk->addr)) + break; + + bootmap_addr = ramdisk->addr + ramdisk->size; + } + + if ((PFN_UP(bootmap_addr) + bootmap_len) >= (mem->addr + mem->size)) + return ~0UL; + + return PFN_UP(bootmap_addr); +} + +void __init setup_bootmem(void) +{ + unsigned bootmap_size; + unsigned long first_pfn, bootmap_pfn, pages; + unsigned long max_pfn, max_low_pfn; + unsigned long kern_start = virt_to_phys(_stext); + unsigned long kern_end = virt_to_phys(_end); + unsigned node = 0; + struct tag_mem_range *bank, *res; + + sort_mem_list(&mem_phys); + sort_mem_list(&mem_reserved); + + print_memory_map("Physical memory", mem_phys); + print_memory_map("Reserved memory", mem_reserved); + + nodes_clear(node_online_map); + + if (mem_ramdisk) { +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = __va(mem_ramdisk->addr); + initrd_end = initrd_start + mem_ramdisk->size; + + print_memory_map("RAMDISK images", mem_ramdisk); + if (mem_ramdisk->next) + printk(KERN_WARNING + "Warning: Only the first RAMDISK image " + "will be used\n"); + sort_mem_list(&mem_ramdisk); +#else + printk(KERN_WARNING "RAM disk image present, but " + "no initrd support in kernel!\n"); +#endif + } + + if (mem_phys->next) + printk(KERN_WARNING "Only using first memory bank\n"); + + for (bank = mem_phys; bank; bank = NULL) { + first_pfn = PFN_UP(bank->addr); + max_low_pfn = max_pfn = PFN_DOWN(bank->addr + bank->size); + bootmap_pfn = find_bootmap_pfn(bank); + if (bootmap_pfn > max_pfn) + panic("No space for bootmem bitmap!\n"); + + if (max_low_pfn > MAX_LOWMEM_PFN) { + max_low_pfn = MAX_LOWMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* + * Lowmem is memory that can be addressed + * directly through P1/P2 + */ + printk(KERN_WARNING + "Node %u: Only %ld MiB of memory will be used.\n", + node, MAX_LOWMEM >> 20); + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); +#else +#error HIGHMEM is not supported by AVR32 yet +#endif + } + + /* Initialize the boot-time allocator with low memory only. */ + bootmap_size = init_bootmem_node(NODE_DATA(node), bootmap_pfn, + first_pfn, max_low_pfn); + + printk("Node %u: bdata = %p, bdata->node_bootmem_map = %p\n", + node, NODE_DATA(node)->bdata, + NODE_DATA(node)->bdata->node_bootmem_map); + + /* + * Register fully available RAM pages with the bootmem + * allocator. + */ + pages = max_low_pfn - first_pfn; + free_bootmem_node (NODE_DATA(node), PFN_PHYS(first_pfn), + PFN_PHYS(pages)); + + /* + * Reserve space for the kernel image (if present in + * this node)... + */ + if ((kern_start >= PFN_PHYS(first_pfn)) && + (kern_start < PFN_PHYS(max_pfn))) { + printk("Node %u: Kernel image %08lx - %08lx\n", + node, kern_start, kern_end); + reserve_bootmem_node(NODE_DATA(node), kern_start, + kern_end - kern_start); + } + + /* ...the bootmem bitmap... */ + reserve_bootmem_node(NODE_DATA(node), + PFN_PHYS(bootmap_pfn), + bootmap_size); + + /* ...any RAMDISK images... */ + for (res = mem_ramdisk; res; res = res->next) { + if (res->addr > PFN_PHYS(max_pfn)) + break; + + if (res->addr >= PFN_PHYS(first_pfn)) { + printk("Node %u: RAMDISK %08lx - %08lx\n", + node, + (unsigned long)res->addr, + (unsigned long)(res->addr + res->size)); + reserve_bootmem_node(NODE_DATA(node), + res->addr, res->size); + } + } + + /* ...and any other reserved regions. */ + for (res = mem_reserved; res; res = res->next) { + if (res->addr > PFN_PHYS(max_pfn)) + break; + + if (res->addr >= PFN_PHYS(first_pfn)) { + printk("Node %u: Reserved %08lx - %08lx\n", + node, + (unsigned long)res->addr, + (unsigned long)(res->addr + res->size)); + reserve_bootmem_node(NODE_DATA(node), + res->addr, res->size); + } + } + + node_set_online(node); + } +} + +/* + * paging_init() sets up the page tables + * + * This routine also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + extern unsigned long _evba; + void *zero_page; + int nid; + + /* + * Make sure we can handle exceptions before enabling + * paging. Not that we should ever _get_ any exceptions this + * early, but you never know... + */ + printk("Exception vectors start at %p\n", &_evba); + sysreg_write(EVBA, (unsigned long)&_evba); + + /* + * Since we are ready to handle exceptions now, we should let + * the CPU generate them... + */ + __asm__ __volatile__ ("csrf %0" : : "i"(SR_EM_BIT)); + + /* + * Allocate the zero page. The allocator will panic if it + * can't satisfy the request, so no need to check. + */ + zero_page = alloc_bootmem_low_pages_node(NODE_DATA(0), + PAGE_SIZE); + + { + pgd_t *pg_dir; + int i; + + pg_dir = swapper_pg_dir; + sysreg_write(PTBR, (unsigned long)pg_dir); + + for (i = 0; i < PTRS_PER_PGD; i++) + pgd_val(pg_dir[i]) = 0; + + enable_mmu(); + printk ("CPU: Paging enabled\n"); + } + + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long low, start_pfn; + + start_pfn = pgdat->bdata->node_boot_start; + start_pfn >>= PAGE_SHIFT; + low = pgdat->bdata->node_low_pfn; + + memset(zones_size, 0, sizeof(zones_size)); + zones_size[ZONE_NORMAL] = low - start_pfn; + + printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n", + nid, start_pfn, low); + + free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL); + + printk("Node %u: mem_map starts at %p\n", + pgdat->node_id, pgdat->node_mem_map); + } + + mem_map = NODE_DATA(0)->node_mem_map; + + memset(zero_page, 0, PAGE_SIZE); + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); +} + +void __init mem_init(void) +{ + int codesize, reservedpages, datasize, initsize; + int nid, i; + + reservedpages = 0; + high_memory = NULL; + + /* this will put all low memory onto the freelists */ + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + unsigned long node_pages = 0; + void *node_high_memory; + + num_physpages += pgdat->node_present_pages; + + if (pgdat->node_spanned_pages != 0) + node_pages = free_all_bootmem_node(pgdat); + + totalram_pages += node_pages; + + for (i = 0; i < node_pages; i++) + if (PageReserved(pgdat->node_mem_map + i)) + reservedpages++; + + node_high_memory = (void *)((pgdat->node_start_pfn + + pgdat->node_spanned_pages) + << PAGE_SHIFT); + if (node_high_memory > high_memory) + high_memory = node_high_memory; + } + + max_mapnr = MAP_NR(high_memory); + + codesize = (unsigned long)_etext - (unsigned long)_text; + datasize = (unsigned long)_edata - (unsigned long)_data; + initsize = (unsigned long)__init_end - (unsigned long)__init_begin; + + printk ("Memory: %luk/%luk available (%dk kernel code, " + "%dk reserved, %dk data, %dk init)\n", + (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10), + totalram_pages << (PAGE_SHIFT - 10), + codesize >> 10, + reservedpages << (PAGE_SHIFT - 10), + datasize >> 10, + initsize >> 10); +} + +static inline void free_area(unsigned long addr, unsigned long end, char *s) +{ + unsigned int size = (end - addr) >> 10; + + for (; addr < end; addr += PAGE_SIZE) { + struct page *page = virt_to_page(addr); + ClearPageReserved(page); + init_page_count(page); + free_page(addr); + totalram_pages++; + } + + if (size && s) + printk(KERN_INFO "Freeing %s memory: %dK (%lx - %lx)\n", + s, size, end - (size << 10), end); +} + +void free_initmem(void) +{ + free_area((unsigned long)__init_begin, (unsigned long)__init_end, + "init"); +} + +#ifdef CONFIG_BLK_DEV_INITRD + +static int keep_initrd; + +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (!keep_initrd) + free_area(start, end, "initrd"); +} + +static int __init keepinitrd_setup(char *__unused) +{ + keep_initrd = 1; + return 1; +} + +__setup("keepinitrd", keepinitrd_setup); +#endif diff --git a/arch/avr32/mm/ioremap.c b/arch/avr32/mm/ioremap.c new file mode 100644 index 00000000000..536021877df --- /dev/null +++ b/arch/avr32/mm/ioremap.c @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/vmalloc.h> +#include <linux/module.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/addrspace.h> + +static inline int remap_area_pte(pte_t *pte, unsigned long address, + unsigned long end, unsigned long phys_addr, + pgprot_t prot) +{ + unsigned long pfn; + + pfn = phys_addr >> PAGE_SHIFT; + do { + WARN_ON(!pte_none(*pte)); + + set_pte(pte, pfn_pte(pfn, prot)); + address += PAGE_SIZE; + pfn++; + pte++; + } while (address && (address < end)); + + return 0; +} + +static inline int remap_area_pmd(pmd_t *pmd, unsigned long address, + unsigned long end, unsigned long phys_addr, + pgprot_t prot) +{ + unsigned long next; + + phys_addr -= address; + + do { + pte_t *pte = pte_alloc_kernel(pmd, address); + if (!pte) + return -ENOMEM; + + next = (address + PMD_SIZE) & PMD_MASK; + if (remap_area_pte(pte, address, next, + address + phys_addr, prot)) + return -ENOMEM; + + address = next; + pmd++; + } while (address && (address < end)); + return 0; +} + +static int remap_area_pud(pud_t *pud, unsigned long address, + unsigned long end, unsigned long phys_addr, + pgprot_t prot) +{ + unsigned long next; + + phys_addr -= address; + + do { + pmd_t *pmd = pmd_alloc(&init_mm, pud, address); + if (!pmd) + return -ENOMEM; + next = (address + PUD_SIZE) & PUD_MASK; + if (remap_area_pmd(pmd, address, next, + phys_addr + address, prot)) + return -ENOMEM; + + address = next; + pud++; + } while (address && address < end); + + return 0; +} + +static int remap_area_pages(unsigned long address, unsigned long phys_addr, + size_t size, pgprot_t prot) +{ + unsigned long end = address + size; + unsigned long next; + pgd_t *pgd; + int err = 0; + + phys_addr -= address; + + pgd = pgd_offset_k(address); + flush_cache_all(); + BUG_ON(address >= end); + + spin_lock(&init_mm.page_table_lock); + do { + pud_t *pud = pud_alloc(&init_mm, pgd, address); + + err = -ENOMEM; + if (!pud) + break; + + next = (address + PGDIR_SIZE) & PGDIR_MASK; + if (next < address || next > end) + next = end; + err = remap_area_pud(pud, address, next, + phys_addr + address, prot); + if (err) + break; + + address = next; + pgd++; + } while (address && (address < end)); + + spin_unlock(&init_mm.page_table_lock); + flush_tlb_all(); + return err; +} + +/* + * Re-map an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access physical + * memory directly. + */ +void __iomem *__ioremap(unsigned long phys_addr, size_t size, + unsigned long flags) +{ + void *addr; + struct vm_struct *area; + unsigned long offset, last_addr; + pgprot_t prot; + + /* + * Check if we can simply use the P4 segment. This area is + * uncacheable, so if caching/buffering is requested, we can't + * use it. + */ + if ((phys_addr >= P4SEG) && (flags == 0)) + return (void __iomem *)phys_addr; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * XXX: When mapping regular RAM, we'd better make damn sure + * it's never used for anything else. But this is really the + * caller's responsibility... + */ + if (PHYSADDR(P2SEGADDR(phys_addr)) == phys_addr) + return (void __iomem *)P2SEGADDR(phys_addr); + + /* Mappings have to be page-aligned */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY + | _PAGE_ACCESSED | _PAGE_TYPE_SMALL | flags); + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + area->phys_addr = phys_addr; + addr = area->addr; + if (remap_area_pages((unsigned long)addr, phys_addr, size, prot)) { + vunmap(addr); + return NULL; + } + + return (void __iomem *)(offset + (char *)addr); +} +EXPORT_SYMBOL(__ioremap); + +void __iounmap(void __iomem *addr) +{ + struct vm_struct *p; + + if ((unsigned long)addr >= P4SEG) + return; + + p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr)); + if (unlikely(!p)) { + printk (KERN_ERR "iounmap: bad address %p\n", addr); + return; + } + + kfree (p); +} +EXPORT_SYMBOL(__iounmap); diff --git a/arch/avr32/mm/tlb.c b/arch/avr32/mm/tlb.c new file mode 100644 index 00000000000..5d0523bbe29 --- /dev/null +++ b/arch/avr32/mm/tlb.c @@ -0,0 +1,378 @@ +/* + * AVR32 TLB operations + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/mm.h> + +#include <asm/mmu_context.h> + +#define _TLBEHI_I 0x100 + +void show_dtlb_entry(unsigned int index) +{ + unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags; + + local_irq_save(flags); + mmucr_save = sysreg_read(MMUCR); + tlbehi_save = sysreg_read(TLBEHI); + mmucr = mmucr_save & 0x13; + mmucr |= index << 14; + sysreg_write(MMUCR, mmucr); + + asm volatile("tlbr" : : : "memory"); + cpu_sync_pipeline(); + + tlbehi = sysreg_read(TLBEHI); + tlbelo = sysreg_read(TLBELO); + + printk("%2u: %c %c %02x %05x %05x %o %o %c %c %c %c\n", + index, + (tlbehi & 0x200)?'1':'0', + (tlbelo & 0x100)?'1':'0', + (tlbehi & 0xff), + (tlbehi >> 12), (tlbelo >> 12), + (tlbelo >> 4) & 7, (tlbelo >> 2) & 3, + (tlbelo & 0x200)?'1':'0', + (tlbelo & 0x080)?'1':'0', + (tlbelo & 0x001)?'1':'0', + (tlbelo & 0x002)?'1':'0'); + + sysreg_write(MMUCR, mmucr_save); + sysreg_write(TLBEHI, tlbehi_save); + cpu_sync_pipeline(); + local_irq_restore(flags); +} + +void dump_dtlb(void) +{ + unsigned int i; + + printk("ID V G ASID VPN PFN AP SZ C B W D\n"); + for (i = 0; i < 32; i++) + show_dtlb_entry(i); +} + +static unsigned long last_mmucr; + +static inline void set_replacement_pointer(unsigned shift) +{ + unsigned long mmucr, mmucr_save; + + mmucr = mmucr_save = sysreg_read(MMUCR); + + /* Does this mapping already exist? */ + __asm__ __volatile__( + " tlbs\n" + " mfsr %0, %1" + : "=r"(mmucr) + : "i"(SYSREG_MMUCR)); + + if (mmucr & SYSREG_BIT(MMUCR_N)) { + /* Not found -- pick a not-recently-accessed entry */ + unsigned long rp; + unsigned long tlbar = sysreg_read(TLBARLO); + + rp = 32 - fls(tlbar); + if (rp == 32) { + rp = 0; + sysreg_write(TLBARLO, -1L); + } + + mmucr &= 0x13; + mmucr |= (rp << shift); + + sysreg_write(MMUCR, mmucr); + } + + last_mmucr = mmucr; +} + +static void update_dtlb(unsigned long address, pte_t pte, unsigned long asid) +{ + unsigned long vpn; + + vpn = (address & MMU_VPN_MASK) | _TLBEHI_VALID | asid; + sysreg_write(TLBEHI, vpn); + cpu_sync_pipeline(); + + set_replacement_pointer(14); + + sysreg_write(TLBELO, pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK); + + /* Let's go */ + asm volatile("nop\n\ttlbw" : : : "memory"); + cpu_sync_pipeline(); +} + +void update_mmu_cache(struct vm_area_struct *vma, + unsigned long address, pte_t pte) +{ + unsigned long flags; + + /* ptrace may call this routine */ + if (vma && current->active_mm != vma->vm_mm) + return; + + local_irq_save(flags); + update_dtlb(address, pte, get_asid()); + local_irq_restore(flags); +} + +void __flush_tlb_page(unsigned long asid, unsigned long page) +{ + unsigned long mmucr, tlbehi; + + page |= asid; + sysreg_write(TLBEHI, page); + cpu_sync_pipeline(); + asm volatile("tlbs"); + mmucr = sysreg_read(MMUCR); + + if (!(mmucr & SYSREG_BIT(MMUCR_N))) { + unsigned long tlbarlo; + unsigned long entry; + + /* Clear the "valid" bit */ + tlbehi = sysreg_read(TLBEHI); + tlbehi &= ~_TLBEHI_VALID; + sysreg_write(TLBEHI, tlbehi); + cpu_sync_pipeline(); + + /* mark the entry as "not accessed" */ + entry = (mmucr >> 14) & 0x3f; + tlbarlo = sysreg_read(TLBARLO); + tlbarlo |= (0x80000000 >> entry); + sysreg_write(TLBARLO, tlbarlo); + + /* update the entry with valid bit clear */ + asm volatile("tlbw"); + cpu_sync_pipeline(); + } +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + if (vma->vm_mm && vma->vm_mm->context != NO_CONTEXT) { + unsigned long flags, asid; + unsigned long saved_asid = MMU_NO_ASID; + + asid = vma->vm_mm->context & MMU_CONTEXT_ASID_MASK; + page &= PAGE_MASK; + + local_irq_save(flags); + if (vma->vm_mm != current->mm) { + saved_asid = get_asid(); + set_asid(asid); + } + + __flush_tlb_page(asid, page); + + if (saved_asid != MMU_NO_ASID) + set_asid(saved_asid); + local_irq_restore(flags); + } +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + + if (mm->context != NO_CONTEXT) { + unsigned long flags; + int size; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */ + mm->context = NO_CONTEXT; + if (mm == current->mm) + activate_context(mm); + } else { + unsigned long asid = mm->context & MMU_CONTEXT_ASID_MASK; + unsigned long saved_asid = MMU_NO_ASID; + + start &= PAGE_MASK; + end += (PAGE_SIZE - 1); + end &= PAGE_MASK; + if (mm != current->mm) { + saved_asid = get_asid(); + set_asid(asid); + } + + while (start < end) { + __flush_tlb_page(asid, start); + start += PAGE_SIZE; + } + if (saved_asid != MMU_NO_ASID) + set_asid(saved_asid); + } + local_irq_restore(flags); + } +} + +/* + * TODO: If this is only called for addresses > TASK_SIZE, we can probably + * skip the ASID stuff and just use the Global bit... + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + int size; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */ + flush_tlb_all(); + } else { + unsigned long asid = init_mm.context & MMU_CONTEXT_ASID_MASK; + unsigned long saved_asid = get_asid(); + + start &= PAGE_MASK; + end += (PAGE_SIZE - 1); + end &= PAGE_MASK; + set_asid(asid); + while (start < end) { + __flush_tlb_page(asid, start); + start += PAGE_SIZE; + } + set_asid(saved_asid); + } + local_irq_restore(flags); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + /* Invalidate all TLB entries of this process by getting a new ASID */ + if (mm->context != NO_CONTEXT) { + unsigned long flags; + + local_irq_save(flags); + mm->context = NO_CONTEXT; + if (mm == current->mm) + activate_context(mm); + local_irq_restore(flags); + } +} + +void flush_tlb_all(void) +{ + unsigned long flags; + + local_irq_save(flags); + sysreg_write(MMUCR, sysreg_read(MMUCR) | SYSREG_BIT(MMUCR_I)); + local_irq_restore(flags); +} + +#ifdef CONFIG_PROC_FS + +#include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <linux/init.h> + +static void *tlb_start(struct seq_file *tlb, loff_t *pos) +{ + static unsigned long tlb_index; + + if (*pos >= 32) + return NULL; + + tlb_index = 0; + return &tlb_index; +} + +static void *tlb_next(struct seq_file *tlb, void *v, loff_t *pos) +{ + unsigned long *index = v; + + if (*index >= 31) + return NULL; + + ++*pos; + ++*index; + return index; +} + +static void tlb_stop(struct seq_file *tlb, void *v) +{ + +} + +static int tlb_show(struct seq_file *tlb, void *v) +{ + unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags; + unsigned long *index = v; + + if (*index == 0) + seq_puts(tlb, "ID V G ASID VPN PFN AP SZ C B W D\n"); + + BUG_ON(*index >= 32); + + local_irq_save(flags); + mmucr_save = sysreg_read(MMUCR); + tlbehi_save = sysreg_read(TLBEHI); + mmucr = mmucr_save & 0x13; + mmucr |= *index << 14; + sysreg_write(MMUCR, mmucr); + + asm volatile("tlbr" : : : "memory"); + cpu_sync_pipeline(); + + tlbehi = sysreg_read(TLBEHI); + tlbelo = sysreg_read(TLBELO); + + sysreg_write(MMUCR, mmucr_save); + sysreg_write(TLBEHI, tlbehi_save); + cpu_sync_pipeline(); + local_irq_restore(flags); + + seq_printf(tlb, "%2lu: %c %c %02x %05x %05x %o %o %c %c %c %c\n", + *index, + (tlbehi & 0x200)?'1':'0', + (tlbelo & 0x100)?'1':'0', + (tlbehi & 0xff), + (tlbehi >> 12), (tlbelo >> 12), + (tlbelo >> 4) & 7, (tlbelo >> 2) & 3, + (tlbelo & 0x200)?'1':'0', + (tlbelo & 0x080)?'1':'0', + (tlbelo & 0x001)?'1':'0', + (tlbelo & 0x002)?'1':'0'); + + return 0; +} + +static struct seq_operations tlb_ops = { + .start = tlb_start, + .next = tlb_next, + .stop = tlb_stop, + .show = tlb_show, +}; + +static int tlb_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &tlb_ops); +} + +static struct file_operations proc_tlb_operations = { + .open = tlb_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proctlb_init(void) +{ + struct proc_dir_entry *entry; + + entry = create_proc_entry("tlb", 0, NULL); + if (entry) + entry->proc_fops = &proc_tlb_operations; + return 0; +} +late_initcall(proctlb_init); +#endif /* CONFIG_PROC_FS */ diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index a601a17cf56..f7b171b92ea 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -27,7 +27,11 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_HARDIRQS bool - default n + default y + +config GENERIC_HARDIRQS_NO__DO_IRQ + bool + default y config GENERIC_TIME bool @@ -251,6 +255,12 @@ config MB93091_NO_MB endchoice endif +config FUJITSU_MB93493 + bool "MB93493 Multimedia chip" + help + Select this option if the MB93493 multimedia chip is going to be + used. + choice prompt "GP-Relative data support" default GPREL_DATA_8 diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile index 5a827b349b5..32db3499c46 100644 --- a/arch/frv/kernel/Makefile +++ b/arch/frv/kernel/Makefile @@ -10,15 +10,14 @@ extra-y:= head.o init_task.o vmlinux.lds obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \ process.o traps.o ptrace.o signal.o dma.o \ sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \ - debug-stub.o irq.o irq-routing.o sleep.o uaccess.o + debug-stub.o irq.o sleep.o uaccess.o obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-io.o obj-$(CONFIG_MB93091_VDK) += irq-mb93091.o -obj-$(CONFIG_MB93093_PDK) += irq-mb93093.o -obj-$(CONFIG_FUJITSU_MB93493) += irq-mb93493.o obj-$(CONFIG_PM) += pm.o cmode.o obj-$(CONFIG_MB93093_PDK) += pm-mb93093.o +obj-$(CONFIG_FUJITSU_MB93493) += irq-mb93493.o obj-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/frv/kernel/irq-mb93091.c b/arch/frv/kernel/irq-mb93091.c index 1381abcd5cc..369bc0a7443 100644 --- a/arch/frv/kernel/irq-mb93091.c +++ b/arch/frv/kernel/irq-mb93091.c @@ -24,7 +24,6 @@ #include <asm/delay.h> #include <asm/irq.h> #include <asm/irc-regs.h> -#include <asm/irq-routing.h> #define __reg16(ADDR) (*(volatile unsigned short *)(ADDR)) @@ -33,83 +32,131 @@ #define __get_IFR() ({ __reg16(0xffc0000c); }) #define __clr_IFR(M) do { __reg16(0xffc0000c) = ~(M); wmb(); } while(0) -static void frv_fpga_doirq(struct irq_source *source); -static void frv_fpga_control(struct irq_group *group, int irq, int on); -/*****************************************************************************/ /* - * FPGA IRQ multiplexor + * on-motherboard FPGA PIC operations */ -static struct irq_source frv_fpga[4] = { -#define __FPGA(X, M) \ - [X] = { \ - .muxname = "fpga."#X, \ - .irqmask = M, \ - .doirq = frv_fpga_doirq, \ - } +static void frv_fpga_mask(unsigned int irq) +{ + uint16_t imr = __get_IMR(); - __FPGA(0, 0x0028), - __FPGA(1, 0x0050), - __FPGA(2, 0x1c00), - __FPGA(3, 0x6386), -}; + imr |= 1 << (irq - IRQ_BASE_FPGA); -static struct irq_group frv_fpga_irqs = { - .first_irq = IRQ_BASE_FPGA, - .control = frv_fpga_control, - .sources = { - [ 1] = &frv_fpga[3], - [ 2] = &frv_fpga[3], - [ 3] = &frv_fpga[0], - [ 4] = &frv_fpga[1], - [ 5] = &frv_fpga[0], - [ 6] = &frv_fpga[1], - [ 7] = &frv_fpga[3], - [ 8] = &frv_fpga[3], - [ 9] = &frv_fpga[3], - [10] = &frv_fpga[2], - [11] = &frv_fpga[2], - [12] = &frv_fpga[2], - [13] = &frv_fpga[3], - [14] = &frv_fpga[3], - }, -}; + __set_IMR(imr); +} +static void frv_fpga_ack(unsigned int irq) +{ + __clr_IFR(1 << (irq - IRQ_BASE_FPGA)); +} -static void frv_fpga_control(struct irq_group *group, int index, int on) +static void frv_fpga_mask_ack(unsigned int irq) { uint16_t imr = __get_IMR(); - if (on) - imr &= ~(1 << index); - else - imr |= 1 << index; + imr |= 1 << (irq - IRQ_BASE_FPGA); + __set_IMR(imr); + + __clr_IFR(1 << (irq - IRQ_BASE_FPGA)); +} + +static void frv_fpga_unmask(unsigned int irq) +{ + uint16_t imr = __get_IMR(); + + imr &= ~(1 << (irq - IRQ_BASE_FPGA)); __set_IMR(imr); } -static void frv_fpga_doirq(struct irq_source *source) +static struct irq_chip frv_fpga_pic = { + .name = "mb93091", + .ack = frv_fpga_ack, + .mask = frv_fpga_mask, + .mask_ack = frv_fpga_mask_ack, + .unmask = frv_fpga_unmask, +}; + +/* + * FPGA PIC interrupt handler + */ +static irqreturn_t fpga_interrupt(int irq, void *_mask, struct pt_regs *regs) { - uint16_t mask, imr; + uint16_t imr, mask = (unsigned long) _mask; imr = __get_IMR(); - mask = source->irqmask & ~imr & __get_IFR(); - if (mask) { - __set_IMR(imr | mask); - __clr_IFR(mask); - distribute_irqs(&frv_fpga_irqs, mask); - __set_IMR(imr); + mask = mask & ~imr & __get_IFR(); + + /* poll all the triggered IRQs */ + while (mask) { + int irq; + + asm("scan %1,gr0,%0" : "=r"(irq) : "r"(mask)); + irq = 31 - irq; + mask &= ~(1 << irq); + + generic_handle_irq(IRQ_BASE_FPGA + irq, regs); } + + return IRQ_HANDLED; } +/* + * define an interrupt action for each FPGA PIC output + * - use dev_id to indicate the FPGA PIC input to output mappings + */ +static struct irqaction fpga_irq[4] = { + [0] = { + .handler = fpga_interrupt, + .flags = IRQF_DISABLED | IRQF_SHARED, + .mask = CPU_MASK_NONE, + .name = "fpga.0", + .dev_id = (void *) 0x0028UL, + }, + [1] = { + .handler = fpga_interrupt, + .flags = IRQF_DISABLED | IRQF_SHARED, + .mask = CPU_MASK_NONE, + .name = "fpga.1", + .dev_id = (void *) 0x0050UL, + }, + [2] = { + .handler = fpga_interrupt, + .flags = IRQF_DISABLED | IRQF_SHARED, + .mask = CPU_MASK_NONE, + .name = "fpga.2", + .dev_id = (void *) 0x1c00UL, + }, + [3] = { + .handler = fpga_interrupt, + .flags = IRQF_DISABLED | IRQF_SHARED, + .mask = CPU_MASK_NONE, + .name = "fpga.3", + .dev_id = (void *) 0x6386UL, + } +}; + +/* + * initialise the motherboard FPGA's PIC + */ void __init fpga_init(void) { + int irq; + + /* all PIC inputs are all set to be low-level driven, apart from the + * NMI button (15) which is fixed at falling-edge + */ __set_IMR(0x7ffe); __clr_IFR(0x0000); - frv_irq_route_external(&frv_fpga[0], IRQ_CPU_EXTERNAL0); - frv_irq_route_external(&frv_fpga[1], IRQ_CPU_EXTERNAL1); - frv_irq_route_external(&frv_fpga[2], IRQ_CPU_EXTERNAL2); - frv_irq_route_external(&frv_fpga[3], IRQ_CPU_EXTERNAL3); - frv_irq_set_group(&frv_fpga_irqs); + for (irq = IRQ_BASE_FPGA + 1; irq <= IRQ_BASE_FPGA + 14; irq++) + set_irq_chip_and_handler(irq, &frv_fpga_pic, handle_level_irq); + + set_irq_chip_and_handler(IRQ_FPGA_NMI, &frv_fpga_pic, handle_edge_irq); + + /* the FPGA drives the first four external IRQ inputs on the CPU PIC */ + setup_irq(IRQ_CPU_EXTERNAL0, &fpga_irq[0]); + setup_irq(IRQ_CPU_EXTERNAL1, &fpga_irq[1]); + setup_irq(IRQ_CPU_EXTERNAL2, &fpga_irq[2]); + setup_irq(IRQ_CPU_EXTERNAL3, &fpga_irq[3]); } diff --git a/arch/frv/kernel/irq-mb93093.c b/arch/frv/kernel/irq-mb93093.c index 48b2a642088..a43a2215895 100644 --- a/arch/frv/kernel/irq-mb93093.c +++ b/arch/frv/kernel/irq-mb93093.c @@ -1,6 +1,6 @@ /* irq-mb93093.c: MB93093 FPGA interrupt handling * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -24,7 +24,6 @@ #include <asm/delay.h> #include <asm/irq.h> #include <asm/irc-regs.h> -#include <asm/irq-routing.h> #define __reg16(ADDR) (*(volatile unsigned short *)(__region_CS2 + (ADDR))) @@ -33,66 +32,102 @@ #define __get_IFR() ({ __reg16(0x02); }) #define __clr_IFR(M) do { __reg16(0x02) = ~(M); wmb(); } while(0) -static void frv_fpga_doirq(struct irq_source *source); -static void frv_fpga_control(struct irq_group *group, int irq, int on); - -/*****************************************************************************/ /* - * FPGA IRQ multiplexor + * off-CPU FPGA PIC operations */ -static struct irq_source frv_fpga[4] = { -#define __FPGA(X, M) \ - [X] = { \ - .muxname = "fpga."#X, \ - .irqmask = M, \ - .doirq = frv_fpga_doirq, \ - } +static void frv_fpga_mask(unsigned int irq) +{ + uint16_t imr = __get_IMR(); - __FPGA(0, 0x0700), -}; + imr |= 1 << (irq - IRQ_BASE_FPGA); + __set_IMR(imr); +} -static struct irq_group frv_fpga_irqs = { - .first_irq = IRQ_BASE_FPGA, - .control = frv_fpga_control, - .sources = { - [ 8] = &frv_fpga[0], - [ 9] = &frv_fpga[0], - [10] = &frv_fpga[0], - }, -}; +static void frv_fpga_ack(unsigned int irq) +{ + __clr_IFR(1 << (irq - IRQ_BASE_FPGA)); +} + +static void frv_fpga_mask_ack(unsigned int irq) +{ + uint16_t imr = __get_IMR(); + imr |= 1 << (irq - IRQ_BASE_FPGA); + __set_IMR(imr); + + __clr_IFR(1 << (irq - IRQ_BASE_FPGA)); +} -static void frv_fpga_control(struct irq_group *group, int index, int on) +static void frv_fpga_unmask(unsigned int irq) { uint16_t imr = __get_IMR(); - if (on) - imr &= ~(1 << index); - else - imr |= 1 << index; + imr &= ~(1 << (irq - IRQ_BASE_FPGA)); __set_IMR(imr); } -static void frv_fpga_doirq(struct irq_source *source) +static struct irq_chip frv_fpga_pic = { + .name = "mb93093", + .ack = frv_fpga_ack, + .mask = frv_fpga_mask, + .mask_ack = frv_fpga_mask_ack, + .unmask = frv_fpga_unmask, + .end = frv_fpga_end, +}; + +/* + * FPGA PIC interrupt handler + */ +static irqreturn_t fpga_interrupt(int irq, void *_mask, struct pt_regs *regs) { - uint16_t mask, imr; + uint16_t imr, mask = (unsigned long) _mask; imr = __get_IMR(); - mask = source->irqmask & ~imr & __get_IFR(); - if (mask) { - __set_IMR(imr | mask); - __clr_IFR(mask); - distribute_irqs(&frv_fpga_irqs, mask); - __set_IMR(imr); + mask = mask & ~imr & __get_IFR(); + + /* poll all the triggered IRQs */ + while (mask) { + int irq; + + asm("scan %1,gr0,%0" : "=r"(irq) : "r"(mask)); + irq = 31 - irq; + mask &= ~(1 << irq); + + generic_irq_handle(IRQ_BASE_FPGA + irq, regs); } + + return IRQ_HANDLED; } +/* + * define an interrupt action for each FPGA PIC output + * - use dev_id to indicate the FPGA PIC input to output mappings + */ +static struct irqaction fpga_irq[1] = { + [0] = { + .handler = fpga_interrupt, + .flags = IRQF_DISABLED, + .mask = CPU_MASK_NONE, + .name = "fpga.0", + .dev_id = (void *) 0x0700UL, + } +}; + +/* + * initialise the motherboard FPGA's PIC + */ void __init fpga_init(void) { + int irq; + + /* all PIC inputs are all set to be edge triggered */ __set_IMR(0x0700); __clr_IFR(0x0000); - frv_irq_route_external(&frv_fpga[0], IRQ_CPU_EXTERNAL2); - frv_irq_set_group(&frv_fpga_irqs); + for (irq = IRQ_BASE_FPGA + 8; irq <= IRQ_BASE_FPGA + 10; irq++) + set_irq_chip_and_handler(irq, &frv_fpga_pic, handle_edge_irq); + + /* the FPGA drives external IRQ input #2 on the CPU PIC */ + setup_irq(IRQ_CPU_EXTERNAL2, &fpga_irq[0]); } diff --git a/arch/frv/kernel/irq-mb93493.c b/arch/frv/kernel/irq-mb93493.c index 988d035640e..39c0188a349 100644 --- a/arch/frv/kernel/irq-mb93493.c +++ b/arch/frv/kernel/irq-mb93493.c @@ -1,6 +1,6 @@ /* irq-mb93493.c: MB93493 companion chip interrupt handler * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -24,84 +24,126 @@ #include <asm/delay.h> #include <asm/irq.h> #include <asm/irc-regs.h> -#include <asm/irq-routing.h> #include <asm/mb93493-irqs.h> +#include <asm/mb93493-regs.h> -static void frv_mb93493_doirq(struct irq_source *source); +#define IRQ_ROUTE_ONE(X) (X##_ROUTE << (X - IRQ_BASE_MB93493)) + +#define IRQ_ROUTING \ + (IRQ_ROUTE_ONE(IRQ_MB93493_VDC) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_VCC) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_AUDIO_OUT) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_I2C_0) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_I2C_1) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_USB) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_LOCAL_BUS) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_PCMCIA) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_GPIO) | \ + IRQ_ROUTE_ONE(IRQ_MB93493_AUDIO_IN)) -/*****************************************************************************/ /* - * MB93493 companion chip IRQ multiplexor + * daughter board PIC operations + * - there is no way to ACK interrupts in the MB93493 chip */ -static struct irq_source frv_mb93493[2] = { - [0] = { - .muxname = "mb93493.0", - .muxdata = __region_CS3 + 0x3d0, - .doirq = frv_mb93493_doirq, - .irqmask = 0x0000, - }, - [1] = { - .muxname = "mb93493.1", - .muxdata = __region_CS3 + 0x3d4, - .doirq = frv_mb93493_doirq, - .irqmask = 0x0000, - }, -}; - -static void frv_mb93493_control(struct irq_group *group, int index, int on) +static void frv_mb93493_mask(unsigned int irq) { - struct irq_source *source; uint32_t iqsr; + volatile void *piqsr; - if ((frv_mb93493[0].irqmask & (1 << index))) - source = &frv_mb93493[0]; + if (IRQ_ROUTING & (1 << (irq - IRQ_BASE_MB93493))) + piqsr = __addr_MB93493_IQSR(1); else - source = &frv_mb93493[1]; + piqsr = __addr_MB93493_IQSR(0); + + iqsr = readl(piqsr); + iqsr &= ~(1 << (irq - IRQ_BASE_MB93493 + 16)); + writel(iqsr, piqsr); +} - iqsr = readl(source->muxdata); - if (on) - iqsr |= 1 << (index + 16); +static void frv_mb93493_ack(unsigned int irq) +{ +} + +static void frv_mb93493_unmask(unsigned int irq) +{ + uint32_t iqsr; + volatile void *piqsr; + + if (IRQ_ROUTING & (1 << (irq - IRQ_BASE_MB93493))) + piqsr = __addr_MB93493_IQSR(1); else - iqsr &= ~(1 << (index + 16)); + piqsr = __addr_MB93493_IQSR(0); - writel(iqsr, source->muxdata); + iqsr = readl(piqsr); + iqsr |= 1 << (irq - IRQ_BASE_MB93493 + 16); + writel(iqsr, piqsr); } -static struct irq_group frv_mb93493_irqs = { - .first_irq = IRQ_BASE_MB93493, - .control = frv_mb93493_control, +static struct irq_chip frv_mb93493_pic = { + .name = "mb93093", + .ack = frv_mb93493_ack, + .mask = frv_mb93493_mask, + .mask_ack = frv_mb93493_mask, + .unmask = frv_mb93493_unmask, }; -static void frv_mb93493_doirq(struct irq_source *source) +/* + * MB93493 PIC interrupt handler + */ +static irqreturn_t mb93493_interrupt(int irq, void *_piqsr, struct pt_regs *regs) { - uint32_t mask = readl(source->muxdata); - mask = mask & (mask >> 16) & 0xffff; + volatile void *piqsr = _piqsr; + uint32_t iqsr; - if (mask) - distribute_irqs(&frv_mb93493_irqs, mask); -} + iqsr = readl(piqsr); + iqsr = iqsr & (iqsr >> 16) & 0xffff; -static void __init mb93493_irq_route(int irq, int source) -{ - frv_mb93493[source].irqmask |= 1 << (irq - IRQ_BASE_MB93493); - frv_mb93493_irqs.sources[irq - IRQ_BASE_MB93493] = &frv_mb93493[source]; + /* poll all the triggered IRQs */ + while (iqsr) { + int irq; + + asm("scan %1,gr0,%0" : "=r"(irq) : "r"(iqsr)); + irq = 31 - irq; + iqsr &= ~(1 << irq); + + generic_handle_irq(IRQ_BASE_MB93493 + irq, regs); + } + + return IRQ_HANDLED; } -void __init route_mb93493_irqs(void) +/* + * define an interrupt action for each MB93493 PIC output + * - use dev_id to indicate the MB93493 PIC input to output mappings + */ +static struct irqaction mb93493_irq[2] = { + [0] = { + .handler = mb93493_interrupt, + .flags = IRQF_DISABLED | IRQF_SHARED, + .mask = CPU_MASK_NONE, + .name = "mb93493.0", + .dev_id = (void *) __addr_MB93493_IQSR(0), + }, + [1] = { + .handler = mb93493_interrupt, + .flags = IRQF_DISABLED | IRQF_SHARED, + .mask = CPU_MASK_NONE, + .name = "mb93493.1", + .dev_id = (void *) __addr_MB93493_IQSR(1), + } +}; + +/* + * initialise the motherboard MB93493's PIC + */ +void __init mb93493_init(void) { - frv_irq_route_external(&frv_mb93493[0], IRQ_CPU_MB93493_0); - frv_irq_route_external(&frv_mb93493[1], IRQ_CPU_MB93493_1); - - frv_irq_set_group(&frv_mb93493_irqs); - - mb93493_irq_route(IRQ_MB93493_VDC, IRQ_MB93493_VDC_ROUTE); - mb93493_irq_route(IRQ_MB93493_VCC, IRQ_MB93493_VCC_ROUTE); - mb93493_irq_route(IRQ_MB93493_AUDIO_IN, IRQ_MB93493_AUDIO_IN_ROUTE); - mb93493_irq_route(IRQ_MB93493_I2C_0, IRQ_MB93493_I2C_0_ROUTE); - mb93493_irq_route(IRQ_MB93493_I2C_1, IRQ_MB93493_I2C_1_ROUTE); - mb93493_irq_route(IRQ_MB93493_USB, IRQ_MB93493_USB_ROUTE); - mb93493_irq_route(IRQ_MB93493_LOCAL_BUS, IRQ_MB93493_LOCAL_BUS_ROUTE); - mb93493_irq_route(IRQ_MB93493_PCMCIA, IRQ_MB93493_PCMCIA_ROUTE); - mb93493_irq_route(IRQ_MB93493_GPIO, IRQ_MB93493_GPIO_ROUTE); - mb93493_irq_route(IRQ_MB93493_AUDIO_OUT, IRQ_MB93493_AUDIO_OUT_ROUTE); + int irq; + + for (irq = IRQ_BASE_MB93493 + 0; irq <= IRQ_BASE_MB93493 + 10; irq++) + set_irq_chip_and_handler(irq, &frv_mb93493_pic, handle_edge_irq); + + /* the MB93493 drives external IRQ inputs on the CPU PIC */ + setup_irq(IRQ_CPU_MB93493_0, &mb93493_irq[0]); + setup_irq(IRQ_CPU_MB93493_1, &mb93493_irq[1]); } diff --git a/arch/frv/kernel/irq-routing.c b/arch/frv/kernel/irq-routing.c deleted file mode 100644 index 53886adf47d..00000000000 --- a/arch/frv/kernel/irq-routing.c +++ /dev/null @@ -1,291 +0,0 @@ -/* irq-routing.c: IRQ routing - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/sched.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/serial_reg.h> -#include <asm/io.h> -#include <asm/irq-routing.h> -#include <asm/irc-regs.h> -#include <asm/serial-regs.h> -#include <asm/dma.h> - -struct irq_level frv_irq_levels[16] = { - [0 ... 15] = { - .lock = SPIN_LOCK_UNLOCKED, - } -}; - -struct irq_group *irq_groups[NR_IRQ_GROUPS]; - -extern struct irq_group frv_cpu_irqs; - -void __init frv_irq_route(struct irq_source *source, int irqlevel) -{ - source->level = &frv_irq_levels[irqlevel]; - source->next = frv_irq_levels[irqlevel].sources; - frv_irq_levels[irqlevel].sources = source; -} - -void __init frv_irq_route_external(struct irq_source *source, int irq) -{ - int irqlevel = 0; - - switch (irq) { - case IRQ_CPU_EXTERNAL0: irqlevel = IRQ_XIRQ0_LEVEL; break; - case IRQ_CPU_EXTERNAL1: irqlevel = IRQ_XIRQ1_LEVEL; break; - case IRQ_CPU_EXTERNAL2: irqlevel = IRQ_XIRQ2_LEVEL; break; - case IRQ_CPU_EXTERNAL3: irqlevel = IRQ_XIRQ3_LEVEL; break; - case IRQ_CPU_EXTERNAL4: irqlevel = IRQ_XIRQ4_LEVEL; break; - case IRQ_CPU_EXTERNAL5: irqlevel = IRQ_XIRQ5_LEVEL; break; - case IRQ_CPU_EXTERNAL6: irqlevel = IRQ_XIRQ6_LEVEL; break; - case IRQ_CPU_EXTERNAL7: irqlevel = IRQ_XIRQ7_LEVEL; break; - default: BUG(); - } - - source->level = &frv_irq_levels[irqlevel]; - source->next = frv_irq_levels[irqlevel].sources; - frv_irq_levels[irqlevel].sources = source; -} - -void __init frv_irq_set_group(struct irq_group *group) -{ - irq_groups[group->first_irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP] = group; -} - -void distribute_irqs(struct irq_group *group, unsigned long irqmask) -{ - struct irqaction *action; - int irq; - - while (irqmask) { - asm("scan %1,gr0,%0" : "=r"(irq) : "r"(irqmask)); - if (irq < 0 || irq > 31) - asm volatile("break"); - irq = 31 - irq; - - irqmask &= ~(1 << irq); - action = group->actions[irq]; - - irq += group->first_irq; - - if (action) { - int status = 0; - -// if (!(action->flags & IRQF_DISABLED)) -// local_irq_enable(); - - do { - status |= action->flags; - action->handler(irq, action->dev_id, __frame); - action = action->next; - } while (action); - - if (status & IRQF_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - local_irq_disable(); - } - } -} - -/*****************************************************************************/ -/* - * CPU UART interrupts - */ -static void frv_cpuuart_doirq(struct irq_source *source) -{ -// uint8_t iir = readb(source->muxdata + UART_IIR * 8); -// if ((iir & 0x0f) != UART_IIR_NO_INT) - distribute_irqs(&frv_cpu_irqs, source->irqmask); -} - -struct irq_source frv_cpuuart[2] = { -#define __CPUUART(X, A) \ - [X] = { \ - .muxname = "uart", \ - .muxdata = (volatile void __iomem *)(unsigned long)A,\ - .irqmask = 1 << IRQ_CPU_UART##X, \ - .doirq = frv_cpuuart_doirq, \ - } - - __CPUUART(0, UART0_BASE), - __CPUUART(1, UART1_BASE), -}; - -/*****************************************************************************/ -/* - * CPU DMA interrupts - */ -static void frv_cpudma_doirq(struct irq_source *source) -{ - uint32_t cstr = readl(source->muxdata + DMAC_CSTRx); - if (cstr & DMAC_CSTRx_INT) - distribute_irqs(&frv_cpu_irqs, source->irqmask); -} - -struct irq_source frv_cpudma[8] = { -#define __CPUDMA(X, A) \ - [X] = { \ - .muxname = "dma", \ - .muxdata = (volatile void __iomem *)(unsigned long)A,\ - .irqmask = 1 << IRQ_CPU_DMA##X, \ - .doirq = frv_cpudma_doirq, \ - } - - __CPUDMA(0, 0xfe000900), - __CPUDMA(1, 0xfe000980), - __CPUDMA(2, 0xfe000a00), - __CPUDMA(3, 0xfe000a80), - __CPUDMA(4, 0xfe001000), - __CPUDMA(5, 0xfe001080), - __CPUDMA(6, 0xfe001100), - __CPUDMA(7, 0xfe001180), -}; - -/*****************************************************************************/ -/* - * CPU timer interrupts - can't tell whether they've generated an interrupt or not - */ -static void frv_cputimer_doirq(struct irq_source *source) -{ - distribute_irqs(&frv_cpu_irqs, source->irqmask); -} - -struct irq_source frv_cputimer[3] = { -#define __CPUTIMER(X) \ - [X] = { \ - .muxname = "timer", \ - .muxdata = NULL, \ - .irqmask = 1 << IRQ_CPU_TIMER##X, \ - .doirq = frv_cputimer_doirq, \ - } - - __CPUTIMER(0), - __CPUTIMER(1), - __CPUTIMER(2), -}; - -/*****************************************************************************/ -/* - * external CPU interrupts - can't tell directly whether they've generated an interrupt or not - */ -static void frv_cpuexternal_doirq(struct irq_source *source) -{ - distribute_irqs(&frv_cpu_irqs, source->irqmask); -} - -struct irq_source frv_cpuexternal[8] = { -#define __CPUEXTERNAL(X) \ - [X] = { \ - .muxname = "ext", \ - .muxdata = NULL, \ - .irqmask = 1 << IRQ_CPU_EXTERNAL##X, \ - .doirq = frv_cpuexternal_doirq, \ - } - - __CPUEXTERNAL(0), - __CPUEXTERNAL(1), - __CPUEXTERNAL(2), - __CPUEXTERNAL(3), - __CPUEXTERNAL(4), - __CPUEXTERNAL(5), - __CPUEXTERNAL(6), - __CPUEXTERNAL(7), -}; - -#define set_IRR(N,A,B,C,D) __set_IRR(N, (A << 28) | (B << 24) | (C << 20) | (D << 16)) - -struct irq_group frv_cpu_irqs = { - .sources = { - [IRQ_CPU_UART0] = &frv_cpuuart[0], - [IRQ_CPU_UART1] = &frv_cpuuart[1], - [IRQ_CPU_TIMER0] = &frv_cputimer[0], - [IRQ_CPU_TIMER1] = &frv_cputimer[1], - [IRQ_CPU_TIMER2] = &frv_cputimer[2], - [IRQ_CPU_DMA0] = &frv_cpudma[0], - [IRQ_CPU_DMA1] = &frv_cpudma[1], - [IRQ_CPU_DMA2] = &frv_cpudma[2], - [IRQ_CPU_DMA3] = &frv_cpudma[3], - [IRQ_CPU_DMA4] = &frv_cpudma[4], - [IRQ_CPU_DMA5] = &frv_cpudma[5], - [IRQ_CPU_DMA6] = &frv_cpudma[6], - [IRQ_CPU_DMA7] = &frv_cpudma[7], - [IRQ_CPU_EXTERNAL0] = &frv_cpuexternal[0], - [IRQ_CPU_EXTERNAL1] = &frv_cpuexternal[1], - [IRQ_CPU_EXTERNAL2] = &frv_cpuexternal[2], - [IRQ_CPU_EXTERNAL3] = &frv_cpuexternal[3], - [IRQ_CPU_EXTERNAL4] = &frv_cpuexternal[4], - [IRQ_CPU_EXTERNAL5] = &frv_cpuexternal[5], - [IRQ_CPU_EXTERNAL6] = &frv_cpuexternal[6], - [IRQ_CPU_EXTERNAL7] = &frv_cpuexternal[7], - }, -}; - -/*****************************************************************************/ -/* - * route the CPU's interrupt sources - */ -void __init route_cpu_irqs(void) -{ - frv_irq_set_group(&frv_cpu_irqs); - - __set_IITMR(0, 0x003f0000); /* DMA0-3, TIMER0-2 IRQ detect levels */ - __set_IITMR(1, 0x20000000); /* ERR0-1, UART0-1, DMA4-7 IRQ detect levels */ - - /* route UART and error interrupts */ - frv_irq_route(&frv_cpuuart[0], IRQ_UART0_LEVEL); - frv_irq_route(&frv_cpuuart[1], IRQ_UART1_LEVEL); - - set_IRR(6, IRQ_GDBSTUB_LEVEL, IRQ_GDBSTUB_LEVEL, IRQ_UART1_LEVEL, IRQ_UART0_LEVEL); - - /* route DMA channel interrupts */ - frv_irq_route(&frv_cpudma[0], IRQ_DMA0_LEVEL); - frv_irq_route(&frv_cpudma[1], IRQ_DMA1_LEVEL); - frv_irq_route(&frv_cpudma[2], IRQ_DMA2_LEVEL); - frv_irq_route(&frv_cpudma[3], IRQ_DMA3_LEVEL); - frv_irq_route(&frv_cpudma[4], IRQ_DMA4_LEVEL); - frv_irq_route(&frv_cpudma[5], IRQ_DMA5_LEVEL); - frv_irq_route(&frv_cpudma[6], IRQ_DMA6_LEVEL); - frv_irq_route(&frv_cpudma[7], IRQ_DMA7_LEVEL); - - set_IRR(4, IRQ_DMA3_LEVEL, IRQ_DMA2_LEVEL, IRQ_DMA1_LEVEL, IRQ_DMA0_LEVEL); - set_IRR(7, IRQ_DMA7_LEVEL, IRQ_DMA6_LEVEL, IRQ_DMA5_LEVEL, IRQ_DMA4_LEVEL); - - /* route timer interrupts */ - frv_irq_route(&frv_cputimer[0], IRQ_TIMER0_LEVEL); - frv_irq_route(&frv_cputimer[1], IRQ_TIMER1_LEVEL); - frv_irq_route(&frv_cputimer[2], IRQ_TIMER2_LEVEL); - - set_IRR(5, 0, IRQ_TIMER2_LEVEL, IRQ_TIMER1_LEVEL, IRQ_TIMER0_LEVEL); - - /* route external interrupts */ - frv_irq_route(&frv_cpuexternal[0], IRQ_XIRQ0_LEVEL); - frv_irq_route(&frv_cpuexternal[1], IRQ_XIRQ1_LEVEL); - frv_irq_route(&frv_cpuexternal[2], IRQ_XIRQ2_LEVEL); - frv_irq_route(&frv_cpuexternal[3], IRQ_XIRQ3_LEVEL); - frv_irq_route(&frv_cpuexternal[4], IRQ_XIRQ4_LEVEL); - frv_irq_route(&frv_cpuexternal[5], IRQ_XIRQ5_LEVEL); - frv_irq_route(&frv_cpuexternal[6], IRQ_XIRQ6_LEVEL); - frv_irq_route(&frv_cpuexternal[7], IRQ_XIRQ7_LEVEL); - - set_IRR(2, IRQ_XIRQ7_LEVEL, IRQ_XIRQ6_LEVEL, IRQ_XIRQ5_LEVEL, IRQ_XIRQ4_LEVEL); - set_IRR(3, IRQ_XIRQ3_LEVEL, IRQ_XIRQ2_LEVEL, IRQ_XIRQ1_LEVEL, IRQ_XIRQ0_LEVEL); - -#if defined(CONFIG_MB93091_VDK) - __set_TM1(0x55550000); /* XIRQ7-0 all active low */ -#elif defined(CONFIG_MB93093_PDK) - __set_TM1(0x15550000); /* XIRQ7 active high, 6-0 all active low */ -#else -#error dont know external IRQ trigger levels for this setup -#endif - -} /* end route_cpu_irqs() */ diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index 08967010be0..5ac041c7c0a 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -1,6 +1,6 @@ /* irq.c: FRV IRQ handling * - * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -9,13 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -/* - * (mostly architecture independent, will move to kernel/irq.c in 2.5.) - * - * IRQs are in fact implemented a bit like signal handlers for the kernel. - * Naturally it's not a 1:1 relation, but there are similarities. - */ - #include <linux/ptrace.h> #include <linux/errno.h> #include <linux/signal.h> @@ -43,19 +36,16 @@ #include <asm/delay.h> #include <asm/irq.h> #include <asm/irc-regs.h> -#include <asm/irq-routing.h> #include <asm/gdb-stub.h> -extern void __init fpga_init(void); -extern void __init route_mb93493_irqs(void); - -static void register_irq_proc (unsigned int irq); +#define set_IRR(N,A,B,C,D) __set_IRR(N, (A << 28) | (B << 24) | (C << 20) | (D << 16)) -/* - * Special irq handlers. - */ +extern void __init fpga_init(void); +#ifdef CONFIG_FUJITSU_MB93493 +extern void __init mb93493_init(void); +#endif -irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) { return IRQ_HANDLED; } +#define __reg16(ADDR) (*(volatile unsigned short *)(ADDR)) atomic_t irq_err_count; @@ -64,215 +54,86 @@ atomic_t irq_err_count; */ int show_interrupts(struct seq_file *p, void *v) { - struct irqaction *action; - struct irq_group *group; + int i = *(loff_t *) v, cpu; + struct irqaction * action; unsigned long flags; - int level, grp, ix, i, j; - - i = *(loff_t *) v; - - switch (i) { - case 0: - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%d ",j); - - seq_putc(p, '\n'); - break; - case 1 ... NR_IRQ_GROUPS * NR_IRQ_ACTIONS_PER_GROUP: - local_irq_save(flags); - - grp = (i - 1) / NR_IRQ_ACTIONS_PER_GROUP; - group = irq_groups[grp]; - if (!group) - goto skip; - - ix = (i - 1) % NR_IRQ_ACTIONS_PER_GROUP; - action = group->actions[ix]; - if (!action) - goto skip; - - seq_printf(p, "%3d: ", i - 1); - -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]); -#endif - - level = group->sources[ix]->level - frv_irq_levels; - - seq_printf(p, " %12s@%x", group->sources[ix]->muxname, level); - seq_printf(p, " %s", action->name); - - for (action = action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); + if (i == 0) { + char cpuname[12]; + seq_printf(p, " "); + for_each_present_cpu(cpu) { + sprintf(cpuname, "CPU%d", cpu); + seq_printf(p, " %10s", cpuname); + } seq_putc(p, '\n'); -skip: - local_irq_restore(flags); - break; + } - case NR_IRQ_GROUPS * NR_IRQ_ACTIONS_PER_GROUP + 1: - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); - break; + if (i < NR_IRQS) { + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; + if (action) { + seq_printf(p, "%3d: ", i); + for_each_present_cpu(cpu) + seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]); + seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-"); + seq_printf(p, " %s", action->name); + for (action = action->next; + action; + action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); + } - default: - break; + spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } else if (i == NR_IRQS) { + seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count)); } return 0; } - /* - * Generic enable/disable code: this just calls - * down into the PIC-specific version for the actual - * hardware disable after having gotten the irq - * controller lock. + * on-CPU PIC operations */ - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. - * - * This function may be called from IRQ context. - */ - -void disable_irq_nosync(unsigned int irq) +static void frv_cpupic_ack(unsigned int irqlevel) { - struct irq_source *source; - struct irq_group *group; - struct irq_level *level; - unsigned long flags; - int idx = irq & (NR_IRQ_ACTIONS_PER_GROUP - 1); - - group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP]; - if (!group) - BUG(); - - source = group->sources[idx]; - if (!source) - BUG(); - - level = source->level; - - spin_lock_irqsave(&level->lock, flags); - - if (group->control) { - if (!group->disable_cnt[idx]++) - group->control(group, idx, 0); - } else if (!level->disable_count++) { - __set_MASK(level - frv_irq_levels); - } - - spin_unlock_irqrestore(&level->lock, flags); + __clr_RC(irqlevel); + __clr_IRL(); } -EXPORT_SYMBOL(disable_irq_nosync); - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ - -void disable_irq(unsigned int irq) +static void frv_cpupic_mask(unsigned int irqlevel) { - disable_irq_nosync(irq); - -#ifdef CONFIG_SMP - if (!local_irq_count(smp_processor_id())) { - do { - barrier(); - } while (irq_desc[irq].status & IRQ_INPROGRESS); - } -#endif + __set_MASK(irqlevel); } -EXPORT_SYMBOL(disable_irq); - -/** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable - * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. - * - * This function may be called from IRQ context. - */ - -void enable_irq(unsigned int irq) +static void frv_cpupic_mask_ack(unsigned int irqlevel) { - struct irq_source *source; - struct irq_group *group; - struct irq_level *level; - unsigned long flags; - int idx = irq & (NR_IRQ_ACTIONS_PER_GROUP - 1); - int count; - - group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP]; - if (!group) - BUG(); - - source = group->sources[idx]; - if (!source) - BUG(); - - level = source->level; - - spin_lock_irqsave(&level->lock, flags); - - if (group->control) - count = group->disable_cnt[idx]; - else - count = level->disable_count; - - switch (count) { - case 1: - if (group->control) { - if (group->actions[idx]) - group->control(group, idx, 1); - } else { - if (level->usage) - __clr_MASK(level - frv_irq_levels); - } - /* fall-through */ - - default: - count--; - break; - - case 0: - printk("enable_irq(%u) unbalanced from %p\n", irq, __builtin_return_address(0)); - } + __set_MASK(irqlevel); + __clr_RC(irqlevel); + __clr_IRL(); +} - if (group->control) - group->disable_cnt[idx] = count; - else - level->disable_count = count; +static void frv_cpupic_unmask(unsigned int irqlevel) +{ + __clr_MASK(irqlevel); +} - spin_unlock_irqrestore(&level->lock, flags); +static void frv_cpupic_end(unsigned int irqlevel) +{ + __clr_MASK(irqlevel); } -EXPORT_SYMBOL(enable_irq); +static struct irq_chip frv_cpu_pic = { + .name = "cpu", + .ack = frv_cpupic_ack, + .mask = frv_cpupic_mask, + .mask_ack = frv_cpupic_mask_ack, + .unmask = frv_cpupic_unmask, + .end = frv_cpupic_end, +}; -/*****************************************************************************/ /* * handles all normal device IRQ's * - registers are referred to by the __frame variable (GR28) @@ -281,463 +142,65 @@ EXPORT_SYMBOL(enable_irq); */ asmlinkage void do_IRQ(void) { - struct irq_source *source; - int level, cpu; - irq_enter(); - - level = (__frame->tbr >> 4) & 0xf; - cpu = smp_processor_id(); - - if ((unsigned long) __frame - (unsigned long) (current + 1) < 512) - BUG(); - - __set_MASK(level); - __clr_RC(level); - __clr_IRL(); - - kstat_this_cpu.irqs[level]++; - - for (source = frv_irq_levels[level].sources; source; source = source->next) - source->doirq(source); - - __clr_MASK(level); - + generic_handle_irq(__get_IRL(), __frame); irq_exit(); +} -} /* end do_IRQ() */ - -/*****************************************************************************/ /* * handles all NMIs when not co-opted by the debugger * - registers are referred to by the __frame variable (GR28) */ asmlinkage void do_NMI(void) { -} /* end do_NMI() */ - -/*****************************************************************************/ -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * IRQF_SHARED Interrupt is shared - * - * IRQF_DISABLED Disable local interrupts while processing - * - * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ - -int request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -{ - int retval; - struct irqaction *action; - -#if 1 - /* - * Sanity-check: shared interrupts should REALLY pass in - * a real dev-ID, otherwise we'll have trouble later trying - * to figure out which interrupt is which (messes up the - * interrupt freeing logic etc). - */ - if (irqflags & IRQF_SHARED) { - if (!dev_id) - printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", - devname, (&irq)[-1]); - } -#endif - - if ((irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP) >= NR_IRQ_GROUPS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = (struct irqaction *) kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irqflags; - action->mask = CPU_MASK_NONE; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - if (retval) - kfree(action); - return retval; -} - -EXPORT_SYMBOL(request_irq); - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function may be called from interrupt context. - * - * Bugs: Attempting to free an irq in a handler for the same irq hangs - * the machine. - */ - -void free_irq(unsigned int irq, void *dev_id) -{ - struct irq_source *source; - struct irq_group *group; - struct irq_level *level; - struct irqaction **p, **pp; - unsigned long flags; - - if ((irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP) >= NR_IRQ_GROUPS) - return; - - group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP]; - if (!group) - BUG(); - - source = group->sources[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)]; - if (!source) - BUG(); - - level = source->level; - p = &group->actions[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)]; - - spin_lock_irqsave(&level->lock, flags); - - for (pp = p; *pp; pp = &(*pp)->next) { - struct irqaction *action = *pp; - - if (action->dev_id != dev_id) - continue; - - /* found it - remove from the list of entries */ - *pp = action->next; - - level->usage--; - - if (p == pp && group->control) - group->control(group, irq & (NR_IRQ_ACTIONS_PER_GROUP - 1), 0); - - if (level->usage == 0) - __set_MASK(level - frv_irq_levels); - - spin_unlock_irqrestore(&level->lock,flags); - -#ifdef CONFIG_SMP - /* Wait to make sure it's not being used on another CPU */ - while (desc->status & IRQ_INPROGRESS) - barrier(); -#endif - kfree(action); - return; - } -} - -EXPORT_SYMBOL(free_irq); - -/* - * IRQ autodetection code.. - * - * This depends on the fact that any interrupt that comes in on to an - * unassigned IRQ will cause GxICR_DETECT to be set - */ - -static DECLARE_MUTEX(probe_sem); - -/** - * probe_irq_on - begin an interrupt autodetect - * - * Commence probing for an interrupt. The interrupts are scanned - * and a mask of potential interrupt lines is returned. - * - */ - -unsigned long probe_irq_on(void) -{ - down(&probe_sem); - return 0; } -EXPORT_SYMBOL(probe_irq_on); - /* - * Return a mask of triggered interrupts (this - * can handle only legacy ISA interrupts). - */ - -/** - * probe_irq_mask - scan a bitmap of interrupt lines - * @val: mask of interrupts to consider - * - * Scan the ISA bus interrupt lines and return a bitmap of - * active interrupts. The interrupt probe logic state is then - * returned to its previous value. - * - * Note: we need to scan all the irq's even though we will - * only return ISA irq numbers - just so that we reset them - * all to a known state. - */ -unsigned int probe_irq_mask(unsigned long xmask) -{ - up(&probe_sem); - return 0; -} - -EXPORT_SYMBOL(probe_irq_mask); - -/* - * Return the one interrupt that triggered (this can - * handle any interrupt source). - */ - -/** - * probe_irq_off - end an interrupt autodetect - * @xmask: mask of potential interrupts (unused) - * - * Scans the unused interrupt lines and returns the line which - * appears to have triggered the interrupt. If no interrupt was - * found then zero is returned. If more than one interrupt is - * found then minus the first candidate is returned to indicate - * their is doubt. - * - * The interrupt probe logic state is returned to its previous - * value. - * - * BUGS: When used in a module (which arguably shouldnt happen) - * nothing prevents two IRQ probe callers from overlapping. The - * results of this are non-optimal. + * initialise the interrupt system */ - -int probe_irq_off(unsigned long xmask) -{ - up(&probe_sem); - return -1; -} - -EXPORT_SYMBOL(probe_irq_off); - -/* this was setup_x86_irq but it seems pretty generic */ -int setup_irq(unsigned int irq, struct irqaction *new) -{ - struct irq_source *source; - struct irq_group *group; - struct irq_level *level; - struct irqaction **p, **pp; - unsigned long flags; - - group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP]; - if (!group) - BUG(); - - source = group->sources[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)]; - if (!source) - BUG(); - - level = source->level; - - p = &group->actions[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)]; - - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & IRQF_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* must juggle the interrupt processing stuff with interrupts disabled */ - spin_lock_irqsave(&level->lock, flags); - - /* can't share interrupts unless all parties agree to */ - if (level->usage != 0 && !(level->flags & new->flags & IRQF_SHARED)) { - spin_unlock_irqrestore(&level->lock,flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - pp = p; - while (*pp) - pp = &(*pp)->next; - - *pp = new; - - level->usage++; - level->flags = new->flags; - - /* turn the interrupts on */ - if (level->usage == 1) - __clr_MASK(level - frv_irq_levels); - - if (p == pp && group->control) - group->control(group, irq & (NR_IRQ_ACTIONS_PER_GROUP - 1), 1); - - spin_unlock_irqrestore(&level->lock, flags); - register_irq_proc(irq); - return 0; -} - -static struct proc_dir_entry * root_irq_dir; -static struct proc_dir_entry * irq_dir [NR_IRQS]; - -#define HEX_DIGITS 8 - -static unsigned int parse_hex_value (const char __user *buffer, - unsigned long count, unsigned long *ret) -{ - unsigned char hexnum [HEX_DIGITS]; - unsigned long value; - int i; - - if (!count) - return -EINVAL; - if (count > HEX_DIGITS) - count = HEX_DIGITS; - if (copy_from_user(hexnum, buffer, count)) - return -EFAULT; - - /* - * Parse the first 8 characters as a hex string, any non-hex char - * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. - */ - value = 0; - - for (i = 0; i < count; i++) { - unsigned int c = hexnum[i]; - - switch (c) { - case '0' ... '9': c -= '0'; break; - case 'a' ... 'f': c -= 'a'-10; break; - case 'A' ... 'F': c -= 'A'-10; break; - default: - goto out; - } - value = (value << 4) | c; - } -out: - *ret = value; - return 0; -} - - -static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - unsigned long *mask = (unsigned long *) data; - if (count < HEX_DIGITS+1) - return -EINVAL; - return sprintf (page, "%08lx\n", *mask); -} - -static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - unsigned long *mask = (unsigned long *) data, full_count = count, err; - unsigned long new_value; - - show_state(); - err = parse_hex_value(buffer, count, &new_value); - if (err) - return err; - - *mask = new_value; - return full_count; -} - -#define MAX_NAMELEN 10 - -static void register_irq_proc (unsigned int irq) -{ - char name [MAX_NAMELEN]; - - if (!root_irq_dir || irq_dir[irq]) - return; - - memset(name, 0, MAX_NAMELEN); - sprintf(name, "%d", irq); - - /* create /proc/irq/1234 */ - irq_dir[irq] = proc_mkdir(name, root_irq_dir); -} - -unsigned long prof_cpu_mask = -1; - -void init_irq_proc (void) +void __init init_IRQ(void) { - struct proc_dir_entry *entry; - int i; + int level; - /* create /proc/irq */ - root_irq_dir = proc_mkdir("irq", NULL); + for (level = 1; level <= 14; level++) + set_irq_chip_and_handler(level, &frv_cpu_pic, + handle_level_irq); - /* create /proc/irq/prof_cpu_mask */ - entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); - if (!entry) - return; + set_irq_handler(IRQ_CPU_TIMER0, handle_edge_irq); - entry->nlink = 1; - entry->data = (void *)&prof_cpu_mask; - entry->read_proc = prof_cpu_mask_read_proc; - entry->write_proc = prof_cpu_mask_write_proc; - - /* - * Create entries for all existing IRQs. + /* set the trigger levels for internal interrupt sources + * - timers all falling-edge + * - ERR0 is rising-edge + * - all others are high-level */ - for (i = 0; i < NR_IRQS; i++) - register_irq_proc(i); -} + __set_IITMR(0, 0x003f0000); /* DMA0-3, TIMER0-2 */ + __set_IITMR(1, 0x20000000); /* ERR0-1, UART0-1, DMA4-7 */ + + /* route internal interrupts */ + set_IRR(4, IRQ_DMA3_LEVEL, IRQ_DMA2_LEVEL, IRQ_DMA1_LEVEL, + IRQ_DMA0_LEVEL); + set_IRR(5, 0, IRQ_TIMER2_LEVEL, IRQ_TIMER1_LEVEL, IRQ_TIMER0_LEVEL); + set_IRR(6, IRQ_GDBSTUB_LEVEL, IRQ_GDBSTUB_LEVEL, + IRQ_UART1_LEVEL, IRQ_UART0_LEVEL); + set_IRR(7, IRQ_DMA7_LEVEL, IRQ_DMA6_LEVEL, IRQ_DMA5_LEVEL, + IRQ_DMA4_LEVEL); + + /* route external interrupts */ + set_IRR(2, IRQ_XIRQ7_LEVEL, IRQ_XIRQ6_LEVEL, IRQ_XIRQ5_LEVEL, + IRQ_XIRQ4_LEVEL); + set_IRR(3, IRQ_XIRQ3_LEVEL, IRQ_XIRQ2_LEVEL, IRQ_XIRQ1_LEVEL, + IRQ_XIRQ0_LEVEL); + +#if defined(CONFIG_MB93091_VDK) + __set_TM1(0x55550000); /* XIRQ7-0 all active low */ +#elif defined(CONFIG_MB93093_PDK) + __set_TM1(0x15550000); /* XIRQ7 active high, 6-0 all active low */ +#else +#error dont know external IRQ trigger levels for this setup +#endif -/*****************************************************************************/ -/* - * initialise the interrupt system - */ -void __init init_IRQ(void) -{ - route_cpu_irqs(); fpga_init(); #ifdef CONFIG_FUJITSU_MB93493 - route_mb93493_irqs(); + mb93493_init(); #endif -} /* end init_IRQ() */ +} diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index af08ccd4ed6..d96a57e5f03 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -43,7 +43,6 @@ #include <asm/mb-regs.h> #include <asm/mb93493-regs.h> #include <asm/gdb-stub.h> -#include <asm/irq-routing.h> #include <asm/io.h> #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c index 68a77fe3bb4..3d0284bccb9 100644 --- a/arch/frv/kernel/time.c +++ b/arch/frv/kernel/time.c @@ -26,7 +26,6 @@ #include <asm/timer-regs.h> #include <asm/mb-regs.h> #include <asm/mb86943a.h> -#include <asm/irq-routing.h> #include <linux/timex.h> diff --git a/arch/frv/mb93090-mb00/pci-irq.c b/arch/frv/mb93090-mb00/pci-irq.c index 2278c80bd88..ba587523c01 100644 --- a/arch/frv/mb93090-mb00/pci-irq.c +++ b/arch/frv/mb93090-mb00/pci-irq.c @@ -15,7 +15,6 @@ #include <asm/io.h> #include <asm/smp.h> -#include <asm/irq-routing.h> #include "pci-frv.h" diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index b5b4286f9dd..3f3a0ed3539 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -98,7 +98,7 @@ void show_mem(void) */ void __init paging_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; /* allocate some pages for kernel housekeeping tasks */ empty_bad_page_table = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index d3d40bdc2d6..e4f4199f97a 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -138,7 +138,7 @@ void paging_init(void) #endif { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT; diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index b2751eadbc5..6189b0c28d6 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -494,7 +494,7 @@ config HIGHMEM64G endchoice choice - depends on EXPERIMENTAL && !X86_PAE + depends on EXPERIMENTAL prompt "Memory split" if EMBEDDED default VMSPLIT_3G help @@ -516,6 +516,7 @@ choice config VMSPLIT_3G bool "3G/1G user/kernel split" config VMSPLIT_3G_OPT + depends on !HIGHMEM bool "3G/1G user/kernel split (for full 1G low memory)" config VMSPLIT_2G bool "2G/2G user/kernel split" @@ -794,6 +795,7 @@ config HOTPLUG_CPU config COMPAT_VDSO bool "Compat VDSO support" default y + depends on !PARAVIRT help Map the VDSO to the predictable old-style address too. ---help--- diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 8591f2fa920..ff9ce4b5eaa 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -1154,9 +1154,11 @@ out: static void set_time(void) { + struct timespec ts; if (got_clock_diff) { /* Must know time zone in order to set clock */ - xtime.tv_sec = get_cmos_time() + clock_cmos_diff; - xtime.tv_nsec = 0; + ts.tv_sec = get_cmos_time() + clock_cmos_diff; + ts.tv_nsec = 0; + do_settimeofday(&ts); } } @@ -1232,13 +1234,8 @@ static int suspend(int vetoable) restore_processor_state(); local_irq_disable(); - write_seqlock(&xtime_lock); - spin_lock(&i8253_lock); - reinit_timer(); set_time(); - - spin_unlock(&i8253_lock); - write_sequnlock(&xtime_lock); + reinit_timer(); if (err == APM_NO_ERROR) err = APM_SUCCESS; @@ -1365,9 +1362,7 @@ static void check_events(void) ignore_bounce = 1; if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { - write_seqlock_irq(&xtime_lock); set_time(); - write_sequnlock_irq(&xtime_lock); device_resume(); pm_send_all(PM_RESUME, (void *)0); queue_event(event, NULL); @@ -1383,9 +1378,7 @@ static void check_events(void) break; case APM_UPDATE_TIME: - write_seqlock_irq(&xtime_lock); set_time(); - write_sequnlock_irq(&xtime_lock); break; case APM_CRITICAL_SUSPEND: @@ -2339,6 +2332,7 @@ static int __init apm_init(void) ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD); if (ret < 0) { printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n"); + remove_proc_entry("apm", NULL); return -ENOMEM; } @@ -2348,7 +2342,13 @@ static int __init apm_init(void) return 0; } - misc_register(&apm_device); + /* + * Note we don't actually care if the misc_device cannot be registered. + * this driver can do its job without it, even if userspace can't + * control it. just log the error + */ + if (misc_register(&apm_device)) + printk(KERN_WARNING "apm: Could not register misc device.\n"); if (HZ != 100) idle_period = (idle_period * HZ) / 100; diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index 169ac8e0db6..0b61eed8bbd 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -243,7 +243,7 @@ static DEFINE_SPINLOCK(set_atomicity_lock); * has been called. */ -static void prepare_set(void) +static void prepare_set(void) __acquires(set_atomicity_lock) { unsigned long cr0; @@ -274,7 +274,7 @@ static void prepare_set(void) mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi); } -static void post_set(void) +static void post_set(void) __releases(set_atomicity_lock) { /* Flush TLBs (no need to flush caches - they are disabled) */ __flush_tlb(); diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S index d3ee73a3eee..ef00bb77d7e 100644 --- a/arch/i386/kernel/efi_stub.S +++ b/arch/i386/kernel/efi_stub.S @@ -7,7 +7,6 @@ #include <linux/linkage.h> #include <asm/page.h> -#include <asm/pgtable.h> /* * efi_call_phys(void *, ...) is a function with variable parameters. diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 54cfeabbc5e..84278e0093a 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -145,14 +145,10 @@ real_mode_gdt_entries [3] = 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct -{ - unsigned short size __attribute__ ((packed)); - unsigned long long * base __attribute__ ((packed)); -} -real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries }, -real_mode_idt = { 0x3ff, NULL }, -no_idt = { 0, NULL }; +static struct Xgt_desc_struct +real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, +real_mode_idt = { 0x3ff, 0 }, +no_idt = { 0, 0 }; /* This is 16-bit protected mode code to disable paging and the cache, diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index f1682206d30..16d99444cf6 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -53,6 +53,7 @@ #include <asm/apic.h> #include <asm/e820.h> #include <asm/mpspec.h> +#include <asm/mmzone.h> #include <asm/setup.h> #include <asm/arch_hooks.h> #include <asm/sections.h> @@ -934,6 +935,24 @@ static void __init parse_cmdline_early (char ** cmdline_p) } /* + * reservetop=size reserves a hole at the top of the kernel address space which + * a hypervisor can load into later. Needed for dynamically loaded hypervisors, + * so relocating the fixmap can be done before paging initialization. + */ +static int __init parse_reservetop(char *arg) +{ + unsigned long address; + + if (!arg) + return -EINVAL; + + address = memparse(arg, &arg); + reserve_top_address(address); + return 0; +} +early_param("reservetop", parse_reservetop); + +/* * Callback for efi_memory_walk. */ static int __init @@ -1181,7 +1200,7 @@ static unsigned long __init setup_memory(void) void __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; unsigned int max_dma, low; max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; @@ -1258,7 +1277,7 @@ void __init setup_bootmem_allocator(void) */ find_smp_config(); #endif - + numa_kva_reserve(); #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index c10789d7a9d..465188e2d70 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -634,3 +634,69 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) } } +/* + * this function sends a 'generic call function' IPI to one other CPU + * in the system. + * + * cpu is a standard Linux logical CPU number. + */ +static void +__smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + struct call_data_struct data; + int cpus = 1; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + wmb(); + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (!wait) + return; + + while (atomic_read(&data.finished) != cpus) + cpu_relax(); +} + +/* + * smp_call_function_single - Run a function on another CPU + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Currently unused. + * @wait: If true, wait until function has completed on other CPUs. + * + * Retrurns 0 on success, else a negative status code. + * + * Does not return until the remote CPU is nearly ready to execute <func> + * or is or has executed. + */ + +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + /* prevent preemption and reschedule on another processor */ + int me = get_cpu(); + if (cpu == me) { + WARN_ON(1); + put_cpu(); + return -EBUSY; + } + spin_lock_bh(&call_lock); + __smp_call_function_single(cpu, func, info, nonatomic, wait); + spin_unlock_bh(&call_lock); + put_cpu(); + return 0; +} +EXPORT_SYMBOL(smp_call_function_single); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index f948419c888..efe07990e7f 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -642,9 +642,13 @@ static void map_cpu_to_logical_apicid(void) { int cpu = smp_processor_id(); int apicid = logical_smp_processor_id(); + int node = apicid_to_node(apicid); + + if (!node_online(node)) + node = first_online_node; cpu_2_logical_apicid[cpu] = apicid; - map_cpu_to_node(cpu, apicid_to_node(apicid)); + map_cpu_to_node(cpu, node); } static void unmap_cpu_to_logical_apicid(int cpu) diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index b1809c9a089..83db411b3aa 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -42,7 +42,7 @@ #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ -#define MAX_CHUNKS_PER_NODE 4 +#define MAX_CHUNKS_PER_NODE 3 #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) struct node_memory_chunk_s { unsigned long start_pfn; @@ -135,9 +135,6 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } -#if MAX_NR_ZONES != 4 -#error "MAX_NR_ZONES != 4, chunk_to_zone requires review" -#endif /* Take a chunk of pages from page frame cstart to cend and count the number * of pages in each zone, returned via zones[]. */ diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index edd00f6cee3..1302e4ab3c4 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -270,16 +270,19 @@ void notify_arch_cmos_timer(void) mod_timer(&sync_cmos_timer, jiffies + 1); } -static long clock_cmos_diff, sleep_start; +static long clock_cmos_diff; +static unsigned long sleep_start; static int timer_suspend(struct sys_device *dev, pm_message_t state) { /* * Estimate time zone so that set_time can update the clock */ - clock_cmos_diff = -get_cmos_time(); + unsigned long ctime = get_cmos_time(); + + clock_cmos_diff = -ctime; clock_cmos_diff += get_seconds(); - sleep_start = get_cmos_time(); + sleep_start = ctime; return 0; } @@ -287,18 +290,29 @@ static int timer_resume(struct sys_device *dev) { unsigned long flags; unsigned long sec; - unsigned long sleep_length; - + unsigned long ctime = get_cmos_time(); + long sleep_length = (ctime - sleep_start) * HZ; + struct timespec ts; + + if (sleep_length < 0) { + printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n"); + /* The time after the resume must not be earlier than the time + * before the suspend or some nasty things will happen + */ + sleep_length = 0; + ctime = sleep_start; + } #ifdef CONFIG_HPET_TIMER if (is_hpet_enabled()) hpet_reenable(); #endif setup_pit_timer(); - sec = get_cmos_time() + clock_cmos_diff; - sleep_length = (get_cmos_time() - sleep_start) * HZ; + + sec = ctime + clock_cmos_diff; + ts.tv_sec = sec; + ts.tv_nsec = 0; + do_settimeofday(&ts); write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; jiffies_64 += sleep_length; wall_jiffies += sleep_length; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -334,10 +348,11 @@ extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + struct timespec ts; + ts.tv_sec = get_cmos_time(); + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + + do_settimeofday(&ts); if ((hpet_enable() >= 0) && hpet_use_timer) { printk("Using HPET for base-timer\n"); @@ -349,6 +364,7 @@ static void __init hpet_time_init(void) void __init time_init(void) { + struct timespec ts; #ifdef CONFIG_HPET_TIMER if (is_hpet_capable()) { /* @@ -359,10 +375,10 @@ void __init time_init(void) return; } #endif - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + ts.tv_sec = get_cmos_time(); + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + + do_settimeofday(&ts); time_init_hook(); } diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 14a1376fedd..6bf14a4e995 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -301,23 +301,25 @@ int hpet_rtc_timer_init(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; local_irq_save(flags); + cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; - local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); + local_irq_restore(flags); + return 1; } static void hpet_rtc_timer_reinit(void) { - unsigned int cfg, cnt; + unsigned int cfg, cnt, ticks_per_int, lost_ints; if (unlikely(!(PIE_on | AIE_on | UIE_on))) { cfg = hpet_readl(HPET_T1_CFG); @@ -332,10 +334,33 @@ static void hpet_rtc_timer_reinit(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; /* It is more accurate to use the comparator value than current count.*/ - cnt = hpet_t1_cmp; - cnt += hpet_tick*HZ/hpet_rtc_int_freq; - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; + ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq; + hpet_t1_cmp += ticks_per_int; + hpet_writel(hpet_t1_cmp, HPET_T1_CMP); + + /* + * If the interrupt handler was delayed too long, the write above tries + * to schedule the next interrupt in the past and the hardware would + * not interrupt until the counter had wrapped around. + * So we have to check that the comparator wasn't set to a past time. + */ + cnt = hpet_readl(HPET_COUNTER); + if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) { + lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1; + /* Make sure that, even with the time needed to execute + * this code, the next scheduled interrupt has been moved + * back to the future: */ + lost_ints++; + + hpet_t1_cmp += lost_ints * ticks_per_int; + hpet_writel(hpet_t1_cmp, HPET_T1_CMP); + + if (PIE_on) + PIE_count += lost_ints; + + printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", + hpet_rtc_int_freq); + } } /* diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7e9edafffd8..4fcc6690be9 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -313,6 +313,8 @@ void show_registers(struct pt_regs *regs) */ if (in_kernel) { u8 __user *eip; + int code_bytes = 64; + unsigned char c; printk("\n" KERN_EMERG "Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); @@ -320,9 +322,12 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Code: "); eip = (u8 __user *)regs->eip - 43; - for (i = 0; i < 64; i++, eip++) { - unsigned char c; - + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + /* try starting at EIP */ + eip = (u8 __user *)regs->eip; + code_bytes = 32; + } + for (i = 0; i < code_bytes; i++, eip++) { if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { printk(" Bad EIP value."); break; diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 2d4f1386e2b..1e7ac1c44dd 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) ENTRY(phys_startup_32) jiffies = jiffies_64; + +PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(4); /* R__ */ +} SECTIONS { . = __KERNEL_START; @@ -26,7 +32,7 @@ SECTIONS KPROBES_TEXT *(.fixup) *(.gnu.warning) - } = 0x9090 + } :text = 0x9090 _etext = .; /* End of text section */ @@ -48,7 +54,7 @@ SECTIONS .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ *(.data) CONSTRUCTORS - } + } :data . = ALIGN(4096); __nosave_begin = .; @@ -184,4 +190,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + NOTES } diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c index 50f6de6ff64..f39887359e8 100644 --- a/arch/i386/mach-voyager/voyager_thread.c +++ b/arch/i386/mach-voyager/voyager_thread.c @@ -130,7 +130,6 @@ thread(void *unused) init_timer(&wakeup_timer); sigfillset(¤t->blocked); - current->signal->tty = NULL; printk(KERN_NOTICE "Voyager starting monitor thread\n"); diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c index 5d44f4f5ff5..4de11f508c3 100644 --- a/arch/i386/mm/boot_ioremap.c +++ b/arch/i386/mm/boot_ioremap.c @@ -29,8 +29,11 @@ */ #define BOOT_PTE_PTRS (PTRS_PER_PTE*2) -#define boot_pte_index(address) \ - (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1)) + +static unsigned long boot_pte_index(unsigned long vaddr) +{ + return __pa(vaddr) >> PAGE_SHIFT; +} static inline boot_pte_t* boot_vaddr_to_pte(void *address) { diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 7c392dc553b..fb5d8b747de 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); void *node_remap_end_vaddr[MAX_NUMNODES]; void *node_remap_alloc_vaddr[MAX_NUMNODES]; - +static unsigned long kva_start_pfn; +static unsigned long kva_pages; /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -286,7 +287,6 @@ unsigned long __init setup_memory(void) { int nid; unsigned long system_start_pfn, system_max_low_pfn; - unsigned long reserve_pages; /* * When mapping a NUMA machine we allocate the node_mem_map arrays @@ -298,14 +298,23 @@ unsigned long __init setup_memory(void) find_max_pfn(); get_memcfg_numa(); - reserve_pages = calculate_numa_remap_pages(); + kva_pages = calculate_numa_remap_pages(); /* partially used pages are not usable - thus round upwards */ system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); - system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; - printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", - reserve_pages, max_low_pfn + reserve_pages); + kva_start_pfn = find_max_low_pfn() - kva_pages; + +#ifdef CONFIG_BLK_DEV_INITRD + /* Numa kva area is below the initrd */ + if (LOADER_TYPE && INITRD_START) + kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages; +#endif + kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); + + system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", + kva_start_pfn, max_low_pfn); printk("max_pfn = %ld\n", max_pfn); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; @@ -323,7 +332,7 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - highstart_pfn + node_remap_offset[nid]); + kva_start_pfn + node_remap_offset[nid]); /* Init the node remap allocator */ node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + (node_remap_size[nid] * PAGE_SIZE); @@ -338,7 +347,6 @@ unsigned long __init setup_memory(void) } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); - vmalloc_earlyreserve = reserve_pages * PAGE_SIZE; for_each_online_node(nid) find_max_pfn_node(nid); @@ -348,13 +356,18 @@ unsigned long __init setup_memory(void) return max_low_pfn; } +void __init numa_kva_reserve(void) +{ + reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages)); +} + void __init zone_sizes_init(void) { int nid; for_each_online_node(nid) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long *zholes_size; unsigned int max_dma; @@ -409,7 +422,7 @@ void __init set_highmem_pages_init(int bad_ppro) zone_end_pfn = zone_start_pfn + zone->spanned_pages; printk("Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, zone->zone_pgdat->node_id, + zone->name, zone_to_nid(zone), zone_start_pfn, zone_end_pfn); for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 89e8486aac3..efd0bcdac65 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -629,6 +629,48 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); +#if 1 /* double-sanity-check paranoia */ + printk("virtual kernel memory layout:\n" + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#endif + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", + FIXADDR_START, FIXADDR_TOP, + (FIXADDR_TOP - FIXADDR_START) >> 10, + +#ifdef CONFIG_HIGHMEM + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, + (LAST_PKMAP*PAGE_SIZE) >> 10, +#endif + + VMALLOC_START, VMALLOC_END, + (VMALLOC_END - VMALLOC_START) >> 20, + + (unsigned long)__va(0), (unsigned long)high_memory, + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, + + (unsigned long)&__init_begin, (unsigned long)&__init_end, + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, + + (unsigned long)&_etext, (unsigned long)&_edata, + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, + + (unsigned long)&_text, (unsigned long)&_etext, + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + +#ifdef CONFIG_HIGHMEM + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); + BUG_ON(VMALLOC_END > PKMAP_BASE); +#endif + BUG_ON(VMALLOC_START > VMALLOC_END); + BUG_ON((unsigned long)high_memory > VMALLOC_START); +#endif /* double-sanity-check paranoia */ + #ifdef CONFIG_X86_PAE if (!cpu_has_pae) panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); @@ -657,7 +699,7 @@ void __init mem_init(void) int arch_add_memory(int nid, u64 start, u64 size) { struct pglist_data *pgdata = &contig_page_data; - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index bd98768d876..10126e3f817 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/spinlock.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -60,7 +61,9 @@ void show_mem(void) printk(KERN_INFO "%lu pages writeback\n", global_page_state(NR_WRITEBACK)); printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB)); + printk(KERN_INFO "%lu pages slab\n", + global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)); printk(KERN_INFO "%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); } @@ -137,6 +140,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) __flush_tlb_one(vaddr); } +static int fixmaps; +#ifndef CONFIG_COMPAT_VDSO +unsigned long __FIXADDR_TOP = 0xfffff000; +EXPORT_SYMBOL(__FIXADDR_TOP); +#endif + void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { unsigned long address = __fix_to_virt(idx); @@ -146,6 +155,25 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) return; } set_pte_pfn(address, phys >> PAGE_SHIFT, flags); + fixmaps++; +} + +/** + * reserve_top_address - reserves a hole in the top of kernel address space + * @reserve - size of hole to reserve + * + * Can be used to relocate the fixmap area and poke a hole in the top + * of kernel address space to make room for a hypervisor. + */ +void reserve_top_address(unsigned long reserve) +{ + BUG_ON(fixmaps > 0); +#ifdef CONFIG_COMPAT_VDSO + BUG_ON(reserve != 0); +#else + __FIXADDR_TOP = -reserve - PAGE_SIZE; + __VMALLOC_RESERVE += reserve; +#endif } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S index c893b897217..8a2b50a0aaa 100644 --- a/arch/i386/power/swsusp.S +++ b/arch/i386/power/swsusp.S @@ -32,7 +32,7 @@ ENTRY(swsusp_arch_resume) movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx movl %ecx, %cr3 - movl pagedir_nosave, %edx + movl restore_pblist, %edx .p2align 4,,7 copy_loop: diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index db274da7dba..f521f2f60a7 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR bool select GENERIC_ALLOCATOR -config DMA_IS_DMA32 - bool - default y - -config DMA_IS_NORMAL - bool - depends on IA64_SGI_SN2 - default y - config AUDIT_ARCH bool default y diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 0176556aeec..32c3abededc 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -771,16 +771,19 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) { #ifdef CONFIG_ACPI_NUMA int pxm_id; + int nid; pxm_id = acpi_get_pxm(handle); - /* - * Assuming that the container driver would have set the proximity - * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag + * We don't have cpu-only-node hotadd. But if the system equips + * SRAT table, pxm is already found and node is ready. + * So, just pxm_to_nid(pxm) is OK. + * This code here is for the system which doesn't have full SRAT + * table for possible cpus. */ - node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); - + nid = acpi_map_pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; + node_cpuid[cpu].nid = nid; #endif return (0); } diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index 1cc360c83e7..20340631179 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -29,6 +29,36 @@ EXPORT_SYMBOL(cpu_to_node_map); cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; +void __cpuinit map_cpu_to_node(int cpu, int nid) +{ + int oldnid; + if (nid < 0) { /* just initialize by zero */ + cpu_to_node_map[cpu] = 0; + return; + } + /* sanity check first */ + oldnid = cpu_to_node_map[cpu]; + if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) { + return; /* nothing to do */ + } + /* we don't have cpu-driven node hot add yet... + In usual case, node is created from SRAT at boot time. */ + if (!node_online(nid)) + nid = first_online_node; + cpu_to_node_map[cpu] = nid; + cpu_set(cpu, node_to_cpu_mask[nid]); + return; +} + +void __cpuinit unmap_cpu_from_node(int cpu, int nid) +{ + WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid])); + WARN_ON(cpu_to_node_map[cpu] != nid); + cpu_to_node_map[cpu] = 0; + cpu_clear(cpu, node_to_cpu_mask[nid]); +} + + /** * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays * @@ -49,8 +79,6 @@ void __init build_cpu_to_node_map(void) node = node_cpuid[i].nid; break; } - cpu_to_node_map[cpu] = (node >= 0) ? node : 0; - if (node >= 0) - cpu_set(cpu, node_to_cpu_mask[node]); + map_cpu_to_node(cpu, node); } } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 84a7e52f56f..7bb7696e4ce 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -34,6 +34,7 @@ #include <linux/file.h> #include <linux/poll.h> #include <linux/vfs.h> +#include <linux/smp.h> #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/bitops.h> diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index f648c610b10..05bdf7affb4 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -36,6 +36,9 @@ int arch_register_cpu(int num) */ if (!can_cpei_retarget() && is_cpu_cpei_target(num)) sysfs_cpus[num].cpu.no_control = 1; +#ifdef CONFIG_NUMA + map_cpu_to_node(num, node_cpuid[num].nid); +#endif #endif return register_cpu(&sysfs_cpus[num].cpu, num); @@ -45,7 +48,8 @@ int arch_register_cpu(int num) void arch_unregister_cpu(int num) { - return unregister_cpu(&sysfs_cpus[num].cpu); + unregister_cpu(&sysfs_cpus[num].cpu); + unmap_cpu_from_node(num, cpu_to_node(num)); } EXPORT_SYMBOL(arch_register_cpu); EXPORT_SYMBOL(arch_unregister_cpu); diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 4c73a676366..c58e933694d 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) /* attempt to allocate a granule's worth of cached memory pages */ - page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO, + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { mutex_unlock(&uc_pool->add_chunk_mutex); diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 9a8a29339d2..b632b9c1e3b 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -32,9 +32,10 @@ #include <linux/cpumask.h> #include <linux/smp_lock.h> #include <linux/nodemask.h> +#include <linux/smp.h> + #include <asm/processor.h> #include <asm/topology.h> -#include <asm/smp.h> #include <asm/semaphore.h> #include <asm/uaccess.h> #include <asm/sal.h> diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index b71348fec1f..bbd97c85bc5 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -100,7 +100,7 @@ void free_initrd_mem(unsigned long, unsigned long); #ifndef CONFIG_DISCONTIGMEM unsigned long __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long max_dma; unsigned long low; unsigned long start_pfn; diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c index e4c233eef19..06e538d1be3 100644 --- a/arch/m68knommu/mm/init.c +++ b/arch/m68knommu/mm/init.c @@ -136,7 +136,7 @@ void paging_init(void) #endif { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT; diff --git a/arch/mips/au1000/common/dbdma.c b/arch/mips/au1000/common/dbdma.c index 98244d51c15..c4fae8ff467 100644 --- a/arch/mips/au1000/common/dbdma.c +++ b/arch/mips/au1000/common/dbdma.c @@ -230,7 +230,7 @@ EXPORT_SYMBOL(au1xxx_ddma_add_device); */ u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid, - void (*callback)(int, void *, struct pt_regs *), void *callparam) + void (*callback)(int, void *), void *callparam) { unsigned long flags; u32 used, chan, rv; @@ -248,8 +248,10 @@ au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid, au1xxx_dbdma_init(); dbdma_initialized = 1; - if ((stp = find_dbdev_id(srcid)) == NULL) return 0; - if ((dtp = find_dbdev_id(destid)) == NULL) return 0; + if ((stp = find_dbdev_id(srcid)) == NULL) + return 0; + if ((dtp = find_dbdev_id(destid)) == NULL) + return 0; used = 0; rv = 0; @@ -869,7 +871,7 @@ dbdma_interrupt(int irq, void *dev_id, struct pt_regs *regs) au_sync(); if (ctp->chan_callback) - (ctp->chan_callback)(irq, ctp->chan_callparam, regs); + (ctp->chan_callback)(irq, ctp->chan_callparam); ctp->cur_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr)); return IRQ_RETVAL(1); diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index c52497bb102..5b06349af2d 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -163,10 +163,10 @@ static int __init page_is_ram(unsigned long pagenr) void __init paging_init(void) { - unsigned long zones_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 }; + unsigned long zones_size[] = { 0, }; unsigned long max_dma, high, low; #ifndef CONFIG_FLATMEM - unsigned long zholes_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 }; + unsigned long zholes_size[] = { 0, }; unsigned long i, j, pfn; #endif diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index efe6971fc80..16e5682b01f 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -19,6 +19,7 @@ #include <linux/swap.h> #include <linux/bootmem.h> #include <linux/pfn.h> +#include <linux/highmem.h> #include <asm/page.h> #include <asm/sections.h> @@ -508,7 +509,7 @@ extern unsigned long setup_zero_pages(void); void __init paging_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned node; pagetable_init(); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index f2b96f1e0da..25ad28d63e8 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -551,7 +551,7 @@ void show_mem(void) printk("Zone list for zone %d on node %d: ", j, i); for (k = 0; zl->zones[k] != NULL; k++) - printk("[%d/%s] ", zl->zones[k]->zone_pgdat->node_id, zl->zones[k]->name); + printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name); printk("\n"); } } @@ -809,7 +809,7 @@ void __init paging_init(void) flush_tlb_all_local(NULL); for (i = 0; i < npmem_ranges; i++) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0 }; + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; /* We have an IOMMU, so all memory can go into a single ZONE_DMA zone. */ diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index 7369f9a6ad2..69e8f86aa4f 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -159,8 +159,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) isync /* Load ptr the list of pages to copy in r3 */ - lis r11,(pagedir_nosave - KERNELBASE)@h - ori r11,r11,pagedir_nosave@l + lis r11,(restore_pblist - KERNELBASE)@h + ori r11,r11,restore_pblist@l lwz r10,0(r11) /* Copy the pages. This is a very basic implementation, to diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index ab3b0765a64..8aea3698a77 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -117,8 +117,7 @@ static void appldata_get_mem_data(void *data) mem_data->pgpgout = ev[PGPGOUT] >> 1; mem_data->pswpin = ev[PSWPIN]; mem_data->pswpout = ev[PSWPOUT]; - mem_data->pgalloc = ev[PGALLOC_HIGH] + ev[PGALLOC_NORMAL] + - ev[PGALLOC_DMA]; + mem_data->pgalloc = ev[PGALLOC_NORMAL] + ev[PGALLOC_DMA]; mem_data->pgfault = ev[PGFAULT]; mem_data->pgmajfault = ev[PGMAJFAULT]; diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 786a44dba5b..607f50ead1f 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -15,6 +15,8 @@ #include <linux/sched.h> #include <linux/sysctl.h> #include <linux/ctype.h> +#include <linux/swap.h> +#include <linux/kthread.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> @@ -34,18 +36,18 @@ struct cmm_page_array { unsigned long pages[CMM_NR_PAGES]; }; -static long cmm_pages = 0; -static long cmm_timed_pages = 0; -static volatile long cmm_pages_target = 0; -static volatile long cmm_timed_pages_target = 0; -static long cmm_timeout_pages = 0; -static long cmm_timeout_seconds = 0; +static long cmm_pages; +static long cmm_timed_pages; +static volatile long cmm_pages_target; +static volatile long cmm_timed_pages_target; +static long cmm_timeout_pages; +static long cmm_timeout_seconds; -static struct cmm_page_array *cmm_page_list = NULL; -static struct cmm_page_array *cmm_timed_page_list = NULL; +static struct cmm_page_array *cmm_page_list; +static struct cmm_page_array *cmm_timed_page_list; +static DEFINE_SPINLOCK(cmm_lock); -static unsigned long cmm_thread_active = 0; -static struct work_struct cmm_thread_starter; +static struct task_struct *cmm_thread_ptr; static wait_queue_head_t cmm_thread_wait; static struct timer_list cmm_timer; @@ -53,71 +55,100 @@ static void cmm_timer_fn(unsigned long); static void cmm_set_timer(void); static long -cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) +cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list) { - struct cmm_page_array *pa; - unsigned long page; + struct cmm_page_array *pa, *npa; + unsigned long addr; - pa = *list; - while (pages) { - page = __get_free_page(GFP_NOIO); - if (!page) + while (nr) { + addr = __get_free_page(GFP_NOIO); + if (!addr) break; + spin_lock(&cmm_lock); + pa = *list; if (!pa || pa->index >= CMM_NR_PAGES) { /* Need a new page for the page list. */ - pa = (struct cmm_page_array *) + spin_unlock(&cmm_lock); + npa = (struct cmm_page_array *) __get_free_page(GFP_NOIO); - if (!pa) { - free_page(page); + if (!npa) { + free_page(addr); break; } - pa->next = *list; - pa->index = 0; - *list = pa; + spin_lock(&cmm_lock); + pa = *list; + if (!pa || pa->index >= CMM_NR_PAGES) { + npa->next = pa; + npa->index = 0; + pa = npa; + *list = pa; + } else + free_page((unsigned long) npa); } - diag10(page); - pa->pages[pa->index++] = page; + diag10(addr); + pa->pages[pa->index++] = addr; (*counter)++; - pages--; + spin_unlock(&cmm_lock); + nr--; } - return pages; + return nr; } -static void -cmm_free_pages(long pages, long *counter, struct cmm_page_array **list) +static long +cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) { struct cmm_page_array *pa; - unsigned long page; + unsigned long addr; + spin_lock(&cmm_lock); pa = *list; - while (pages) { + while (nr) { if (!pa || pa->index <= 0) break; - page = pa->pages[--pa->index]; + addr = pa->pages[--pa->index]; if (pa->index == 0) { pa = pa->next; free_page((unsigned long) *list); *list = pa; } - free_page(page); + free_page(addr); (*counter)--; - pages--; + nr--; } + spin_unlock(&cmm_lock); + return nr; } +static int cmm_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + unsigned long *freed = parm; + long nr = 256; + + nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list); + if (nr > 0) + nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list); + cmm_pages_target = cmm_pages; + cmm_timed_pages_target = cmm_timed_pages; + *freed += 256 - nr; + return NOTIFY_OK; +} + +static struct notifier_block cmm_oom_nb = { + .notifier_call = cmm_oom_notify +}; + static int cmm_thread(void *dummy) { int rc; - daemonize("cmmthread"); while (1) { rc = wait_event_interruptible(cmm_thread_wait, (cmm_pages != cmm_pages_target || - cmm_timed_pages != cmm_timed_pages_target)); - if (rc == -ERESTARTSYS) { - /* Got kill signal. End thread. */ - clear_bit(0, &cmm_thread_active); + cmm_timed_pages != cmm_timed_pages_target || + kthread_should_stop())); + if (kthread_should_stop() || rc == -ERESTARTSYS) { cmm_pages_target = cmm_pages; cmm_timed_pages_target = cmm_timed_pages; break; @@ -143,16 +174,8 @@ cmm_thread(void *dummy) } static void -cmm_start_thread(void) -{ - kernel_thread(cmm_thread, NULL, 0); -} - -static void cmm_kick_thread(void) { - if (!test_and_set_bit(0, &cmm_thread_active)) - schedule_work(&cmm_thread_starter); wake_up(&cmm_thread_wait); } @@ -177,21 +200,21 @@ cmm_set_timer(void) static void cmm_timer_fn(unsigned long ignored) { - long pages; + long nr; - pages = cmm_timed_pages_target - cmm_timeout_pages; - if (pages < 0) + nr = cmm_timed_pages_target - cmm_timeout_pages; + if (nr < 0) cmm_timed_pages_target = 0; else - cmm_timed_pages_target = pages; + cmm_timed_pages_target = nr; cmm_kick_thread(); cmm_set_timer(); } void -cmm_set_pages(long pages) +cmm_set_pages(long nr) { - cmm_pages_target = pages; + cmm_pages_target = nr; cmm_kick_thread(); } @@ -202,9 +225,9 @@ cmm_get_pages(void) } void -cmm_add_timed_pages(long pages) +cmm_add_timed_pages(long nr) { - cmm_timed_pages_target += pages; + cmm_timed_pages_target += nr; cmm_kick_thread(); } @@ -215,9 +238,9 @@ cmm_get_timed_pages(void) } void -cmm_set_timeout(long pages, long seconds) +cmm_set_timeout(long nr, long seconds) { - cmm_timeout_pages = pages; + cmm_timeout_pages = nr; cmm_timeout_seconds = seconds; cmm_set_timer(); } @@ -245,7 +268,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[16], *p; - long pages; + long nr; int len; if (!*lenp || (*ppos && !write)) { @@ -260,17 +283,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; buf[sizeof(buf) - 1] = '\0'; cmm_skip_blanks(buf, &p); - pages = simple_strtoul(p, &p, 0); + nr = simple_strtoul(p, &p, 0); if (ctl == &cmm_table[0]) - cmm_set_pages(pages); + cmm_set_pages(nr); else - cmm_add_timed_pages(pages); + cmm_add_timed_pages(nr); } else { if (ctl == &cmm_table[0]) - pages = cmm_get_pages(); + nr = cmm_get_pages(); else - pages = cmm_get_timed_pages(); - len = sprintf(buf, "%ld\n", pages); + nr = cmm_get_timed_pages(); + len = sprintf(buf, "%ld\n", nr); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) @@ -286,7 +309,7 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; - long pages, seconds; + long nr, seconds; int len; if (!*lenp || (*ppos && !write)) { @@ -301,10 +324,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; buf[sizeof(buf) - 1] = '\0'; cmm_skip_blanks(buf, &p); - pages = simple_strtoul(p, &p, 0); + nr = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); seconds = simple_strtoul(p, &p, 0); - cmm_set_timeout(pages, seconds); + cmm_set_timeout(nr, seconds); } else { len = sprintf(buf, "%ld %ld\n", cmm_timeout_pages, cmm_timeout_seconds); @@ -357,7 +380,7 @@ static struct ctl_table cmm_dir_table[] = { static void cmm_smsg_target(char *from, char *msg) { - long pages, seconds; + long nr, seconds; if (strlen(sender) > 0 && strcmp(from, sender) != 0) return; @@ -366,27 +389,27 @@ cmm_smsg_target(char *from, char *msg) if (strncmp(msg, "SHRINK", 6) == 0) { if (!cmm_skip_blanks(msg + 6, &msg)) return; - pages = simple_strtoul(msg, &msg, 0); + nr = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_set_pages(pages); + cmm_set_pages(nr); } else if (strncmp(msg, "RELEASE", 7) == 0) { if (!cmm_skip_blanks(msg + 7, &msg)) return; - pages = simple_strtoul(msg, &msg, 0); + nr = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_add_timed_pages(pages); + cmm_add_timed_pages(nr); } else if (strncmp(msg, "REUSE", 5) == 0) { if (!cmm_skip_blanks(msg + 5, &msg)) return; - pages = simple_strtoul(msg, &msg, 0); + nr = simple_strtoul(msg, &msg, 0); if (!cmm_skip_blanks(msg, &msg)) return; seconds = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_set_timeout(pages, seconds); + cmm_set_timeout(nr, seconds); } } #endif @@ -396,21 +419,49 @@ struct ctl_table_header *cmm_sysctl_header; static int cmm_init (void) { + int rc = -ENOMEM; + #ifdef CONFIG_CMM_PROC cmm_sysctl_header = register_sysctl_table(cmm_dir_table, 1); + if (!cmm_sysctl_header) + goto out; #endif #ifdef CONFIG_CMM_IUCV - smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); + rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); + if (rc < 0) + goto out_smsg; #endif - INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL); + rc = register_oom_notifier(&cmm_oom_nb); + if (rc < 0) + goto out_oom_notify; init_waitqueue_head(&cmm_thread_wait); init_timer(&cmm_timer); - return 0; + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0; + if (!rc) + goto out; + /* + * kthread_create failed. undo all the stuff from above again. + */ + unregister_oom_notifier(&cmm_oom_nb); + +out_oom_notify: +#ifdef CONFIG_CMM_IUCV + smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); +out_smsg: +#endif +#ifdef CONFIG_CMM_PROC + unregister_sysctl_table(cmm_sysctl_header); +#endif +out: + return rc; } static void cmm_exit(void) { + kthread_stop(cmm_thread_ptr); + unregister_oom_notifier(&cmm_oom_nb); cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); #ifdef CONFIG_CMM_PROC diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c index ad8ed7d41e1..bf94eedb0a8 100644 --- a/arch/sh/mm/cache-sh7705.c +++ b/arch/sh/mm/cache-sh7705.c @@ -30,7 +30,7 @@ #define __pte_offset(address) \ ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \ +#define pte_offset(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ __pte_offset(address)) static inline void cache_wback_all(void) diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c index 1169757fb38..83295bd21aa 100644 --- a/arch/sh64/mm/init.c +++ b/arch/sh64/mm/init.c @@ -110,7 +110,7 @@ void show_mem(void) */ void __init paging_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; pgd_init((unsigned long)swapper_pg_dir); pgd_init((unsigned long)swapper_pg_dir + diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 16e13f663ab..b27a506309e 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -2175,7 +2175,7 @@ void __init ld_mmu_srmmu(void) BTFIXUPSET_CALL(pte_pfn, srmmu_pte_pfn, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_page, srmmu_pgd_page, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(pgd_page_vaddr, srmmu_pgd_page, BTFIXUPCALL_NORM); BTFIXUPSET_SETHI(none_mask, 0xF0000000); diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index 7fdddf3c7e1..436021ceb2e 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -2280,5 +2280,5 @@ void __init ld_mmu_sun4c(void) /* These should _never_ get called with two level tables. */ BTFIXUPSET_CALL(pgd_set, sun4c_pgd_set, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(pgd_page, sun4c_pgd_page, BTFIXUPCALL_RETO0); + BTFIXUPSET_CALL(pgd_page_vaddr, sun4c_pgd_page, BTFIXUPCALL_RETO0); } diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c index 8135ec322c9..642541769a1 100644 --- a/arch/sparc64/solaris/misc.c +++ b/arch/sparc64/solaris/misc.c @@ -736,20 +736,15 @@ struct exec_domain solaris_exec_domain = { extern int init_socksys(void); -#ifdef MODULE - MODULE_AUTHOR("Jakub Jelinek (jj@ultra.linux.cz), Patrik Rak (prak3264@ss1000.ms.mff.cuni.cz)"); MODULE_DESCRIPTION("Solaris binary emulation module"); MODULE_LICENSE("GPL"); -#ifdef __sparc_v9__ extern u32 tl0_solaris[8]; #define update_ttable(x) \ tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000; \ wmb(); \ __asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3])) -#else -#endif extern u32 solaris_sparc_syscall[]; extern u32 solaris_syscall[]; @@ -757,7 +752,7 @@ extern void cleanup_socksys(void); extern u32 entry64_personality_patch; -int init_module(void) +static int __init solaris_init(void) { int ret; @@ -777,19 +772,12 @@ int init_module(void) return 0; } -void cleanup_module(void) +static void __exit solaris_exit(void) { update_ttable(solaris_syscall); cleanup_socksys(); unregister_exec_domain(&solaris_exec_domain); } -#else -int init_solaris_emul(void) -{ - register_exec_domain(&solaris_exec_domain); - init_socksys(); - return 0; -} -#endif - +module_init(solaris_init); +module_exit(solaris_exit); diff --git a/arch/sparc64/solaris/socksys.c b/arch/sparc64/solaris/socksys.c index bc3df95bc05..7c90e41fd3b 100644 --- a/arch/sparc64/solaris/socksys.c +++ b/arch/sparc64/solaris/socksys.c @@ -168,8 +168,7 @@ static struct file_operations socksys_fops = { .release = socksys_release, }; -int __init -init_socksys(void) +int __init init_socksys(void) { int ret; struct file * file; @@ -199,8 +198,7 @@ init_socksys(void) return 0; } -void -cleanup_socksys(void) +void __exit cleanup_socksys(void) { if (unregister_chrdev(30, "socksys")) printk ("Couldn't unregister socksys character device\n"); diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 7218c754505..e82764f75e7 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -544,7 +544,7 @@ static struct chan *parse_chan(struct line *line, char *str, int device, ops = NULL; data = NULL; - for(i = 0; i < sizeof(chan_table)/sizeof(chan_table[0]); i++){ + for(i = 0; i < ARRAY_SIZE(chan_table); i++){ entry = &chan_table[i]; if(!strncmp(str, entry->key, strlen(entry->key))){ ops = entry->ops; diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index b414522f768..79610b5ce67 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -497,7 +497,7 @@ static void mconsole_get_config(int (*get_config)(char *, char *, int, } error = NULL; - size = sizeof(default_buf)/sizeof(default_buf[0]); + size = ARRAY_SIZE(default_buf); buf = default_buf; while(1){ diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index 9bfd405c3bd..5b2f5fe9e42 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -16,6 +16,7 @@ #include "user.h" #include "mconsole.h" #include "umid.h" +#include "user_util.h" static struct mconsole_command commands[] = { /* With uts namespaces, uts information becomes process-specific, so @@ -65,14 +66,14 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req) struct mconsole_command *cmd; int i; - for(i=0;i<sizeof(commands)/sizeof(commands[0]);i++){ + for(i = 0; i < ARRAY_SIZE(commands); i++){ cmd = &commands[i]; if(!strncmp(req->request.data, cmd->command, strlen(cmd->command))){ - return(cmd); + return cmd; } } - return(NULL); + return NULL; } #define MIN(a,b) ((a)<(b) ? (a):(b)) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 501f95675d8..4a7966b2193 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -31,6 +31,11 @@ #include "irq_user.h" #include "irq_kern.h" +static inline void set_ether_mac(struct net_device *dev, unsigned char *addr) +{ + memcpy(dev->dev_addr, addr, ETH_ALEN); +} + #define DRIVER_NAME "uml-netdev" static DEFINE_SPINLOCK(opened_lock); @@ -242,7 +247,7 @@ static int uml_net_set_mac(struct net_device *dev, void *addr) struct sockaddr *hwaddr = addr; spin_lock(&lp->lock); - memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN); + set_ether_mac(dev, hwaddr->sa_data); spin_unlock(&lp->lock); return(0); @@ -790,13 +795,6 @@ void dev_ip_addr(void *d, unsigned char *bin_buf) memcpy(bin_buf, &in->ifa_address, sizeof(in->ifa_address)); } -void set_ether_mac(void *d, unsigned char *addr) -{ - struct net_device *dev = d; - - memcpy(dev->dev_addr, addr, ETH_ALEN); -} - struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra) { if((skb != NULL) && (skb_tailroom(skb) < extra)){ diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c index 466ff2c2f91..4c767c7adb9 100644 --- a/arch/um/drivers/pcap_kern.c +++ b/arch/um/drivers/pcap_kern.c @@ -76,7 +76,7 @@ int pcap_setup(char *str, char **mac_out, void *data) if(host_if != NULL) init->host_if = host_if; - for(i = 0; i < sizeof(options)/sizeof(options[0]); i++){ + for(i = 0; i < ARRAY_SIZE(options); i++){ if(options[i] == NULL) continue; if(!strcmp(options[i], "promisc")) diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index b98bdd8e052..89e1dc835a5 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -27,7 +27,6 @@ extern int ncpus; extern char *linux_prog; extern char *gdb_init; extern int kmalloc_ok; -extern int timer_irq_inited; extern int jail; extern int nsyscalls; diff --git a/arch/um/include/longjmp.h b/arch/um/include/longjmp.h index 1b5c0131a12..e93c6d3e893 100644 --- a/arch/um/include/longjmp.h +++ b/arch/um/include/longjmp.h @@ -1,9 +1,12 @@ #ifndef __UML_LONGJMP_H #define __UML_LONGJMP_H -#include <setjmp.h> +#include "sysdep/archsetjmp.h" #include "os.h" +extern int setjmp(jmp_buf); +extern void longjmp(jmp_buf, int); + #define UML_LONGJMP(buf, val) do { \ longjmp(*buf, val); \ } while(0) diff --git a/arch/um/include/net_user.h b/arch/um/include/net_user.h index 800c403920b..47ef7cb49a8 100644 --- a/arch/um/include/net_user.h +++ b/arch/um/include/net_user.h @@ -26,7 +26,6 @@ struct net_user_info { extern void ether_user_init(void *data, void *dev); extern void dev_ip_addr(void *d, unsigned char *bin_buf); -extern void set_ether_mac(void *d, unsigned char *addr); extern void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, void *), void *arg); diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 5316e8a4a4f..24fb6d8680e 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -276,9 +276,11 @@ extern int setjmp_wrapper(void (*proc)(void *, void *), ...); extern void switch_timers(int to_real); extern void idle_sleep(int secs); +extern int set_interval(int is_virtual); +#ifdef CONFIG_MODE_TT extern void enable_timer(void); +#endif extern void disable_timer(void); -extern void user_time_init(void); extern void uml_idle_timer(void); extern unsigned long long os_nsecs(void); @@ -329,6 +331,7 @@ extern void os_set_ioignore(void); extern void init_irq_signals(int on_sigstack); /* sigio.c */ +extern int add_sigio_fd(int fd); extern int ignore_sigio_fd(int fd); extern void maybe_sigio_broken(int fd, int read); diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h index 83b688ca198..f845b3629a6 100644 --- a/arch/um/include/registers.h +++ b/arch/um/include/registers.h @@ -7,6 +7,7 @@ #define __REGISTERS_H #include "sysdep/ptrace.h" +#include "sysdep/archsetjmp.h" extern void init_thread_registers(union uml_pt_regs *to); extern int save_fp_registers(int pid, unsigned long *fp_regs); @@ -15,6 +16,6 @@ extern void save_registers(int pid, union uml_pt_regs *regs); extern void restore_registers(int pid, union uml_pt_regs *regs); extern void init_registers(int pid); extern void get_safe_registers(unsigned long * regs, unsigned long * fp_regs); -extern void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer); +extern unsigned long get_thread_reg(int reg, jmp_buf *buf); #endif diff --git a/arch/um/include/sysdep-i386/archsetjmp.h b/arch/um/include/sysdep-i386/archsetjmp.h new file mode 100644 index 00000000000..ea1ba3d42ae --- /dev/null +++ b/arch/um/include/sysdep-i386/archsetjmp.h @@ -0,0 +1,19 @@ +/* + * arch/i386/include/klibc/archsetjmp.h + */ + +#ifndef _KLIBC_ARCHSETJMP_H +#define _KLIBC_ARCHSETJMP_H + +struct __jmp_buf { + unsigned int __ebx; + unsigned int __esp; + unsigned int __ebp; + unsigned int __esi; + unsigned int __edi; + unsigned int __eip; +}; + +typedef struct __jmp_buf jmp_buf[1]; + +#endif /* _SETJMP_H */ diff --git a/arch/um/include/sysdep-i386/signal.h b/arch/um/include/sysdep-i386/signal.h deleted file mode 100644 index 07518b16213..00000000000 --- a/arch/um/include/sysdep-i386/signal.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2004 PathScale, Inc - * Licensed under the GPL - */ - -#ifndef __I386_SIGNAL_H_ -#define __I386_SIGNAL_H_ - -#include <signal.h> - -#define ARCH_SIGHDLR_PARAM int sig - -#define ARCH_GET_SIGCONTEXT(sc, sig) \ - do sc = (struct sigcontext *) (&sig + 1); while(0) - -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/sysdep-x86_64/archsetjmp.h b/arch/um/include/sysdep-x86_64/archsetjmp.h new file mode 100644 index 00000000000..454fc60aff6 --- /dev/null +++ b/arch/um/include/sysdep-x86_64/archsetjmp.h @@ -0,0 +1,21 @@ +/* + * arch/x86_64/include/klibc/archsetjmp.h + */ + +#ifndef _KLIBC_ARCHSETJMP_H +#define _KLIBC_ARCHSETJMP_H + +struct __jmp_buf { + unsigned long __rbx; + unsigned long __rsp; + unsigned long __rbp; + unsigned long __r12; + unsigned long __r13; + unsigned long __r14; + unsigned long __r15; + unsigned long __rip; +}; + +typedef struct __jmp_buf jmp_buf[1]; + +#endif /* _SETJMP_H */ diff --git a/arch/um/include/sysdep-x86_64/signal.h b/arch/um/include/sysdep-x86_64/signal.h deleted file mode 100644 index 6142897af3d..00000000000 --- a/arch/um/include/sysdep-x86_64/signal.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2004 PathScale, Inc - * Licensed under the GPL - */ - -#ifndef __X86_64_SIGNAL_H_ -#define __X86_64_SIGNAL_H_ - -#define ARCH_SIGHDLR_PARAM int sig - -#define ARCH_GET_SIGCONTEXT(sc, sig_addr) \ - do { \ - struct ucontext *__uc; \ - asm("movq %%rdx, %0" : "=r" (__uc)); \ - sc = (struct sigcontext *) &__uc->uc_mcontext; \ - } while(0) - -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index fc38a6d5906..0561c43b468 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -41,9 +41,11 @@ static long execve1(char *file, char __user * __user *argv, long error; #ifdef CONFIG_TTY_LOG - task_lock(current); + mutex_lock(&tty_mutex); + task_lock(current); /* FIXME: is this needed ? */ log_exec(argv, current->signal->tty); task_unlock(current); + mutex_unlock(&tty_mutex); #endif error = do_execve(file, argv, env, ¤t->thread.regs); if (error == 0){ diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 589c69a7504..ce7f233fc49 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -142,19 +142,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id) .events = events, .current_events = 0 } ); - /* Critical section - locked by a spinlock because this stuff can - * be changed from interrupt handlers. The stuff above is done - * outside the lock because it allocates memory. - */ - - /* Actually, it only looks like it can be called from interrupt - * context. The culprit is reactivate_fd, which calls - * maybe_sigio_broken, which calls write_sigio_workaround, - * which calls activate_fd. However, write_sigio_workaround should - * only be called once, at boot time. That would make it clear that - * this is called only from process context, and can be locked with - * a semaphore. - */ spin_lock_irqsave(&irq_lock, flags); for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) { if ((irq_fd->fd == fd) && (irq_fd->type == type)) { @@ -165,7 +152,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id) } } - /*-------------*/ if (type == IRQ_WRITE) fd = -1; @@ -198,7 +184,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id) spin_lock_irqsave(&irq_lock, flags); } - /*-------------*/ *last_irq_ptr = new_fd; last_irq_ptr = &new_fd->next; @@ -210,14 +195,14 @@ int activate_fd(int irq, int fd, int type, void *dev_id) */ maybe_sigio_broken(fd, (type == IRQ_READ)); - return(0); + return 0; out_unlock: spin_unlock_irqrestore(&irq_lock, flags); out_kfree: kfree(new_fd); out: - return(err); + return err; } static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) @@ -302,10 +287,7 @@ void reactivate_fd(int fd, int irqnum) os_set_pollfd(i, irq->fd); spin_unlock_irqrestore(&irq_lock, flags); - /* This calls activate_fd, so it has to be outside the critical - * section. - */ - maybe_sigio_broken(fd, (irq->type == IRQ_READ)); + add_sigio_fd(fd); } void deactivate_fd(int fd, int irqnum) @@ -316,11 +298,15 @@ void deactivate_fd(int fd, int irqnum) spin_lock_irqsave(&irq_lock, flags); irq = find_irq_by_fd(fd, irqnum, &i); - if (irq == NULL) - goto out; + if(irq == NULL){ + spin_unlock_irqrestore(&irq_lock, flags); + return; + } + os_set_pollfd(i, -1); - out: spin_unlock_irqrestore(&irq_lock, flags); + + ignore_sigio_fd(fd); } int deactivate_all_fds(void) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 61280167c56..93121c6d26e 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -79,8 +79,10 @@ void mem_init(void) /* this will put all low memory onto the freelists */ totalram_pages = free_all_bootmem(); +#ifdef CONFIG_HIGHMEM totalhigh_pages = highmem >> PAGE_SHIFT; totalram_pages += totalhigh_pages; +#endif num_physpages = totalram_pages; max_pfn = totalram_pages; printk(KERN_INFO "Memory: %luk available\n", @@ -221,10 +223,13 @@ void paging_init(void) empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); - for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++) + for(i = 0; i < ARRAY_SIZE(zones_size); i++) zones_size[i] = 0; + zones_size[ZONE_DMA] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); +#ifdef CONFIG_HIGHMEM zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT; +#endif free_area_init(zones_size); /* diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index f6a5a502120..537895d68ad 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -23,6 +23,7 @@ #include "linux/proc_fs.h" #include "linux/ptrace.h" #include "linux/random.h" +#include "linux/personality.h" #include "asm/unistd.h" #include "asm/mman.h" #include "asm/segment.h" @@ -476,7 +477,7 @@ int singlestepping(void * t) #ifndef arch_align_stack unsigned long arch_align_stack(unsigned long sp) { - if (randomize_va_space) + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) sp -= get_random_int() % 8192; return sp & ~0xf; } diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 3ef73bf2e78..f602623644a 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -22,7 +22,7 @@ static void kill_idlers(int me) struct task_struct *p; int i; - for(i = 0; i < sizeof(idle_threads)/sizeof(idle_threads[0]); i++){ + for(i = 0; i < ARRAY_SIZE(idle_threads); i++){ p = idle_threads[i]; if((p != NULL) && (p->thread.mode.tt.extern_pid != me)) os_kill_process(p->thread.mode.tt.extern_pid, 0); @@ -62,14 +62,3 @@ void machine_halt(void) { machine_power_off(); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 624ca238d1f..79c22707a63 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -55,7 +55,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, * destroy_context_skas. */ - mm->context.skas.last_page_table = pmd_page_kernel(*pmd); + mm->context.skas.last_page_table = pmd_page_vaddr(*pmd); #ifdef CONFIG_3_LEVEL_PGTABLES mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud)); #endif diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 552ca1cb984..2454bbd9555 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -35,9 +35,6 @@ unsigned long long sched_clock(void) return (unsigned long long)jiffies_64 * (1000000000 / HZ); } -/* Changed at early boot */ -int timer_irq_inited = 0; - static unsigned long long prev_nsecs; #ifdef CONFIG_UML_REAL_TIME_CLOCK static long long delta; /* Deviation per interval */ @@ -113,12 +110,13 @@ static void register_timer(void) err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL); if(err != 0) - printk(KERN_ERR "timer_init : request_irq failed - " + printk(KERN_ERR "register_timer : request_irq failed - " "errno = %d\n", -err); - timer_irq_inited = 1; - - user_time_init(); + err = set_interval(1); + if(err != 0) + printk(KERN_ERR "register_timer : set_interval failed - " + "errno = %d\n", -err); } extern void (*late_time_init)(void); diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index f5b0636f9ad..54a5ff25645 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -16,12 +16,12 @@ #include "os.h" static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int *index, + int r, int w, int x, struct host_vm_op *ops, int *index, int last_filled, union mm_context *mmu, void **flush, int (*do_ops)(union mm_context *, struct host_vm_op *, int, int, void **)) { - __u64 offset; + __u64 offset; struct host_vm_op *last; int fd, ret = 0; @@ -89,7 +89,7 @@ static int add_munmap(unsigned long addr, unsigned long len, static int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, struct host_vm_op *ops, int *index, int last_filled, union mm_context *mmu, void **flush, - int (*do_ops)(union mm_context *, struct host_vm_op *, + int (*do_ops)(union mm_context *, struct host_vm_op *, int, int, void **)) { struct host_vm_op *last; @@ -124,105 +124,105 @@ static int add_mprotect(unsigned long addr, unsigned long len, int r, int w, #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, + unsigned long end_addr, int force, int (*do_ops)(union mm_context *, struct host_vm_op *, int, int, void **)) { - pgd_t *npgd; - pud_t *npud; - pmd_t *npmd; - pte_t *npte; - union mm_context *mmu = &mm->context; - unsigned long addr, end; - int r, w, x; - struct host_vm_op ops[1]; - void *flush = NULL; - int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1; - int ret = 0; - - if(mm == NULL) return; - - ops[0].type = NONE; - for(addr = start_addr; addr < end_addr && !ret;){ - npgd = pgd_offset(mm, addr); - if(!pgd_present(*npgd)){ - end = ADD_ROUND(addr, PGDIR_SIZE); - if(end > end_addr) - end = end_addr; - if(force || pgd_newpage(*npgd)){ - ret = add_munmap(addr, end - addr, ops, - &op_index, last_op, mmu, - &flush, do_ops); - pgd_mkuptodate(*npgd); - } - addr = end; - continue; - } - - npud = pud_offset(npgd, addr); - if(!pud_present(*npud)){ - end = ADD_ROUND(addr, PUD_SIZE); - if(end > end_addr) - end = end_addr; - if(force || pud_newpage(*npud)){ - ret = add_munmap(addr, end - addr, ops, - &op_index, last_op, mmu, - &flush, do_ops); - pud_mkuptodate(*npud); - } - addr = end; - continue; - } - - npmd = pmd_offset(npud, addr); - if(!pmd_present(*npmd)){ - end = ADD_ROUND(addr, PMD_SIZE); - if(end > end_addr) - end = end_addr; - if(force || pmd_newpage(*npmd)){ - ret = add_munmap(addr, end - addr, ops, - &op_index, last_op, mmu, - &flush, do_ops); - pmd_mkuptodate(*npmd); - } - addr = end; - continue; - } - - npte = pte_offset_kernel(npmd, addr); - r = pte_read(*npte); - w = pte_write(*npte); - x = pte_exec(*npte); + pgd_t *npgd; + pud_t *npud; + pmd_t *npmd; + pte_t *npte; + union mm_context *mmu = &mm->context; + unsigned long addr, end; + int r, w, x; + struct host_vm_op ops[1]; + void *flush = NULL; + int op_index = -1, last_op = ARRAY_SIZE(ops) - 1; + int ret = 0; + + if(mm == NULL) + return; + + ops[0].type = NONE; + for(addr = start_addr; addr < end_addr && !ret;){ + npgd = pgd_offset(mm, addr); + if(!pgd_present(*npgd)){ + end = ADD_ROUND(addr, PGDIR_SIZE); + if(end > end_addr) + end = end_addr; + if(force || pgd_newpage(*npgd)){ + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); + pgd_mkuptodate(*npgd); + } + addr = end; + continue; + } + + npud = pud_offset(npgd, addr); + if(!pud_present(*npud)){ + end = ADD_ROUND(addr, PUD_SIZE); + if(end > end_addr) + end = end_addr; + if(force || pud_newpage(*npud)){ + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); + pud_mkuptodate(*npud); + } + addr = end; + continue; + } + + npmd = pmd_offset(npud, addr); + if(!pmd_present(*npmd)){ + end = ADD_ROUND(addr, PMD_SIZE); + if(end > end_addr) + end = end_addr; + if(force || pmd_newpage(*npmd)){ + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); + pmd_mkuptodate(*npmd); + } + addr = end; + continue; + } + + npte = pte_offset_kernel(npmd, addr); + r = pte_read(*npte); + w = pte_write(*npte); + x = pte_exec(*npte); if (!pte_young(*npte)) { r = 0; w = 0; } else if (!pte_dirty(*npte)) { w = 0; } - if(force || pte_newpage(*npte)){ - if(pte_present(*npte)) - ret = add_mmap(addr, - pte_val(*npte) & PAGE_MASK, - PAGE_SIZE, r, w, x, ops, - &op_index, last_op, mmu, - &flush, do_ops); + if(force || pte_newpage(*npte)){ + if(pte_present(*npte)) + ret = add_mmap(addr, + pte_val(*npte) & PAGE_MASK, + PAGE_SIZE, r, w, x, ops, + &op_index, last_op, mmu, + &flush, do_ops); else ret = add_munmap(addr, PAGE_SIZE, ops, &op_index, last_op, mmu, &flush, do_ops); - } - else if(pte_newprot(*npte)) + } + else if(pte_newprot(*npte)) ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, &op_index, last_op, mmu, &flush, do_ops); - *npte = pte_mkuptodate(*npte); - addr += PAGE_SIZE; - } - + *npte = pte_mkuptodate(*npte); + addr += PAGE_SIZE; + } if(!ret) ret = (*do_ops)(mmu, ops, op_index, 1, &flush); - /* This is not an else because ret is modified above */ +/* This is not an else because ret is modified above */ if(ret) { printk("fix_range_common: failed, killing current process\n"); force_sig(SIGKILL, current); @@ -231,160 +231,160 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) { - struct mm_struct *mm; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long addr, last; - int updated = 0, err; - - mm = &init_mm; - for(addr = start; addr < end;){ - pgd = pgd_offset(mm, addr); - if(!pgd_present(*pgd)){ - last = ADD_ROUND(addr, PGDIR_SIZE); - if(last > end) - last = end; - if(pgd_newpage(*pgd)){ - updated = 1; - err = os_unmap_memory((void *) addr, - last - addr); - if(err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pud = pud_offset(pgd, addr); - if(!pud_present(*pud)){ - last = ADD_ROUND(addr, PUD_SIZE); - if(last > end) - last = end; - if(pud_newpage(*pud)){ - updated = 1; - err = os_unmap_memory((void *) addr, - last - addr); - if(err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pmd = pmd_offset(pud, addr); - if(!pmd_present(*pmd)){ - last = ADD_ROUND(addr, PMD_SIZE); - if(last > end) - last = end; - if(pmd_newpage(*pmd)){ - updated = 1; - err = os_unmap_memory((void *) addr, - last - addr); - if(err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pte = pte_offset_kernel(pmd, addr); - if(!pte_present(*pte) || pte_newpage(*pte)){ - updated = 1; - err = os_unmap_memory((void *) addr, - PAGE_SIZE); - if(err < 0) - panic("munmap failed, errno = %d\n", - -err); - if(pte_present(*pte)) - map_memory(addr, - pte_val(*pte) & PAGE_MASK, - PAGE_SIZE, 1, 1, 1); - } - else if(pte_newprot(*pte)){ - updated = 1; - os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1); - } - addr += PAGE_SIZE; - } - return(updated); + struct mm_struct *mm; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr, last; + int updated = 0, err; + + mm = &init_mm; + for(addr = start; addr < end;){ + pgd = pgd_offset(mm, addr); + if(!pgd_present(*pgd)){ + last = ADD_ROUND(addr, PGDIR_SIZE); + if(last > end) + last = end; + if(pgd_newpage(*pgd)){ + updated = 1; + err = os_unmap_memory((void *) addr, + last - addr); + if(err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pud = pud_offset(pgd, addr); + if(!pud_present(*pud)){ + last = ADD_ROUND(addr, PUD_SIZE); + if(last > end) + last = end; + if(pud_newpage(*pud)){ + updated = 1; + err = os_unmap_memory((void *) addr, + last - addr); + if(err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pmd = pmd_offset(pud, addr); + if(!pmd_present(*pmd)){ + last = ADD_ROUND(addr, PMD_SIZE); + if(last > end) + last = end; + if(pmd_newpage(*pmd)){ + updated = 1; + err = os_unmap_memory((void *) addr, + last - addr); + if(err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pte = pte_offset_kernel(pmd, addr); + if(!pte_present(*pte) || pte_newpage(*pte)){ + updated = 1; + err = os_unmap_memory((void *) addr, + PAGE_SIZE); + if(err < 0) + panic("munmap failed, errno = %d\n", + -err); + if(pte_present(*pte)) + map_memory(addr, + pte_val(*pte) & PAGE_MASK, + PAGE_SIZE, 1, 1, 1); + } + else if(pte_newprot(*pte)){ + updated = 1; + os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1); + } + addr += PAGE_SIZE; + } + return(updated); } pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) { - return(pgd_offset(mm, address)); + return(pgd_offset(mm, address)); } pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address) { - return(pud_offset(pgd, address)); + return(pud_offset(pgd, address)); } pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address) { - return(pmd_offset(pud, address)); + return(pmd_offset(pud, address)); } pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) { - return(pte_offset_kernel(pmd, address)); + return(pte_offset_kernel(pmd, address)); } pte_t *addr_pte(struct task_struct *task, unsigned long addr) { - pgd_t *pgd = pgd_offset(task->mm, addr); - pud_t *pud = pud_offset(pgd, addr); - pmd_t *pmd = pmd_offset(pud, addr); + pgd_t *pgd = pgd_offset(task->mm, addr); + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); - return(pte_offset_map(pmd, addr)); + return(pte_offset_map(pmd, addr)); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) { - address &= PAGE_MASK; - flush_tlb_range(vma, address, address + PAGE_SIZE); + address &= PAGE_MASK; + flush_tlb_range(vma, address, address + PAGE_SIZE); } void flush_tlb_all(void) { - flush_tlb_mm(current->mm); + flush_tlb_mm(current->mm); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, - flush_tlb_kernel_range_common, start, end); + CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, + flush_tlb_kernel_range_common, start, end); } void flush_tlb_kernel_vm(void) { - CHOOSE_MODE(flush_tlb_kernel_vm_tt(), - flush_tlb_kernel_range_common(start_vm, end_vm)); + CHOOSE_MODE(flush_tlb_kernel_vm_tt(), + flush_tlb_kernel_range_common(start_vm, end_vm)); } void __flush_tlb_one(unsigned long addr) { - CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); + CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, - end); + CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, + end); } void flush_tlb_mm(struct mm_struct *mm) { - CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); + CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); } void force_flush_all(void) { - CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); + CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ac70fa5a2e2..e5eeaf2b6af 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -227,9 +227,16 @@ void bad_segv(struct faultinfo fi, unsigned long ip) void relay_signal(int sig, union uml_pt_regs *regs) { - if(arch_handle_signal(sig, regs)) return; - if(!UPT_IS_USER(regs)) + if(arch_handle_signal(sig, regs)) + return; + + if(!UPT_IS_USER(regs)){ + if(sig == SIGBUS) + printk("Bus error - the /dev/shm or /tmp mount likely " + "just ran out of space\n"); panic("Kernel mode signal %d", sig); + } + current->thread.arch.faultinfo = *UPT_FAULTINFO(regs); force_sig(sig, current); } diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 6987d1d247a..cd15b9df5b5 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -42,7 +42,7 @@ static int helper_child(void *arg) if(data->pre_exec != NULL) (*data->pre_exec)(data->pre_data); execvp(argv[0], argv); - errval = errno; + errval = -errno; printk("helper_child - execve of '%s' failed - errno = %d\n", argv[0], errno); os_write_file(data->fd, &errval, sizeof(errval)); kill(os_getpid(), SIGKILL); @@ -62,7 +62,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, stack = *stack_out; else stack = alloc_stack(0, __cant_sleep()); if(stack == 0) - return(-ENOMEM); + return -ENOMEM; ret = os_pipe(fds, 1, 0); if(ret < 0){ @@ -95,16 +95,16 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, /* Read the errno value from the child, if the exec failed, or get 0 if * the exec succeeded because the pipe fd was set as close-on-exec. */ n = os_read_file(fds[0], &ret, sizeof(ret)); - if (n < 0) { - printk("run_helper : read on pipe failed, ret = %d\n", -n); - ret = n; - kill(pid, SIGKILL); - CATCH_EINTR(waitpid(pid, NULL, 0)); - } else if(n != 0){ - CATCH_EINTR(n = waitpid(pid, NULL, 0)); - ret = -errno; - } else { + if(n == 0) ret = pid; + else { + if(n < 0){ + printk("run_helper : read on pipe failed, ret = %d\n", + -n); + ret = n; + kill(pid, SIGKILL); + } + CATCH_EINTR(waitpid(pid, NULL, 0)); } out_close: diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c index 7555bf9c33d..a97206df5b5 100644 --- a/arch/um/os-Linux/irq.c +++ b/arch/um/os-Linux/irq.c @@ -132,7 +132,7 @@ void os_set_pollfd(int i, int fd) void os_set_ioignore(void) { - set_handler(SIGIO, SIG_IGN, 0, -1); + signal(SIGIO, SIG_IGN); } void init_irq_signals(int on_sigstack) diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 90912aaca7a..d1c5670787d 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -67,13 +67,32 @@ static __init void do_uml_initcalls(void) static void last_ditch_exit(int sig) { - signal(SIGINT, SIG_DFL); - signal(SIGTERM, SIG_DFL); - signal(SIGHUP, SIG_DFL); uml_cleanup(); exit(1); } +static void install_fatal_handler(int sig) +{ + struct sigaction action; + + /* All signals are enabled in this handler ... */ + sigemptyset(&action.sa_mask); + + /* ... including the signal being handled, plus we want the + * handler reset to the default behavior, so that if an exit + * handler is hanging for some reason, the UML will just die + * after this signal is sent a second time. + */ + action.sa_flags = SA_RESETHAND | SA_NODEFER; + action.sa_restorer = NULL; + action.sa_handler = last_ditch_exit; + if(sigaction(sig, &action, NULL) < 0){ + printf("failed to install handler for signal %d - errno = %d\n", + errno); + exit(1); + } +} + #define UML_LIB_PATH ":/usr/lib/uml" static void setup_env_path(void) @@ -158,9 +177,12 @@ int main(int argc, char **argv, char **envp) } new_argv[argc] = NULL; - set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); - set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); - set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); + /* Allow these signals to bring down a UML if all other + * methods of control fail. + */ + install_fatal_handler(SIGINT); + install_fatal_handler(SIGTERM); + install_fatal_handler(SIGHUP); scan_elf_aux( envp); diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index 560c8063c77..b170b4704dc 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -114,14 +114,14 @@ static void which_tmpdir(void) } while(1){ - found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), ' '); + found = next(fd, buf, ARRAY_SIZE(buf), ' '); if(found != 1) break; if(!strncmp(buf, "/dev/shm", strlen("/dev/shm"))) goto found; - found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), '\n'); + found = next(fd, buf, ARRAY_SIZE(buf), '\n'); if(found != 1) break; } @@ -135,7 +135,7 @@ err: return; found: - found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), ' '); + found = next(fd, buf, ARRAY_SIZE(buf), ' '); if(found != 1) goto err; diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index b98d3ca2cd1..ff203625a4b 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -7,7 +7,6 @@ #include <stdio.h> #include <errno.h> #include <signal.h> -#include <setjmp.h> #include <linux/unistd.h> #include <sys/mman.h> #include <sys/wait.h> @@ -247,7 +246,17 @@ void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) set_sigstack(sig_stack, pages * page_size()); flags = SA_ONSTACK; } - if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); + if(usr1_handler){ + struct sigaction sa; + + sa.sa_handler = usr1_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = flags; + sa.sa_restorer = NULL; + if(sigaction(SIGUSR1, &sa, NULL) < 0) + panic("init_new_thread_stack - sigaction failed - " + "errno = %d\n", errno); + } } void init_new_thread_signals(void) diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 0ecac563c7b..f6457765b17 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -43,17 +43,9 @@ struct pollfds { /* Protected by sigio_lock(). Used by the sigio thread, but the UML thread * synchronizes with it. */ -static struct pollfds current_poll = { - .poll = NULL, - .size = 0, - .used = 0 -}; - -static struct pollfds next_poll = { - .poll = NULL, - .size = 0, - .used = 0 -}; +static struct pollfds current_poll; +static struct pollfds next_poll; +static struct pollfds all_sigio_fds; static int write_sigio_thread(void *unused) { @@ -78,7 +70,8 @@ static int write_sigio_thread(void *unused) n = os_read_file(sigio_private[1], &c, sizeof(c)); if(n != sizeof(c)) printk("write_sigio_thread : " - "read failed, err = %d\n", -n); + "read on socket failed, " + "err = %d\n", -n); tmp = current_poll; current_poll = next_poll; next_poll = tmp; @@ -93,35 +86,36 @@ static int write_sigio_thread(void *unused) n = os_write_file(respond_fd, &c, sizeof(c)); if(n != sizeof(c)) - printk("write_sigio_thread : write failed, " - "err = %d\n", -n); + printk("write_sigio_thread : write on socket " + "failed, err = %d\n", -n); } } return 0; } -static int need_poll(int n) +static int need_poll(struct pollfds *polls, int n) { - if(n <= next_poll.size){ - next_poll.used = n; - return(0); + if(n <= polls->size){ + polls->used = n; + return 0; } - kfree(next_poll.poll); - next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); - if(next_poll.poll == NULL){ + kfree(polls->poll); + polls->poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); + if(polls->poll == NULL){ printk("need_poll : failed to allocate new pollfds\n"); - next_poll.size = 0; - next_poll.used = 0; - return(-1); + polls->size = 0; + polls->used = 0; + return -ENOMEM; } - next_poll.size = n; - next_poll.used = n; - return(0); + polls->size = n; + polls->used = n; + return 0; } /* Must be called with sigio_lock held, because it's needed by the marked - * critical section. */ + * critical section. + */ static void update_thread(void) { unsigned long flags; @@ -156,34 +150,39 @@ static void update_thread(void) set_signals(flags); } -static int add_sigio_fd(int fd, int read) +int add_sigio_fd(int fd) { - int err = 0, i, n, events; + struct pollfd *p; + int err = 0, i, n; sigio_lock(); + for(i = 0; i < all_sigio_fds.used; i++){ + if(all_sigio_fds.poll[i].fd == fd) + break; + } + if(i == all_sigio_fds.used) + goto out; + + p = &all_sigio_fds.poll[i]; + for(i = 0; i < current_poll.used; i++){ if(current_poll.poll[i].fd == fd) goto out; } n = current_poll.used + 1; - err = need_poll(n); + err = need_poll(&next_poll, n); if(err) goto out; for(i = 0; i < current_poll.used; i++) next_poll.poll[i] = current_poll.poll[i]; - if(read) events = POLLIN; - else events = POLLOUT; - - next_poll.poll[n - 1] = ((struct pollfd) { .fd = fd, - .events = events, - .revents = 0 }); + next_poll.poll[n - 1] = *p; update_thread(); out: sigio_unlock(); - return(err); + return err; } int ignore_sigio_fd(int fd) @@ -205,18 +204,14 @@ int ignore_sigio_fd(int fd) if(i == current_poll.used) goto out; - err = need_poll(current_poll.used - 1); + err = need_poll(&next_poll, current_poll.used - 1); if(err) goto out; for(i = 0; i < current_poll.used; i++){ p = ¤t_poll.poll[i]; - if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i]; - } - if(n == i){ - printk("ignore_sigio_fd : fd %d not found\n", fd); - err = -1; - goto out; + if(p->fd != fd) + next_poll.poll[n++] = *p; } update_thread(); @@ -234,7 +229,7 @@ static struct pollfd *setup_initial_poll(int fd) printk("setup_initial_poll : failed to allocate poll\n"); return NULL; } - *p = ((struct pollfd) { .fd = fd, + *p = ((struct pollfd) { .fd = fd, .events = POLLIN, .revents = 0 }); return p; @@ -323,6 +318,8 @@ out_close1: void maybe_sigio_broken(int fd, int read) { + int err; + if(!isatty(fd)) return; @@ -330,7 +327,19 @@ void maybe_sigio_broken(int fd, int read) return; write_sigio_workaround(); - add_sigio_fd(fd, read); + + sigio_lock(); + err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); + if(err){ + printk("maybe_sigio_broken - failed to add pollfd\n"); + goto out; + } + all_sigio_fds.poll[all_sigio_fds.used++] = + ((struct pollfd) { .fd = fd, + .events = read ? POLLIN : POLLOUT, + .revents = 0 }); +out: + sigio_unlock(); } static void sigio_cleanup(void) diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 60e4faedf25..6b81739279d 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -15,7 +15,6 @@ #include "user.h" #include "signal_kern.h" #include "sysdep/sigcontext.h" -#include "sysdep/signal.h" #include "sigcontext.h" #include "mode.h" #include "os.h" @@ -38,18 +37,10 @@ static int signals_enabled = 1; static int pending = 0; -void sig_handler(ARCH_SIGHDLR_PARAM) +void sig_handler(int sig, struct sigcontext *sc) { - struct sigcontext *sc; int enabled; - /* Must be the first thing that this handler does - x86_64 stores - * the sigcontext in %rdx, and we need to save it before it has a - * chance to get trashed. - */ - - ARCH_GET_SIGCONTEXT(sc, sig); - enabled = signals_enabled; if(!enabled && (sig == SIGIO)){ pending |= SIGIO_MASK; @@ -64,15 +55,8 @@ void sig_handler(ARCH_SIGHDLR_PARAM) set_signals(enabled); } -extern int timer_irq_inited; - static void real_alarm_handler(int sig, struct sigcontext *sc) { - if(!timer_irq_inited){ - signals_enabled = 1; - return; - } - if(sig == SIGALRM) switch_timers(0); @@ -84,13 +68,10 @@ static void real_alarm_handler(int sig, struct sigcontext *sc) } -void alarm_handler(ARCH_SIGHDLR_PARAM) +void alarm_handler(int sig, struct sigcontext *sc) { - struct sigcontext *sc; int enabled; - ARCH_GET_SIGCONTEXT(sc, sig); - enabled = signals_enabled; if(!signals_enabled){ if(sig == SIGVTALRM) @@ -126,6 +107,10 @@ void remove_sigstack(void) panic("disabling signal stack failed, errno = %d\n", errno); } +void (*handlers[_NSIG])(int sig, struct sigcontext *sc); + +extern void hard_handler(int sig); + void set_handler(int sig, void (*handler)(int), int flags, ...) { struct sigaction action; @@ -133,13 +118,16 @@ void set_handler(int sig, void (*handler)(int), int flags, ...) sigset_t sig_mask; int mask; - va_start(ap, flags); - action.sa_handler = handler; + handlers[sig] = (void (*)(int, struct sigcontext *)) handler; + action.sa_handler = hard_handler; + sigemptyset(&action.sa_mask); - while((mask = va_arg(ap, int)) != -1){ + + va_start(ap, flags); + while((mask = va_arg(ap, int)) != -1) sigaddset(&action.sa_mask, mask); - } va_end(ap); + action.sa_flags = flags; action.sa_restorer = NULL; if(sigaction(sig, &action, NULL) < 0) diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 7baf90fda58..42e3d1ed802 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -8,7 +8,6 @@ #include <unistd.h> #include <errno.h> #include <signal.h> -#include <setjmp.h> #include <sched.h> #include "ptrace_user.h" #include <sys/wait.h> @@ -156,11 +155,15 @@ extern int __syscall_stub_start; static int userspace_tramp(void *stack) { void *addr; + int err; ptrace(PTRACE_TRACEME, 0, 0, 0); init_new_thread_signals(); - enable_timer(); + err = set_interval(1); + if(err) + panic("userspace_tramp - setting timer failed, errno = %d\n", + err); if(!proc_mm){ /* This has a pte, but it can't be mapped in with the usual @@ -190,14 +193,25 @@ static int userspace_tramp(void *stack) } } if(!ptrace_faultinfo && (stack != NULL)){ + struct sigaction sa; + unsigned long v = UML_CONFIG_STUB_CODE + (unsigned long) stub_segv_handler - (unsigned long) &__syscall_stub_start; set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size()); - set_handler(SIGSEGV, (void *) v, SA_ONSTACK, - SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, - SIGUSR1, -1); + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, SIGIO); + sigaddset(&sa.sa_mask, SIGWINCH); + sigaddset(&sa.sa_mask, SIGALRM); + sigaddset(&sa.sa_mask, SIGVTALRM); + sigaddset(&sa.sa_mask, SIGUSR1); + sa.sa_flags = SA_ONSTACK; + sa.sa_handler = (void *) v; + sa.sa_restorer = NULL; + if(sigaction(SIGSEGV, &sa, NULL) < 0) + panic("userspace_tramp - setting SIGSEGV handler " + "failed - errno = %d\n", errno); } os_stop_process(os_getpid()); @@ -470,7 +484,7 @@ void thread_wait(void *sw, void *fb) *switch_buf = &buf; fork_buf = fb; if(UML_SETJMP(&buf) == 0) - siglongjmp(*fork_buf, INIT_JMP_REMOVE_SIGSTACK); + UML_LONGJMP(fork_buf, INIT_JMP_REMOVE_SIGSTACK); } void switch_threads(void *me, void *next) diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 50314850400..7fe92680c7d 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -14,7 +14,6 @@ #include <sched.h> #include <fcntl.h> #include <errno.h> -#include <setjmp.h> #include <sys/time.h> #include <sys/wait.h> #include <sys/mman.h> diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile index b3213613c41..37806621b25 100644 --- a/arch/um/os-Linux/sys-i386/Makefile +++ b/arch/um/os-Linux/sys-i386/Makefile @@ -3,7 +3,7 @@ # Licensed under the GPL # -obj-$(CONFIG_MODE_SKAS) = registers.o tls.o +obj-$(CONFIG_MODE_SKAS) = registers.o signal.o tls.o USER_OBJS := $(obj-y) diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c index 516f66dd87e..7cd0369e02b 100644 --- a/arch/um/os-Linux/sys-i386/registers.c +++ b/arch/um/os-Linux/sys-i386/registers.c @@ -5,12 +5,12 @@ #include <errno.h> #include <string.h> -#include <setjmp.h> #include "sysdep/ptrace_user.h" #include "sysdep/ptrace.h" #include "uml-config.h" #include "skas_ptregs.h" #include "registers.h" +#include "longjmp.h" #include "user.h" /* These are set once at boot time and not changed thereafter */ @@ -130,11 +130,14 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs) HOST_FP_SIZE * sizeof(unsigned long)); } -void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer) +unsigned long get_thread_reg(int reg, jmp_buf *buf) { - struct __jmp_buf_tag *jmpbuf = buffer; - - UPT_SET(uml_regs, EIP, jmpbuf->__jmpbuf[JB_PC]); - UPT_SET(uml_regs, UESP, jmpbuf->__jmpbuf[JB_SP]); - UPT_SET(uml_regs, EBP, jmpbuf->__jmpbuf[JB_BP]); + switch(reg){ + case EIP: return buf[0]->__eip; + case UESP: return buf[0]->__esp; + case EBP: return buf[0]->__ebp; + default: + printk("get_thread_regs - unknown register %d\n", reg); + return 0; + } } diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c new file mode 100644 index 00000000000..0d3eae51835 --- /dev/null +++ b/arch/um/os-Linux/sys-i386/signal.c @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include <signal.h> + +extern void (*handlers[])(int sig, struct sigcontext *sc); + +void hard_handler(int sig) +{ + struct sigcontext *sc = (struct sigcontext *) (&sig + 1); + + (*handlers[sig])(sig, sc); +} diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile index 340ef26f594..f67842a7735 100644 --- a/arch/um/os-Linux/sys-x86_64/Makefile +++ b/arch/um/os-Linux/sys-x86_64/Makefile @@ -3,7 +3,7 @@ # Licensed under the GPL # -obj-$(CONFIG_MODE_SKAS) = registers.o +obj-$(CONFIG_MODE_SKAS) = registers.o signal.o USER_OBJS := $(obj-y) diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c index becd898d939..cb8e8a26328 100644 --- a/arch/um/os-Linux/sys-x86_64/registers.c +++ b/arch/um/os-Linux/sys-x86_64/registers.c @@ -5,11 +5,11 @@ #include <errno.h> #include <string.h> -#include <setjmp.h> #include "ptrace_user.h" #include "uml-config.h" #include "skas_ptregs.h" #include "registers.h" +#include "longjmp.h" #include "user.h" /* These are set once at boot time and not changed thereafter */ @@ -78,11 +78,14 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs) HOST_FP_SIZE * sizeof(unsigned long)); } -void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer) +unsigned long get_thread_reg(int reg, jmp_buf *buf) { - struct __jmp_buf_tag *jmpbuf = buffer; - - UPT_SET(uml_regs, RIP, jmpbuf->__jmpbuf[JB_PC]); - UPT_SET(uml_regs, RSP, jmpbuf->__jmpbuf[JB_RSP]); - UPT_SET(uml_regs, RBP, jmpbuf->__jmpbuf[JB_RBP]); + switch(reg){ + case RIP: return buf[0]->__rip; + case RSP: return buf[0]->__rsp; + case RBP: return buf[0]->__rbp; + default: + printk("get_thread_regs - unknown register %d\n", reg); + return 0; + } } diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c new file mode 100644 index 00000000000..3f369e5f976 --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/signal.c @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include <signal.h> + +extern void (*handlers[])(int sig, struct sigcontext *sc); + +void hard_handler(int sig) +{ + struct ucontext *uc; + asm("movq %%rdx, %0" : "=r" (uc)); + + (*handlers[sig])(sig, (struct sigcontext *) &uc->uc_mcontext); +} diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index 4ae73c0e548..38be096e750 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -17,20 +17,25 @@ #include "kern_constants.h" #include "os.h" -static void set_interval(int timer_type) +int set_interval(int is_virtual) { int usec = 1000000/hz(); + int timer_type = is_virtual ? ITIMER_VIRTUAL : ITIMER_REAL; struct itimerval interval = ((struct itimerval) { { 0, usec }, { 0, usec } }); if(setitimer(timer_type, &interval, NULL) == -1) - panic("setitimer failed - errno = %d\n", errno); + return -errno; + + return 0; } +#ifdef CONFIG_MODE_TT void enable_timer(void) { - set_interval(ITIMER_VIRTUAL); + set_interval(1); } +#endif void disable_timer(void) { @@ -40,8 +45,8 @@ void disable_timer(void) printk("disnable_timer - setitimer failed, errno = %d\n", errno); /* If there are signals already queued, after unblocking ignore them */ - set_handler(SIGALRM, SIG_IGN, 0, -1); - set_handler(SIGVTALRM, SIG_IGN, 0, -1); + signal(SIGALRM, SIG_IGN); + signal(SIGVTALRM, SIG_IGN); } void switch_timers(int to_real) @@ -74,7 +79,7 @@ void uml_idle_timer(void) set_handler(SIGALRM, (__sighandler_t) alarm_handler, SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); - set_interval(ITIMER_REAL); + set_interval(0); } #endif @@ -94,8 +99,3 @@ void idle_sleep(int secs) ts.tv_nsec = 0; nanosleep(&ts, NULL); } - -void user_time_init(void) -{ - set_interval(ITIMER_VIRTUAL); -} diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c index 90b29ae9af4..1df231a2624 100644 --- a/arch/um/os-Linux/trap.c +++ b/arch/um/os-Linux/trap.c @@ -5,7 +5,6 @@ #include <stdlib.h> #include <signal.h> -#include <setjmp.h> #include "kern_util.h" #include "user_util.h" #include "os.h" diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c index 865f6a6a259..bbb73a65037 100644 --- a/arch/um/os-Linux/uaccess.c +++ b/arch/um/os-Linux/uaccess.c @@ -4,8 +4,7 @@ * Licensed under the GPL */ -#include <setjmp.h> -#include <string.h> +#include <stddef.h> #include "longjmp.h" unsigned long __do_user_copy(void *to, const void *from, int n, diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index c47a2a7ce70..3f5b1514e8a 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -7,7 +7,6 @@ #include <stdlib.h> #include <unistd.h> #include <limits.h> -#include <setjmp.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/utsname.h> @@ -107,11 +106,11 @@ int setjmp_wrapper(void (*proc)(void *, void *), ...) jmp_buf buf; int n; - n = sigsetjmp(buf, 1); + n = UML_SETJMP(&buf); if(n == 0){ va_start(args, proc); (*proc)(&buf, &args); } va_end(args); - return(n); + return n; } diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 374d61a1943..59cc7027575 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -1,5 +1,5 @@ obj-y = bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ - ptrace_user.o signal.o sigcontext.o syscalls.o sysrq.o \ + ptrace_user.o setjmp.o signal.o sigcontext.o syscalls.o sysrq.o \ sys_call_table.o tls.o obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c index 41b0ab2fe83..f1bcd399ac9 100644 --- a/arch/um/sys-i386/bugs.c +++ b/arch/um/sys-i386/bugs.c @@ -13,6 +13,7 @@ #include "sysdep/ptrace.h" #include "task.h" #include "os.h" +#include "user_util.h" #define MAXTOKEN 64 @@ -104,17 +105,17 @@ int cpu_feature(char *what, char *buf, int len) static int check_cpu_flag(char *feature, int *have_it) { char buf[MAXTOKEN], c; - int fd, len = sizeof(buf)/sizeof(buf[0]); + int fd, len = ARRAY_SIZE(buf); printk("Checking for host processor %s support...", feature); fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); if(fd < 0){ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); - return(0); + return 0; } *have_it = 0; - if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0]))) + if(!find_cpuinfo_line(fd, "flags", buf, ARRAY_SIZE(buf))) goto out; c = token(fd, buf, len - 1, ' '); @@ -138,7 +139,7 @@ static int check_cpu_flag(char *feature, int *have_it) if(*have_it == 0) printk("No\n"); else if(*have_it == 1) printk("Yes\n"); os_close_file(fd); - return(1); + return 1; } #if 0 /* This doesn't work in tt mode, plus it's causing compilation problems diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c index fe0877b3509..69971b78bea 100644 --- a/arch/um/sys-i386/ldt.c +++ b/arch/um/sys-i386/ldt.c @@ -424,9 +424,8 @@ void ldt_get_host_info(void) size++; } - if(size < sizeof(dummy_list)/sizeof(dummy_list[0])) { + if(size < ARRAY_SIZE(dummy_list)) host_ldt_entries = dummy_list; - } else { size = (size + 1) * sizeof(dummy_list[0]); host_ldt_entries = (short *)kmalloc(size, GFP_KERNEL); diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c index 40aa8853144..5f3cc668582 100644 --- a/arch/um/sys-i386/ptrace_user.c +++ b/arch/um/sys-i386/ptrace_user.c @@ -15,6 +15,7 @@ #include "user.h" #include "os.h" #include "uml-config.h" +#include "user_util.h" int ptrace_getregs(long pid, unsigned long *regs_out) { @@ -51,7 +52,7 @@ static void write_debugregs(int pid, unsigned long *regs) int nregs, i; dummy = NULL; - nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); + nregs = ARRAY_SIZE(dummy->u_debugreg); for(i = 0; i < nregs; i++){ if((i == 4) || (i == 5)) continue; if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i], @@ -68,7 +69,7 @@ static void read_debugregs(int pid, unsigned long *regs) int nregs, i; dummy = NULL; - nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); + nregs = ARRAY_SIZE(dummy->u_debugreg); for(i = 0; i < nregs; i++){ regs[i] = ptrace(PTRACE_PEEKUSR, pid, &dummy->u_debugreg[i], 0); diff --git a/arch/um/sys-i386/setjmp.S b/arch/um/sys-i386/setjmp.S new file mode 100644 index 00000000000..b766792c993 --- /dev/null +++ b/arch/um/sys-i386/setjmp.S @@ -0,0 +1,58 @@ +# +# arch/i386/setjmp.S +# +# setjmp/longjmp for the i386 architecture +# + +# +# The jmp_buf is assumed to contain the following, in order: +# %ebx +# %esp +# %ebp +# %esi +# %edi +# <return address> +# + + .text + .align 4 + .globl setjmp + .type setjmp, @function +setjmp: +#ifdef _REGPARM + movl %eax,%edx +#else + movl 4(%esp),%edx +#endif + popl %ecx # Return address, and adjust the stack + xorl %eax,%eax # Return value + movl %ebx,(%edx) + movl %esp,4(%edx) # Post-return %esp! + pushl %ecx # Make the call/return stack happy + movl %ebp,8(%edx) + movl %esi,12(%edx) + movl %edi,16(%edx) + movl %ecx,20(%edx) # Return address + ret + + .size setjmp,.-setjmp + + .text + .align 4 + .globl longjmp + .type longjmp, @function +longjmp: +#ifdef _REGPARM + xchgl %eax,%edx +#else + movl 4(%esp),%edx # jmp_ptr address + movl 8(%esp),%eax # Return value +#endif + movl (%edx),%ebx + movl 4(%edx),%esp + movl 8(%edx),%ebp + movl 12(%edx),%esi + movl 16(%edx),%edi + jmp *20(%edx) + + .size longjmp,.-longjmp diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index c19794d435d..f41768b8e25 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -5,8 +5,8 @@ # obj-y = bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ - sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o ksyms.o \ - tls.o + setjmp.o sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o \ + ksyms.o tls.o obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o obj-$(CONFIG_MODULES) += um_module.o diff --git a/arch/um/sys-x86_64/setjmp.S b/arch/um/sys-x86_64/setjmp.S new file mode 100644 index 00000000000..45f547b4043 --- /dev/null +++ b/arch/um/sys-x86_64/setjmp.S @@ -0,0 +1,54 @@ +# +# arch/x86_64/setjmp.S +# +# setjmp/longjmp for the x86-64 architecture +# + +# +# The jmp_buf is assumed to contain the following, in order: +# %rbx +# %rsp (post-return) +# %rbp +# %r12 +# %r13 +# %r14 +# %r15 +# <return address> +# + + .text + .align 4 + .globl setjmp + .type setjmp, @function +setjmp: + pop %rsi # Return address, and adjust the stack + xorl %eax,%eax # Return value + movq %rbx,(%rdi) + movq %rsp,8(%rdi) # Post-return %rsp! + push %rsi # Make the call/return stack happy + movq %rbp,16(%rdi) + movq %r12,24(%rdi) + movq %r13,32(%rdi) + movq %r14,40(%rdi) + movq %r15,48(%rdi) + movq %rsi,56(%rdi) # Return address + ret + + .size setjmp,.-setjmp + + .text + .align 4 + .globl longjmp + .type longjmp, @function +longjmp: + movl %esi,%eax # Return value (int) + movq (%rdi),%rbx + movq 8(%rdi),%rsp + movq 16(%rdi),%rbp + movq 24(%rdi),%r12 + movq 32(%rdi),%r13 + movq 40(%rdi),%r14 + movq 48(%rdi),%r15 + jmp *56(%rdi) + + .size longjmp,.-longjmp diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 6cd4878625f..581ce9af0ec 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -24,6 +24,10 @@ config X86 bool default y +config ZONE_DMA32 + bool + default y + config LOCKDEP_SUPPORT bool default y diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index d6d7f731f6f..708a3cd9a27 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/kexec.h> #include <linux/module.h> +#include <linux/mm.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -297,6 +298,53 @@ void __init e820_reserve_resources(void) } } +/* Mark pages corresponding to given address range as nosave */ +static void __init +e820_mark_nosave_range(unsigned long start, unsigned long end) +{ + unsigned long pfn, max_pfn; + + if (start >= end) + return; + + printk("Nosave address range: %016lx - %016lx\n", start, end); + max_pfn = end >> PAGE_SHIFT; + for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) + if (pfn_valid(pfn)) + SetPageNosave(pfn_to_page(pfn)); +} + +/* + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for software + * suspend and suspend to RAM. + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(void) +{ + int i; + unsigned long paddr; + + paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (paddr < ei->addr) + e820_mark_nosave_range(paddr, + round_up(ei->addr, PAGE_SIZE)); + + paddr = round_down(ei->addr + ei->size, PAGE_SIZE); + if (ei->type != E820_RAM) + e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), + paddr); + + if (paddr >= (end_pfn << PAGE_SHIFT)) + break; + } +} + /* * Add a memory region to the kernel e820 map. */ diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 34afad70482..4b39f0da17f 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -689,6 +689,7 @@ void __init setup_arch(char **cmdline_p) */ probe_roms(); e820_reserve_resources(); + e820_mark_nosave_regions(); request_resource(&iomem_resource, &video_ram_resource); diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 975380207b4..3ae9ffddddc 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -46,9 +46,10 @@ #include <linux/bootmem.h> #include <linux/thread_info.h> #include <linux/module.h> - #include <linux/delay.h> #include <linux/mc146818rtc.h> +#include <linux/smp.h> + #include <asm/mtrr.h> #include <asm/pgalloc.h> #include <asm/desc.h> diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S index 320b6fb00cc..bfbe00763c6 100644 --- a/arch/x86_64/kernel/suspend_asm.S +++ b/arch/x86_64/kernel/suspend_asm.S @@ -54,7 +54,7 @@ ENTRY(restore_image) movq %rcx, %cr3; movq %rax, %cr4; # turn PGE back on - movq pagedir_nosave(%rip), %rdx + movq restore_pblist(%rip), %rdx loop: testq %rdx, %rdx jz done diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 7a9b1822418..7700e6cd2bd 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1148,23 +1148,25 @@ int hpet_rtc_timer_init(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; local_irq_save(flags); + cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; - local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); + local_irq_restore(flags); + return 1; } static void hpet_rtc_timer_reinit(void) { - unsigned int cfg, cnt; + unsigned int cfg, cnt, ticks_per_int, lost_ints; if (unlikely(!(PIE_on | AIE_on | UIE_on))) { cfg = hpet_readl(HPET_T1_CFG); @@ -1179,10 +1181,33 @@ static void hpet_rtc_timer_reinit(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; /* It is more accurate to use the comparator value than current count.*/ - cnt = hpet_t1_cmp; - cnt += hpet_tick*HZ/hpet_rtc_int_freq; - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; + ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq; + hpet_t1_cmp += ticks_per_int; + hpet_writel(hpet_t1_cmp, HPET_T1_CMP); + + /* + * If the interrupt handler was delayed too long, the write above tries + * to schedule the next interrupt in the past and the hardware would + * not interrupt until the counter had wrapped around. + * So we have to check that the comparator wasn't set to a past time. + */ + cnt = hpet_readl(HPET_COUNTER); + if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) { + lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1; + /* Make sure that, even with the time needed to execute + * this code, the next scheduled interrupt has been moved + * back to the future: */ + lost_ints++; + + hpet_t1_cmp += lost_ints * ticks_per_int; + hpet_writel(hpet_t1_cmp, HPET_T1_CMP); + + if (PIE_on) + PIE_count += lost_ints; + + printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", + hpet_rtc_int_freq); + } } /* diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index ac8ea66ccb9..4198798e146 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -299,7 +299,7 @@ static int vmalloc_fault(unsigned long address) if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); else - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); /* Below here mismatches are bugs because these lower tables are shared */ @@ -308,7 +308,7 @@ static int vmalloc_fault(unsigned long address) pud_ref = pud_offset(pgd_ref, address); if (pud_none(*pud_ref)) return -1; - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref)) + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) BUG(); pmd = pmd_offset(pud, address); pmd_ref = pmd_offset(pud_ref, address); @@ -641,7 +641,7 @@ void vmalloc_sync_all(void) if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); else - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); } spin_unlock(&pgd_lock); set_bit(pgd_index(address), insync); diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index d14fb2dfbfc..52fd42c40c8 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -536,7 +536,7 @@ int memory_add_physaddr_to_nid(u64 start) int arch_add_memory(int nid, u64 start, u64 size) { struct pglist_data *pgdat = NODE_DATA(nid); - struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; + struct zone *zone = pgdat->node_zones + ZONE_NORMAL; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index ab2ecccf779..ffa111eea9d 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -851,7 +851,7 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev) * @ap: Port whose timings we are configuring * @adev: Drive in question * @udma: udma mode, 0 - 6 - * @is_ich: set if the chip is an ICH device + * @isich: set if the chip is an ICH device * * Set UDMA mode for device, in host controller PCI config space. * diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 27c22feebf3..8cd730fe5dd 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -484,7 +484,7 @@ static void nv_error_handler(struct ata_port *ap) static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version = 0; - struct ata_port_info *ppi; + struct ata_port_info *ppi[2]; struct ata_probe_ent *probe_ent; int pci_dev_busy = 0; int rc; @@ -520,8 +520,8 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) rc = -ENOMEM; - ppi = &nv_port_info[ent->driver_data]; - probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); + ppi[0] = ppi[1] = &nv_port_info[ent->driver_data]; + probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); if (!probe_ent) goto err_out_regions; diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c index 9b17375d805..18d49fff8dc 100644 --- a/drivers/ata/sata_sis.c +++ b/drivers/ata/sata_sis.c @@ -240,7 +240,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) struct ata_probe_ent *probe_ent = NULL; int rc; u32 genctl; - struct ata_port_info *ppi; + struct ata_port_info *ppi[2]; int pci_dev_busy = 0; u8 pmr; u8 port2_start; @@ -265,8 +265,8 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto err_out_regions; - ppi = &sis_port_info; - probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); + ppi[0] = ppi[1] = &sis_port_info; + probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); if (!probe_ent) { rc = -ENOMEM; goto err_out_regions; diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c index 8fc6e800011..dd76f37be18 100644 --- a/drivers/ata/sata_uli.c +++ b/drivers/ata/sata_uli.c @@ -185,7 +185,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version; struct ata_probe_ent *probe_ent; - struct ata_port_info *ppi; + struct ata_port_info *ppi[2]; int rc; unsigned int board_idx = (unsigned int) ent->driver_data; int pci_dev_busy = 0; @@ -211,8 +211,8 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto err_out_regions; - ppi = &uli_port_info; - probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); + ppi[0] = ppi[1] = &uli_port_info; + probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); if (!probe_ent) { rc = -ENOMEM; goto err_out_regions; diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 7f087aef99d..a72a2389a11 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -318,9 +318,10 @@ static void vt6421_init_addrs(struct ata_probe_ent *probe_ent, static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev) { struct ata_probe_ent *probe_ent; - struct ata_port_info *ppi = &vt6420_port_info; - - probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); + struct ata_port_info *ppi[2]; + + ppi[0] = ppi[1] = &vt6420_port_info; + probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); if (!probe_ent) return NULL; diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 41e052fecd7..f2511b42dba 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -454,7 +454,7 @@ rate_to_atmf(unsigned rate) /* cps to atm forum format */ return (NONZERO | (exp << 9) | (rate & 0x1ff)); } -static void __init +static void __devinit he_init_rx_lbfp0(struct he_dev *he_dev) { unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count; @@ -485,7 +485,7 @@ he_init_rx_lbfp0(struct he_dev *he_dev) he_writel(he_dev, he_dev->r0_numbuffs, RLBF0_C); } -static void __init +static void __devinit he_init_rx_lbfp1(struct he_dev *he_dev) { unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count; @@ -516,7 +516,7 @@ he_init_rx_lbfp1(struct he_dev *he_dev) he_writel(he_dev, he_dev->r1_numbuffs, RLBF1_C); } -static void __init +static void __devinit he_init_tx_lbfp(struct he_dev *he_dev) { unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count; @@ -546,7 +546,7 @@ he_init_tx_lbfp(struct he_dev *he_dev) he_writel(he_dev, lbufd_index - 1, TLBF_T); } -static int __init +static int __devinit he_init_tpdrq(struct he_dev *he_dev) { he_dev->tpdrq_base = pci_alloc_consistent(he_dev->pci_dev, @@ -568,7 +568,7 @@ he_init_tpdrq(struct he_dev *he_dev) return 0; } -static void __init +static void __devinit he_init_cs_block(struct he_dev *he_dev) { unsigned clock, rate, delta; @@ -664,7 +664,7 @@ he_init_cs_block(struct he_dev *he_dev) } -static int __init +static int __devinit he_init_cs_block_rcm(struct he_dev *he_dev) { unsigned (*rategrid)[16][16]; @@ -785,7 +785,7 @@ he_init_cs_block_rcm(struct he_dev *he_dev) return 0; } -static int __init +static int __devinit he_init_group(struct he_dev *he_dev, int group) { int i; @@ -955,7 +955,7 @@ he_init_group(struct he_dev *he_dev, int group) return 0; } -static int __init +static int __devinit he_init_irq(struct he_dev *he_dev) { int i; diff --git a/drivers/base/node.c b/drivers/base/node.c index e9b0957f15d..001e6f6b9c1 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -54,10 +54,12 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) "Node %d MemUsed: %8lu kB\n" "Node %d Active: %8lu kB\n" "Node %d Inactive: %8lu kB\n" +#ifdef CONFIG_HIGHMEM "Node %d HighTotal: %8lu kB\n" "Node %d HighFree: %8lu kB\n" "Node %d LowTotal: %8lu kB\n" "Node %d LowFree: %8lu kB\n" +#endif "Node %d Dirty: %8lu kB\n" "Node %d Writeback: %8lu kB\n" "Node %d FilePages: %8lu kB\n" @@ -66,16 +68,20 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" - "Node %d Slab: %8lu kB\n", + "Node %d Slab: %8lu kB\n" + "Node %d SReclaimable: %8lu kB\n" + "Node %d SUnreclaim: %8lu kB\n", nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), nid, K(active), nid, K(inactive), +#ifdef CONFIG_HIGHMEM nid, K(i.totalhigh), nid, K(i.freehigh), nid, K(i.totalram - i.totalhigh), nid, K(i.freeram - i.freehigh), +#endif nid, K(node_page_state(nid, NR_FILE_DIRTY)), nid, K(node_page_state(nid, NR_WRITEBACK)), nid, K(node_page_state(nid, NR_FILE_PAGES)), @@ -84,7 +90,10 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) nid, K(node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), nid, K(node_page_state(nid, NR_BOUNCE)), - nid, K(node_page_state(nid, NR_SLAB))); + nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) + + node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), + nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)), + nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE))); n += hugetlb_report_node_meminfo(nid, buf + n); return n; } diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index 6e6a7c7a7ef..ab6429b4a84 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -209,11 +209,12 @@ static const unsigned char days_in_mo[] = */ static inline unsigned char rtc_is_updating(void) { + unsigned long flags; unsigned char uip; - spin_lock_irq(&rtc_lock); + spin_lock_irqsave(&rtc_lock, flags); uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); - spin_unlock_irq(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); return uip; } diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c index 71f27e955d8..c7854ea57b5 100644 --- a/drivers/ide/mips/au1xxx-ide.c +++ b/drivers/ide/mips/au1xxx-ide.c @@ -476,13 +476,13 @@ static int auide_dma_lostirq(ide_drive_t *drive) return 0; } -static void auide_ddma_tx_callback(int irq, void *param, struct pt_regs *regs) +static void auide_ddma_tx_callback(int irq, void *param) { _auide_hwif *ahwif = (_auide_hwif*)param; ahwif->drive->waiting_for_dma = 0; } -static void auide_ddma_rx_callback(int irq, void *param, struct pt_regs *regs) +static void auide_ddma_rx_callback(int irq, void *param) { _auide_hwif *ahwif = (_auide_hwif*)param; ahwif->drive->waiting_for_dma = 0; diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c index 88bf2af2a0e..edd7b83c346 100644 --- a/drivers/media/video/videodev.c +++ b/drivers/media/video/videodev.c @@ -836,7 +836,7 @@ static int __video_do_ioctl(struct inode *inode, struct file *file, break; } - if (index<=0 || index >= vfd->tvnormsize) { + if (index < 0 || index >= vfd->tvnormsize) { ret=-EINVAL; break; } diff --git a/drivers/mmc/au1xmmc.c b/drivers/mmc/au1xmmc.c index fb606165af3..61268da1395 100644 --- a/drivers/mmc/au1xmmc.c +++ b/drivers/mmc/au1xmmc.c @@ -731,7 +731,7 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios) } } -static void au1xmmc_dma_callback(int irq, void *dev_id, struct pt_regs *regs) +static void au1xmmc_dma_callback(int irq, void *dev_id) { struct au1xmmc_host *host = (struct au1xmmc_host *) dev_id; diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c index 77670741e10..feb42db10ee 100644 --- a/drivers/net/sunlance.c +++ b/drivers/net/sunlance.c @@ -1323,9 +1323,9 @@ static const struct ethtool_ops sparc_lance_ethtool_ops = { .get_link = sparc_lance_get_link, }; -static int __init sparc_lance_probe_one(struct sbus_dev *sdev, - struct sbus_dma *ledma, - struct sbus_dev *lebuffer) +static int __devinit sparc_lance_probe_one(struct sbus_dev *sdev, + struct sbus_dma *ledma, + struct sbus_dev *lebuffer) { static unsigned version_printed; struct net_device *dev; @@ -1515,7 +1515,7 @@ fail: } /* On 4m, find the associated dma for the lance chip */ -static inline struct sbus_dma *find_ledma(struct sbus_dev *sdev) +static struct sbus_dma * __devinit find_ledma(struct sbus_dev *sdev) { struct sbus_dma *p; @@ -1533,7 +1533,7 @@ static inline struct sbus_dma *find_ledma(struct sbus_dev *sdev) /* Find all the lance cards on the system and initialize them */ static struct sbus_dev sun4_sdev; -static int __init sparc_lance_init(void) +static int __devinit sparc_lance_init(void) { if ((idprom->id_machtype == (SM_SUN4|SM_4_330)) || (idprom->id_machtype == (SM_SUN4|SM_4_470))) { diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index 372e47f7d59..5f7ba1adb30 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -1929,6 +1929,13 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port) mutex_lock(&state->mutex); +#ifdef CONFIG_DISABLE_CONSOLE_SUSPEND + if (uart_console(port)) { + mutex_unlock(&state->mutex); + return 0; + } +#endif + if (state->info && state->info->flags & UIF_INITIALIZED) { const struct uart_ops *ops = port->ops; @@ -1967,6 +1974,13 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port) mutex_lock(&state->mutex); +#ifdef CONFIG_DISABLE_CONSOLE_SUSPEND + if (uart_console(port)) { + mutex_unlock(&state->mutex); + return 0; + } +#endif + uart_change_pm(state, 0); /* diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c index 4f78f234473..c151dcf6878 100644 --- a/drivers/video/fbsysfs.c +++ b/drivers/video/fbsysfs.c @@ -397,6 +397,12 @@ static ssize_t store_bl_curve(struct class_device *class_device, u8 tmp_curve[FB_BACKLIGHT_LEVELS]; unsigned int i; + /* Some drivers don't use framebuffer_alloc(), but those also + * don't have backlights. + */ + if (!fb_info || !fb_info->bl_dev) + return -ENODEV; + if (count != (FB_BACKLIGHT_LEVELS / 8 * 24)) return -EINVAL; @@ -430,6 +436,12 @@ static ssize_t show_bl_curve(struct class_device *class_device, char *buf) ssize_t len = 0; unsigned int i; + /* Some drivers don't use framebuffer_alloc(), but those also + * don't have backlights. + */ + if (!fb_info || !fb_info->bl_dev) + return -ENODEV; + mutex_lock(&fb_info->bl_mutex); for (i = 0; i < FB_BACKLIGHT_LEVELS; i += 8) len += snprintf(&buf[len], PAGE_SIZE, diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 8dbd44f10e9..d96e5c14a9c 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -32,7 +32,7 @@ static inline int autofs4_can_expire(struct dentry *dentry, if (!do_now) { /* Too young to die */ - if (time_after(ino->last_used + timeout, now)) + if (!timeout || time_after(ino->last_used + timeout, now)) return 0; /* update last_used here :- @@ -253,7 +253,7 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb, struct dentry *root = dget(sb->s_root); int do_now = how & AUTOFS_EXP_IMMEDIATE; - if (!sbi->exp_timeout || !root) + if (!root) return NULL; now = jiffies; @@ -293,7 +293,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, int do_now = how & AUTOFS_EXP_IMMEDIATE; int exp_leaves = how & AUTOFS_EXP_LEAVES; - if ( !sbi->exp_timeout || !root ) + if (!root) return NULL; now = jiffies; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 672a3b90bc5..64802aabd1a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1262,7 +1262,7 @@ static void fill_elf_header(struct elfhdr *elf, int segs) return; } -static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset) +static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) { phdr->p_type = PT_NOTE; phdr->p_offset = offset; @@ -1428,7 +1428,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) int i; struct vm_area_struct *vma; struct elfhdr *elf = NULL; - off_t offset = 0, dataoff; + loff_t offset = 0, dataoff; unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; int numnote; struct memelfnote *notes = NULL; @@ -1661,11 +1661,11 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) ELF_CORE_WRITE_EXTRA_DATA; #endif - if ((off_t)file->f_pos != offset) { + if (file->f_pos != offset) { /* Sanity check */ printk(KERN_WARNING - "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n", - (off_t)file->f_pos, offset); + "elf_core_dump: file->f_pos (%Ld) != offset (%Ld)\n", + file->f_pos, offset); } end_coredump: diff --git a/fs/buffer.c b/fs/buffer.c index 71649ef9b65..3b6d701073e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page) spin_lock(&mapping->private_lock); ret = drop_buffers(page, &buffers_to_free); + spin_unlock(&mapping->private_lock); if (ret) { /* * If the filesystem writes its buffers by hand (eg ext3) @@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page) */ clear_page_dirty(page); } - spin_unlock(&mapping->private_lock); out: if (buffers_to_free) { struct buffer_head *bh = buffers_to_free; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 42da6078431..32a8caf0c41 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -160,6 +160,117 @@ static int journal_write_commit_record(journal_t *journal, return (ret == -EIO); } +static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) +{ + int i; + + for (i = 0; i < bufs; i++) { + wbuf[i]->b_end_io = end_buffer_write_sync; + /* We use-up our safety reference in submit_bh() */ + submit_bh(WRITE, wbuf[i]); + } +} + +/* + * Submit all the data buffers to disk + */ +static void journal_submit_data_buffers(journal_t *journal, + transaction_t *commit_transaction) +{ + struct journal_head *jh; + struct buffer_head *bh; + int locked; + int bufs = 0; + struct buffer_head **wbuf = journal->j_wbuf; + + /* + * Whenever we unlock the journal and sleep, things can get added + * onto ->t_sync_datalist, so we have to keep looping back to + * write_out_data until we *know* that the list is empty. + * + * Cleanup any flushed data buffers from the data list. Even in + * abort mode, we want to flush this out as soon as possible. + */ +write_out_data: + cond_resched(); + spin_lock(&journal->j_list_lock); + + while (commit_transaction->t_sync_datalist) { + jh = commit_transaction->t_sync_datalist; + bh = jh2bh(jh); + locked = 0; + + /* Get reference just to make sure buffer does not disappear + * when we are forced to drop various locks */ + get_bh(bh); + /* If the buffer is dirty, we need to submit IO and hence + * we need the buffer lock. We try to lock the buffer without + * blocking. If we fail, we need to drop j_list_lock and do + * blocking lock_buffer(). + */ + if (buffer_dirty(bh)) { + if (test_set_buffer_locked(bh)) { + BUFFER_TRACE(bh, "needs blocking lock"); + spin_unlock(&journal->j_list_lock); + /* Write out all data to prevent deadlocks */ + journal_do_submit_data(wbuf, bufs); + bufs = 0; + lock_buffer(bh); + spin_lock(&journal->j_list_lock); + } + locked = 1; + } + /* We have to get bh_state lock. Again out of order, sigh. */ + if (!inverted_lock(journal, bh)) { + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + } + /* Someone already cleaned up the buffer? */ + if (!buffer_jbd(bh) + || jh->b_transaction != commit_transaction + || jh->b_jlist != BJ_SyncData) { + jbd_unlock_bh_state(bh); + if (locked) + unlock_buffer(bh); + BUFFER_TRACE(bh, "already cleaned up"); + put_bh(bh); + continue; + } + if (locked && test_clear_buffer_dirty(bh)) { + BUFFER_TRACE(bh, "needs writeout, adding to array"); + wbuf[bufs++] = bh; + __journal_file_buffer(jh, commit_transaction, + BJ_Locked); + jbd_unlock_bh_state(bh); + if (bufs == journal->j_wbufsize) { + spin_unlock(&journal->j_list_lock); + journal_do_submit_data(wbuf, bufs); + bufs = 0; + goto write_out_data; + } + } + else { + BUFFER_TRACE(bh, "writeout complete: unfile"); + __journal_unfile_buffer(jh); + jbd_unlock_bh_state(bh); + if (locked) + unlock_buffer(bh); + journal_remove_journal_head(bh); + /* Once for our safety reference, once for + * journal_remove_journal_head() */ + put_bh(bh); + put_bh(bh); + } + + if (lock_need_resched(&journal->j_list_lock)) { + spin_unlock(&journal->j_list_lock); + goto write_out_data; + } + } + spin_unlock(&journal->j_list_lock); + journal_do_submit_data(wbuf, bufs); +} + /* * journal_commit_transaction * @@ -313,80 +424,13 @@ void journal_commit_transaction(journal_t *journal) * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ - err = 0; - /* - * Whenever we unlock the journal and sleep, things can get added - * onto ->t_sync_datalist, so we have to keep looping back to - * write_out_data until we *know* that the list is empty. - */ - bufs = 0; - /* - * Cleanup any flushed data buffers from the data list. Even in - * abort mode, we want to flush this out as soon as possible. - */ -write_out_data: - cond_resched(); - spin_lock(&journal->j_list_lock); - - while (commit_transaction->t_sync_datalist) { - struct buffer_head *bh; - - jh = commit_transaction->t_sync_datalist; - commit_transaction->t_sync_datalist = jh->b_tnext; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - BUFFER_TRACE(bh, "locked"); - if (!inverted_lock(journal, bh)) - goto write_out_data; - __journal_temp_unlink_buffer(jh); - __journal_file_buffer(jh, commit_transaction, - BJ_Locked); - jbd_unlock_bh_state(bh); - if (lock_need_resched(&journal->j_list_lock)) { - spin_unlock(&journal->j_list_lock); - goto write_out_data; - } - } else { - if (buffer_dirty(bh)) { - BUFFER_TRACE(bh, "start journal writeout"); - get_bh(bh); - wbuf[bufs++] = bh; - if (bufs == journal->j_wbufsize) { - jbd_debug(2, "submit %d writes\n", - bufs); - spin_unlock(&journal->j_list_lock); - ll_rw_block(SWRITE, bufs, wbuf); - journal_brelse_array(wbuf, bufs); - bufs = 0; - goto write_out_data; - } - } else { - BUFFER_TRACE(bh, "writeout complete: unfile"); - if (!inverted_lock(journal, bh)) - goto write_out_data; - __journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - put_bh(bh); - if (lock_need_resched(&journal->j_list_lock)) { - spin_unlock(&journal->j_list_lock); - goto write_out_data; - } - } - } - } - - if (bufs) { - spin_unlock(&journal->j_list_lock); - ll_rw_block(SWRITE, bufs, wbuf); - journal_brelse_array(wbuf, bufs); - spin_lock(&journal->j_list_lock); - } + journal_submit_data_buffers(journal, commit_transaction); /* * Wait for all previously submitted IO to complete. */ + spin_lock(&journal->j_list_lock); while (commit_transaction->t_locked_list) { struct buffer_head *bh; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 94215622544..5bbd6089605 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -157,10 +157,12 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "SwapCached: %8lu kB\n" "Active: %8lu kB\n" "Inactive: %8lu kB\n" +#ifdef CONFIG_HIGHMEM "HighTotal: %8lu kB\n" "HighFree: %8lu kB\n" "LowTotal: %8lu kB\n" "LowFree: %8lu kB\n" +#endif "SwapTotal: %8lu kB\n" "SwapFree: %8lu kB\n" "Dirty: %8lu kB\n" @@ -168,6 +170,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "AnonPages: %8lu kB\n" "Mapped: %8lu kB\n" "Slab: %8lu kB\n" + "SReclaimable: %8lu kB\n" + "SUnreclaim: %8lu kB\n" "PageTables: %8lu kB\n" "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" @@ -183,17 +187,22 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(total_swapcache_pages), K(active), K(inactive), +#ifdef CONFIG_HIGHMEM K(i.totalhigh), K(i.freehigh), K(i.totalram-i.totalhigh), K(i.freeram-i.freehigh), +#endif K(i.totalswap), K(i.freeswap), K(global_page_state(NR_FILE_DIRTY)), K(global_page_state(NR_WRITEBACK)), K(global_page_state(NR_ANON_PAGES)), K(global_page_state(NR_FILE_MAPPED)), - K(global_page_state(NR_SLAB)), + K(global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)), + K(global_page_state(NR_SLAB_RECLAIMABLE)), + K(global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_PAGETABLE)), K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h index 64d0ab98fcd..8af56ce346a 100644 --- a/include/asm-alpha/mmzone.h +++ b/include/asm-alpha/mmzone.h @@ -75,6 +75,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) #define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32)) +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32)) #define pte_pfn(pte) (pte_val(pte) >> 32) #define mk_pte(page, pgprot) \ diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h index 93eaa58b796..49ac9bee7ce 100644 --- a/include/asm-alpha/pgtable.h +++ b/include/asm-alpha/pgtable.h @@ -230,16 +230,17 @@ extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp) extern inline unsigned long -pmd_page_kernel(pmd_t pmd) +pmd_page_vaddr(pmd_t pmd) { return ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)) + PAGE_OFFSET; } #ifndef CONFIG_DISCONTIGMEM #define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32)) +#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32)) #endif -extern inline unsigned long pgd_page(pgd_t pgd) +extern inline unsigned long pgd_page_vaddr(pgd_t pgd) { return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } extern inline int pte_none(pte_t pte) { return !pte_val(pte); } @@ -293,13 +294,13 @@ extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; retu /* Find an entry in the second-level page table.. */ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { - return (pmd_t *) pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); + return (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); } /* Find an entry in the third-level page table.. */ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) { - return (pte_t *) pmd_page_kernel(*dir) + return (pte_t *) pmd_page_vaddr(*dir) + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1)); } diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h index 8d3919c6458..4d10d319fa3 100644 --- a/include/asm-arm/pgtable.h +++ b/include/asm-arm/pgtable.h @@ -224,9 +224,9 @@ extern struct page *empty_zero_page; #define pte_none(pte) (!pte_val(pte)) #define pte_clear(mm,addr,ptep) set_pte_at((mm),(addr),(ptep), __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -#define pte_offset_kernel(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) -#define pte_offset_map(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) -#define pte_offset_map_nested(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) +#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) +#define pte_offset_map(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) +#define pte_offset_map_nested(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) @@ -291,7 +291,7 @@ PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG); clean_pmd_entry(pmdp); \ } while (0) -static inline pte_t *pmd_page_kernel(pmd_t pmd) +static inline pte_t *pmd_page_vaddr(pmd_t pmd) { unsigned long ptr; diff --git a/include/asm-arm26/pgtable.h b/include/asm-arm26/pgtable.h index 19ac9101a6b..63a8881fae1 100644 --- a/include/asm-arm26/pgtable.h +++ b/include/asm-arm26/pgtable.h @@ -186,12 +186,12 @@ extern struct page *empty_zero_page; * return a pointer to memory (no special alignment) */ #define pmd_page(pmd) ((struct page *)(pmd_val((pmd)) & ~_PMD_PRESENT)) -#define pmd_page_kernel(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT)) +#define pmd_page_vaddr(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT)) -#define pte_offset_kernel(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) +#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) -#define pte_offset_map(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) -#define pte_offset_map_nested(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) +#define pte_offset_map(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) +#define pte_offset_map_nested(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) diff --git a/include/asm-avr32/Kbuild b/include/asm-avr32/Kbuild new file mode 100644 index 00000000000..8770e73ce93 --- /dev/null +++ b/include/asm-avr32/Kbuild @@ -0,0 +1,3 @@ +include include/asm-generic/Kbuild.asm + +headers-y += cachectl.h diff --git a/include/asm-avr32/a.out.h b/include/asm-avr32/a.out.h new file mode 100644 index 00000000000..50bf6e31a14 --- /dev/null +++ b/include/asm-avr32/a.out.h @@ -0,0 +1,26 @@ +#ifndef __ASM_AVR32_A_OUT_H +#define __ASM_AVR32_A_OUT_H + +struct exec +{ + unsigned long a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#ifdef __KERNEL__ + +#define STACK_TOP TASK_SIZE + +#endif + +#endif /* __ASM_AVR32_A_OUT_H */ diff --git a/include/asm-avr32/addrspace.h b/include/asm-avr32/addrspace.h new file mode 100644 index 00000000000..366794858ec --- /dev/null +++ b/include/asm-avr32/addrspace.h @@ -0,0 +1,43 @@ +/* + * Defitions for the address spaces of the AVR32 CPUs. Heavily based on + * include/asm-sh/addrspace.h + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_ADDRSPACE_H +#define __ASM_AVR32_ADDRSPACE_H + +#ifdef CONFIG_MMU + +/* Memory segments when segmentation is enabled */ +#define P0SEG 0x00000000 +#define P1SEG 0x80000000 +#define P2SEG 0xa0000000 +#define P3SEG 0xc0000000 +#define P4SEG 0xe0000000 + +/* Returns the privileged segment base of a given address */ +#define PXSEG(a) (((unsigned long)(a)) & 0xe0000000) + +/* Returns the physical address of a PnSEG (n=1,2) address */ +#define PHYSADDR(a) (((unsigned long)(a)) & 0x1fffffff) + +/* + * Map an address to a certain privileged segment + */ +#define P1SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \ + | P1SEG)) +#define P2SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \ + | P2SEG)) +#define P3SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \ + | P3SEG)) +#define P4SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \ + | P4SEG)) + +#endif /* CONFIG_MMU */ + +#endif /* __ASM_AVR32_ADDRSPACE_H */ diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h new file mode 100644 index 00000000000..ce1150d4438 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h @@ -0,0 +1,36 @@ +/* + * include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h + * + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * Peripheral Data Controller (PDC) registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef AT91RM9200_PDC_H +#define AT91RM9200_PDC_H + +#define AT91_PDC_RPR 0x100 /* Receive Pointer Register */ +#define AT91_PDC_RCR 0x104 /* Receive Counter Register */ +#define AT91_PDC_TPR 0x108 /* Transmit Pointer Register */ +#define AT91_PDC_TCR 0x10c /* Transmit Counter Register */ +#define AT91_PDC_RNPR 0x110 /* Receive Next Pointer Register */ +#define AT91_PDC_RNCR 0x114 /* Receive Next Counter Register */ +#define AT91_PDC_TNPR 0x118 /* Transmit Next Pointer Register */ +#define AT91_PDC_TNCR 0x11c /* Transmit Next Counter Register */ + +#define AT91_PDC_PTCR 0x120 /* Transfer Control Register */ +#define AT91_PDC_RXTEN (1 << 0) /* Receiver Transfer Enable */ +#define AT91_PDC_RXTDIS (1 << 1) /* Receiver Transfer Disable */ +#define AT91_PDC_TXTEN (1 << 8) /* Transmitter Transfer Enable */ +#define AT91_PDC_TXTDIS (1 << 9) /* Transmitter Transfer Disable */ + +#define AT91_PDC_PTSR 0x124 /* Transfer Status Register */ + +#endif diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_usart.h b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h new file mode 100644 index 00000000000..79f851e31b9 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h @@ -0,0 +1,123 @@ +/* + * include/asm-arm/arch-at91rm9200/at91rm9200_usart.h + * + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * USART registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef AT91RM9200_USART_H +#define AT91RM9200_USART_H + +#define AT91_US_CR 0x00 /* Control Register */ +#define AT91_US_RSTRX (1 << 2) /* Reset Receiver */ +#define AT91_US_RSTTX (1 << 3) /* Reset Transmitter */ +#define AT91_US_RXEN (1 << 4) /* Receiver Enable */ +#define AT91_US_RXDIS (1 << 5) /* Receiver Disable */ +#define AT91_US_TXEN (1 << 6) /* Transmitter Enable */ +#define AT91_US_TXDIS (1 << 7) /* Transmitter Disable */ +#define AT91_US_RSTSTA (1 << 8) /* Reset Status Bits */ +#define AT91_US_STTBRK (1 << 9) /* Start Break */ +#define AT91_US_STPBRK (1 << 10) /* Stop Break */ +#define AT91_US_STTTO (1 << 11) /* Start Time-out */ +#define AT91_US_SENDA (1 << 12) /* Send Address */ +#define AT91_US_RSTIT (1 << 13) /* Reset Iterations */ +#define AT91_US_RSTNACK (1 << 14) /* Reset Non Acknowledge */ +#define AT91_US_RETTO (1 << 15) /* Rearm Time-out */ +#define AT91_US_DTREN (1 << 16) /* Data Terminal Ready Enable */ +#define AT91_US_DTRDIS (1 << 17) /* Data Terminal Ready Disable */ +#define AT91_US_RTSEN (1 << 18) /* Request To Send Enable */ +#define AT91_US_RTSDIS (1 << 19) /* Request To Send Disable */ + +#define AT91_US_MR 0x04 /* Mode Register */ +#define AT91_US_USMODE (0xf << 0) /* Mode of the USART */ +#define AT91_US_USMODE_NORMAL 0 +#define AT91_US_USMODE_RS485 1 +#define AT91_US_USMODE_HWHS 2 +#define AT91_US_USMODE_MODEM 3 +#define AT91_US_USMODE_ISO7816_T0 4 +#define AT91_US_USMODE_ISO7816_T1 6 +#define AT91_US_USMODE_IRDA 8 +#define AT91_US_USCLKS (3 << 4) /* Clock Selection */ +#define AT91_US_CHRL (3 << 6) /* Character Length */ +#define AT91_US_CHRL_5 (0 << 6) +#define AT91_US_CHRL_6 (1 << 6) +#define AT91_US_CHRL_7 (2 << 6) +#define AT91_US_CHRL_8 (3 << 6) +#define AT91_US_SYNC (1 << 8) /* Synchronous Mode Select */ +#define AT91_US_PAR (7 << 9) /* Parity Type */ +#define AT91_US_PAR_EVEN (0 << 9) +#define AT91_US_PAR_ODD (1 << 9) +#define AT91_US_PAR_SPACE (2 << 9) +#define AT91_US_PAR_MARK (3 << 9) +#define AT91_US_PAR_NONE (4 << 9) +#define AT91_US_PAR_MULTI_DROP (6 << 9) +#define AT91_US_NBSTOP (3 << 12) /* Number of Stop Bits */ +#define AT91_US_NBSTOP_1 (0 << 12) +#define AT91_US_NBSTOP_1_5 (1 << 12) +#define AT91_US_NBSTOP_2 (2 << 12) +#define AT91_US_CHMODE (3 << 14) /* Channel Mode */ +#define AT91_US_CHMODE_NORMAL (0 << 14) +#define AT91_US_CHMODE_ECHO (1 << 14) +#define AT91_US_CHMODE_LOC_LOOP (2 << 14) +#define AT91_US_CHMODE_REM_LOOP (3 << 14) +#define AT91_US_MSBF (1 << 16) /* Bit Order */ +#define AT91_US_MODE9 (1 << 17) /* 9-bit Character Length */ +#define AT91_US_CLKO (1 << 18) /* Clock Output Select */ +#define AT91_US_OVER (1 << 19) /* Oversampling Mode */ +#define AT91_US_INACK (1 << 20) /* Inhibit Non Acknowledge */ +#define AT91_US_DSNACK (1 << 21) /* Disable Successive NACK */ +#define AT91_US_MAX_ITER (7 << 24) /* Max Iterations */ +#define AT91_US_FILTER (1 << 28) /* Infrared Receive Line Filter */ + +#define AT91_US_IER 0x08 /* Interrupt Enable Register */ +#define AT91_US_RXRDY (1 << 0) /* Receiver Ready */ +#define AT91_US_TXRDY (1 << 1) /* Transmitter Ready */ +#define AT91_US_RXBRK (1 << 2) /* Break Received / End of Break */ +#define AT91_US_ENDRX (1 << 3) /* End of Receiver Transfer */ +#define AT91_US_ENDTX (1 << 4) /* End of Transmitter Transfer */ +#define AT91_US_OVRE (1 << 5) /* Overrun Error */ +#define AT91_US_FRAME (1 << 6) /* Framing Error */ +#define AT91_US_PARE (1 << 7) /* Parity Error */ +#define AT91_US_TIMEOUT (1 << 8) /* Receiver Time-out */ +#define AT91_US_TXEMPTY (1 << 9) /* Transmitter Empty */ +#define AT91_US_ITERATION (1 << 10) /* Max number of Repetitions Reached */ +#define AT91_US_TXBUFE (1 << 11) /* Transmission Buffer Empty */ +#define AT91_US_RXBUFF (1 << 12) /* Reception Buffer Full */ +#define AT91_US_NACK (1 << 13) /* Non Acknowledge */ +#define AT91_US_RIIC (1 << 16) /* Ring Indicator Input Change */ +#define AT91_US_DSRIC (1 << 17) /* Data Set Ready Input Change */ +#define AT91_US_DCDIC (1 << 18) /* Data Carrier Detect Input Change */ +#define AT91_US_CTSIC (1 << 19) /* Clear to Send Input Change */ +#define AT91_US_RI (1 << 20) /* RI */ +#define AT91_US_DSR (1 << 21) /* DSR */ +#define AT91_US_DCD (1 << 22) /* DCD */ +#define AT91_US_CTS (1 << 23) /* CTS */ + +#define AT91_US_IDR 0x0c /* Interrupt Disable Register */ +#define AT91_US_IMR 0x10 /* Interrupt Mask Register */ +#define AT91_US_CSR 0x14 /* Channel Status Register */ +#define AT91_US_RHR 0x18 /* Receiver Holding Register */ +#define AT91_US_THR 0x1c /* Transmitter Holding Register */ + +#define AT91_US_BRGR 0x20 /* Baud Rate Generator Register */ +#define AT91_US_CD (0xffff << 0) /* Clock Divider */ + +#define AT91_US_RTOR 0x24 /* Receiver Time-out Register */ +#define AT91_US_TO (0xffff << 0) /* Time-out Value */ + +#define AT91_US_TTGR 0x28 /* Transmitter Timeguard Register */ +#define AT91_US_TG (0xff << 0) /* Timeguard Value */ + +#define AT91_US_FIDI 0x40 /* FI DI Ratio Register */ +#define AT91_US_NER 0x44 /* Number of Errors Register */ +#define AT91_US_IF 0x4c /* IrDA Filter Register */ + +#endif diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h new file mode 100644 index 00000000000..39368e18ab2 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/board.h @@ -0,0 +1,35 @@ +/* + * Platform data definitions. + */ +#ifndef __ASM_ARCH_BOARD_H +#define __ASM_ARCH_BOARD_H + +#include <linux/types.h> + +/* Add basic devices: system manager, interrupt controller, portmuxes, etc. */ +void at32_add_system_devices(void); + +#define AT91_NR_UART 4 +extern struct platform_device *at91_default_console_device; + +struct platform_device *at32_add_device_usart(unsigned int id); + +struct eth_platform_data { + u8 valid; + u8 mii_phy_addr; + u8 is_rmii; + u8 hw_addr[6]; +}; +struct platform_device * +at32_add_device_eth(unsigned int id, struct eth_platform_data *data); + +struct platform_device *at32_add_device_spi(unsigned int id); + +struct lcdc_platform_data { + unsigned long fbmem_start; + unsigned long fbmem_size; +}; +struct platform_device * +at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data); + +#endif /* __ASM_ARCH_BOARD_H */ diff --git a/include/asm-avr32/arch-at32ap/init.h b/include/asm-avr32/arch-at32ap/init.h new file mode 100644 index 00000000000..43722634e06 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/init.h @@ -0,0 +1,21 @@ +/* + * AT32AP platform initialization calls. + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_AT32AP_INIT_H__ +#define __ASM_AVR32_AT32AP_INIT_H__ + +void setup_platform(void); + +/* Called by setup_platform */ +void at32_clock_init(void); +void at32_portmux_init(void); + +void at32_setup_serial_console(unsigned int usart_id); + +#endif /* __ASM_AVR32_AT32AP_INIT_H__ */ diff --git a/include/asm-avr32/arch-at32ap/portmux.h b/include/asm-avr32/arch-at32ap/portmux.h new file mode 100644 index 00000000000..4d50421262a --- /dev/null +++ b/include/asm-avr32/arch-at32ap/portmux.h @@ -0,0 +1,16 @@ +/* + * AT32 portmux interface. + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_AT32_PORTMUX_H__ +#define __ASM_AVR32_AT32_PORTMUX_H__ + +void portmux_set_func(unsigned int portmux_id, unsigned int pin_id, + unsigned int function_id); + +#endif /* __ASM_AVR32_AT32_PORTMUX_H__ */ diff --git a/include/asm-avr32/arch-at32ap/sm.h b/include/asm-avr32/arch-at32ap/sm.h new file mode 100644 index 00000000000..265a9ead20b --- /dev/null +++ b/include/asm-avr32/arch-at32ap/sm.h @@ -0,0 +1,27 @@ +/* + * AT32 System Manager interface. + * + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_AT32_SM_H__ +#define __ASM_AVR32_AT32_SM_H__ + +struct irq_chip; +struct platform_device; + +struct at32_sm { + spinlock_t lock; + void __iomem *regs; + struct irq_chip *eim_chip; + unsigned int eim_first_irq; + struct platform_device *pdev; +}; + +extern struct platform_device at32_sm_device; +extern struct at32_sm system_manager; + +#endif /* __ASM_AVR32_AT32_SM_H__ */ diff --git a/include/asm-avr32/arch-at32ap/smc.h b/include/asm-avr32/arch-at32ap/smc.h new file mode 100644 index 00000000000..3732b328303 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/smc.h @@ -0,0 +1,60 @@ +/* + * Static Memory Controller for AT32 chips + * + * Copyright (C) 2006 Atmel Corporation + * + * Inspired by the OMAP2 General-Purpose Memory Controller interface + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ARCH_AT32AP_SMC_H +#define __ARCH_AT32AP_SMC_H + +/* + * All timing parameters are in nanoseconds. + */ +struct smc_config { + /* Delay from address valid to assertion of given strobe */ + u16 ncs_read_setup; + u16 nrd_setup; + u16 ncs_write_setup; + u16 nwe_setup; + + /* Pulse length of given strobe */ + u16 ncs_read_pulse; + u16 nrd_pulse; + u16 ncs_write_pulse; + u16 nwe_pulse; + + /* Total cycle length of given operation */ + u16 read_cycle; + u16 write_cycle; + + /* Bus width in bytes */ + u8 bus_width; + + /* + * 0: Data is sampled on rising edge of NCS + * 1: Data is sampled on rising edge of NRD + */ + unsigned int nrd_controlled:1; + + /* + * 0: Data is driven on falling edge of NCS + * 1: Data is driven on falling edge of NWR + */ + unsigned int nwe_controlled:1; + + /* + * 0: Byte select access type + * 1: Byte write access type + */ + unsigned int byte_write:1; +}; + +extern int smc_set_configuration(int cs, const struct smc_config *config); +extern struct smc_config *smc_get_configuration(int cs); + +#endif /* __ARCH_AT32AP_SMC_H */ diff --git a/include/asm-avr32/asm.h b/include/asm-avr32/asm.h new file mode 100644 index 00000000000..515c7618952 --- /dev/null +++ b/include/asm-avr32/asm.h @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_ASM_H__ +#define __ASM_AVR32_ASM_H__ + +#include <asm/sysreg.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> + +#define mask_interrupts ssrf SR_GM_BIT +#define mask_exceptions ssrf SR_EM_BIT +#define unmask_interrupts csrf SR_GM_BIT +#define unmask_exceptions csrf SR_EM_BIT + +#ifdef CONFIG_FRAME_POINTER + .macro save_fp + st.w --sp, r7 + .endm + .macro restore_fp + ld.w r7, sp++ + .endm + .macro zero_fp + mov r7, 0 + .endm +#else + .macro save_fp + .endm + .macro restore_fp + .endm + .macro zero_fp + .endm +#endif + .macro get_thread_info reg + mov \reg, sp + andl \reg, ~(THREAD_SIZE - 1) & 0xffff + .endm + + /* Save and restore registers */ + .macro save_min sr, tmp=lr + pushm lr + mfsr \tmp, \sr + zero_fp + st.w --sp, \tmp + .endm + + .macro restore_min sr, tmp=lr + ld.w \tmp, sp++ + mtsr \sr, \tmp + popm lr + .endm + + .macro save_half sr, tmp=lr + save_fp + pushm r8-r9,r10,r11,r12,lr + zero_fp + mfsr \tmp, \sr + st.w --sp, \tmp + .endm + + .macro restore_half sr, tmp=lr + ld.w \tmp, sp++ + mtsr \sr, \tmp + popm r8-r9,r10,r11,r12,lr + restore_fp + .endm + + .macro save_full_user sr, tmp=lr + stmts --sp, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr + st.w --sp, lr + zero_fp + mfsr \tmp, \sr + st.w --sp, \tmp + .endm + + .macro restore_full_user sr, tmp=lr + ld.w \tmp, sp++ + mtsr \sr, \tmp + ld.w lr, sp++ + ldmts sp++, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr + .endm + + /* uaccess macros */ + .macro branch_if_kernel scratch, label + get_thread_info \scratch + ld.w \scratch, \scratch[TI_flags] + bld \scratch, TIF_USERSPACE + brcc \label + .endm + + .macro ret_if_privileged scratch, addr, size, ret + sub \scratch, \size, 1 + add \scratch, \addr + retcs \ret + retmi \ret + .endm + +#endif /* __ASM_AVR32_ASM_H__ */ diff --git a/include/asm-avr32/atomic.h b/include/asm-avr32/atomic.h new file mode 100644 index 00000000000..e0b9c44c126 --- /dev/null +++ b/include/asm-avr32/atomic.h @@ -0,0 +1,201 @@ +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc. + * + * But use these as seldom as possible since they are slower than + * regular operations. + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_ATOMIC_H +#define __ASM_AVR32_ATOMIC_H + +#include <asm/system.h> + +typedef struct { volatile int counter; } atomic_t; +#define ATOMIC_INIT(i) { (i) } + +#define atomic_read(v) ((v)->counter) +#define atomic_set(v, i) (((v)->counter) = i) + +/* + * atomic_sub_return - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. Returns the resulting value. + */ +static inline int atomic_sub_return(int i, atomic_t *v) +{ + int result; + + asm volatile( + "/* atomic_sub_return */\n" + "1: ssrf 5\n" + " ld.w %0, %2\n" + " sub %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "ir"(i) + : "cc"); + + return result; +} + +/* + * atomic_add_return - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. Returns the resulting value. + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + int result; + + if (__builtin_constant_p(i)) + result = atomic_sub_return(-i, v); + else + asm volatile( + "/* atomic_add_return */\n" + "1: ssrf 5\n" + " ld.w %0, %1\n" + " add %0, %3\n" + " stcond %2, %0\n" + " brne 1b" + : "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "r"(i) + : "cc", "memory"); + + return result; +} + +/* + * atomic_sub_unless - sub unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * If the atomic value v is not equal to u, this function subtracts a + * from v, and returns non zero. If v is equal to u then it returns + * zero. This is done as an atomic operation. +*/ +static inline int atomic_sub_unless(atomic_t *v, int a, int u) +{ + int tmp, result = 0; + + asm volatile( + "/* atomic_sub_unless */\n" + "1: ssrf 5\n" + " ld.w %0, %3\n" + " cp.w %0, %5\n" + " breq 1f\n" + " sub %0, %4\n" + " stcond %2, %0\n" + " brne 1b\n" + " mov %1, 1\n" + "1:" + : "=&r"(tmp), "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "ir"(a), "ir"(u) + : "cc", "memory"); + + return result; +} + +/* + * atomic_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * If the atomic value v is not equal to u, this function adds a to v, + * and returns non zero. If v is equal to u then it returns zero. This + * is done as an atomic operation. +*/ +static inline int atomic_add_unless(atomic_t *v, int a, int u) +{ + int tmp, result; + + if (__builtin_constant_p(a)) + result = atomic_sub_unless(v, -a, u); + else { + result = 0; + asm volatile( + "/* atomic_add_unless */\n" + "1: ssrf 5\n" + " ld.w %0, %3\n" + " cp.w %0, %5\n" + " breq 1f\n" + " add %0, %4\n" + " stcond %2, %0\n" + " brne 1b\n" + " mov %1, 1\n" + "1:" + : "=&r"(tmp), "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "r"(a), "ir"(u) + : "cc", "memory"); + } + + return result; +} + +/* + * atomic_sub_if_positive - conditionally subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically test @v and subtract @i if @v is greater or equal than @i. + * The function returns the old value of @v minus @i. + */ +static inline int atomic_sub_if_positive(int i, atomic_t *v) +{ + int result; + + asm volatile( + "/* atomic_sub_if_positive */\n" + "1: ssrf 5\n" + " ld.w %0, %2\n" + " sub %0, %3\n" + " brlt 1f\n" + " stcond %1, %0\n" + " brne 1b\n" + "1:" + : "=&r"(result), "=o"(v->counter) + : "m"(v->counter), "ir"(i) + : "cc", "memory"); + + return result; +} + +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) + +#define atomic_sub(i, v) (void)atomic_sub_return(i, v) +#define atomic_add(i, v) (void)atomic_add_return(i, v) +#define atomic_dec(v) atomic_sub(1, (v)) +#define atomic_inc(v) atomic_add(1, (v)) + +#define atomic_dec_return(v) atomic_sub_return(1, v) +#define atomic_inc_return(v) atomic_add_return(1, v) + +#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) +#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) +#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) +#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0) + +#define atomic_inc_not_zero(v) atomic_add_unless(v, 1, 0) +#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) + +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#include <asm-generic/atomic.h> + +#endif /* __ASM_AVR32_ATOMIC_H */ diff --git a/include/asm-avr32/auxvec.h b/include/asm-avr32/auxvec.h new file mode 100644 index 00000000000..d5dd435bf8f --- /dev/null +++ b/include/asm-avr32/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_AVR32_AUXVEC_H +#define __ASM_AVR32_AUXVEC_H + +#endif /* __ASM_AVR32_AUXVEC_H */ diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h new file mode 100644 index 00000000000..5299f8c8e11 --- /dev/null +++ b/include/asm-avr32/bitops.h @@ -0,0 +1,296 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_BITOPS_H +#define __ASM_AVR32_BITOPS_H + +#include <asm/byteorder.h> +#include <asm/system.h> + +/* + * clear_bit() doesn't provide any barrier for the compiler + */ +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/* + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long tmp; + + if (__builtin_constant_p(nr)) { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " sbr %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "i"(nr) + : "cc"); + } else { + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " or %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "r"(mask) + : "cc"); + } +} + +/* + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void clear_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long tmp; + + if (__builtin_constant_p(nr)) { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " cbr %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "i"(nr) + : "cc"); + } else { + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " andn %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "r"(mask) + : "cc"); + } +} + +/* + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void change_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + unsigned long tmp; + + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %2\n" + " eor %0, %3\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p) + : "m"(*p), "r"(mask) + : "cc"); +} + +/* + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_set_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + unsigned long tmp, old; + + if (__builtin_constant_p(nr)) { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %3\n" + " mov %2, %0\n" + " sbr %0, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "i"(nr) + : "memory", "cc"); + } else { + asm volatile( + "1: ssrf 5\n" + " ld.w %2, %3\n" + " or %0, %2, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "r"(mask) + : "memory", "cc"); + } + + return (old & mask) != 0; +} + +/* + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + unsigned long tmp, old; + + if (__builtin_constant_p(nr)) { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %3\n" + " mov %2, %0\n" + " cbr %0, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "i"(nr) + : "memory", "cc"); + } else { + asm volatile( + "1: ssrf 5\n" + " ld.w %0, %3\n" + " mov %2, %0\n" + " andn %0, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "r"(mask) + : "memory", "cc"); + } + + return (old & mask) != 0; +} + +/* + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_change_bit(int nr, volatile void * addr) +{ + unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG; + unsigned long mask = 1UL << (nr % BITS_PER_LONG); + unsigned long tmp, old; + + asm volatile( + "1: ssrf 5\n" + " ld.w %2, %3\n" + " eor %0, %2, %4\n" + " stcond %1, %0\n" + " brne 1b" + : "=&r"(tmp), "=o"(*p), "=&r"(old) + : "m"(*p), "r"(mask) + : "memory", "cc"); + + return (old & mask) != 0; +} + +#include <asm-generic/bitops/non-atomic.h> + +/* Find First bit Set */ +static inline unsigned long __ffs(unsigned long word) +{ + unsigned long result; + + asm("brev %1\n\t" + "clz %0,%1" + : "=r"(result), "=&r"(word) + : "1"(word)); + return result; +} + +/* Find First Zero */ +static inline unsigned long ffz(unsigned long word) +{ + return __ffs(~word); +} + +/* Find Last bit Set */ +static inline int fls(unsigned long word) +{ + unsigned long result; + + asm("clz %0,%1" : "=r"(result) : "r"(word)); + return 32 - result; +} + +unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size); +unsigned long find_next_zero_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); +unsigned long find_first_bit(const unsigned long *addr, + unsigned long size); +unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + +/* + * ffs: find first bit set. This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + * + * The difference is that bit numbering starts at 1, and if no bit is set, + * the function returns 0. + */ +static inline int ffs(unsigned long word) +{ + if(word == 0) + return 0; + return __ffs(word) + 1; +} + +#include <asm-generic/bitops/fls64.h> +#include <asm-generic/bitops/sched.h> +#include <asm-generic/bitops/hweight.h> + +#include <asm-generic/bitops/ext2-non-atomic.h> +#include <asm-generic/bitops/ext2-atomic.h> +#include <asm-generic/bitops/minix-le.h> + +#endif /* __ASM_AVR32_BITOPS_H */ diff --git a/include/asm-avr32/bug.h b/include/asm-avr32/bug.h new file mode 100644 index 00000000000..521766bc936 --- /dev/null +++ b/include/asm-avr32/bug.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_BUG_H +#define __ASM_AVR32_BUG_H + +#ifdef CONFIG_BUG + +/* + * According to our Chief Architect, this compact opcode is very + * unlikely to ever be implemented. + */ +#define AVR32_BUG_OPCODE 0x5df0 + +#ifdef CONFIG_DEBUG_BUGVERBOSE + +#define BUG() \ + do { \ + asm volatile(".hword %0\n\t" \ + ".hword %1\n\t" \ + ".long %2" \ + : \ + : "n"(AVR32_BUG_OPCODE), \ + "i"(__LINE__), "X"(__FILE__)); \ + } while (0) + +#else + +#define BUG() \ + do { \ + asm volatile(".hword %0\n\t" \ + : : "n"(AVR32_BUG_OPCODE)); \ + } while (0) + +#endif /* CONFIG_DEBUG_BUGVERBOSE */ + +#define HAVE_ARCH_BUG + +#endif /* CONFIG_BUG */ + +#include <asm-generic/bug.h> + +#endif /* __ASM_AVR32_BUG_H */ diff --git a/include/asm-avr32/bugs.h b/include/asm-avr32/bugs.h new file mode 100644 index 00000000000..7635e770622 --- /dev/null +++ b/include/asm-avr32/bugs.h @@ -0,0 +1,15 @@ +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ +#ifndef __ASM_AVR32_BUGS_H +#define __ASM_AVR32_BUGS_H + +static void __init check_bugs(void) +{ + cpu_data->loops_per_jiffy = loops_per_jiffy; +} + +#endif /* __ASM_AVR32_BUGS_H */ diff --git a/include/asm-avr32/byteorder.h b/include/asm-avr32/byteorder.h new file mode 100644 index 00000000000..402ff4125cd --- /dev/null +++ b/include/asm-avr32/byteorder.h @@ -0,0 +1,25 @@ +/* + * AVR32 endian-conversion functions. + */ +#ifndef __ASM_AVR32_BYTEORDER_H +#define __ASM_AVR32_BYTEORDER_H + +#include <asm/types.h> +#include <linux/compiler.h> + +#ifdef __CHECKER__ +extern unsigned long __builtin_bswap_32(unsigned long x); +extern unsigned short __builtin_bswap_16(unsigned short x); +#endif + +#define __arch__swab32(x) __builtin_bswap_32(x) +#define __arch__swab16(x) __builtin_bswap_16(x) + +#if !defined(__STRICT_ANSI__) || defined(__KERNEL__) +# define __BYTEORDER_HAS_U64__ +# define __SWAB_64_THRU_32__ +#endif + +#include <linux/byteorder/big_endian.h> + +#endif /* __ASM_AVR32_BYTEORDER_H */ diff --git a/include/asm-avr32/cache.h b/include/asm-avr32/cache.h new file mode 100644 index 00000000000..dabb955f3c0 --- /dev/null +++ b/include/asm-avr32/cache.h @@ -0,0 +1,29 @@ +#ifndef __ASM_AVR32_CACHE_H +#define __ASM_AVR32_CACHE_H + +#define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#ifndef __ASSEMBLER__ +struct cache_info { + unsigned int ways; + unsigned int sets; + unsigned int linesz; +}; +#endif /* __ASSEMBLER */ + +/* Cache operation constants */ +#define ICACHE_FLUSH 0x00 +#define ICACHE_INVALIDATE 0x01 +#define ICACHE_LOCK 0x02 +#define ICACHE_UNLOCK 0x03 +#define ICACHE_PREFETCH 0x04 + +#define DCACHE_FLUSH 0x08 +#define DCACHE_LOCK 0x09 +#define DCACHE_UNLOCK 0x0a +#define DCACHE_INVALIDATE 0x0b +#define DCACHE_CLEAN 0x0c +#define DCACHE_CLEAN_INVAL 0x0d + +#endif /* __ASM_AVR32_CACHE_H */ diff --git a/include/asm-avr32/cachectl.h b/include/asm-avr32/cachectl.h new file mode 100644 index 00000000000..4faf1ce6006 --- /dev/null +++ b/include/asm-avr32/cachectl.h @@ -0,0 +1,11 @@ +#ifndef __ASM_AVR32_CACHECTL_H +#define __ASM_AVR32_CACHECTL_H + +/* + * Operations that can be performed through the cacheflush system call + */ + +/* Clean the data cache, then invalidate the icache */ +#define CACHE_IFLUSH 0 + +#endif /* __ASM_AVR32_CACHECTL_H */ diff --git a/include/asm-avr32/cacheflush.h b/include/asm-avr32/cacheflush.h new file mode 100644 index 00000000000..f1bf1708980 --- /dev/null +++ b/include/asm-avr32/cacheflush.h @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_CACHEFLUSH_H +#define __ASM_AVR32_CACHEFLUSH_H + +/* Keep includes the same across arches. */ +#include <linux/mm.h> + +#define CACHE_OP_ICACHE_INVALIDATE 0x01 +#define CACHE_OP_DCACHE_INVALIDATE 0x0b +#define CACHE_OP_DCACHE_CLEAN 0x0c +#define CACHE_OP_DCACHE_CLEAN_INVAL 0x0d + +/* + * Invalidate any cacheline containing virtual address vaddr without + * writing anything back to memory. + * + * Note that this function may corrupt unrelated data structures when + * applied on buffers that are not cacheline aligned in both ends. + */ +static inline void invalidate_dcache_line(void *vaddr) +{ + asm volatile("cache %0[0], %1" + : + : "r"(vaddr), "n"(CACHE_OP_DCACHE_INVALIDATE) + : "memory"); +} + +/* + * Make sure any cacheline containing virtual address vaddr is written + * to memory. + */ +static inline void clean_dcache_line(void *vaddr) +{ + asm volatile("cache %0[0], %1" + : + : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN) + : "memory"); +} + +/* + * Make sure any cacheline containing virtual address vaddr is written + * to memory and then invalidate it. + */ +static inline void flush_dcache_line(void *vaddr) +{ + asm volatile("cache %0[0], %1" + : + : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN_INVAL) + : "memory"); +} + +/* + * Invalidate any instruction cacheline containing virtual address + * vaddr. + */ +static inline void invalidate_icache_line(void *vaddr) +{ + asm volatile("cache %0[0], %1" + : + : "r"(vaddr), "n"(CACHE_OP_ICACHE_INVALIDATE) + : "memory"); +} + +/* + * Applies the above functions on all lines that are touched by the + * specified virtual address range. + */ +void invalidate_dcache_region(void *start, size_t len); +void clean_dcache_region(void *start, size_t len); +void flush_dcache_region(void *start, size_t len); +void invalidate_icache_region(void *start, size_t len); + +/* + * Make sure any pending writes are completed before continuing. + */ +#define flush_write_buffer() asm volatile("sync 0" : : : "memory") + +/* + * The following functions are called when a virtual mapping changes. + * We do not need to flush anything in this case. + */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +/* + * I think we need to implement this one to be able to reliably + * execute pages from RAMDISK. However, if we implement the + * flush_dcache_*() functions, it might not be needed anymore. + * + * #define flush_icache_page(vma, page) do { } while (0) + */ +extern void flush_icache_page(struct vm_area_struct *vma, struct page *page); + +/* + * These are (I think) related to D-cache aliasing. We might need to + * do something here, but only for certain configurations. No such + * configurations exist at this time. + */ +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(page) do { } while (0) +#define flush_dcache_mmap_unlock(page) do { } while (0) + +/* + * These are for I/D cache coherency. In this case, we do need to + * flush with all configurations. + */ +extern void flush_icache_range(unsigned long start, unsigned long end); +extern void flush_icache_user_range(struct vm_area_struct *vma, + struct page *page, + unsigned long addr, int len); + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) do { \ + memcpy(dst, src, len); \ + flush_icache_user_range(vma, page, vaddr, len); \ +} while(0) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) + +#endif /* __ASM_AVR32_CACHEFLUSH_H */ diff --git a/include/asm-avr32/checksum.h b/include/asm-avr32/checksum.h new file mode 100644 index 00000000000..41b7af09edc --- /dev/null +++ b/include/asm-avr32/checksum.h @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_CHECKSUM_H +#define __ASM_AVR32_CHECKSUM_H + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +unsigned int csum_partial(const unsigned char * buff, int len, + unsigned int sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +unsigned int csum_partial_copy_generic(const char *src, char *dst, int len, + int sum, int *src_err_ptr, + int *dst_err_ptr); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * verify_area(). + */ +static inline +unsigned int csum_partial_copy_nocheck(const char *src, char *dst, + int len, int sum) +{ + return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); +} + +static inline +unsigned int csum_partial_copy_from_user (const char __user *src, char *dst, + int len, int sum, int *err_ptr) +{ + return csum_partial_copy_generic((const char __force *)src, dst, len, + sum, err_ptr, NULL); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +static inline unsigned short ip_fast_csum(unsigned char *iph, + unsigned int ihl) +{ + unsigned int sum, tmp; + + __asm__ __volatile__( + " ld.w %0, %1++\n" + " ld.w %3, %1++\n" + " sub %2, 4\n" + " add %0, %3\n" + " ld.w %3, %1++\n" + " adc %0, %0, %3\n" + " ld.w %3, %1++\n" + " adc %0, %0, %3\n" + " acr %0\n" + "1: ld.w %3, %1++\n" + " add %0, %3\n" + " acr %0\n" + " sub %2, 1\n" + " brne 1b\n" + " lsl %3, %0, 16\n" + " andl %0, 0\n" + " mov %2, 0xffff\n" + " add %0, %3\n" + " adc %0, %0, %2\n" + " com %0\n" + " lsr %0, 16\n" + : "=r"(sum), "=r"(iph), "=r"(ihl), "=r"(tmp) + : "1"(iph), "2"(ihl) + : "memory", "cc"); + return sum; +} + +/* + * Fold a partial checksum + */ + +static inline unsigned int csum_fold(unsigned int sum) +{ + unsigned int tmp; + + asm(" bfextu %1, %0, 0, 16\n" + " lsr %0, 16\n" + " add %0, %1\n" + " bfextu %1, %0, 16, 16\n" + " add %0, %1" + : "=&r"(sum), "=&r"(tmp) + : "0"(sum)); + + return ~sum; +} + +static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + asm(" add %0, %1\n" + " adc %0, %0, %2\n" + " adc %0, %0, %3\n" + " acr %0" + : "=r"(sum) + : "r"(daddr), "r"(saddr), "r"(ntohs(len) | (proto << 16)), + "0"(sum) + : "cc"); + + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline unsigned short ip_compute_csum(unsigned char * buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#endif /* __ASM_AVR32_CHECKSUM_H */ diff --git a/include/asm-avr32/cputime.h b/include/asm-avr32/cputime.h new file mode 100644 index 00000000000..e87e0f81cbe --- /dev/null +++ b/include/asm-avr32/cputime.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_CPUTIME_H +#define __ASM_AVR32_CPUTIME_H + +#include <asm-generic/cputime.h> + +#endif /* __ASM_AVR32_CPUTIME_H */ diff --git a/include/asm-avr32/current.h b/include/asm-avr32/current.h new file mode 100644 index 00000000000..c7b0549eab8 --- /dev/null +++ b/include/asm-avr32/current.h @@ -0,0 +1,15 @@ +#ifndef __ASM_AVR32_CURRENT_H +#define __ASM_AVR32_CURRENT_H + +#include <linux/thread_info.h> + +struct task_struct; + +inline static struct task_struct * get_current(void) +{ + return current_thread_info()->task; +} + +#define current get_current() + +#endif /* __ASM_AVR32_CURRENT_H */ diff --git a/include/asm-avr32/delay.h b/include/asm-avr32/delay.h new file mode 100644 index 00000000000..cc3b2e3343b --- /dev/null +++ b/include/asm-avr32/delay.h @@ -0,0 +1,26 @@ +#ifndef __ASM_AVR32_DELAY_H +#define __ASM_AVR32_DELAY_H + +/* + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/avr32/lib/delay.c + */ + +extern void __bad_udelay(void); +extern void __bad_ndelay(void); + +extern void __udelay(unsigned long usecs); +extern void __ndelay(unsigned long nsecs); +extern void __const_udelay(unsigned long usecs); +extern void __delay(unsigned long loops); + +#define udelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \ + __udelay(n)) + +#define ndelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ + __ndelay(n)) + +#endif /* __ASM_AVR32_DELAY_H */ diff --git a/include/asm-avr32/div64.h b/include/asm-avr32/div64.h new file mode 100644 index 00000000000..d7ddd4fdeca --- /dev/null +++ b/include/asm-avr32/div64.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_DIV64_H +#define __ASM_AVR32_DIV64_H + +#include <asm-generic/div64.h> + +#endif /* __ASM_AVR32_DIV64_H */ diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h new file mode 100644 index 00000000000..4c40cb41cdf --- /dev/null +++ b/include/asm-avr32/dma-mapping.h @@ -0,0 +1,320 @@ +#ifndef __ASM_AVR32_DMA_MAPPING_H +#define __ASM_AVR32_DMA_MAPPING_H + +#include <linux/mm.h> +#include <linux/device.h> +#include <asm/scatterlist.h> +#include <asm/processor.h> +#include <asm/cacheflush.h> +#include <asm/io.h> + +extern void dma_cache_sync(void *vaddr, size_t size, int direction); + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask + * to this function. + */ +static inline int dma_supported(struct device *dev, u64 mask) +{ + /* Fix when needed. I really don't know of any limitations */ + return 1; +} + +static inline int dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + *dev->dma_mask = dma_mask; + return 0; +} + +/** + * dma_alloc_coherent - allocate consistent memory for DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @size: required memory size + * @handle: bus-specific DMA address + * + * Allocate some uncached, unbuffered memory for a device for + * performing DMA. This function allocates pages, and will + * return the CPU-viewed address, and sets @handle to be the + * device-viewed address. + */ +extern void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp); + +/** + * dma_free_coherent - free memory allocated by dma_alloc_coherent + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @size: size of memory originally requested in dma_alloc_coherent + * @cpu_addr: CPU-view address returned from dma_alloc_coherent + * @handle: device-view address returned from dma_alloc_coherent + * + * Free (and unmap) a DMA buffer previously allocated by + * dma_alloc_coherent(). + * + * References to memory and mappings associated with cpu_addr/handle + * during and after this call executing are illegal. + */ +extern void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle); + +/** + * dma_alloc_writecombine - allocate write-combining memory for DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @size: required memory size + * @handle: bus-specific DMA address + * + * Allocate some uncached, buffered memory for a device for + * performing DMA. This function allocates pages, and will + * return the CPU-viewed address, and sets @handle to be the + * device-viewed address. + */ +extern void *dma_alloc_writecombine(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp); + +/** + * dma_free_coherent - free memory allocated by dma_alloc_writecombine + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @size: size of memory originally requested in dma_alloc_writecombine + * @cpu_addr: CPU-view address returned from dma_alloc_writecombine + * @handle: device-view address returned from dma_alloc_writecombine + * + * Free (and unmap) a DMA buffer previously allocated by + * dma_alloc_writecombine(). + * + * References to memory and mappings associated with cpu_addr/handle + * during and after this call executing are illegal. + */ +extern void dma_free_writecombine(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle); + +/** + * dma_map_single - map a single buffer for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @cpu_addr: CPU direct mapped address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_single() or dma_sync_single(). + */ +static inline dma_addr_t +dma_map_single(struct device *dev, void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + dma_cache_sync(cpu_addr, size, direction); + return virt_to_bus(cpu_addr); +} + +/** + * dma_unmap_single - unmap a single buffer previously mapped + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Unmap a single streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_single() call. + * All other usages are undefined. + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static inline void +dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + +} + +/** + * dma_map_page - map a portion of a page for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_page() or dma_sync_single(). + */ +static inline dma_addr_t +dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + return dma_map_single(dev, page_address(page) + offset, + size, direction); +} + +/** + * dma_unmap_page - unmap a buffer previously mapped through dma_map_page() + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Unmap a single streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_single() call. + * All other usages are undefined. + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static inline void +dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + dma_unmap_single(dev, dma_address, size, direction); +} + +/** + * dma_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scatter-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +static inline int +dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++) { + char *virt; + + sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; + virt = page_address(sg[i].page) + sg[i].offset; + dma_cache_sync(virt, sg[i].length, direction); + } + + return nents; +} + +/** + * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Unmap a set of streaming mode DMA translations. + * Again, CPU read rules concerning calls here are the same as for + * pci_unmap_single() above. + */ +static inline void +dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + +} + +/** + * dma_sync_single_for_cpu + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Make physical memory consistent for a single streaming mode DMA + * translation after a transfer. + * + * If you perform a dma_map_single() but wish to interrogate the + * buffer using the cpu, yet do not wish to teardown the DMA mapping, + * you must call this function before doing so. At the next point you + * give the DMA address back to the card, you must first perform a + * dma_sync_single_for_device, and then the device again owns the + * buffer. + */ +static inline void +dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + dma_cache_sync(bus_to_virt(dma_handle), size, direction); +} + +static inline void +dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + dma_cache_sync(bus_to_virt(dma_handle), size, direction); +} + +/** + * dma_sync_sg_for_cpu + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Make physical memory consistent for a set of streaming + * mode DMA translations after a transfer. + * + * The same as dma_sync_single_for_* but for a scatter-gather list, + * same rules and usage. + */ +static inline void +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++) { + dma_cache_sync(page_address(sg[i].page) + sg[i].offset, + sg[i].length, direction); + } +} + +static inline void +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++) { + dma_cache_sync(page_address(sg[i].page) + sg[i].offset, + sg[i].length, direction); + } +} + +/* Now for the API extensions over the pci_ one */ + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +static inline int dma_is_consistent(dma_addr_t dma_addr) +{ + return 1; +} + +static inline int dma_get_cache_alignment(void) +{ + return boot_cpu_data.dcache.linesz; +} + +#endif /* __ASM_AVR32_DMA_MAPPING_H */ diff --git a/include/asm-avr32/dma.h b/include/asm-avr32/dma.h new file mode 100644 index 00000000000..9e91205590a --- /dev/null +++ b/include/asm-avr32/dma.h @@ -0,0 +1,8 @@ +#ifndef __ASM_AVR32_DMA_H +#define __ASM_AVR32_DMA_H + +/* The maximum address that we can perform a DMA transfer to on this platform. + * Not really applicable to AVR32, but some functions need it. */ +#define MAX_DMA_ADDRESS 0xffffffff + +#endif /* __ASM_AVR32_DMA_H */ diff --git a/include/asm-avr32/elf.h b/include/asm-avr32/elf.h new file mode 100644 index 00000000000..d334b4994d2 --- /dev/null +++ b/include/asm-avr32/elf.h @@ -0,0 +1,110 @@ +#ifndef __ASM_AVR32_ELF_H +#define __ASM_AVR32_ELF_H + +/* AVR32 relocation numbers */ +#define R_AVR32_NONE 0 +#define R_AVR32_32 1 +#define R_AVR32_16 2 +#define R_AVR32_8 3 +#define R_AVR32_32_PCREL 4 +#define R_AVR32_16_PCREL 5 +#define R_AVR32_8_PCREL 6 +#define R_AVR32_DIFF32 7 +#define R_AVR32_DIFF16 8 +#define R_AVR32_DIFF8 9 +#define R_AVR32_GOT32 10 +#define R_AVR32_GOT16 11 +#define R_AVR32_GOT8 12 +#define R_AVR32_21S 13 +#define R_AVR32_16U 14 +#define R_AVR32_16S 15 +#define R_AVR32_8S 16 +#define R_AVR32_8S_EXT 17 +#define R_AVR32_22H_PCREL 18 +#define R_AVR32_18W_PCREL 19 +#define R_AVR32_16B_PCREL 20 +#define R_AVR32_16N_PCREL 21 +#define R_AVR32_14UW_PCREL 22 +#define R_AVR32_11H_PCREL 23 +#define R_AVR32_10UW_PCREL 24 +#define R_AVR32_9H_PCREL 25 +#define R_AVR32_9UW_PCREL 26 +#define R_AVR32_HI16 27 +#define R_AVR32_LO16 28 +#define R_AVR32_GOTPC 29 +#define R_AVR32_GOTCALL 30 +#define R_AVR32_LDA_GOT 31 +#define R_AVR32_GOT21S 32 +#define R_AVR32_GOT18SW 33 +#define R_AVR32_GOT16S 34 +#define R_AVR32_GOT7UW 35 +#define R_AVR32_32_CPENT 36 +#define R_AVR32_CPCALL 37 +#define R_AVR32_16_CP 38 +#define R_AVR32_9W_CP 39 +#define R_AVR32_RELATIVE 40 +#define R_AVR32_GLOB_DAT 41 +#define R_AVR32_JMP_SLOT 42 +#define R_AVR32_ALIGN 43 + +/* + * ELF register definitions.. + */ + +#include <asm/ptrace.h> +#include <asm/user.h> + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof (struct pt_regs) / sizeof (elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_fpu_struct elf_fpregset_t; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ( (x)->e_machine == EM_AVR32 ) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#ifdef __LITTLE_ENDIAN__ +#define ELF_DATA ELFDATA2LSB +#else +#define ELF_DATA ELFDATA2MSB +#endif +#define ELF_ARCH EM_AVR32 + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) + + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (0) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define ELF_PLATFORM (NULL) + +#ifdef __KERNEL__ +#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT) +#endif + +#endif /* __ASM_AVR32_ELF_H */ diff --git a/include/asm-avr32/emergency-restart.h b/include/asm-avr32/emergency-restart.h new file mode 100644 index 00000000000..3e7e014776b --- /dev/null +++ b/include/asm-avr32/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_EMERGENCY_RESTART_H +#define __ASM_AVR32_EMERGENCY_RESTART_H + +#include <asm-generic/emergency-restart.h> + +#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */ diff --git a/include/asm-avr32/errno.h b/include/asm-avr32/errno.h new file mode 100644 index 00000000000..558a7249f06 --- /dev/null +++ b/include/asm-avr32/errno.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_ERRNO_H +#define __ASM_AVR32_ERRNO_H + +#include <asm-generic/errno.h> + +#endif /* __ASM_AVR32_ERRNO_H */ diff --git a/include/asm-avr32/fcntl.h b/include/asm-avr32/fcntl.h new file mode 100644 index 00000000000..14c0c4402b1 --- /dev/null +++ b/include/asm-avr32/fcntl.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_FCNTL_H +#define __ASM_AVR32_FCNTL_H + +#include <asm-generic/fcntl.h> + +#endif /* __ASM_AVR32_FCNTL_H */ diff --git a/include/asm-avr32/futex.h b/include/asm-avr32/futex.h new file mode 100644 index 00000000000..10419f14a68 --- /dev/null +++ b/include/asm-avr32/futex.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_FUTEX_H +#define __ASM_AVR32_FUTEX_H + +#include <asm-generic/futex.h> + +#endif /* __ASM_AVR32_FUTEX_H */ diff --git a/include/asm-avr32/hardirq.h b/include/asm-avr32/hardirq.h new file mode 100644 index 00000000000..267354356f6 --- /dev/null +++ b/include/asm-avr32/hardirq.h @@ -0,0 +1,34 @@ +#ifndef __ASM_AVR32_HARDIRQ_H +#define __ASM_AVR32_HARDIRQ_H + +#include <linux/threads.h> +#include <asm/irq.h> + +#ifndef __ASSEMBLY__ + +#include <linux/cache.h> + +/* entry.S is sensitive to the offsets of these fields */ +typedef struct { + unsigned int __softirq_pending; +} ____cacheline_aligned irq_cpustat_t; + +void ack_bad_irq(unsigned int irq); + +/* Standard mappings for irq_cpustat_t above */ +#include <linux/irq_cpustat.h> + +#endif /* __ASSEMBLY__ */ + +#define HARDIRQ_BITS 12 + +/* + * The hardirq mask has to be large enough to have + * space for potentially all IRQ sources in the system + * nesting on a single CPU: + */ +#if (1 << HARDIRQ_BITS) < NR_IRQS +# error HARDIRQ_BITS is too low! +#endif + +#endif /* __ASM_AVR32_HARDIRQ_H */ diff --git a/include/asm-avr32/hw_irq.h b/include/asm-avr32/hw_irq.h new file mode 100644 index 00000000000..218b0a6bfd1 --- /dev/null +++ b/include/asm-avr32/hw_irq.h @@ -0,0 +1,9 @@ +#ifndef __ASM_AVR32_HW_IRQ_H +#define __ASM_AVR32_HW_IRQ_H + +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) +{ + /* Nothing to do */ +} + +#endif /* __ASM_AVR32_HW_IRQ_H */ diff --git a/include/asm-avr32/intc.h b/include/asm-avr32/intc.h new file mode 100644 index 00000000000..1ac9ca75e8f --- /dev/null +++ b/include/asm-avr32/intc.h @@ -0,0 +1,128 @@ +#ifndef __ASM_AVR32_INTC_H +#define __ASM_AVR32_INTC_H + +#include <linux/sysdev.h> +#include <linux/interrupt.h> + +struct irq_controller; +struct irqaction; +struct pt_regs; + +struct platform_device; + +/* Information about the internal interrupt controller */ +struct intc_device { + /* ioremapped address of configuration block */ + void __iomem *regs; + + /* the physical device */ + struct platform_device *pdev; + + /* Number of interrupt lines per group. */ + unsigned int irqs_per_group; + + /* The highest group ID + 1 */ + unsigned int nr_groups; + + /* + * Bitfield indicating which groups are actually in use. The + * size of the array is + * ceil(group_max / (8 * sizeof(unsigned int))). + */ + unsigned int group_mask[]; +}; + +struct irq_controller_class { + /* + * A short name identifying this kind of controller. + */ + const char *typename; + /* + * Handle the IRQ. Must do any necessary acking and masking. + */ + irqreturn_t (*handle)(int irq, void *dev_id, struct pt_regs *regs); + /* + * Register a new IRQ handler. + */ + int (*setup)(struct irq_controller *ctrl, unsigned int irq, + struct irqaction *action); + /* + * Unregister a IRQ handler. + */ + void (*free)(struct irq_controller *ctrl, unsigned int irq, + void *dev_id); + /* + * Mask the IRQ in the interrupt controller. + */ + void (*mask)(struct irq_controller *ctrl, unsigned int irq); + /* + * Unmask the IRQ in the interrupt controller. + */ + void (*unmask)(struct irq_controller *ctrl, unsigned int irq); + /* + * Set the type of the IRQ. See below for possible types. + * Return -EINVAL if a given type is not supported + */ + int (*set_type)(struct irq_controller *ctrl, unsigned int irq, + unsigned int type); + /* + * Return the IRQ type currently set + */ + unsigned int (*get_type)(struct irq_controller *ctrl, unsigned int irq); +}; + +struct irq_controller { + struct irq_controller_class *class; + unsigned int irq_group; + unsigned int first_irq; + unsigned int nr_irqs; + struct list_head list; +}; + +struct intc_group_desc { + struct irq_controller *ctrl; + irqreturn_t (*handle)(int, void *, struct pt_regs *); + unsigned long flags; + void *dev_id; + const char *devname; +}; + +/* + * The internal interrupt controller. Defined in board/part-specific + * devices.c. + * TODO: Should probably be defined per-cpu. + */ +extern struct intc_device intc; + +extern int request_internal_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, void *dev_id); +extern void free_internal_irq(unsigned int irq); + +/* Only used by time_init() */ +extern int setup_internal_irq(unsigned int irq, struct intc_group_desc *desc); + +/* + * Set interrupt priority for a given group. `group' can be found by + * using irq_to_group(irq). Priority can be from 0 (lowest) to 3 + * (highest). Higher-priority interrupts will preempt lower-priority + * interrupts (unless interrupts are masked globally). + * + * This function does not check for conflicts within a group. + */ +extern int intc_set_priority(unsigned int group, + unsigned int priority); + +/* + * Returns a bitmask of pending interrupts in a group. + */ +extern unsigned long intc_get_pending(unsigned int group); + +/* + * Register a new external interrupt controller. Returns the first + * external IRQ number that is assigned to the new controller. + */ +extern int intc_register_controller(struct irq_controller *ctrl); + +#endif /* __ASM_AVR32_INTC_H */ diff --git a/include/asm-avr32/io.h b/include/asm-avr32/io.h new file mode 100644 index 00000000000..2fc8f111dce --- /dev/null +++ b/include/asm-avr32/io.h @@ -0,0 +1,253 @@ +#ifndef __ASM_AVR32_IO_H +#define __ASM_AVR32_IO_H + +#include <linux/string.h> + +#ifdef __KERNEL__ + +#include <asm/addrspace.h> +#include <asm/byteorder.h> + +/* virt_to_phys will only work when address is in P1 or P2 */ +static __inline__ unsigned long virt_to_phys(volatile void *address) +{ + return PHYSADDR(address); +} + +static __inline__ void * phys_to_virt(unsigned long address) +{ + return (void *)P1SEGADDR(address); +} + +#define cached_to_phys(addr) ((unsigned long)PHYSADDR(addr)) +#define uncached_to_phys(addr) ((unsigned long)PHYSADDR(addr)) +#define phys_to_cached(addr) ((void *)P1SEGADDR(addr)) +#define phys_to_uncached(addr) ((void *)P2SEGADDR(addr)) + +/* + * Generic IO read/write. These perform native-endian accesses. Note + * that some architectures will want to re-define __raw_{read,write}w. + */ +extern void __raw_writesb(unsigned int addr, const void *data, int bytelen); +extern void __raw_writesw(unsigned int addr, const void *data, int wordlen); +extern void __raw_writesl(unsigned int addr, const void *data, int longlen); + +extern void __raw_readsb(unsigned int addr, void *data, int bytelen); +extern void __raw_readsw(unsigned int addr, void *data, int wordlen); +extern void __raw_readsl(unsigned int addr, void *data, int longlen); + +static inline void writeb(unsigned char b, volatile void __iomem *addr) +{ + *(volatile unsigned char __force *)addr = b; +} +static inline void writew(unsigned short b, volatile void __iomem *addr) +{ + *(volatile unsigned short __force *)addr = b; +} +static inline void writel(unsigned int b, volatile void __iomem *addr) +{ + *(volatile unsigned int __force *)addr = b; +} +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + +static inline unsigned char readb(const volatile void __iomem *addr) +{ + return *(const volatile unsigned char __force *)addr; +} +static inline unsigned short readw(const volatile void __iomem *addr) +{ + return *(const volatile unsigned short __force *)addr; +} +static inline unsigned int readl(const volatile void __iomem *addr) +{ + return *(const volatile unsigned int __force *)addr; +} +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl + +#define writesb(p, d, l) __raw_writesb((unsigned int)p, d, l) +#define writesw(p, d, l) __raw_writesw((unsigned int)p, d, l) +#define writesl(p, d, l) __raw_writesl((unsigned int)p, d, l) + +#define readsb(p, d, l) __raw_readsb((unsigned int)p, d, l) +#define readsw(p, d, l) __raw_readsw((unsigned int)p, d, l) +#define readsl(p, d, l) __raw_readsl((unsigned int)p, d, l) + +/* + * These two are only here because ALSA _thinks_ it needs them... + */ +static inline void memcpy_fromio(void * to, const volatile void __iomem *from, + unsigned long count) +{ + char *p = to; + while (count) { + count--; + *p = readb(from); + p++; + from++; + } +} + +static inline void memcpy_toio(volatile void __iomem *to, const void * from, + unsigned long count) +{ + const char *p = from; + while (count) { + count--; + writeb(*p, to); + p++; + to++; + } +} + +static inline void memset_io(volatile void __iomem *addr, unsigned char val, + unsigned long count) +{ + memset((void __force *)addr, val, count); +} + +/* + * Bad read/write accesses... + */ +extern void __readwrite_bug(const char *fn); + +#define IO_SPACE_LIMIT 0xffffffff + +/* Convert I/O port address to virtual address */ +#define __io(p) ((void __iomem *)phys_to_uncached(p)) + +/* + * IO port access primitives + * ------------------------- + * + * The AVR32 doesn't have special IO access instructions; all IO is memory + * mapped. Note that these are defined to perform little endian accesses + * only. Their primary purpose is to access PCI and ISA peripherals. + * + * Note that for a big endian machine, this implies that the following + * big endian mode connectivity is in place. + * + * The machine specific io.h include defines __io to translate an "IO" + * address to a memory address. + * + * Note that we prevent GCC re-ordering or caching values in expressions + * by introducing sequence points into the in*() definitions. Note that + * __raw_* do not guarantee this behaviour. + * + * The {in,out}[bwl] macros are for emulating x86-style PCI/ISA IO space. + */ +#define outb(v, p) __raw_writeb(v, __io(p)) +#define outw(v, p) __raw_writew(cpu_to_le16(v), __io(p)) +#define outl(v, p) __raw_writel(cpu_to_le32(v), __io(p)) + +#define inb(p) __raw_readb(__io(p)) +#define inw(p) le16_to_cpu(__raw_readw(__io(p))) +#define inl(p) le32_to_cpu(__raw_readl(__io(p))) + +static inline void __outsb(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + outb(*(u8 *)addr, port); + addr++; + } +} + +static inline void __insb(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + *(u8 *)addr = inb(port); + addr++; + } +} + +static inline void __outsw(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + outw(*(u16 *)addr, port); + addr += 2; + } +} + +static inline void __insw(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + *(u16 *)addr = inw(port); + addr += 2; + } +} + +static inline void __outsl(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + outl(*(u32 *)addr, port); + addr += 4; + } +} + +static inline void __insl(unsigned long port, void *addr, unsigned int count) +{ + while (count--) { + *(u32 *)addr = inl(port); + addr += 4; + } +} + +#define outsb(port, addr, count) __outsb(port, addr, count) +#define insb(port, addr, count) __insb(port, addr, count) +#define outsw(port, addr, count) __outsw(port, addr, count) +#define insw(port, addr, count) __insw(port, addr, count) +#define outsl(port, addr, count) __outsl(port, addr, count) +#define insl(port, addr, count) __insl(port, addr, count) + +extern void __iomem *__ioremap(unsigned long offset, size_t size, + unsigned long flags); +extern void __iounmap(void __iomem *addr); + +/* + * ioremap - map bus memory into CPU space + * @offset bus address of the memory + * @size size of the resource to map + * + * ioremap performs a platform specific sequence of operations to make + * bus memory CPU accessible via the readb/.../writel functions and + * the other mmio helpers. The returned address is not guaranteed to + * be usable directly as a virtual address. + */ +#define ioremap(offset, size) \ + __ioremap((offset), (size), 0) + +#define iounmap(addr) \ + __iounmap(addr) + +#define cached(addr) P1SEGADDR(addr) +#define uncached(addr) P2SEGADDR(addr) + +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt +#define page_to_bus page_to_phys +#define bus_to_page phys_to_page + +#define dma_cache_wback_inv(_start, _size) \ + flush_dcache_region(_start, _size) +#define dma_cache_inv(_start, _size) \ + invalidate_dcache_region(_start, _size) +#define dma_cache_wback(_start, _size) \ + clean_dcache_region(_start, _size) + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#endif /* __KERNEL__ */ + +#endif /* __ASM_AVR32_IO_H */ diff --git a/include/asm-avr32/ioctl.h b/include/asm-avr32/ioctl.h new file mode 100644 index 00000000000..c8472c1398e --- /dev/null +++ b/include/asm-avr32/ioctl.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_IOCTL_H +#define __ASM_AVR32_IOCTL_H + +#include <asm-generic/ioctl.h> + +#endif /* __ASM_AVR32_IOCTL_H */ diff --git a/include/asm-avr32/ioctls.h b/include/asm-avr32/ioctls.h new file mode 100644 index 00000000000..0500426b718 --- /dev/null +++ b/include/asm-avr32/ioctls.h @@ -0,0 +1,83 @@ +#ifndef __ASM_AVR32_IOCTLS_H +#define __ASM_AVR32_IOCTLS_H + +#include <asm/ioctl.h> + +/* 0x54 is just a magic number to make these relatively unique ('T') */ + +#define TCGETS 0x5401 +#define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */ +#define TCSETSW 0x5403 +#define TCSETSF 0x5404 +#define TCGETA 0x5405 +#define TCSETA 0x5406 +#define TCSETAW 0x5407 +#define TCSETAF 0x5408 +#define TCSBRK 0x5409 +#define TCXONC 0x540A +#define TCFLSH 0x540B +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E +#define TIOCGPGRP 0x540F +#define TIOCSPGRP 0x5410 +#define TIOCOUTQ 0x5411 +#define TIOCSTI 0x5412 +#define TIOCGWINSZ 0x5413 +#define TIOCSWINSZ 0x5414 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define FIONREAD 0x541B +#define TIOCINQ FIONREAD +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +#define FIONBIO 0x5421 +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ + +#define FIONCLEX 0x5450 +#define FIOCLEX 0x5451 +#define FIOASYNC 0x5452 +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ +#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ +#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 + +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#endif /* __ASM_AVR32_IOCTLS_H */ diff --git a/include/asm-avr32/ipcbuf.h b/include/asm-avr32/ipcbuf.h new file mode 100644 index 00000000000..1552c9698f5 --- /dev/null +++ b/include/asm-avr32/ipcbuf.h @@ -0,0 +1,29 @@ +#ifndef __ASM_AVR32_IPCBUF_H +#define __ASM_AVR32_IPCBUF_H + +/* +* The user_ipc_perm structure for AVR32 architecture. +* Note extra padding because this structure is passed back and forth +* between kernel and user space. +* +* Pad space is left for: +* - 32-bit mode_t and seq +* - 2 miscellaneous 32-bit values +*/ + +struct ipc64_perm +{ + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __ASM_AVR32_IPCBUF_H */ diff --git a/include/asm-avr32/irq.h b/include/asm-avr32/irq.h new file mode 100644 index 00000000000..f7e725707dd --- /dev/null +++ b/include/asm-avr32/irq.h @@ -0,0 +1,10 @@ +#ifndef __ASM_AVR32_IRQ_H +#define __ASM_AVR32_IRQ_H + +#define NR_INTERNAL_IRQS 64 +#define NR_EXTERNAL_IRQS 64 +#define NR_IRQS (NR_INTERNAL_IRQS + NR_EXTERNAL_IRQS) + +#define irq_canonicalize(i) (i) + +#endif /* __ASM_AVR32_IOCTLS_H */ diff --git a/include/asm-avr32/irqflags.h b/include/asm-avr32/irqflags.h new file mode 100644 index 00000000000..93570daac38 --- /dev/null +++ b/include/asm-avr32/irqflags.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_IRQFLAGS_H +#define __ASM_AVR32_IRQFLAGS_H + +#include <asm/sysreg.h> + +static inline unsigned long __raw_local_save_flags(void) +{ + return sysreg_read(SR); +} + +#define raw_local_save_flags(x) \ + do { (x) = __raw_local_save_flags(); } while (0) + +/* + * This will restore ALL status register flags, not only the interrupt + * mask flag. + * + * The empty asm statement informs the compiler of this fact while + * also serving as a barrier. + */ +static inline void raw_local_irq_restore(unsigned long flags) +{ + sysreg_write(SR, flags); + asm volatile("" : : : "memory", "cc"); +} + +static inline void raw_local_irq_disable(void) +{ + asm volatile("ssrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory"); +} + +static inline void raw_local_irq_enable(void) +{ + asm volatile("csrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory"); +} + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return (flags & SYSREG_BIT(GM)) != 0; +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +#endif /* __ASM_AVR32_IRQFLAGS_H */ diff --git a/include/asm-avr32/kdebug.h b/include/asm-avr32/kdebug.h new file mode 100644 index 00000000000..f583b643ffb --- /dev/null +++ b/include/asm-avr32/kdebug.h @@ -0,0 +1,38 @@ +#ifndef __ASM_AVR32_KDEBUG_H +#define __ASM_AVR32_KDEBUG_H + +#include <linux/notifier.h> + +struct pt_regs; + +struct die_args { + struct pt_regs *regs; + int trapnr; +}; + +int register_die_notifier(struct notifier_block *nb); +int unregister_die_notifier(struct notifier_block *nb); +int register_page_fault_notifier(struct notifier_block *nb); +int unregister_page_fault_notifier(struct notifier_block *nb); +extern struct atomic_notifier_head avr32_die_chain; + +/* Grossly misnamed. */ +enum die_val { + DIE_FAULT, + DIE_BREAKPOINT, + DIE_SSTEP, + DIE_PAGE_FAULT, +}; + +static inline int notify_die(enum die_val val, struct pt_regs *regs, + int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .trapnr = trap, + }; + + return atomic_notifier_call_chain(&avr32_die_chain, val, &args); +} + +#endif /* __ASM_AVR32_KDEBUG_H */ diff --git a/include/asm-avr32/kmap_types.h b/include/asm-avr32/kmap_types.h new file mode 100644 index 00000000000..b7f5c687010 --- /dev/null +++ b/include/asm-avr32/kmap_types.h @@ -0,0 +1,30 @@ +#ifndef __ASM_AVR32_KMAP_TYPES_H +#define __ASM_AVR32_KMAP_TYPES_H + +#ifdef CONFIG_DEBUG_HIGHMEM +# define D(n) __KM_FENCE_##n , +#else +# define D(n) +#endif + +enum km_type { +D(0) KM_BOUNCE_READ, +D(1) KM_SKB_SUNRPC_DATA, +D(2) KM_SKB_DATA_SOFTIRQ, +D(3) KM_USER0, +D(4) KM_USER1, +D(5) KM_BIO_SRC_IRQ, +D(6) KM_BIO_DST_IRQ, +D(7) KM_PTE0, +D(8) KM_PTE1, +D(9) KM_PTE2, +D(10) KM_IRQ0, +D(11) KM_IRQ1, +D(12) KM_SOFTIRQ0, +D(13) KM_SOFTIRQ1, +D(14) KM_TYPE_NR +}; + +#undef D + +#endif /* __ASM_AVR32_KMAP_TYPES_H */ diff --git a/include/asm-avr32/kprobes.h b/include/asm-avr32/kprobes.h new file mode 100644 index 00000000000..09a5cbe2f89 --- /dev/null +++ b/include/asm-avr32/kprobes.h @@ -0,0 +1,34 @@ +/* + * Kernel Probes (KProbes) + * + * Copyright (C) 2005-2006 Atmel Corporation + * Copyright (C) IBM Corporation, 2002, 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_KPROBES_H +#define __ASM_AVR32_KPROBES_H + +#include <linux/types.h> + +typedef u16 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */ +#define MAX_INSN_SIZE 2 + +#define ARCH_INACTIVE_KPROBE_COUNT 1 + +#define arch_remove_kprobe(p) do { } while (0) + +/* Architecture specific copy of original instruction */ +struct arch_specific_insn { + kprobe_opcode_t insn[MAX_INSN_SIZE]; +}; + +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#define flush_insn_slot(p) do { } while (0) + +#endif /* __ASM_AVR32_KPROBES_H */ diff --git a/include/asm-avr32/linkage.h b/include/asm-avr32/linkage.h new file mode 100644 index 00000000000..f7b285e910d --- /dev/null +++ b/include/asm-avr32/linkage.h @@ -0,0 +1,7 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#define __ALIGN .balign 2 +#define __ALIGN_STR ".balign 2" + +#endif /* __ASM_LINKAGE_H */ diff --git a/include/asm-avr32/local.h b/include/asm-avr32/local.h new file mode 100644 index 00000000000..1c1619694da --- /dev/null +++ b/include/asm-avr32/local.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_LOCAL_H +#define __ASM_AVR32_LOCAL_H + +#include <asm-generic/local.h> + +#endif /* __ASM_AVR32_LOCAL_H */ diff --git a/include/asm-avr32/mach/serial_at91.h b/include/asm-avr32/mach/serial_at91.h new file mode 100644 index 00000000000..1290bb32802 --- /dev/null +++ b/include/asm-avr32/mach/serial_at91.h @@ -0,0 +1,33 @@ +/* + * linux/include/asm-arm/mach/serial_at91.h + * + * Based on serial_sa1100.h by Nicolas Pitre + * + * Copyright (C) 2002 ATMEL Rousset + * + * Low level machine dependent UART functions. + */ + +struct uart_port; + +/* + * This is a temporary structure for registering these + * functions; it is intended to be discarded after boot. + */ +struct at91_port_fns { + void (*set_mctrl)(struct uart_port *, u_int); + u_int (*get_mctrl)(struct uart_port *); + void (*enable_ms)(struct uart_port *); + void (*pm)(struct uart_port *, u_int, u_int); + int (*set_wake)(struct uart_port *, u_int); + int (*open)(struct uart_port *); + void (*close)(struct uart_port *); +}; + +#if defined(CONFIG_SERIAL_AT91) +void at91_register_uart_fns(struct at91_port_fns *fns); +#else +#define at91_register_uart_fns(fns) do { } while (0) +#endif + + diff --git a/include/asm-avr32/mman.h b/include/asm-avr32/mman.h new file mode 100644 index 00000000000..648f91e7187 --- /dev/null +++ b/include/asm-avr32/mman.h @@ -0,0 +1,17 @@ +#ifndef __ASM_AVR32_MMAN_H__ +#define __ASM_AVR32_MMAN_H__ + +#include <asm-generic/mman.h> + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#endif /* __ASM_AVR32_MMAN_H__ */ diff --git a/include/asm-avr32/mmu.h b/include/asm-avr32/mmu.h new file mode 100644 index 00000000000..60c2d2650d3 --- /dev/null +++ b/include/asm-avr32/mmu.h @@ -0,0 +1,10 @@ +#ifndef __ASM_AVR32_MMU_H +#define __ASM_AVR32_MMU_H + +/* Default "unsigned long" context */ +typedef unsigned long mm_context_t; + +#define MMU_ITLB_ENTRIES 64 +#define MMU_DTLB_ENTRIES 64 + +#endif /* __ASM_AVR32_MMU_H */ diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h new file mode 100644 index 00000000000..31add1ae808 --- /dev/null +++ b/include/asm-avr32/mmu_context.h @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * ASID handling taken from SH implementation. + * Copyright (C) 1999 Niibe Yutaka + * Copyright (C) 2003 Paul Mundt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_MMU_CONTEXT_H +#define __ASM_AVR32_MMU_CONTEXT_H + +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> +#include <asm/sysreg.h> + +/* + * The MMU "context" consists of two things: + * (a) TLB cache version + * (b) ASID (Address Space IDentifier) + */ +#define MMU_CONTEXT_ASID_MASK 0x000000ff +#define MMU_CONTEXT_VERSION_MASK 0xffffff00 +#define MMU_CONTEXT_FIRST_VERSION 0x00000100 +#define NO_CONTEXT 0 + +#define MMU_NO_ASID 0x100 + +/* Virtual Page Number mask */ +#define MMU_VPN_MASK 0xfffff000 + +/* Cache of MMU context last used */ +extern unsigned long mmu_context_cache; + +/* + * Get MMU context if needed + */ +static inline void +get_mmu_context(struct mm_struct *mm) +{ + unsigned long mc = mmu_context_cache; + + if (((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0) + /* It's up to date, do nothing */ + return; + + /* It's old, we need to get new context with new version */ + mc = ++mmu_context_cache; + if (!(mc & MMU_CONTEXT_ASID_MASK)) { + /* + * We have exhausted all ASIDs of this version. + * Flush the TLB and start new cycle. + */ + flush_tlb_all(); + /* + * Fix version. Note that we avoid version #0 + * to distinguish NO_CONTEXT. + */ + if (!mc) + mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION; + } + mm->context = mc; +} + +/* + * Initialize the context related info for a new mm_struct + * instance. + */ +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + mm->context = NO_CONTEXT; + return 0; +} + +/* + * Destroy context related info for an mm_struct that is about + * to be put to rest. + */ +static inline void destroy_context(struct mm_struct *mm) +{ + /* Do nothing */ +} + +static inline void set_asid(unsigned long asid) +{ + /* XXX: We're destroying TLBEHI[8:31] */ + sysreg_write(TLBEHI, asid & MMU_CONTEXT_ASID_MASK); + cpu_sync_pipeline(); +} + +static inline unsigned long get_asid(void) +{ + unsigned long asid; + + asid = sysreg_read(TLBEHI); + return asid & MMU_CONTEXT_ASID_MASK; +} + +static inline void activate_context(struct mm_struct *mm) +{ + get_mmu_context(mm); + set_asid(mm->context & MMU_CONTEXT_ASID_MASK); +} + +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ + if (likely(prev != next)) { + unsigned long __pgdir = (unsigned long)next->pgd; + + sysreg_write(PTBR, __pgdir); + activate_context(next); + } +} + +#define deactivate_mm(tsk,mm) do { } while(0) + +#define activate_mm(prev, next) switch_mm((prev), (next), NULL) + +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +} + + +static inline void enable_mmu(void) +{ + sysreg_write(MMUCR, (SYSREG_BIT(MMUCR_S) + | SYSREG_BIT(E) + | SYSREG_BIT(MMUCR_I))); + nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); + + if (mmu_context_cache == NO_CONTEXT) + mmu_context_cache = MMU_CONTEXT_FIRST_VERSION; + + set_asid(mmu_context_cache & MMU_CONTEXT_ASID_MASK); +} + +static inline void disable_mmu(void) +{ + sysreg_write(MMUCR, SYSREG_BIT(MMUCR_S)); +} + +#endif /* __ASM_AVR32_MMU_CONTEXT_H */ diff --git a/include/asm-avr32/module.h b/include/asm-avr32/module.h new file mode 100644 index 00000000000..451444538a1 --- /dev/null +++ b/include/asm-avr32/module.h @@ -0,0 +1,28 @@ +#ifndef __ASM_AVR32_MODULE_H +#define __ASM_AVR32_MODULE_H + +struct mod_arch_syminfo { + unsigned long got_offset; + int got_initialized; +}; + +struct mod_arch_specific { + /* Starting offset of got in the module core memory. */ + unsigned long got_offset; + /* Size of the got. */ + unsigned long got_size; + /* Number of symbols in syminfo. */ + int nsyms; + /* Additional symbol information (got offsets). */ + struct mod_arch_syminfo *syminfo; +}; + +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr + +#define MODULE_PROC_FAMILY "AVR32v1" + +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY + +#endif /* __ASM_AVR32_MODULE_H */ diff --git a/include/asm-avr32/msgbuf.h b/include/asm-avr32/msgbuf.h new file mode 100644 index 00000000000..ac18bc4da7f --- /dev/null +++ b/include/asm-avr32/msgbuf.h @@ -0,0 +1,31 @@ +#ifndef __ASM_AVR32_MSGBUF_H +#define __ASM_AVR32_MSGBUF_H + +/* + * The msqid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ + unsigned long __unused1; + __kernel_time_t msg_rtime; /* last msgrcv time */ + unsigned long __unused2; + __kernel_time_t msg_ctime; /* last change time */ + unsigned long __unused3; + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; + +#endif /* __ASM_AVR32_MSGBUF_H */ diff --git a/include/asm-avr32/mutex.h b/include/asm-avr32/mutex.h new file mode 100644 index 00000000000..458c1f7fbc1 --- /dev/null +++ b/include/asm-avr32/mutex.h @@ -0,0 +1,9 @@ +/* + * Pull in the generic implementation for the mutex fastpath. + * + * TODO: implement optimized primitives instead, or leave the generic + * implementation in place, or pick the atomic_xchg() based generic + * implementation. (see asm-generic/mutex-xchg.h for details) + */ + +#include <asm-generic/mutex-dec.h> diff --git a/include/asm-avr32/namei.h b/include/asm-avr32/namei.h new file mode 100644 index 00000000000..f0a26de06ca --- /dev/null +++ b/include/asm-avr32/namei.h @@ -0,0 +1,7 @@ +#ifndef __ASM_AVR32_NAMEI_H +#define __ASM_AVR32_NAMEI_H + +/* This dummy routine may be changed to something useful */ +#define __emul_prefix() NULL + +#endif /* __ASM_AVR32_NAMEI_H */ diff --git a/include/asm-avr32/numnodes.h b/include/asm-avr32/numnodes.h new file mode 100644 index 00000000000..0b864d7ce33 --- /dev/null +++ b/include/asm-avr32/numnodes.h @@ -0,0 +1,7 @@ +#ifndef __ASM_AVR32_NUMNODES_H +#define __ASM_AVR32_NUMNODES_H + +/* Max 4 nodes */ +#define NODES_SHIFT 2 + +#endif /* __ASM_AVR32_NUMNODES_H */ diff --git a/include/asm-avr32/ocd.h b/include/asm-avr32/ocd.h new file mode 100644 index 00000000000..46f73180a12 --- /dev/null +++ b/include/asm-avr32/ocd.h @@ -0,0 +1,78 @@ +/* + * AVR32 OCD Registers + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_OCD_H +#define __ASM_AVR32_OCD_H + +/* Debug Registers */ +#define DBGREG_DID 0 +#define DBGREG_DC 8 +#define DBGREG_DS 16 +#define DBGREG_RWCS 28 +#define DBGREG_RWA 36 +#define DBGREG_RWD 40 +#define DBGREG_WT 44 +#define DBGREG_DTC 52 +#define DBGREG_DTSA0 56 +#define DBGREG_DTSA1 60 +#define DBGREG_DTEA0 72 +#define DBGREG_DTEA1 76 +#define DBGREG_BWC0A 88 +#define DBGREG_BWC0B 92 +#define DBGREG_BWC1A 96 +#define DBGREG_BWC1B 100 +#define DBGREG_BWC2A 104 +#define DBGREG_BWC2B 108 +#define DBGREG_BWC3A 112 +#define DBGREG_BWC3B 116 +#define DBGREG_BWA0A 120 +#define DBGREG_BWA0B 124 +#define DBGREG_BWA1A 128 +#define DBGREG_BWA1B 132 +#define DBGREG_BWA2A 136 +#define DBGREG_BWA2B 140 +#define DBGREG_BWA3A 144 +#define DBGREG_BWA3B 148 +#define DBGREG_BWD3A 153 +#define DBGREG_BWD3B 156 + +#define DBGREG_PID 284 + +#define SABAH_OCD 0x01 +#define SABAH_ICACHE 0x02 +#define SABAH_MEM_CACHED 0x04 +#define SABAH_MEM_UNCACHED 0x05 + +/* Fields in the Development Control register */ +#define DC_SS_BIT 8 + +#define DC_SS (1 << DC_SS_BIT) +#define DC_DBE (1 << 13) +#define DC_RID (1 << 27) +#define DC_ORP (1 << 28) +#define DC_MM (1 << 29) +#define DC_RES (1 << 30) + +/* Fields in the Development Status register */ +#define DS_SSS (1 << 0) +#define DS_SWB (1 << 1) +#define DS_HWB (1 << 2) +#define DS_BP_SHIFT 8 +#define DS_BP_MASK (0xff << DS_BP_SHIFT) + +#define __mfdr(addr) \ +({ \ + register unsigned long value; \ + asm volatile("mfdr %0, %1" : "=r"(value) : "i"(addr)); \ + value; \ +}) +#define __mtdr(addr, value) \ + asm volatile("mtdr %0, %1" : : "i"(addr), "r"(value)) + +#endif /* __ASM_AVR32_OCD_H */ diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h new file mode 100644 index 00000000000..0f630b3e993 --- /dev/null +++ b/include/asm-avr32/page.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PAGE_H +#define __ASM_AVR32_PAGE_H + +#ifdef __KERNEL__ + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#ifdef __ASSEMBLY__ +#define PAGE_SIZE (1 << PAGE_SHIFT) +#else +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#endif +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PTE_MASK PAGE_MASK + +#ifndef __ASSEMBLY__ + +#include <asm/addrspace.h> + +extern void clear_page(void *to); +extern void copy_page(void *to, void *from); + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pgd; } pgd_t; +typedef struct { unsigned long pgprot; } pgprot_t; + +#define pte_val(x) ((x).pte) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) }) +#define __pgd(x) ((pgd_t) { (x) }) +#define __pgprot(x) ((pgprot_t) { (x) }) + +/* FIXME: These should be removed soon */ +extern unsigned long memory_start, memory_end; + +/* Pure 2^n version of get_order */ +static inline int get_order(unsigned long size) +{ + unsigned lz; + + size = (size - 1) >> PAGE_SHIFT; + asm("clz %0, %1" : "=r"(lz) : "r"(size)); + return 32 - lz; +} + +#endif /* !__ASSEMBLY__ */ + +/* Align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) + +/* + * The hardware maps the virtual addresses 0x80000000 -> 0x9fffffff + * permanently to the physical addresses 0x00000000 -> 0x1fffffff when + * segmentation is enabled. We want to make use of this in order to + * minimize TLB pressure. + */ +#define PAGE_OFFSET (0x80000000UL) + +/* + * ALSA uses virt_to_page() on DMA pages, which I'm not entirely sure + * is a good idea. Anyway, we can't simply subtract PAGE_OFFSET here + * in that case, so we'll have to mask out the three most significant + * bits of the address instead... + * + * What's the difference between __pa() and virt_to_phys() anyway? + */ +#define __pa(x) PHYSADDR(x) +#define __va(x) ((void *)(P1SEGADDR(x))) + +#define MAP_NR(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT) + +#define phys_to_page(phys) (pfn_to_page(phys >> PAGE_SHIFT)) +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) + +#ifndef CONFIG_NEED_MULTIPLE_NODES + +#define PHYS_PFN_OFFSET (CONFIG_PHYS_OFFSET >> PAGE_SHIFT) + +#define pfn_to_page(pfn) (mem_map + ((pfn) - PHYS_PFN_OFFSET)) +#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET) +#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr)) +#endif /* CONFIG_NEED_MULTIPLE_NODES */ + +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +/* + * Memory above this physical address will be considered highmem. + */ +#define HIGHMEM_START 0x20000000UL + +#endif /* __KERNEL__ */ + +#endif /* __ASM_AVR32_PAGE_H */ diff --git a/include/asm-avr32/param.h b/include/asm-avr32/param.h new file mode 100644 index 00000000000..34bc8d4c3b2 --- /dev/null +++ b/include/asm-avr32/param.h @@ -0,0 +1,23 @@ +#ifndef __ASM_AVR32_PARAM_H +#define __ASM_AVR32_PARAM_H + +#ifdef __KERNEL__ +# define HZ CONFIG_HZ +# define USER_HZ 100 /* User interfaces are in "ticks" */ +# define CLOCKS_PER_SEC (USER_HZ) /* frequency at which times() counts */ +#endif + +#ifndef HZ +# define HZ 100 +#endif + +/* TODO: Should be configurable */ +#define EXEC_PAGESIZE 4096 + +#ifndef NOGROUP +# define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 + +#endif /* __ASM_AVR32_PARAM_H */ diff --git a/include/asm-avr32/pci.h b/include/asm-avr32/pci.h new file mode 100644 index 00000000000..0f5f134b896 --- /dev/null +++ b/include/asm-avr32/pci.h @@ -0,0 +1,8 @@ +#ifndef __ASM_AVR32_PCI_H__ +#define __ASM_AVR32_PCI_H__ + +/* We don't support PCI yet, but some drivers require this file anyway */ + +#define PCI_DMA_BUS_IS_PHYS (1) + +#endif /* __ASM_AVR32_PCI_H__ */ diff --git a/include/asm-avr32/percpu.h b/include/asm-avr32/percpu.h new file mode 100644 index 00000000000..69227b4cd0d --- /dev/null +++ b/include/asm-avr32/percpu.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_PERCPU_H +#define __ASM_AVR32_PERCPU_H + +#include <asm-generic/percpu.h> + +#endif /* __ASM_AVR32_PERCPU_H */ diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h new file mode 100644 index 00000000000..7492cfb92ce --- /dev/null +++ b/include/asm-avr32/pgalloc.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PGALLOC_H +#define __ASM_AVR32_PGALLOC_H + +#include <asm/processor.h> +#include <linux/threads.h> +#include <linux/slab.h> +#include <linux/mm.h> + +#define pmd_populate_kernel(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) + +static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd, + struct page *pte) +{ + set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte))); +} + +/* + * Allocate and free page tables + */ +static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm) +{ + unsigned int pgd_size = (USER_PTRS_PER_PGD * sizeof(pgd_t)); + pgd_t *pgd = (pgd_t *)kmalloc(pgd_size, GFP_KERNEL); + + if (pgd) + memset(pgd, 0, pgd_size); + + return pgd; +} + +static inline void pgd_free(pgd_t *pgd) +{ + kfree(pgd); +} + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + int count = 0; + pte_t *pte; + + do { + pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT); + if (pte) + clear_page(pte); + else { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + } + } while (!pte && (count++ < 10)); + + return pte; +} + +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + int count = 0; + struct page *pte; + + do { + pte = alloc_pages(GFP_KERNEL, 0); + if (pte) + clear_page(page_address(pte)); + else { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + } + } while (!pte && (count++ < 10)); + + return pte; +} + +static inline void pte_free_kernel(pte_t *pte) +{ + free_page((unsigned long)pte); +} + +static inline void pte_free(struct page *pte) +{ + __free_page(pte); +} + +#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) + +#define check_pgt_cache() do { } while(0) + +#endif /* __ASM_AVR32_PGALLOC_H */ diff --git a/include/asm-avr32/pgtable-2level.h b/include/asm-avr32/pgtable-2level.h new file mode 100644 index 00000000000..425dd567b5b --- /dev/null +++ b/include/asm-avr32/pgtable-2level.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PGTABLE_2LEVEL_H +#define __ASM_AVR32_PGTABLE_2LEVEL_H + +#include <asm-generic/pgtable-nopmd.h> + +/* + * Traditional 2-level paging structure + */ +#define PGDIR_SHIFT 22 +#define PTRS_PER_PGD 1024 + +#define PTRS_PER_PTE 1024 + +#ifndef __ASSEMBLY__ +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#define set_pte(pteptr, pteval) (*(pteptr) = pteval) +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep, pteval) + +/* + * (pmds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) + +#define pte_pfn(x) ((unsigned long)(((x).pte >> PAGE_SHIFT))) +#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_AVR32_PGTABLE_2LEVEL_H */ diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h new file mode 100644 index 00000000000..6b8ca9db2bd --- /dev/null +++ b/include/asm-avr32/pgtable.h @@ -0,0 +1,408 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PGTABLE_H +#define __ASM_AVR32_PGTABLE_H + +#include <asm/addrspace.h> + +#ifndef __ASSEMBLY__ +#include <linux/sched.h> + +#endif /* !__ASSEMBLY__ */ + +/* + * Use two-level page tables just as the i386 (without PAE) + */ +#include <asm/pgtable-2level.h> + +/* + * The following code might need some cleanup when the values are + * final... + */ +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) +#define FIRST_USER_ADDRESS 0 + +#define PTE_PHYS_MASK 0x1ffff000 + +#ifndef __ASSEMBLY__ +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern void paging_init(void); + +/* + * ZERO_PAGE is a global shared page that is always zero: used for + * zero-mapped memory areas etc. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8 MiB value just means that there will be a 8 MiB "hole" + * after the uncached physical memory (P2 segment) until the vmalloc + * area starts. That means that any out-of-bounds memory accesses will + * hopefully be caught; we don't know if the end of the P1/P2 segments + * are actually used for anything, but it is anyway safer to let the + * MMU catch these kinds of errors than to rely on the memory bus. + * + * A "hole" of the same size is added to the end of the P3 segment as + * well. It might seem wasteful to use 16 MiB of virtual address space + * on this, but we do have 512 MiB of it... + * + * The vmalloc() routines leave a hole of 4 KiB between each vmalloced + * area for the same reason. + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) +#define VMALLOC_START (P3SEG + VMALLOC_OFFSET) +#define VMALLOC_END (P4SEG - VMALLOC_OFFSET) +#endif /* !__ASSEMBLY__ */ + +/* + * Page flags. Some of these flags are not directly supported by + * hardware, so we have to emulate them. + */ +#define _TLBEHI_BIT_VALID 9 +#define _TLBEHI_VALID (1 << _TLBEHI_BIT_VALID) + +#define _PAGE_BIT_WT 0 /* W-bit : write-through */ +#define _PAGE_BIT_DIRTY 1 /* D-bit : page changed */ +#define _PAGE_BIT_SZ0 2 /* SZ0-bit : Size of page */ +#define _PAGE_BIT_SZ1 3 /* SZ1-bit : Size of page */ +#define _PAGE_BIT_EXECUTE 4 /* X-bit : execute access allowed */ +#define _PAGE_BIT_RW 5 /* AP0-bit : write access allowed */ +#define _PAGE_BIT_USER 6 /* AP1-bit : user space access allowed */ +#define _PAGE_BIT_BUFFER 7 /* B-bit : bufferable */ +#define _PAGE_BIT_GLOBAL 8 /* G-bit : global (ignore ASID) */ +#define _PAGE_BIT_CACHABLE 9 /* C-bit : cachable */ + +/* If we drop support for 1K pages, we get two extra bits */ +#define _PAGE_BIT_PRESENT 10 +#define _PAGE_BIT_ACCESSED 11 /* software: page was accessed */ + +/* The following flags are only valid when !PRESENT */ +#define _PAGE_BIT_FILE 0 /* software: pagecache or swap? */ + +#define _PAGE_WT (1 << _PAGE_BIT_WT) +#define _PAGE_DIRTY (1 << _PAGE_BIT_DIRTY) +#define _PAGE_EXECUTE (1 << _PAGE_BIT_EXECUTE) +#define _PAGE_RW (1 << _PAGE_BIT_RW) +#define _PAGE_USER (1 << _PAGE_BIT_USER) +#define _PAGE_BUFFER (1 << _PAGE_BIT_BUFFER) +#define _PAGE_GLOBAL (1 << _PAGE_BIT_GLOBAL) +#define _PAGE_CACHABLE (1 << _PAGE_BIT_CACHABLE) + +/* Software flags */ +#define _PAGE_ACCESSED (1 << _PAGE_BIT_ACCESSED) +#define _PAGE_PRESENT (1 << _PAGE_BIT_PRESENT) +#define _PAGE_FILE (1 << _PAGE_BIT_FILE) + +/* + * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is + * usually called _PAGE_PROTNONE on other architectures. + * + * XXX: Find out if _PAGE_PROTNONE is equivalent with !_PAGE_USER. If + * so, we can encode all possible page sizes (although we can't really + * support 1K pages anyway due to the _PAGE_PRESENT and _PAGE_ACCESSED + * bits) + * + */ +#define _PAGE_TYPE_MASK ((1 << _PAGE_BIT_SZ0) | (1 << _PAGE_BIT_SZ1)) +#define _PAGE_TYPE_NONE (0 << _PAGE_BIT_SZ0) +#define _PAGE_TYPE_SMALL (1 << _PAGE_BIT_SZ0) +#define _PAGE_TYPE_MEDIUM (2 << _PAGE_BIT_SZ0) +#define _PAGE_TYPE_LARGE (3 << _PAGE_BIT_SZ0) + +/* + * Mask which drop software flags. We currently can't handle more than + * 512 MiB of physical memory, so we can use bits 29-31 for other + * stuff. With a fixed 4K page size, we can use bits 10-11 as well as + * bits 2-3 (SZ) + */ +#define _PAGE_FLAGS_HARDWARE_MASK 0xfffff3ff + +#define _PAGE_FLAGS_CACHE_MASK (_PAGE_CACHABLE | _PAGE_BUFFER | _PAGE_WT) + +/* TODO: Check for saneness */ +/* User-mode page table flags (to be set in a pgd or pmd entry) */ +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \ + | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +/* Kernel-mode page table flags */ +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \ + | _PAGE_ACCESSED | _PAGE_DIRTY) +/* Flags that may be modified by software */ +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY \ + | _PAGE_FLAGS_CACHE_MASK) + +#define _PAGE_FLAGS_READ (_PAGE_CACHABLE | _PAGE_BUFFER) +#define _PAGE_FLAGS_WRITE (_PAGE_FLAGS_READ | _PAGE_RW | _PAGE_DIRTY) + +#define _PAGE_NORMAL(x) __pgprot((x) | _PAGE_PRESENT | _PAGE_TYPE_SMALL \ + | _PAGE_ACCESSED) + +#define PAGE_NONE (_PAGE_ACCESSED | _PAGE_TYPE_NONE) +#define PAGE_READ (_PAGE_FLAGS_READ | _PAGE_USER) +#define PAGE_EXEC (_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_USER) +#define PAGE_WRITE (_PAGE_FLAGS_WRITE | _PAGE_USER) +#define PAGE_KERNEL _PAGE_NORMAL(_PAGE_FLAGS_WRITE | _PAGE_EXECUTE | _PAGE_GLOBAL) +#define PAGE_KERNEL_RO _PAGE_NORMAL(_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_GLOBAL) + +#define _PAGE_P(x) _PAGE_NORMAL((x) & ~(_PAGE_RW | _PAGE_DIRTY)) +#define _PAGE_S(x) _PAGE_NORMAL(x) + +#define PAGE_COPY _PAGE_P(PAGE_WRITE | PAGE_READ) + +#ifndef __ASSEMBLY__ +/* + * The hardware supports flags for write- and execute access. Read is + * always allowed if the page is loaded into the TLB, so the "-w-", + * "--x" and "-wx" mappings are implemented as "rw-", "r-x" and "rwx", + * respectively. + * + * The "---" case is handled by software; the page will simply not be + * loaded into the TLB if the page type is _PAGE_TYPE_NONE. + */ + +#define __P000 __pgprot(PAGE_NONE) +#define __P001 _PAGE_P(PAGE_READ) +#define __P010 _PAGE_P(PAGE_WRITE) +#define __P011 _PAGE_P(PAGE_WRITE | PAGE_READ) +#define __P100 _PAGE_P(PAGE_EXEC) +#define __P101 _PAGE_P(PAGE_EXEC | PAGE_READ) +#define __P110 _PAGE_P(PAGE_EXEC | PAGE_WRITE) +#define __P111 _PAGE_P(PAGE_EXEC | PAGE_WRITE | PAGE_READ) + +#define __S000 __pgprot(PAGE_NONE) +#define __S001 _PAGE_S(PAGE_READ) +#define __S010 _PAGE_S(PAGE_WRITE) +#define __S011 _PAGE_S(PAGE_WRITE | PAGE_READ) +#define __S100 _PAGE_S(PAGE_EXEC) +#define __S101 _PAGE_S(PAGE_EXEC | PAGE_READ) +#define __S110 _PAGE_S(PAGE_EXEC | PAGE_WRITE) +#define __S111 _PAGE_S(PAGE_EXEC | PAGE_WRITE | PAGE_READ) + +#define pte_none(x) (!pte_val(x)) +#define pte_present(x) (pte_val(x) & _PAGE_PRESENT) + +#define pte_clear(mm,addr,xp) \ + do { \ + set_pte_at(mm, addr, xp, __pte(0)); \ + } while (0) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) +{ + return pte_val(pte) & _PAGE_USER; +} +static inline int pte_write(pte_t pte) +{ + return pte_val(pte) & _PAGE_RW; +} +static inline int pte_exec(pte_t pte) +{ + return pte_val(pte) & _PAGE_EXECUTE; +} +static inline int pte_dirty(pte_t pte) +{ + return pte_val(pte) & _PAGE_DIRTY; +} +static inline int pte_young(pte_t pte) +{ + return pte_val(pte) & _PAGE_ACCESSED; +} + +/* + * The following only work if pte_present() is not true. + */ +static inline int pte_file(pte_t pte) +{ + return pte_val(pte) & _PAGE_FILE; +} + +/* Mutator functions for PTE bits */ +static inline pte_t pte_rdprotect(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); + return pte; +} +static inline pte_t pte_wrprotect(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); + return pte; +} +static inline pte_t pte_exprotect(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_EXECUTE)); + return pte; +} +static inline pte_t pte_mkclean(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); + return pte; +} +static inline pte_t pte_mkold(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); + return pte; +} +static inline pte_t pte_mkread(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); + return pte; +} +static inline pte_t pte_mkwrite(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); + return pte; +} +static inline pte_t pte_mkexec(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE)); + return pte; +} +static inline pte_t pte_mkdirty(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); + return pte; +} +static inline pte_t pte_mkyoung(pte_t pte) +{ + set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); + return pte; +} + +#define pmd_none(x) (!pmd_val(x)) +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) \ + != _KERNPG_TABLE) + +/* + * Permanent address of a page. We don't support highmem, so this is + * trivial. + */ +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) +#define pte_page(x) phys_to_page(pte_val(x) & PTE_PHYS_MASK) + +/* + * Mark the prot value as uncacheable and unbufferable + */ +#define pgprot_noncached(prot) \ + __pgprot(pgprot_val(prot) & ~(_PAGE_BUFFER | _PAGE_CACHABLE)) + +/* + * Mark the prot value as uncacheable but bufferable + */ +#define pgprot_writecombine(prot) \ + __pgprot((pgprot_val(prot) & ~_PAGE_CACHABLE) | _PAGE_BUFFER) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * extern pte_t mk_pte(struct page *page, pgprot_t pgprot) + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) + | pgprot_val(newprot))); + return pte; +} + +#define page_pte(page) page_pte_prot(page, __pgprot(0)) + +#define pmd_page_vaddr(pmd) \ + ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +#define pmd_page(pmd) (phys_to_page(pmd_val(pmd))) + +/* to find an entry in a page-table-directory. */ +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) +#define pgd_offset_current(address) \ + ((pgd_t *)__mfsr(SYSREG_PTBR) + pgd_index(address)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* Find an entry in the third-level page table.. */ +#define pte_index(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset(dir, address) \ + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) +#define pte_offset_map(dir, address) pte_offset_kernel(dir, address) +#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +struct vm_area_struct; +extern void update_mmu_cache(struct vm_area_struct * vma, + unsigned long address, pte_t pte); + +/* + * Encode and decode a swap entry + * + * Constraints: + * _PAGE_FILE at bit 0 + * _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE) + * _PAGE_PRESENT at bit 10 + * + * We encode the type into bits 4-9 and offset into bits 11-31. This + * gives us a 21 bits offset, or 2**21 * 4K = 8G usable swap space per + * device, and 64 possible types. + * + * NOTE: We should set ZEROs at the position of _PAGE_PRESENT + * and _PAGE_PROTNONE bits + */ +#define __swp_type(x) (((x).val >> 4) & 0x3f) +#define __swp_offset(x) ((x).val >> 11) +#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 4) | ((offset) << 11) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +/* + * Encode and decode a nonlinear file mapping entry. We have to + * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't + * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?) + */ +#define PTE_FILE_MAX_BITS 30 +#define pte_to_pgoff(pte) (((pte_val(pte) >> 1) & 0x1ff) \ + | ((pte_val(pte) >> 11) << 9)) +#define pgoff_to_pte(off) ((pte_t) { ((((off) & 0x1ff) << 1) \ + | (((off) >> 9) << 11) \ + | _PAGE_FILE) }) + +typedef pte_t *pte_addr_t; + +#define kern_addr_valid(addr) (1) + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +#define MK_IOSPACE_PFN(space, pfn) (pfn) +#define GET_IOSPACE(pfn) 0 +#define GET_PFN(pfn) (pfn) + +/* No page table caches to initialize (?) */ +#define pgtable_cache_init() do { } while(0) + +#include <asm-generic/pgtable.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_AVR32_PGTABLE_H */ diff --git a/include/asm-avr32/poll.h b/include/asm-avr32/poll.h new file mode 100644 index 00000000000..736e29755df --- /dev/null +++ b/include/asm-avr32/poll.h @@ -0,0 +1,27 @@ +#ifndef __ASM_AVR32_POLL_H +#define __ASM_AVR32_POLL_H + +/* These are specified by iBCS2 */ +#define POLLIN 0x0001 +#define POLLPRI 0x0002 +#define POLLOUT 0x0004 +#define POLLERR 0x0008 +#define POLLHUP 0x0010 +#define POLLNVAL 0x0020 + +/* The rest seem to be more-or-less nonstandard. Check them! */ +#define POLLRDNORM 0x0040 +#define POLLRDBAND 0x0080 +#define POLLWRNORM 0x0100 +#define POLLWRBAND 0x0200 +#define POLLMSG 0x0400 +#define POLLREMOVE 0x1000 +#define POLLRDHUP 0x2000 + +struct pollfd { + int fd; + short events; + short revents; +}; + +#endif /* __ASM_AVR32_POLL_H */ diff --git a/include/asm-avr32/posix_types.h b/include/asm-avr32/posix_types.h new file mode 100644 index 00000000000..2831b039b34 --- /dev/null +++ b/include/asm-avr32/posix_types.h @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_POSIX_TYPES_H +#define __ASM_AVR32_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_nlink_t; +typedef long __kernel_off_t; +typedef int __kernel_pid_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef unsigned long __kernel_size_t; +typedef int __kernel_ssize_t; +typedef int __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef int __kernel_daddr_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; +typedef unsigned int __kernel_uid32_t; +typedef unsigned int __kernel_gid32_t; + +typedef unsigned short __kernel_old_uid_t; +typedef unsigned short __kernel_old_gid_t; +typedef unsigned short __kernel_old_dev_t; + +#ifdef __GNUC__ +typedef long long __kernel_loff_t; +#endif + +typedef struct { +#if defined(__KERNEL__) || defined(__USE_ALL) + int val[2]; +#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */ + int __val[2]; +#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */ +} __kernel_fsid_t; + +#if defined(__KERNEL__) + +#undef __FD_SET +static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] |= (1UL<<__rem); +} + +#undef __FD_CLR +static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem); +} + + +#undef __FD_ISSET +static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant case (8 ints, + * for a 256-bit fd_set) + */ +#undef __FD_ZERO +static __inline__ void __FD_ZERO(__kernel_fd_set *__p) +{ + unsigned long *__tmp = __p->fds_bits; + int __i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 16: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + __tmp[ 8] = 0; __tmp[ 9] = 0; + __tmp[10] = 0; __tmp[11] = 0; + __tmp[12] = 0; __tmp[13] = 0; + __tmp[14] = 0; __tmp[15] = 0; + return; + + case 8: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + return; + + case 4: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + return; + } + } + __i = __FDSET_LONGS; + while (__i) { + __i--; + *__tmp = 0; + __tmp++; + } +} + +#endif /* defined(__KERNEL__) */ + +#endif /* __ASM_AVR32_POSIX_TYPES_H */ diff --git a/include/asm-avr32/processor.h b/include/asm-avr32/processor.h new file mode 100644 index 00000000000..f6913778a45 --- /dev/null +++ b/include/asm-avr32/processor.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PROCESSOR_H +#define __ASM_AVR32_PROCESSOR_H + +#include <asm/page.h> +#include <asm/cache.h> + +#define TASK_SIZE 0x80000000 + +#ifndef __ASSEMBLY__ + +static inline void *current_text_addr(void) +{ + register void *pc asm("pc"); + return pc; +} + +enum arch_type { + ARCH_AVR32A, + ARCH_AVR32B, + ARCH_MAX +}; + +enum cpu_type { + CPU_MORGAN, + CPU_AT32AP, + CPU_MAX +}; + +enum tlb_config { + TLB_NONE, + TLB_SPLIT, + TLB_UNIFIED, + TLB_INVALID +}; + +struct avr32_cpuinfo { + struct clk *clk; + unsigned long loops_per_jiffy; + enum arch_type arch_type; + enum cpu_type cpu_type; + unsigned short arch_revision; + unsigned short cpu_revision; + enum tlb_config tlb_config; + + struct cache_info icache; + struct cache_info dcache; +}; + +extern struct avr32_cpuinfo boot_cpu_data; + +#ifdef CONFIG_SMP +extern struct avr32_cpuinfo cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's + */ +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) + +#define cpu_relax() barrier() +#define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory") + +struct cpu_context { + unsigned long sr; + unsigned long pc; + unsigned long ksp; /* Kernel stack pointer */ + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; + unsigned long r0; +}; + +/* This struct contains the CPU context as stored by switch_to() */ +struct thread_struct { + struct cpu_context cpu_context; + unsigned long single_step_addr; + u16 single_step_insn; +}; + +#define INIT_THREAD { \ + .cpu_context = { \ + .ksp = sizeof(init_stack) + (long)&init_stack, \ + }, \ +} + +/* + * Do necessary setup to start up a newly executed thread. + */ +#define start_thread(regs, new_pc, new_sp) \ + do { \ + set_fs(USER_DS); \ + memset(regs, 0, sizeof(*regs)); \ + regs->sr = MODE_USER; \ + regs->pc = new_pc & ~1; \ + regs->sp = new_sp; \ + } while(0) + +struct task_struct; + +/* Free all resources held by a thread */ +extern void release_thread(struct task_struct *); + +/* Create a kernel thread without removing it from tasklists */ +extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +/* Prepare to copy thread state - unlazy all lazy status */ +#define prepare_to_copy(tsk) do { } while(0) + +/* Return saved PC of a blocked thread */ +#define thread_saved_pc(tsk) ((tsk)->thread.cpu_context.pc) + +struct pt_regs; +void show_trace(struct task_struct *task, unsigned long *stack, + struct pt_regs *regs); + +extern unsigned long get_wchan(struct task_struct *p); + +#define KSTK_EIP(tsk) ((tsk)->thread.cpu_context.pc) +#define KSTK_ESP(tsk) ((tsk)->thread.cpu_context.ksp) + +#define ARCH_HAS_PREFETCH + +static inline void prefetch(const void *x) +{ + const char *c = x; + asm volatile("pref %0" : : "r"(c)); +} +#define PREFETCH_STRIDE L1_CACHE_BYTES + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_AVR32_PROCESSOR_H */ diff --git a/include/asm-avr32/ptrace.h b/include/asm-avr32/ptrace.h new file mode 100644 index 00000000000..60f0f19a81f --- /dev/null +++ b/include/asm-avr32/ptrace.h @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_PTRACE_H +#define __ASM_AVR32_PTRACE_H + +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 + +/* + * Status Register bits + */ +#define SR_H 0x40000000 +#define SR_R 0x20000000 +#define SR_J 0x10000000 +#define SR_DM 0x08000000 +#define SR_D 0x04000000 +#define MODE_NMI 0x01c00000 +#define MODE_EXCEPTION 0x01800000 +#define MODE_INT3 0x01400000 +#define MODE_INT2 0x01000000 +#define MODE_INT1 0x00c00000 +#define MODE_INT0 0x00800000 +#define MODE_SUPERVISOR 0x00400000 +#define MODE_USER 0x00000000 +#define MODE_MASK 0x01c00000 +#define SR_EM 0x00200000 +#define SR_I3M 0x00100000 +#define SR_I2M 0x00080000 +#define SR_I1M 0x00040000 +#define SR_I0M 0x00020000 +#define SR_GM 0x00010000 + +#define SR_H_BIT 30 +#define SR_R_BIT 29 +#define SR_J_BIT 28 +#define SR_DM_BIT 27 +#define SR_D_BIT 26 +#define MODE_SHIFT 22 +#define SR_EM_BIT 21 +#define SR_I3M_BIT 20 +#define SR_I2M_BIT 19 +#define SR_I1M_BIT 18 +#define SR_I0M_BIT 17 +#define SR_GM_BIT 16 + +/* The user-visible part */ +#define SR_L 0x00000020 +#define SR_Q 0x00000010 +#define SR_V 0x00000008 +#define SR_N 0x00000004 +#define SR_Z 0x00000002 +#define SR_C 0x00000001 + +#define SR_L_BIT 5 +#define SR_Q_BIT 4 +#define SR_V_BIT 3 +#define SR_N_BIT 2 +#define SR_Z_BIT 1 +#define SR_C_BIT 0 + +/* + * The order is defined by the stmts instruction. r0 is stored first, + * so it gets the highest address. + * + * Registers 0-12 are general-purpose registers (r12 is normally used for + * the function return value). + * Register 13 is the stack pointer + * Register 14 is the link register + * Register 15 is the program counter (retrieved from the RAR sysreg) + */ +#define FRAME_SIZE_FULL 72 +#define REG_R12_ORIG 68 +#define REG_R0 64 +#define REG_R1 60 +#define REG_R2 56 +#define REG_R3 52 +#define REG_R4 48 +#define REG_R5 44 +#define REG_R6 40 +#define REG_R7 36 +#define REG_R8 32 +#define REG_R9 28 +#define REG_R10 24 +#define REG_R11 20 +#define REG_R12 16 +#define REG_SP 12 +#define REG_LR 8 + +#define FRAME_SIZE_MIN 8 +#define REG_PC 4 +#define REG_SR 0 + +#ifndef __ASSEMBLY__ +struct pt_regs { + /* These are always saved */ + unsigned long sr; + unsigned long pc; + + /* These are sometimes saved */ + unsigned long lr; + unsigned long sp; + unsigned long r12; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; + unsigned long r0; + + /* Only saved on system call */ + unsigned long r12_orig; +}; + +#ifdef __KERNEL__ +# define user_mode(regs) (((regs)->sr & MODE_MASK) == MODE_USER) +extern void show_regs (struct pt_regs *); + +static __inline__ int valid_user_regs(struct pt_regs *regs) +{ + /* + * Some of the Java bits might be acceptable if/when we + * implement some support for that stuff... + */ + if ((regs->sr & 0xffff0000) == 0) + return 1; + + /* + * Force status register flags to be sane and report this + * illegal behaviour... + */ + regs->sr &= 0x0000ffff; + return 0; +} + +#define instruction_pointer(regs) ((regs)->pc) + +#define profile_pc(regs) instruction_pointer(regs) + +#endif /* __KERNEL__ */ + +#endif /* ! __ASSEMBLY__ */ + +#endif /* __ASM_AVR32_PTRACE_H */ diff --git a/include/asm-avr32/resource.h b/include/asm-avr32/resource.h new file mode 100644 index 00000000000..c6dd101472b --- /dev/null +++ b/include/asm-avr32/resource.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_RESOURCE_H +#define __ASM_AVR32_RESOURCE_H + +#include <asm-generic/resource.h> + +#endif /* __ASM_AVR32_RESOURCE_H */ diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h new file mode 100644 index 00000000000..bfe7d753423 --- /dev/null +++ b/include/asm-avr32/scatterlist.h @@ -0,0 +1,21 @@ +#ifndef __ASM_AVR32_SCATTERLIST_H +#define __ASM_AVR32_SCATTERLIST_H + +struct scatterlist { + struct page *page; + unsigned int offset; + dma_addr_t dma_address; + unsigned int length; +}; + +/* These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->length) + +#define ISA_DMA_THRESHOLD (0xffffffff) + +#endif /* __ASM_AVR32_SCATTERLIST_H */ diff --git a/include/asm-avr32/sections.h b/include/asm-avr32/sections.h new file mode 100644 index 00000000000..aa14252e418 --- /dev/null +++ b/include/asm-avr32/sections.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_SECTIONS_H +#define __ASM_AVR32_SECTIONS_H + +#include <asm-generic/sections.h> + +#endif /* __ASM_AVR32_SECTIONS_H */ diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h new file mode 100644 index 00000000000..ef99ddccc10 --- /dev/null +++ b/include/asm-avr32/semaphore.h @@ -0,0 +1,109 @@ +/* + * SMP- and interrupt-safe semaphores. + * + * Copyright (C) 2006 Atmel Corporation + * + * Based on include/asm-i386/semaphore.h + * Copyright (C) 1996 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SEMAPHORE_H +#define __ASM_AVR32_SEMAPHORE_H + +#include <linux/linkage.h> + +#include <asm/system.h> +#include <asm/atomic.h> +#include <linux/wait.h> +#include <linux/rwsem.h> + +struct semaphore { + atomic_t count; + int sleepers; + wait_queue_head_t wait; +}; + +#define __SEMAPHORE_INITIALIZER(name, n) \ +{ \ + .count = ATOMIC_INIT(n), \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ +} + +#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) + +#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) +#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) + +static inline void sema_init (struct semaphore *sem, int val) +{ + atomic_set(&sem->count, val); + sem->sleepers = 0; + init_waitqueue_head(&sem->wait); +} + +static inline void init_MUTEX (struct semaphore *sem) +{ + sema_init(sem, 1); +} + +static inline void init_MUTEX_LOCKED (struct semaphore *sem) +{ + sema_init(sem, 0); +} + +void __down(struct semaphore * sem); +int __down_interruptible(struct semaphore * sem); +void __up(struct semaphore * sem); + +/* + * This is ugly, but we want the default case to fall through. + * "__down_failed" is a special asm handler that calls the C + * routine that actually waits. See arch/i386/kernel/semaphore.c + */ +static inline void down(struct semaphore * sem) +{ + might_sleep(); + if (unlikely(atomic_dec_return (&sem->count) < 0)) + __down (sem); +} + +/* + * Interruptible try to acquire a semaphore. If we obtained + * it, return zero. If we were interrupted, returns -EINTR + */ +static inline int down_interruptible(struct semaphore * sem) +{ + int ret = 0; + + might_sleep(); + if (unlikely(atomic_dec_return (&sem->count) < 0)) + ret = __down_interruptible (sem); + return ret; +} + +/* + * Non-blockingly attempt to down() a semaphore. + * Returns zero if we acquired it + */ +static inline int down_trylock(struct semaphore * sem) +{ + return atomic_dec_if_positive(&sem->count) < 0; +} + +/* + * Note! This is subtle. We jump to wake people up only if + * the semaphore was negative (== somebody was waiting on it). + * The default case (no contention) will result in NO + * jumps for both down() and up(). + */ +static inline void up(struct semaphore * sem) +{ + if (unlikely(atomic_inc_return (&sem->count) <= 0)) + __up (sem); +} + +#endif /*__ASM_AVR32_SEMAPHORE_H */ diff --git a/include/asm-avr32/sembuf.h b/include/asm-avr32/sembuf.h new file mode 100644 index 00000000000..e472216e0c9 --- /dev/null +++ b/include/asm-avr32/sembuf.h @@ -0,0 +1,25 @@ +#ifndef __ASM_AVR32_SEMBUF_H +#define __ASM_AVR32_SEMBUF_H + +/* +* The semid64_ds structure for AVR32 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ + unsigned long __unused1; + __kernel_time_t sem_ctime; /* last change time */ + unsigned long __unused2; + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* __ASM_AVR32_SEMBUF_H */ diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h new file mode 100644 index 00000000000..10193da4113 --- /dev/null +++ b/include/asm-avr32/setup.h @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * Based on linux/include/asm-arm/setup.h + * Copyright (C) 1997-1999 Russel King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SETUP_H__ +#define __ASM_AVR32_SETUP_H__ + +#define COMMAND_LINE_SIZE 256 + +/* Magic number indicating that a tag table is present */ +#define ATAG_MAGIC 0xa2a25441 + +#ifndef __ASSEMBLY__ + +/* + * Generic memory range, used by several tags. + * + * addr is always physical. + * size is measured in bytes. + * next is for use by the OS, e.g. for grouping regions into + * linked lists. + */ +struct tag_mem_range { + u32 addr; + u32 size; + struct tag_mem_range * next; +}; + +/* The list ends with an ATAG_NONE node. */ +#define ATAG_NONE 0x00000000 + +struct tag_header { + u32 size; + u32 tag; +}; + +/* The list must start with an ATAG_CORE node */ +#define ATAG_CORE 0x54410001 + +struct tag_core { + u32 flags; + u32 pagesize; + u32 rootdev; +}; + +/* it is allowed to have multiple ATAG_MEM nodes */ +#define ATAG_MEM 0x54410002 +/* ATAG_MEM uses tag_mem_range */ + +/* command line: \0 terminated string */ +#define ATAG_CMDLINE 0x54410003 + +struct tag_cmdline { + char cmdline[1]; /* this is the minimum size */ +}; + +/* Ramdisk image (may be compressed) */ +#define ATAG_RDIMG 0x54410004 +/* ATAG_RDIMG uses tag_mem_range */ + +/* Information about various clocks present in the system */ +#define ATAG_CLOCK 0x54410005 + +struct tag_clock { + u32 clock_id; /* Which clock are we talking about? */ + u32 clock_flags; /* Special features */ + u64 clock_hz; /* Clock speed in Hz */ +}; + +/* The clock types we know about */ +#define CLOCK_BOOTCPU 0 + +/* Memory reserved for the system (e.g. the bootloader) */ +#define ATAG_RSVD_MEM 0x54410006 +/* ATAG_RSVD_MEM uses tag_mem_range */ + +/* Ethernet information */ + +#define ATAG_ETHERNET 0x54410007 + +struct tag_ethernet { + u8 mac_index; + u8 mii_phy_addr; + u8 hw_address[6]; +}; + +#define ETH_INVALID_PHY 0xff + +struct tag { + struct tag_header hdr; + union { + struct tag_core core; + struct tag_mem_range mem_range; + struct tag_cmdline cmdline; + struct tag_clock clock; + struct tag_ethernet ethernet; + } u; +}; + +struct tagtable { + u32 tag; + int (*parse)(struct tag *); +}; + +#define __tag __attribute_used__ __attribute__((__section__(".taglist"))) +#define __tagtable(tag, fn) \ + static struct tagtable __tagtable_##fn __tag = { tag, fn } + +#define tag_member_present(tag,member) \ + ((unsigned long)(&((struct tag *)0L)->member + 1) \ + <= (tag)->hdr.size * 4) + +#define tag_next(t) ((struct tag *)((u32 *)(t) + (t)->hdr.size)) +#define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type)) >> 2) + +#define for_each_tag(t,base) \ + for (t = base; t->hdr.size; t = tag_next(t)) + +extern struct tag_mem_range *mem_phys; +extern struct tag_mem_range *mem_reserved; +extern struct tag_mem_range *mem_ramdisk; + +extern struct tag *bootloader_tags; + +extern void setup_bootmem(void); +extern void setup_processor(void); +extern void board_setup_fbmem(unsigned long fbmem_start, + unsigned long fbmem_size); + +/* Chip-specific hook to enable the use of SDRAM */ +void chip_enable_sdram(void); + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_AVR32_SETUP_H__ */ diff --git a/include/asm-avr32/shmbuf.h b/include/asm-avr32/shmbuf.h new file mode 100644 index 00000000000..c62fba41739 --- /dev/null +++ b/include/asm-avr32/shmbuf.h @@ -0,0 +1,42 @@ +#ifndef __ASM_AVR32_SHMBUF_H +#define __ASM_AVR32_SHMBUF_H + +/* + * The shmid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ + unsigned long __unused1; + __kernel_time_t shm_dtime; /* last detach time */ + unsigned long __unused2; + __kernel_time_t shm_ctime; /* last change time */ + unsigned long __unused3; + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused4; + unsigned long __unused5; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* __ASM_AVR32_SHMBUF_H */ diff --git a/include/asm-avr32/shmparam.h b/include/asm-avr32/shmparam.h new file mode 100644 index 00000000000..3681266c77f --- /dev/null +++ b/include/asm-avr32/shmparam.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_SHMPARAM_H +#define __ASM_AVR32_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* __ASM_AVR32_SHMPARAM_H */ diff --git a/include/asm-avr32/sigcontext.h b/include/asm-avr32/sigcontext.h new file mode 100644 index 00000000000..e04062b5f39 --- /dev/null +++ b/include/asm-avr32/sigcontext.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SIGCONTEXT_H +#define __ASM_AVR32_SIGCONTEXT_H + +struct sigcontext { + unsigned long oldmask; + + /* CPU registers */ + unsigned long sr; + unsigned long pc; + unsigned long lr; + unsigned long sp; + unsigned long r12; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; + unsigned long r0; +}; + +#endif /* __ASM_AVR32_SIGCONTEXT_H */ diff --git a/include/asm-avr32/siginfo.h b/include/asm-avr32/siginfo.h new file mode 100644 index 00000000000..5ee93f40a8a --- /dev/null +++ b/include/asm-avr32/siginfo.h @@ -0,0 +1,6 @@ +#ifndef _AVR32_SIGINFO_H +#define _AVR32_SIGINFO_H + +#include <asm-generic/siginfo.h> + +#endif diff --git a/include/asm-avr32/signal.h b/include/asm-avr32/signal.h new file mode 100644 index 00000000000..caffefeeba1 --- /dev/null +++ b/include/asm-avr32/signal.h @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SIGNAL_H +#define __ASM_AVR32_SIGNAL_H + +#include <linux/types.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifdef __KERNEL__ +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +#define _NSIG 64 +#define _NSIG_BPW 32 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX (_NSIG-1) + +/* + * SA_FLAGS values: + * + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_SIGINFO deliver the signal with SIGINFO structs + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NODEFER prevents the current signal from being masked in the handler. + * SA_RESETHAND clears the handler when the signal is delivered. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 +#define SA_SIGINFO 0x00000004 +#define SA_RESTORER 0x04000000 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include <asm-generic/signal.h> + +#ifdef __KERNEL__ +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + __sigrestore_t sa_restorer; +}; + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + __sigrestore_t sa_restorer; + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ + +#include <asm/sigcontext.h> +#undef __HAVE_ARCH_SIG_BITOPS + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#endif /* __KERNEL__ */ + +#endif diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h new file mode 100644 index 00000000000..543229de817 --- /dev/null +++ b/include/asm-avr32/socket.h @@ -0,0 +1,53 @@ +#ifndef __ASM_AVR32_SOCKET_H +#define __ASM_AVR32_SOCKET_H + +#include <asm/sockios.h> + +/* For setsockopt(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +/* To add :#define SO_REUSEPORT 15 */ +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +#define SO_PEERSEC 31 +#define SO_PASSSEC 34 + +#endif /* __ASM_AVR32_SOCKET_H */ diff --git a/include/asm-avr32/sockios.h b/include/asm-avr32/sockios.h new file mode 100644 index 00000000000..84f3d65b3b3 --- /dev/null +++ b/include/asm-avr32/sockios.h @@ -0,0 +1,12 @@ +#ifndef __ASM_AVR32_SOCKIOS_H +#define __ASM_AVR32_SOCKIOS_H + +/* Socket-level I/O control calls. */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp */ + +#endif /* __ASM_AVR32_SOCKIOS_H */ diff --git a/include/asm-avr32/stat.h b/include/asm-avr32/stat.h new file mode 100644 index 00000000000..e72881e1023 --- /dev/null +++ b/include/asm-avr32/stat.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_STAT_H +#define __ASM_AVR32_STAT_H + +struct __old_kernel_stat { + unsigned short st_dev; + unsigned short st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; +}; + +struct stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused4; + unsigned long __unused5; +}; + +#define STAT_HAVE_NSEC 1 + +struct stat64 { + unsigned long long st_dev; + + unsigned long long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + + unsigned long st_uid; + unsigned long st_gid; + + unsigned long long st_rdev; + + long long st_size; + unsigned long __pad1; /* align 64-bit st_blocks */ + unsigned long st_blksize; + + unsigned long long st_blocks; /* Number 512-byte blocks allocated. */ + + unsigned long st_atime; + unsigned long st_atime_nsec; + + unsigned long st_mtime; + unsigned long st_mtime_nsec; + + unsigned long st_ctime; + unsigned long st_ctime_nsec; + + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __ASM_AVR32_STAT_H */ diff --git a/include/asm-avr32/statfs.h b/include/asm-avr32/statfs.h new file mode 100644 index 00000000000..2961bd18c50 --- /dev/null +++ b/include/asm-avr32/statfs.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_STATFS_H +#define __ASM_AVR32_STATFS_H + +#include <asm-generic/statfs.h> + +#endif /* __ASM_AVR32_STATFS_H */ diff --git a/include/asm-avr32/string.h b/include/asm-avr32/string.h new file mode 100644 index 00000000000..c91a623cd58 --- /dev/null +++ b/include/asm-avr32/string.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_STRING_H +#define __ASM_AVR32_STRING_H + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *b, int c, size_t len); + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *to, const void *from, size_t len); + +#endif /* __ASM_AVR32_STRING_H */ diff --git a/include/asm-avr32/sysreg.h b/include/asm-avr32/sysreg.h new file mode 100644 index 00000000000..f91975f330f --- /dev/null +++ b/include/asm-avr32/sysreg.h @@ -0,0 +1,332 @@ +/* + * AVR32 System Registers + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SYSREG_H__ +#define __ASM_AVR32_SYSREG_H__ + +/* sysreg register offsets */ +#define SYSREG_SR 0x0000 +#define SYSREG_EVBA 0x0004 +#define SYSREG_ACBA 0x0008 +#define SYSREG_CPUCR 0x000c +#define SYSREG_ECR 0x0010 +#define SYSREG_RSR_SUP 0x0014 +#define SYSREG_RSR_INT0 0x0018 +#define SYSREG_RSR_INT1 0x001c +#define SYSREG_RSR_INT2 0x0020 +#define SYSREG_RSR_INT3 0x0024 +#define SYSREG_RSR_EX 0x0028 +#define SYSREG_RSR_NMI 0x002c +#define SYSREG_RSR_DBG 0x0030 +#define SYSREG_RAR_SUP 0x0034 +#define SYSREG_RAR_INT0 0x0038 +#define SYSREG_RAR_INT1 0x003c +#define SYSREG_RAR_INT2 0x0040 +#define SYSREG_RAR_INT3 0x0044 +#define SYSREG_RAR_EX 0x0048 +#define SYSREG_RAR_NMI 0x004c +#define SYSREG_RAR_DBG 0x0050 +#define SYSREG_JECR 0x0054 +#define SYSREG_JOSP 0x0058 +#define SYSREG_JAVA_LV0 0x005c +#define SYSREG_JAVA_LV1 0x0060 +#define SYSREG_JAVA_LV2 0x0064 +#define SYSREG_JAVA_LV3 0x0068 +#define SYSREG_JAVA_LV4 0x006c +#define SYSREG_JAVA_LV5 0x0070 +#define SYSREG_JAVA_LV6 0x0074 +#define SYSREG_JAVA_LV7 0x0078 +#define SYSREG_JTBA 0x007c +#define SYSREG_JBCR 0x0080 +#define SYSREG_CONFIG0 0x0100 +#define SYSREG_CONFIG1 0x0104 +#define SYSREG_COUNT 0x0108 +#define SYSREG_COMPARE 0x010c +#define SYSREG_TLBEHI 0x0110 +#define SYSREG_TLBELO 0x0114 +#define SYSREG_PTBR 0x0118 +#define SYSREG_TLBEAR 0x011c +#define SYSREG_MMUCR 0x0120 +#define SYSREG_TLBARLO 0x0124 +#define SYSREG_TLBARHI 0x0128 +#define SYSREG_PCCNT 0x012c +#define SYSREG_PCNT0 0x0130 +#define SYSREG_PCNT1 0x0134 +#define SYSREG_PCCR 0x0138 +#define SYSREG_BEAR 0x013c + +/* Bitfields in SR */ +#define SYSREG_SR_C_OFFSET 0 +#define SYSREG_SR_C_SIZE 1 +#define SYSREG_Z_OFFSET 1 +#define SYSREG_Z_SIZE 1 +#define SYSREG_SR_N_OFFSET 2 +#define SYSREG_SR_N_SIZE 1 +#define SYSREG_SR_V_OFFSET 3 +#define SYSREG_SR_V_SIZE 1 +#define SYSREG_Q_OFFSET 4 +#define SYSREG_Q_SIZE 1 +#define SYSREG_GM_OFFSET 16 +#define SYSREG_GM_SIZE 1 +#define SYSREG_I0M_OFFSET 17 +#define SYSREG_I0M_SIZE 1 +#define SYSREG_I1M_OFFSET 18 +#define SYSREG_I1M_SIZE 1 +#define SYSREG_I2M_OFFSET 19 +#define SYSREG_I2M_SIZE 1 +#define SYSREG_I3M_OFFSET 20 +#define SYSREG_I3M_SIZE 1 +#define SYSREG_EM_OFFSET 21 +#define SYSREG_EM_SIZE 1 +#define SYSREG_M0_OFFSET 22 +#define SYSREG_M0_SIZE 1 +#define SYSREG_M1_OFFSET 23 +#define SYSREG_M1_SIZE 1 +#define SYSREG_M2_OFFSET 24 +#define SYSREG_M2_SIZE 1 +#define SYSREG_SR_D_OFFSET 26 +#define SYSREG_SR_D_SIZE 1 +#define SYSREG_DM_OFFSET 27 +#define SYSREG_DM_SIZE 1 +#define SYSREG_SR_J_OFFSET 28 +#define SYSREG_SR_J_SIZE 1 +#define SYSREG_R_OFFSET 29 +#define SYSREG_R_SIZE 1 +#define SYSREG_H_OFFSET 30 +#define SYSREG_H_SIZE 1 + +/* Bitfields in EVBA */ + +/* Bitfields in ACBA */ + +/* Bitfields in CPUCR */ +#define SYSREG_BI_OFFSET 0 +#define SYSREG_BI_SIZE 1 +#define SYSREG_BE_OFFSET 1 +#define SYSREG_BE_SIZE 1 +#define SYSREG_FE_OFFSET 2 +#define SYSREG_FE_SIZE 1 +#define SYSREG_RE_OFFSET 3 +#define SYSREG_RE_SIZE 1 +#define SYSREG_IBE_OFFSET 4 +#define SYSREG_IBE_SIZE 1 +#define SYSREG_IEE_OFFSET 5 +#define SYSREG_IEE_SIZE 1 + +/* Bitfields in ECR */ +#define SYSREG_ECR_OFFSET 0 +#define SYSREG_ECR_SIZE 32 + +/* Bitfields in RSR_SUP */ + +/* Bitfields in RSR_INT0 */ + +/* Bitfields in RSR_INT1 */ + +/* Bitfields in RSR_INT2 */ + +/* Bitfields in RSR_INT3 */ + +/* Bitfields in RSR_EX */ + +/* Bitfields in RSR_NMI */ + +/* Bitfields in RSR_DBG */ + +/* Bitfields in RAR_SUP */ + +/* Bitfields in RAR_INT0 */ + +/* Bitfields in RAR_INT1 */ + +/* Bitfields in RAR_INT2 */ + +/* Bitfields in RAR_INT3 */ + +/* Bitfields in RAR_EX */ + +/* Bitfields in RAR_NMI */ + +/* Bitfields in RAR_DBG */ + +/* Bitfields in JECR */ + +/* Bitfields in JOSP */ + +/* Bitfields in JAVA_LV0 */ + +/* Bitfields in JAVA_LV1 */ + +/* Bitfields in JAVA_LV2 */ + +/* Bitfields in JAVA_LV3 */ + +/* Bitfields in JAVA_LV4 */ + +/* Bitfields in JAVA_LV5 */ + +/* Bitfields in JAVA_LV6 */ + +/* Bitfields in JAVA_LV7 */ + +/* Bitfields in JTBA */ + +/* Bitfields in JBCR */ + +/* Bitfields in CONFIG0 */ +#define SYSREG_CONFIG0_D_OFFSET 1 +#define SYSREG_CONFIG0_D_SIZE 1 +#define SYSREG_CONFIG0_S_OFFSET 2 +#define SYSREG_CONFIG0_S_SIZE 1 +#define SYSREG_O_OFFSET 3 +#define SYSREG_O_SIZE 1 +#define SYSREG_P_OFFSET 4 +#define SYSREG_P_SIZE 1 +#define SYSREG_CONFIG0_J_OFFSET 5 +#define SYSREG_CONFIG0_J_SIZE 1 +#define SYSREG_F_OFFSET 6 +#define SYSREG_F_SIZE 1 +#define SYSREG_MMUT_OFFSET 7 +#define SYSREG_MMUT_SIZE 3 +#define SYSREG_AR_OFFSET 10 +#define SYSREG_AR_SIZE 3 +#define SYSREG_AT_OFFSET 13 +#define SYSREG_AT_SIZE 3 +#define SYSREG_PROCESSORREVISION_OFFSET 16 +#define SYSREG_PROCESSORREVISION_SIZE 8 +#define SYSREG_PROCESSORID_OFFSET 24 +#define SYSREG_PROCESSORID_SIZE 8 + +/* Bitfields in CONFIG1 */ +#define SYSREG_DASS_OFFSET 0 +#define SYSREG_DASS_SIZE 3 +#define SYSREG_DLSZ_OFFSET 3 +#define SYSREG_DLSZ_SIZE 3 +#define SYSREG_DSET_OFFSET 6 +#define SYSREG_DSET_SIZE 4 +#define SYSREG_IASS_OFFSET 10 +#define SYSREG_IASS_SIZE 2 +#define SYSREG_ILSZ_OFFSET 13 +#define SYSREG_ILSZ_SIZE 3 +#define SYSREG_ISET_OFFSET 16 +#define SYSREG_ISET_SIZE 4 +#define SYSREG_DMMUSZ_OFFSET 20 +#define SYSREG_DMMUSZ_SIZE 6 +#define SYSREG_IMMUSZ_OFFSET 26 +#define SYSREG_IMMUSZ_SIZE 6 + +/* Bitfields in COUNT */ + +/* Bitfields in COMPARE */ + +/* Bitfields in TLBEHI */ +#define SYSREG_ASID_OFFSET 0 +#define SYSREG_ASID_SIZE 8 +#define SYSREG_TLBEHI_I_OFFSET 8 +#define SYSREG_TLBEHI_I_SIZE 1 +#define SYSREG_TLBEHI_V_OFFSET 9 +#define SYSREG_TLBEHI_V_SIZE 1 +#define SYSREG_VPN_OFFSET 10 +#define SYSREG_VPN_SIZE 22 + +/* Bitfields in TLBELO */ +#define SYSREG_W_OFFSET 0 +#define SYSREG_W_SIZE 1 +#define SYSREG_TLBELO_D_OFFSET 1 +#define SYSREG_TLBELO_D_SIZE 1 +#define SYSREG_SZ_OFFSET 2 +#define SYSREG_SZ_SIZE 2 +#define SYSREG_AP_OFFSET 4 +#define SYSREG_AP_SIZE 3 +#define SYSREG_B_OFFSET 7 +#define SYSREG_B_SIZE 1 +#define SYSREG_G_OFFSET 8 +#define SYSREG_G_SIZE 1 +#define SYSREG_TLBELO_C_OFFSET 9 +#define SYSREG_TLBELO_C_SIZE 1 +#define SYSREG_PFN_OFFSET 10 +#define SYSREG_PFN_SIZE 22 + +/* Bitfields in PTBR */ + +/* Bitfields in TLBEAR */ + +/* Bitfields in MMUCR */ +#define SYSREG_E_OFFSET 0 +#define SYSREG_E_SIZE 1 +#define SYSREG_M_OFFSET 1 +#define SYSREG_M_SIZE 1 +#define SYSREG_MMUCR_I_OFFSET 2 +#define SYSREG_MMUCR_I_SIZE 1 +#define SYSREG_MMUCR_N_OFFSET 3 +#define SYSREG_MMUCR_N_SIZE 1 +#define SYSREG_MMUCR_S_OFFSET 4 +#define SYSREG_MMUCR_S_SIZE 1 +#define SYSREG_DLA_OFFSET 8 +#define SYSREG_DLA_SIZE 6 +#define SYSREG_DRP_OFFSET 14 +#define SYSREG_DRP_SIZE 6 +#define SYSREG_ILA_OFFSET 20 +#define SYSREG_ILA_SIZE 6 +#define SYSREG_IRP_OFFSET 26 +#define SYSREG_IRP_SIZE 6 + +/* Bitfields in TLBARLO */ + +/* Bitfields in TLBARHI */ + +/* Bitfields in PCCNT */ + +/* Bitfields in PCNT0 */ + +/* Bitfields in PCNT1 */ + +/* Bitfields in PCCR */ + +/* Bitfields in BEAR */ + +/* Constants for ECR */ +#define ECR_UNRECOVERABLE 0 +#define ECR_TLB_MULTIPLE 1 +#define ECR_BUS_ERROR_WRITE 2 +#define ECR_BUS_ERROR_READ 3 +#define ECR_NMI 4 +#define ECR_ADDR_ALIGN_X 5 +#define ECR_PROTECTION_X 6 +#define ECR_DEBUG 7 +#define ECR_ILLEGAL_OPCODE 8 +#define ECR_UNIMPL_INSTRUCTION 9 +#define ECR_PRIVILEGE_VIOLATION 10 +#define ECR_FPE 11 +#define ECR_COPROC_ABSENT 12 +#define ECR_ADDR_ALIGN_R 13 +#define ECR_ADDR_ALIGN_W 14 +#define ECR_PROTECTION_R 15 +#define ECR_PROTECTION_W 16 +#define ECR_DTLB_MODIFIED 17 +#define ECR_TLB_MISS_X 20 +#define ECR_TLB_MISS_R 24 +#define ECR_TLB_MISS_W 28 + +/* Bit manipulation macros */ +#define SYSREG_BIT(name) (1 << SYSREG_##name##_OFFSET) +#define SYSREG_BF(name,value) (((value) & ((1 << SYSREG_##name##_SIZE) - 1)) << SYSREG_##name##_OFFSET) +#define SYSREG_BFEXT(name,value) (((value) >> SYSREG_##name##_OFFSET) & ((1 << SYSREG_##name##_SIZE) - 1)) +#define SYSREG_BFINS(name,value,old) (((old) & ~(((1 << SYSREG_##name##_SIZE) - 1) << SYSREG_##name##_OFFSET)) | SYSREG_BF(name,value)) + +#ifdef __CHECKER__ +extern unsigned long __builtin_mfsr(unsigned long reg); +extern void __builtin_mtsr(unsigned long reg, unsigned long value); +#endif + +/* Register access macros */ +#define sysreg_read(reg) __builtin_mfsr(SYSREG_##reg) +#define sysreg_write(reg, value) __builtin_mtsr(SYSREG_##reg, value) + +#endif /* __ASM_AVR32_SYSREG_H__ */ diff --git a/include/asm-avr32/system.h b/include/asm-avr32/system.h new file mode 100644 index 00000000000..ac596058697 --- /dev/null +++ b/include/asm-avr32/system.h @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_SYSTEM_H +#define __ASM_AVR32_SYSTEM_H + +#include <linux/compiler.h> +#include <linux/types.h> + +#include <asm/ptrace.h> +#include <asm/sysreg.h> + +#define xchg(ptr,x) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) + +#define nop() asm volatile("nop") + +#define mb() asm volatile("" : : : "memory") +#define rmb() mb() +#define wmb() asm volatile("sync 0" : : : "memory") +#define read_barrier_depends() do { } while(0) +#define set_mb(var, value) do { var = value; mb(); } while(0) + +/* + * Help PathFinder and other Nexus-compliant debuggers keep track of + * the current PID by emitting an Ownership Trace Message each time we + * switch task. + */ +#ifdef CONFIG_OWNERSHIP_TRACE +#include <asm/ocd.h> +#define finish_arch_switch(prev) \ + do { \ + __mtdr(DBGREG_PID, prev->pid); \ + __mtdr(DBGREG_PID, current->pid); \ + } while(0) +#endif + +/* + * switch_to(prev, next, last) should switch from task `prev' to task + * `next'. `prev' will never be the same as `next'. + * + * We just delegate everything to the __switch_to assembly function, + * which is implemented in arch/avr32/kernel/switch_to.S + * + * mb() tells GCC not to cache `current' across this call. + */ +struct cpu_context; +struct task_struct; +extern struct task_struct *__switch_to(struct task_struct *, + struct cpu_context *, + struct cpu_context *); +#define switch_to(prev, next, last) \ + do { \ + last = __switch_to(prev, &prev->thread.cpu_context + 1, \ + &next->thread.cpu_context); \ + } while (0) + +#ifdef CONFIG_SMP +# error "The AVR32 port does not support SMP" +#else +# define smp_mb() barrier() +# define smp_rmb() barrier() +# define smp_wmb() barrier() +# define smp_read_barrier_depends() do { } while(0) +#endif + +#include <linux/irqflags.h> + +extern void __xchg_called_with_bad_pointer(void); + +#ifdef __CHECKER__ +extern unsigned long __builtin_xchg(void *ptr, unsigned long x); +#endif + +#define xchg_u32(val, m) __builtin_xchg((void *)m, val) + +static inline unsigned long __xchg(unsigned long x, + volatile void *ptr, + int size) +{ + switch(size) { + case 4: + return xchg_u32(x, ptr); + default: + __xchg_called_with_bad_pointer(); + return x; + } +} + +static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, + unsigned long new) +{ + __u32 ret; + + asm volatile( + "1: ssrf 5\n" + " ld.w %[ret], %[m]\n" + " cp.w %[ret], %[old]\n" + " brne 2f\n" + " stcond %[m], %[new]\n" + " brne 1b\n" + "2:\n" + : [ret] "=&r"(ret), [m] "=m"(*m) + : "m"(m), [old] "ir"(old), [new] "r"(new) + : "memory", "cc"); + return ret; +} + +extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels( + volatile int * m, unsigned long old, unsigned long new); +#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(ptr, old, new); + case 8: + return __cmpxchg_u64(ptr, old, new); + } + + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr, old, new) \ + ((typeof(*(ptr)))__cmpxchg((ptr), (unsigned long)(old), \ + (unsigned long)(new), \ + sizeof(*(ptr)))) + +struct pt_regs; +extern void __die(const char *, struct pt_regs *, unsigned long, + const char *, const char *, unsigned long); +extern void __die_if_kernel(const char *, struct pt_regs *, unsigned long, + const char *, const char *, unsigned long); + +#define die(msg, regs, err) \ + __die(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__) +#define die_if_kernel(msg, regs, err) \ + __die_if_kernel(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__) + +#define arch_align_stack(x) (x) + +#endif /* __ASM_AVR32_SYSTEM_H */ diff --git a/include/asm-avr32/termbits.h b/include/asm-avr32/termbits.h new file mode 100644 index 00000000000..9dc6eacafa3 --- /dev/null +++ b/include/asm-avr32/termbits.h @@ -0,0 +1,173 @@ +#ifndef __ASM_AVR32_TERMBITS_H +#define __ASM_AVR32_TERMBITS_H + +#include <linux/posix_types.h> + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VTIME 5 +#define VMIN 6 +#define VSWTC 7 +#define VSTART 8 +#define VSTOP 9 +#define VSUSP 10 +#define VEOL 11 +#define VREPRINT 12 +#define VDISCARD 13 +#define VWERASE 14 +#define VLNEXT 15 +#define VEOL2 16 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IUCLC 0001000 +#define IXON 0002000 +#define IXANY 0004000 +#define IXOFF 0010000 +#define IMAXBEL 0020000 +#define IUTF8 0040000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define OLCUC 0000002 +#define ONLCR 0000004 +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 +#define OFILL 0000100 +#define OFDEL 0000200 +#define NLDLY 0000400 +#define NL0 0000000 +#define NL1 0000400 +#define CRDLY 0003000 +#define CR0 0000000 +#define CR1 0001000 +#define CR2 0002000 +#define CR3 0003000 +#define TABDLY 0014000 +#define TAB0 0000000 +#define TAB1 0004000 +#define TAB2 0010000 +#define TAB3 0014000 +#define XTABS 0014000 +#define BSDLY 0020000 +#define BS0 0000000 +#define BS1 0020000 +#define VTDLY 0040000 +#define VT0 0000000 +#define VT1 0040000 +#define FFDLY 0100000 +#define FF0 0000000 +#define FF1 0100000 + +/* c_cflag bit meaning */ +#define CBAUD 0010017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CSIZE 0000060 +#define CS5 0000000 +#define CS6 0000020 +#define CS7 0000040 +#define CS8 0000060 +#define CSTOPB 0000100 +#define CREAD 0000200 +#define PARENB 0000400 +#define PARODD 0001000 +#define HUPCL 0002000 +#define CLOCAL 0004000 +#define CBAUDEX 0010000 +#define B57600 0010001 +#define B115200 0010002 +#define B230400 0010003 +#define B460800 0010004 +#define B500000 0010005 +#define B576000 0010006 +#define B921600 0010007 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 +#define CIBAUD 002003600000 /* input baud rate (not used) */ +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +/* c_lflag bits */ +#define ISIG 0000001 +#define ICANON 0000002 +#define XCASE 0000004 +#define ECHO 0000010 +#define ECHOE 0000020 +#define ECHOK 0000040 +#define ECHONL 0000100 +#define NOFLSH 0000200 +#define TOSTOP 0000400 +#define ECHOCTL 0001000 +#define ECHOPRT 0002000 +#define ECHOKE 0004000 +#define FLUSHO 0010000 +#define PENDIN 0040000 +#define IEXTEN 0100000 + +/* tcflow() and TCXONC use these */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* tcflush() and TCFLSH use these */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* tcsetattr uses these */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif /* __ASM_AVR32_TERMBITS_H */ diff --git a/include/asm-avr32/termios.h b/include/asm-avr32/termios.h new file mode 100644 index 00000000000..615bc0639e5 --- /dev/null +++ b/include/asm-avr32/termios.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TERMIOS_H +#define __ASM_AVR32_TERMIOS_H + +#include <asm/termbits.h> +#include <asm/ioctls.h> + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + +/* line disciplines */ +#define N_TTY 0 +#define N_SLIP 1 +#define N_MOUSE 2 +#define N_PPP 3 +#define N_STRIP 4 +#define N_AX25 5 +#define N_X25 6 /* X.25 async */ +#define N_6PACK 7 +#define N_MASC 8 /* Reserved for Mobitex module <kaz@cafe.net> */ +#define N_R3964 9 /* Reserved for Simatic R3964 module */ +#define N_PROFIBUS_FDL 10 /* Reserved for Profibus <Dave@mvhi.com> */ +#define N_IRDA 11 /* Linux IR - http://irda.sourceforge.net/ */ +#define N_SMSBLOCK 12 /* SMS block mode - for talking to GSM data cards about SMS messages */ +#define N_HDLC 13 /* synchronous HDLC */ +#define N_SYNC_PPP 14 /* synchronous PPP */ +#define N_HCI 15 /* Bluetooth HCI UART */ + +#ifdef __KERNEL__ +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +#include <asm-generic/termios.h> + +#endif /* __KERNEL__ */ + +#endif /* __ASM_AVR32_TERMIOS_H */ diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h new file mode 100644 index 00000000000..d1f5b35ebd5 --- /dev/null +++ b/include/asm-avr32/thread_info.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_THREAD_INFO_H +#define __ASM_AVR32_THREAD_INFO_H + +#include <asm/page.h> + +#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) + +#ifndef __ASSEMBLY__ +#include <asm/types.h> + +struct task_struct; +struct exec_domain; + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + __u32 cpu; + __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + struct restart_block restart_block; + __u8 supervisor_stack[0]; +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .restart_block = { \ + .fn = do_no_restart_syscall \ + } \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* + * Get the thread information struct from C. + * We do the usual trick and use the lower end of the stack for this + */ +static inline struct thread_info *current_thread_info(void) +{ + unsigned long addr = ~(THREAD_SIZE - 1); + + asm("and %0, sp" : "=r"(addr) : "0"(addr)); + return (struct thread_info *)addr; +} + +/* thread information allocation */ +#define alloc_thread_info(ti) \ + ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER)) +#define free_thread_info(ti) free_pages((unsigned long)(ti), 1) +#define get_thread_info(ti) get_task_struct((ti)->task) +#define put_thread_info(ti) put_task_struct((ti)->task) + +#endif /* !__ASSEMBLY__ */ + +#define PREEMPT_ACTIVE 0x40000000 + +/* + * Thread information flags + * - these are process state flags that various assembly files may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ +#define TIF_BREAKPOINT 5 /* true if we should break after return */ +#define TIF_SINGLE_STEP 6 /* single step after next break */ +#define TIF_MEMDIE 7 +#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal */ +#define TIF_USERSPACE 31 /* true if FS sets userspace */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_BREAKPOINT (1 << TIF_BREAKPOINT) +#define _TIF_SINGLE_STEP (1 << TIF_SINGLE_STEP) +#define _TIF_MEMDIE (1 << TIF_MEMDIE) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) + +/* XXX: These two masks must never span more than 16 bits! */ +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK 0x0000013e +/* work to do on any return to userspace */ +#define _TIF_ALLWORK_MASK 0x0000013f +/* work to do on return from debug mode */ +#define _TIF_DBGWORK_MASK 0x0000017e + +#endif /* __ASM_AVR32_THREAD_INFO_H */ diff --git a/include/asm-avr32/timex.h b/include/asm-avr32/timex.h new file mode 100644 index 00000000000..5e44ecb3ce0 --- /dev/null +++ b/include/asm-avr32/timex.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TIMEX_H +#define __ASM_AVR32_TIMEX_H + +/* + * This is the frequency of the timer used for Linux's timer interrupt. + * The value should be defined as accurate as possible or under certain + * circumstances Linux timekeeping might become inaccurate or fail. + * + * For many system the exact clockrate of the timer isn't known but due to + * the way this value is used we can get away with a wrong value as long + * as this value is: + * + * - a multiple of HZ + * - a divisor of the actual rate + * + * 500000 is a good such cheat value. + * + * The obscure number 1193182 is the same as used by the original i8254 + * time in legacy PC hardware; the chip is never found in AVR32 systems. + */ +#define CLOCK_TICK_RATE 500000 /* Underlying HZ */ + +typedef unsigned long cycles_t; + +static inline cycles_t get_cycles (void) +{ + return 0; +} + +extern int read_current_timer(unsigned long *timer_value); +#define ARCH_HAS_READ_CURRENT_TIMER 1 + +#endif /* __ASM_AVR32_TIMEX_H */ diff --git a/include/asm-avr32/tlb.h b/include/asm-avr32/tlb.h new file mode 100644 index 00000000000..5c55f9ce7c7 --- /dev/null +++ b/include/asm-avr32/tlb.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TLB_H +#define __ASM_AVR32_TLB_H + +#define tlb_start_vma(tlb, vma) \ + flush_cache_range(vma, vma->vm_start, vma->vm_end) + +#define tlb_end_vma(tlb, vma) \ + flush_tlb_range(vma, vma->vm_start, vma->vm_end) + +#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while(0) + +/* + * Flush whole TLB for MM + */ +#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) + +#include <asm-generic/tlb.h> + +/* + * For debugging purposes + */ +extern void show_dtlb_entry(unsigned int index); +extern void dump_dtlb(void); + +#endif /* __ASM_AVR32_TLB_H */ diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h new file mode 100644 index 00000000000..730e268f81f --- /dev/null +++ b/include/asm-avr32/tlbflush.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TLBFLUSH_H +#define __ASM_AVR32_TLBFLUSH_H + +#include <asm/mmu.h> + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes' TLB entries + * - flush_tlb_mm(mm) flushes the specified mm context TLBs + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + */ +extern void flush_tlb(void); +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); +extern void __flush_tlb_page(unsigned long asid, unsigned long page); + +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + /* Nothing to do */ +} + +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); + +#endif /* __ASM_AVR32_TLBFLUSH_H */ diff --git a/include/asm-avr32/topology.h b/include/asm-avr32/topology.h new file mode 100644 index 00000000000..5b766cbb480 --- /dev/null +++ b/include/asm-avr32/topology.h @@ -0,0 +1,6 @@ +#ifndef __ASM_AVR32_TOPOLOGY_H +#define __ASM_AVR32_TOPOLOGY_H + +#include <asm-generic/topology.h> + +#endif /* __ASM_AVR32_TOPOLOGY_H */ diff --git a/include/asm-avr32/traps.h b/include/asm-avr32/traps.h new file mode 100644 index 00000000000..6a8fb944f41 --- /dev/null +++ b/include/asm-avr32/traps.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TRAPS_H +#define __ASM_AVR32_TRAPS_H + +#include <linux/list.h> + +struct undef_hook { + struct list_head node; + u32 insn_mask; + u32 insn_val; + int (*fn)(struct pt_regs *regs, u32 insn); +}; + +void register_undef_hook(struct undef_hook *hook); +void unregister_undef_hook(struct undef_hook *hook); + +#endif /* __ASM_AVR32_TRAPS_H */ diff --git a/include/asm-avr32/types.h b/include/asm-avr32/types.h new file mode 100644 index 00000000000..3f47db9675a --- /dev/null +++ b/include/asm-avr32/types.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_TYPES_H +#define __ASM_AVR32_TYPES_H + +#ifndef __ASSEMBLY__ + +typedef unsigned short umode_t; + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif + +#endif /* __ASSEMBLY__ */ + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ + +#define BITS_PER_LONG 32 + +#ifndef __ASSEMBLY__ + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +typedef signed long long s64; +typedef unsigned long long u64; + +/* Dma addresses are 32-bits wide. */ + +typedef u32 dma_addr_t; + +#ifdef CONFIG_LBD +typedef u64 sector_t; +#define HAVE_SECTOR_T +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + + +#endif /* __ASM_AVR32_TYPES_H */ diff --git a/include/asm-avr32/uaccess.h b/include/asm-avr32/uaccess.h new file mode 100644 index 00000000000..821deb5a9d2 --- /dev/null +++ b/include/asm-avr32/uaccess.h @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_UACCESS_H +#define __ASM_AVR32_UACCESS_H + +#include <linux/errno.h> +#include <linux/sched.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +typedef struct { + unsigned int is_user_space; +} mm_segment_t; + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons (Data Segment Register?), these macros are misnamed. + */ +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) +#define segment_eq(a,b) ((a).is_user_space == (b).is_user_space) + +#define USER_ADDR_LIMIT 0x80000000 + +#define KERNEL_DS MAKE_MM_SEG(0) +#define USER_DS MAKE_MM_SEG(1) + +#define get_ds() (KERNEL_DS) + +static inline mm_segment_t get_fs(void) +{ + return MAKE_MM_SEG(test_thread_flag(TIF_USERSPACE)); +} + +static inline void set_fs(mm_segment_t s) +{ + if (s.is_user_space) + set_thread_flag(TIF_USERSPACE); + else + clear_thread_flag(TIF_USERSPACE); +} + +/* + * Test whether a block of memory is a valid user space address. + * Returns 0 if the range is valid, nonzero otherwise. + * + * We do the following checks: + * 1. Is the access from kernel space? + * 2. Does (addr + size) set the carry bit? + * 3. Is (addr + size) a negative number (i.e. >= 0x80000000)? + * + * If yes on the first check, access is granted. + * If no on any of the others, access is denied. + */ +#define __range_ok(addr, size) \ + (test_thread_flag(TIF_USERSPACE) \ + && (((unsigned long)(addr) >= 0x80000000) \ + || ((unsigned long)(size) > 0x80000000) \ + || (((unsigned long)(addr) + (unsigned long)(size)) > 0x80000000))) + +#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0)) + +static inline int +verify_area(int type, const void __user *addr, unsigned long size) +{ + return access_ok(type, addr, size) ? 0 : -EFAULT; +} + +/* Generic arbitrary sized copy. Return the number of bytes NOT copied */ +extern __kernel_size_t __copy_user(void *to, const void *from, + __kernel_size_t n); + +extern __kernel_size_t copy_to_user(void __user *to, const void *from, + __kernel_size_t n); +extern __kernel_size_t copy_from_user(void *to, const void __user *from, + __kernel_size_t n); + +static inline __kernel_size_t __copy_to_user(void __user *to, const void *from, + __kernel_size_t n) +{ + return __copy_user((void __force *)to, from, n); +} +static inline __kernel_size_t __copy_from_user(void *to, + const void __user *from, + __kernel_size_t n) +{ + return __copy_user(to, (const void __force *)from, n); +} + +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user + +/* + * put_user: - Write a simple value into user space. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Returns zero on success, or -EFAULT on error. + */ +#define put_user(x,ptr) \ + __put_user_check((x),(ptr),sizeof(*(ptr))) + +/* + * get_user: - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define get_user(x,ptr) \ + __get_user_check((x),(ptr),sizeof(*(ptr))) + +/* + * __put_user: - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + */ +#define __put_user(x,ptr) \ + __put_user_nocheck((x),(ptr),sizeof(*(ptr))) + +/* + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define __get_user(x,ptr) \ + __get_user_nocheck((x),(ptr),sizeof(*(ptr))) + +extern int __get_user_bad(void); +extern int __put_user_bad(void); + +#define __get_user_nocheck(x, ptr, size) \ +({ \ + typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0; \ + int __gu_err = 0; \ + \ + switch (size) { \ + case 1: __get_user_asm("ub", __gu_val, ptr, __gu_err); break; \ + case 2: __get_user_asm("uh", __gu_val, ptr, __gu_err); break; \ + case 4: __get_user_asm("w", __gu_val, ptr, __gu_err); break; \ + case 8: __get_user_asm("d", __gu_val, ptr, __gu_err); break; \ + default: __gu_err = __get_user_bad(); break; \ + } \ + \ + x = __gu_val; \ + __gu_err; \ +}) + +#define __get_user_check(x, ptr, size) \ +({ \ + typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0; \ + const typeof(*(ptr)) __user * __gu_addr = (ptr); \ + int __gu_err = 0; \ + \ + if (access_ok(VERIFY_READ, __gu_addr, size)) { \ + switch (size) { \ + case 1: \ + __get_user_asm("ub", __gu_val, __gu_addr, \ + __gu_err); \ + break; \ + case 2: \ + __get_user_asm("uh", __gu_val, __gu_addr, \ + __gu_err); \ + break; \ + case 4: \ + __get_user_asm("w", __gu_val, __gu_addr, \ + __gu_err); \ + break; \ + case 8: \ + __get_user_asm("d", __gu_val, __gu_addr, \ + __gu_err); \ + break; \ + default: \ + __gu_err = __get_user_bad(); \ + break; \ + } \ + } else { \ + __gu_err = -EFAULT; \ + } \ + x = __gu_val; \ + __gu_err; \ +}) + +#define __get_user_asm(suffix, __gu_val, ptr, __gu_err) \ + asm volatile( \ + "1: ld." suffix " %1, %3 \n" \ + "2: \n" \ + " .section .fixup, \"ax\" \n" \ + "3: mov %0, %4 \n" \ + " rjmp 2b \n" \ + " .previous \n" \ + " .section __ex_table, \"a\" \n" \ + " .long 1b, 3b \n" \ + " .previous \n" \ + : "=r"(__gu_err), "=r"(__gu_val) \ + : "0"(__gu_err), "m"(*(ptr)), "i"(-EFAULT)) + +#define __put_user_nocheck(x, ptr, size) \ +({ \ + typeof(*(ptr)) __pu_val; \ + int __pu_err = 0; \ + \ + __pu_val = (x); \ + switch (size) { \ + case 1: __put_user_asm("b", ptr, __pu_val, __pu_err); break; \ + case 2: __put_user_asm("h", ptr, __pu_val, __pu_err); break; \ + case 4: __put_user_asm("w", ptr, __pu_val, __pu_err); break; \ + case 8: __put_user_asm("d", ptr, __pu_val, __pu_err); break; \ + default: __pu_err = __put_user_bad(); break; \ + } \ + __pu_err; \ +}) + +#define __put_user_check(x, ptr, size) \ +({ \ + typeof(*(ptr)) __pu_val; \ + typeof(*(ptr)) __user *__pu_addr = (ptr); \ + int __pu_err = 0; \ + \ + __pu_val = (x); \ + if (access_ok(VERIFY_WRITE, __pu_addr, size)) { \ + switch (size) { \ + case 1: \ + __put_user_asm("b", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 2: \ + __put_user_asm("h", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 4: \ + __put_user_asm("w", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 8: \ + __put_user_asm("d", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + default: \ + __pu_err = __put_user_bad(); \ + break; \ + } \ + } else { \ + __pu_err = -EFAULT; \ + } \ + __pu_err; \ +}) + +#define __put_user_asm(suffix, ptr, __pu_val, __gu_err) \ + asm volatile( \ + "1: st." suffix " %1, %3 \n" \ + "2: \n" \ + " .section .fixup, \"ax\" \n" \ + "3: mov %0, %4 \n" \ + " rjmp 2b \n" \ + " .previous \n" \ + " .section __ex_table, \"a\" \n" \ + " .long 1b, 3b \n" \ + " .previous \n" \ + : "=r"(__gu_err), "=m"(*(ptr)) \ + : "0"(__gu_err), "r"(__pu_val), "i"(-EFAULT)) + +extern __kernel_size_t clear_user(void __user *addr, __kernel_size_t size); +extern __kernel_size_t __clear_user(void __user *addr, __kernel_size_t size); + +extern long strncpy_from_user(char *dst, const char __user *src, long count); +extern long __strncpy_from_user(char *dst, const char __user *src, long count); + +extern long strnlen_user(const char __user *__s, long __n); +extern long __strnlen_user(const char __user *__s, long __n); + +#define strlen_user(s) strnlen_user(s, ~0UL >> 1) + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +#endif /* __ASM_AVR32_UACCESS_H */ diff --git a/include/asm-avr32/ucontext.h b/include/asm-avr32/ucontext.h new file mode 100644 index 00000000000..ac7259c2a79 --- /dev/null +++ b/include/asm-avr32/ucontext.h @@ -0,0 +1,12 @@ +#ifndef __ASM_AVR32_UCONTEXT_H +#define __ASM_AVR32_UCONTEXT_H + +struct ucontext { + unsigned long uc_flags; + struct ucontext * uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; +}; + +#endif /* __ASM_AVR32_UCONTEXT_H */ diff --git a/include/asm-avr32/unaligned.h b/include/asm-avr32/unaligned.h new file mode 100644 index 00000000000..3042723fcbf --- /dev/null +++ b/include/asm-avr32/unaligned.h @@ -0,0 +1,25 @@ +#ifndef __ASM_AVR32_UNALIGNED_H +#define __ASM_AVR32_UNALIGNED_H + +/* + * AVR32 can handle some unaligned accesses, depending on the + * implementation. The AVR32 AP implementation can handle unaligned + * words, but halfwords must be halfword-aligned, and doublewords must + * be word-aligned. + * + * TODO: Make all this CPU-specific and optimize. + */ + +#include <linux/string.h> + +/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ + +#define get_unaligned(ptr) \ + ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; }) + +#define put_unaligned(val, ptr) \ + ({ __typeof__(*(ptr)) __tmp = (val); \ + memmove((ptr), &__tmp, sizeof(*(ptr))); \ + (void)0; }) + +#endif /* __ASM_AVR32_UNALIGNED_H */ diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h new file mode 100644 index 00000000000..1f528f92690 --- /dev/null +++ b/include/asm-avr32/unistd.h @@ -0,0 +1,387 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_AVR32_UNISTD_H +#define __ASM_AVR32_UNISTD_H + +/* + * This file contains the system call numbers. + */ + +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_umask 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_chown 16 +#define __NR_lchown 17 +#define __NR_lseek 18 +#define __NR__llseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount2 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 28 +#define __NR_utime 29 +#define __NR_stat 30 +#define __NR_fstat 31 +#define __NR_lstat 32 +#define __NR_access 33 +#define __NR_chroot 34 +#define __NR_sync 35 +#define __NR_fsync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_clone 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_getcwd 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_setfsuid 52 +#define __NR_setfsgid 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 56 +#define __NR_mremap 57 +#define __NR_setresuid 58 +#define __NR_getresuid 59 +#define __NR_setreuid 60 +#define __NR_setregid 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_rt_sigaction 67 +#define __NR_rt_sigreturn 68 +#define __NR_rt_sigprocmask 69 +#define __NR_rt_sigpending 70 +#define __NR_rt_sigtimedwait 71 +#define __NR_rt_sigqueueinfo 72 +#define __NR_rt_sigsuspend 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 /* SuS compliant getrlimit */ +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_fchdir 84 +#define __NR_readlink 85 +#define __NR_pread 86 +#define __NR_pwrite 87 +#define __NR_swapon 88 +#define __NR_reboot 89 +#define __NR_mmap2 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_wait4 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_vhangup 101 +#define __NR_sigaltstack 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_swapoff 106 +#define __NR_sysinfo 107 +#define __NR_ipc 108 +#define __NR_sendfile 109 +#define __NR_setdomainname 110 +#define __NR_uname 111 +#define __NR_adjtimex 112 +#define __NR_mprotect 113 +#define __NR_vfork 114 +#define __NR_init_module 115 +#define __NR_delete_module 116 +#define __NR_quotactl 117 +#define __NR_getpgid 118 +#define __NR_bdflush 119 +#define __NR_sysfs 120 +#define __NR_personality 121 +#define __NR_afs_syscall 122 /* Syscall for Andrew File System */ +#define __NR_getdents 123 +#define __NR_flock 124 +#define __NR_msync 125 +#define __NR_readv 126 +#define __NR_writev 127 +#define __NR_getsid 128 +#define __NR_fdatasync 129 +#define __NR__sysctl 130 +#define __NR_mlock 131 +#define __NR_munlock 132 +#define __NR_mlockall 133 +#define __NR_munlockall 134 +#define __NR_sched_setparam 135 +#define __NR_sched_getparam 136 +#define __NR_sched_setscheduler 137 +#define __NR_sched_getscheduler 138 +#define __NR_sched_yield 139 +#define __NR_sched_get_priority_max 140 +#define __NR_sched_get_priority_min 141 +#define __NR_sched_rr_get_interval 142 +#define __NR_nanosleep 143 +#define __NR_poll 144 +#define __NR_nfsservctl 145 +#define __NR_setresgid 146 +#define __NR_getresgid 147 +#define __NR_prctl 148 +#define __NR_socket 149 +#define __NR_bind 150 +#define __NR_connect 151 +#define __NR_listen 152 +#define __NR_accept 153 +#define __NR_getsockname 154 +#define __NR_getpeername 155 +#define __NR_socketpair 156 +#define __NR_send 157 +#define __NR_recv 158 +#define __NR_sendto 159 +#define __NR_recvfrom 160 +#define __NR_shutdown 161 +#define __NR_setsockopt 162 +#define __NR_getsockopt 163 +#define __NR_sendmsg 164 +#define __NR_recvmsg 165 +#define __NR_truncate64 166 +#define __NR_ftruncate64 167 +#define __NR_stat64 168 +#define __NR_lstat64 169 +#define __NR_fstat64 170 +#define __NR_pivot_root 171 +#define __NR_mincore 172 +#define __NR_madvise 173 +#define __NR_getdents64 174 +#define __NR_fcntl64 175 +#define __NR_gettid 176 +#define __NR_readahead 177 +#define __NR_setxattr 178 +#define __NR_lsetxattr 179 +#define __NR_fsetxattr 180 +#define __NR_getxattr 181 +#define __NR_lgetxattr 182 +#define __NR_fgetxattr 183 +#define __NR_listxattr 184 +#define __NR_llistxattr 185 +#define __NR_flistxattr 186 +#define __NR_removexattr 187 +#define __NR_lremovexattr 188 +#define __NR_fremovexattr 189 +#define __NR_tkill 190 +#define __NR_sendfile64 191 +#define __NR_futex 192 +#define __NR_sched_setaffinity 193 +#define __NR_sched_getaffinity 194 +#define __NR_capget 195 +#define __NR_capset 196 +#define __NR_io_setup 197 +#define __NR_io_destroy 198 +#define __NR_io_getevents 199 +#define __NR_io_submit 200 +#define __NR_io_cancel 201 +#define __NR_fadvise64 202 +#define __NR_exit_group 203 +#define __NR_lookup_dcookie 204 +#define __NR_epoll_create 205 +#define __NR_epoll_ctl 206 +#define __NR_epoll_wait 207 +#define __NR_remap_file_pages 208 +#define __NR_set_tid_address 209 + +#define __NR_timer_create 210 +#define __NR_timer_settime 211 +#define __NR_timer_gettime 212 +#define __NR_timer_getoverrun 213 +#define __NR_timer_delete 214 +#define __NR_clock_settime 215 +#define __NR_clock_gettime 216 +#define __NR_clock_getres 217 +#define __NR_clock_nanosleep 218 +#define __NR_statfs64 219 +#define __NR_fstatfs64 220 +#define __NR_tgkill 221 + /* 222 reserved for tux */ +#define __NR_utimes 223 +#define __NR_fadvise64_64 224 + +#define __NR_cacheflush 225 + +#define __NR_vserver 226 +#define __NR_mq_open 227 +#define __NR_mq_unlink 228 +#define __NR_mq_timedsend 229 +#define __NR_mq_timedreceive 230 +#define __NR_mq_notify 231 +#define __NR_mq_getsetattr 232 +#define __NR_kexec_load 233 +#define __NR_waitid 234 +#define __NR_add_key 235 +#define __NR_request_key 236 +#define __NR_keyctl 237 +#define __NR_ioprio_set 238 +#define __NR_ioprio_get 239 +#define __NR_inotify_init 240 +#define __NR_inotify_add_watch 241 +#define __NR_inotify_rm_watch 242 +#define __NR_openat 243 +#define __NR_mkdirat 244 +#define __NR_mknodat 245 +#define __NR_fchownat 246 +#define __NR_futimesat 247 +#define __NR_fstatat64 248 +#define __NR_unlinkat 249 +#define __NR_renameat 250 +#define __NR_linkat 251 +#define __NR_symlinkat 252 +#define __NR_readlinkat 253 +#define __NR_fchmodat 254 +#define __NR_faccessat 255 +#define __NR_pselect6 256 +#define __NR_ppoll 257 +#define __NR_unshare 258 +#define __NR_set_robust_list 259 +#define __NR_get_robust_list 260 +#define __NR_splice 261 +#define __NR_sync_file_range 262 +#define __NR_tee 263 +#define __NR_vmsplice 264 + +#define NR_syscalls 265 + + +/* + * AVR32 calling convention for system calls: + * - System call number in r8 + * - Parameters in r12 and downwards to r9 as well as r6 and r5. + * - Return value in r12 + */ + +/* + * user-visible error numbers are in the range -1 - -124: see + * <asm-generic/errno.h> + */ + +#define __syscall_return(type, res) do { \ + if ((unsigned long)(res) >= (unsigned long)(-125)) { \ + errno = -(res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + +#ifdef __KERNEL__ +#define __ARCH_WANT_IPC_PARSE_VERSION +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_SYS_ALARM +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_PAUSE +#define __ARCH_WANT_SYS_TIME +#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_WAITPID +#define __ARCH_WANT_SYS_FADVISE64 +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND +#endif + +#if defined(__KERNEL_SYSCALLS__) || defined(__CHECKER__) + +#include <linux/types.h> +#include <linux/linkage.h> +#include <asm/signal.h> + +struct pt_regs; + +/* + * we need this inline - forking from kernel space will result + * in NO COPY ON WRITE (!!!), until an execve is executed. This + * is no problem, but for the stack. This is handled by not letting + * main() use the stack at all after fork(). Thus, no function + * calls - which means inline code for fork too, as otherwise we + * would use the stack upon exit from 'fork()'. + * + * Actually only pause and fork are needed inline, so that there + * won't be any messing with the stack from main(), but we define + * some others too. + */ +static inline int execve(const char *file, char **argv, char **envp) +{ + register long scno asm("r8") = __NR_execve; + register long sc1 asm("r12") = (long)file; + register long sc2 asm("r11") = (long)argv; + register long sc3 asm("r10") = (long)envp; + int res; + + asm volatile("scall" + : "=r"(sc1) + : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3) + : "lr", "memory"); + res = sc1; + __syscall_return(int, res); +} + +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); +asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, + struct pt_regs *regs); +asmlinkage int sys_rt_sigreturn(struct pt_regs *regs); +asmlinkage int sys_pipe(unsigned long __user *filedes); +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset); +asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len); +asmlinkage int sys_fork(struct pt_regs *regs); +asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, + unsigned long parent_tidptr, + unsigned long child_tidptr, struct pt_regs *regs); +asmlinkage int sys_vfork(struct pt_regs *regs); +asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv, + char __user *__user *uenvp, struct pt_regs *regs); + +#endif + +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall"); + +#endif /* __ASM_AVR32_UNISTD_H */ diff --git a/include/asm-avr32/user.h b/include/asm-avr32/user.h new file mode 100644 index 00000000000..060fb3acee4 --- /dev/null +++ b/include/asm-avr32/user.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Note: We may not need these definitions for AVR32, as we don't + * support a.out. + */ +#ifndef __ASM_AVR32_USER_H +#define __ASM_AVR32_USER_H + +#include <linux/types.h> +#include <asm/ptrace.h> +#include <asm/page.h> + +/* + * Core file format: The core file is written in such a way that gdb + * can understand it and provide useful information to the user (under + * linux we use the `trad-core' bfd). The file contents are as follows: + * + * upage: 1 page consisting of a user struct that tells gdb + * what is present in the file. Directly after this is a + * copy of the task_struct, which is currently not used by gdb, + * but it may come in handy at some point. All of the registers + * are stored as part of the upage. The upage should always be + * only one page long. + * data: The data segment follows next. We use current->end_text to + * current->brk to pick up all of the user variables, plus any memory + * that may have been sbrk'ed. No attempt is made to determine if a + * page is demand-zero or if a page is totally unused, we just cover + * the entire range. All of the addresses are rounded in such a way + * that an integral number of pages is written. + * stack: We need the stack information in order to get a meaningful + * backtrace. We need to write the data from usp to + * current->start_stack, so we round each of these in order to be able + * to write an integer number of pages. + */ + +struct user_fpu_struct { + /* We have no FPU (yet) */ +}; + +struct user { + struct pt_regs regs; /* entire machine state */ + size_t u_tsize; /* text size (pages) */ + size_t u_dsize; /* data size (pages) */ + size_t u_ssize; /* stack size (pages) */ + unsigned long start_code; /* text starting address */ + unsigned long start_data; /* data starting address */ + unsigned long start_stack; /* stack starting address */ + long int signal; /* signal causing core dump */ + struct regs * u_ar0; /* help gdb find registers */ + unsigned long magic; /* identifies a core file */ + char u_comm[32]; /* user command name */ +}; + +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_DATA_START_ADDR (u.start_data) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* __ASM_AVR32_USER_H */ diff --git a/include/asm-cris/pgtable.h b/include/asm-cris/pgtable.h index 5d76c1c0d6c..c94a7107019 100644 --- a/include/asm-cris/pgtable.h +++ b/include/asm-cris/pgtable.h @@ -253,7 +253,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep) { pmd_val(*pmdp) = _PAGE_TABLE | (unsigned long) ptep; } #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) @@ -271,7 +271,7 @@ static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address) #define __pte_offset(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ - ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address)) + ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address)) #define pte_offset_map(dir, address) \ ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address)) #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h index 980ae1b0cd2..1f70d47148b 100644 --- a/include/asm-frv/bitops.h +++ b/include/asm-frv/bitops.h @@ -157,23 +157,105 @@ static inline int __test_bit(int nr, const volatile void * addr) __constant_test_bit((nr),(addr)) : \ __test_bit((nr),(addr))) -#include <asm-generic/bitops/ffs.h> -#include <asm-generic/bitops/__ffs.h> #include <asm-generic/bitops/find.h> -/* - * fls: find last bit set. +/** + * fls - find last bit set + * @x: the word to search + * + * This is defined the same way as ffs: + * - return 32..1 to indicate bit 31..0 most significant bit set + * - return 0 to indicate no bits set */ #define fls(x) \ ({ \ int bit; \ \ - asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x)); \ + asm(" subcc %1,gr0,gr0,icc0 \n" \ + " ckne icc0,cc4 \n" \ + " cscan.p %1,gr0,%0 ,cc4,#1 \n" \ + " csub %0,%0,%0 ,cc4,#0 \n" \ + " csub %2,%0,%0 ,cc4,#1 \n" \ + : "=&r"(bit) \ + : "r"(x), "r"(32) \ + : "icc0", "cc4" \ + ); \ \ - bit ? 33 - bit : bit; \ + bit; \ }) -#include <asm-generic/bitops/fls64.h> +/** + * fls64 - find last bit set in a 64-bit value + * @n: the value to search + * + * This is defined the same way as ffs: + * - return 64..1 to indicate bit 63..0 most significant bit set + * - return 0 to indicate no bits set + */ +static inline __attribute__((const)) +int fls64(u64 n) +{ + union { + u64 ll; + struct { u32 h, l; }; + } _; + int bit, x, y; + + _.ll = n; + + asm(" subcc.p %3,gr0,gr0,icc0 \n" + " subcc %4,gr0,gr0,icc1 \n" + " ckne icc0,cc4 \n" + " ckne icc1,cc5 \n" + " norcr cc4,cc5,cc6 \n" + " csub.p %0,%0,%0 ,cc6,1 \n" + " orcr cc5,cc4,cc4 \n" + " andcr cc4,cc5,cc4 \n" + " cscan.p %3,gr0,%0 ,cc4,0 \n" + " setlos #64,%1 \n" + " cscan.p %4,gr0,%0 ,cc4,1 \n" + " setlos #32,%2 \n" + " csub.p %1,%0,%0 ,cc4,0 \n" + " csub %2,%0,%0 ,cc4,1 \n" + : "=&r"(bit), "=r"(x), "=r"(y) + : "0r"(_.h), "r"(_.l) + : "icc0", "icc1", "cc4", "cc5", "cc6" + ); + return bit; + +} + +/** + * ffs - find first bit set + * @x: the word to search + * + * - return 32..1 to indicate bit 31..0 most least significant bit set + * - return 0 to indicate no bits set + */ +static inline __attribute__((const)) +int ffs(int x) +{ + /* Note: (x & -x) gives us a mask that is the least significant + * (rightmost) 1-bit of the value in x. + */ + return fls(x & -x); +} + +/** + * __ffs - find first bit set + * @x: the word to search + * + * - return 31..0 to indicate bit 31..0 most least significant bit set + * - if no bits are set in x, the result is undefined + */ +static inline __attribute__((const)) +int __ffs(unsigned long x) +{ + int bit; + asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x & -x)); + return 31 - bit; +} + #include <asm-generic/bitops/sched.h> #include <asm-generic/bitops/hweight.h> diff --git a/include/asm-frv/cpu-irqs.h b/include/asm-frv/cpu-irqs.h index 5cd691e1f8c..478f3498fcf 100644 --- a/include/asm-frv/cpu-irqs.h +++ b/include/asm-frv/cpu-irqs.h @@ -14,36 +14,6 @@ #ifndef __ASSEMBLY__ -#include <asm/irq-routing.h> - -#define IRQ_BASE_CPU (NR_IRQ_ACTIONS_PER_GROUP * 0) - -/* IRQ IDs presented to drivers */ -enum { - IRQ_CPU__UNUSED = IRQ_BASE_CPU, - IRQ_CPU_UART0, - IRQ_CPU_UART1, - IRQ_CPU_TIMER0, - IRQ_CPU_TIMER1, - IRQ_CPU_TIMER2, - IRQ_CPU_DMA0, - IRQ_CPU_DMA1, - IRQ_CPU_DMA2, - IRQ_CPU_DMA3, - IRQ_CPU_DMA4, - IRQ_CPU_DMA5, - IRQ_CPU_DMA6, - IRQ_CPU_DMA7, - IRQ_CPU_EXTERNAL0, - IRQ_CPU_EXTERNAL1, - IRQ_CPU_EXTERNAL2, - IRQ_CPU_EXTERNAL3, - IRQ_CPU_EXTERNAL4, - IRQ_CPU_EXTERNAL5, - IRQ_CPU_EXTERNAL6, - IRQ_CPU_EXTERNAL7, -}; - /* IRQ to level mappings */ #define IRQ_GDBSTUB_LEVEL 15 #define IRQ_UART_LEVEL 13 @@ -82,6 +52,30 @@ enum { #define IRQ_XIRQ6_LEVEL 7 #define IRQ_XIRQ7_LEVEL 8 +/* IRQ IDs presented to drivers */ +#define IRQ_CPU__UNUSED IRQ_BASE_CPU +#define IRQ_CPU_UART0 (IRQ_BASE_CPU + IRQ_UART0_LEVEL) +#define IRQ_CPU_UART1 (IRQ_BASE_CPU + IRQ_UART1_LEVEL) +#define IRQ_CPU_TIMER0 (IRQ_BASE_CPU + IRQ_TIMER0_LEVEL) +#define IRQ_CPU_TIMER1 (IRQ_BASE_CPU + IRQ_TIMER1_LEVEL) +#define IRQ_CPU_TIMER2 (IRQ_BASE_CPU + IRQ_TIMER2_LEVEL) +#define IRQ_CPU_DMA0 (IRQ_BASE_CPU + IRQ_DMA0_LEVEL) +#define IRQ_CPU_DMA1 (IRQ_BASE_CPU + IRQ_DMA1_LEVEL) +#define IRQ_CPU_DMA2 (IRQ_BASE_CPU + IRQ_DMA2_LEVEL) +#define IRQ_CPU_DMA3 (IRQ_BASE_CPU + IRQ_DMA3_LEVEL) +#define IRQ_CPU_DMA4 (IRQ_BASE_CPU + IRQ_DMA4_LEVEL) +#define IRQ_CPU_DMA5 (IRQ_BASE_CPU + IRQ_DMA5_LEVEL) +#define IRQ_CPU_DMA6 (IRQ_BASE_CPU + IRQ_DMA6_LEVEL) +#define IRQ_CPU_DMA7 (IRQ_BASE_CPU + IRQ_DMA7_LEVEL) +#define IRQ_CPU_EXTERNAL0 (IRQ_BASE_CPU + IRQ_XIRQ0_LEVEL) +#define IRQ_CPU_EXTERNAL1 (IRQ_BASE_CPU + IRQ_XIRQ1_LEVEL) +#define IRQ_CPU_EXTERNAL2 (IRQ_BASE_CPU + IRQ_XIRQ2_LEVEL) +#define IRQ_CPU_EXTERNAL3 (IRQ_BASE_CPU + IRQ_XIRQ3_LEVEL) +#define IRQ_CPU_EXTERNAL4 (IRQ_BASE_CPU + IRQ_XIRQ4_LEVEL) +#define IRQ_CPU_EXTERNAL5 (IRQ_BASE_CPU + IRQ_XIRQ5_LEVEL) +#define IRQ_CPU_EXTERNAL6 (IRQ_BASE_CPU + IRQ_XIRQ6_LEVEL) +#define IRQ_CPU_EXTERNAL7 (IRQ_BASE_CPU + IRQ_XIRQ7_LEVEL) + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_CPU_IRQS_H */ diff --git a/include/asm-frv/hardirq.h b/include/asm-frv/hardirq.h index 7581b5a7559..fc47515822a 100644 --- a/include/asm-frv/hardirq.h +++ b/include/asm-frv/hardirq.h @@ -26,5 +26,10 @@ typedef struct { #error SMP not available on FR-V #endif /* CONFIG_SMP */ +extern atomic_t irq_err_count; +static inline void ack_bad_irq(int irq) +{ + atomic_inc(&irq_err_count); +} #endif diff --git a/include/asm-frv/irq-routing.h b/include/asm-frv/irq-routing.h deleted file mode 100644 index ac3ab900a1d..00000000000 --- a/include/asm-frv/irq-routing.h +++ /dev/null @@ -1,70 +0,0 @@ -/* irq-routing.h: multiplexed IRQ routing - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _ASM_IRQ_ROUTING_H -#define _ASM_IRQ_ROUTING_H - -#ifndef __ASSEMBLY__ - -#include <linux/spinlock.h> -#include <asm/irq.h> - -struct irq_source; -struct irq_level; - -/* - * IRQ action distribution sets - */ -struct irq_group { - int first_irq; /* first IRQ distributed here */ - void (*control)(struct irq_group *group, int index, int on); - - struct irqaction *actions[NR_IRQ_ACTIONS_PER_GROUP]; /* IRQ action chains */ - struct irq_source *sources[NR_IRQ_ACTIONS_PER_GROUP]; /* IRQ sources */ - int disable_cnt[NR_IRQ_ACTIONS_PER_GROUP]; /* disable counts */ -}; - -/* - * IRQ source manager - */ -struct irq_source { - struct irq_source *next; - struct irq_level *level; - const char *muxname; - volatile void __iomem *muxdata; - unsigned long irqmask; - - void (*doirq)(struct irq_source *source); -}; - -/* - * IRQ level management (per CPU IRQ priority / entry vector) - */ -struct irq_level { - int usage; - int disable_count; - unsigned long flags; /* current IRQF_DISABLED and IRQF_SHARED settings */ - spinlock_t lock; - struct irq_source *sources; -}; - -extern struct irq_level frv_irq_levels[16]; -extern struct irq_group *irq_groups[NR_IRQ_GROUPS]; - -extern void frv_irq_route(struct irq_source *source, int irqlevel); -extern void frv_irq_route_external(struct irq_source *source, int irq); -extern void frv_irq_set_group(struct irq_group *group); -extern void distribute_irqs(struct irq_group *group, unsigned long irqmask); -extern void route_cpu_irqs(void); - -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_IRQ_ROUTING_H */ diff --git a/include/asm-frv/irq.h b/include/asm-frv/irq.h index 58b619215a5..8fefd6b827a 100644 --- a/include/asm-frv/irq.h +++ b/include/asm-frv/irq.h @@ -1,6 +1,6 @@ /* irq.h: FRV IRQ definitions * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -12,32 +12,22 @@ #ifndef _ASM_IRQ_H_ #define _ASM_IRQ_H_ - -/* - * the system has an on-CPU PIC and another PIC on the FPGA and other PICs on other peripherals, - * so we do some routing in irq-routing.[ch] to reduce the number of false-positives seen by - * drivers - */ - /* this number is used when no interrupt has been assigned */ #define NO_IRQ (-1) -#define NR_IRQ_LOG2_ACTIONS_PER_GROUP 5 -#define NR_IRQ_ACTIONS_PER_GROUP (1 << NR_IRQ_LOG2_ACTIONS_PER_GROUP) -#define NR_IRQ_GROUPS 4 -#define NR_IRQS (NR_IRQ_ACTIONS_PER_GROUP * NR_IRQ_GROUPS) +#define NR_IRQS 48 +#define IRQ_BASE_CPU (0 * 16) +#define IRQ_BASE_FPGA (1 * 16) +#define IRQ_BASE_MB93493 (2 * 16) /* probe returns a 32-bit IRQ mask:-/ */ -#define MIN_PROBE_IRQ (NR_IRQS - 32) +#define MIN_PROBE_IRQ (NR_IRQS - 32) +#ifndef __ASSEMBLY__ static inline int irq_canonicalize(int irq) { return irq; } - -extern void disable_irq_nosync(unsigned int irq); -extern void disable_irq(unsigned int irq); -extern void enable_irq(unsigned int irq); - +#endif #endif /* _ASM_IRQ_H_ */ diff --git a/include/asm-frv/mb93091-fpga-irqs.h b/include/asm-frv/mb93091-fpga-irqs.h index 341bfc52a0e..19778c5ba9d 100644 --- a/include/asm-frv/mb93091-fpga-irqs.h +++ b/include/asm-frv/mb93091-fpga-irqs.h @@ -12,11 +12,9 @@ #ifndef _ASM_MB93091_FPGA_IRQS_H #define _ASM_MB93091_FPGA_IRQS_H -#ifndef __ASSEMBLY__ - -#include <asm/irq-routing.h> +#include <asm/irq.h> -#define IRQ_BASE_FPGA (NR_IRQ_ACTIONS_PER_GROUP * 1) +#ifndef __ASSEMBLY__ /* IRQ IDs presented to drivers */ enum { diff --git a/include/asm-frv/mb93093-fpga-irqs.h b/include/asm-frv/mb93093-fpga-irqs.h index 1e0f11c2fcd..590266b1a6d 100644 --- a/include/asm-frv/mb93093-fpga-irqs.h +++ b/include/asm-frv/mb93093-fpga-irqs.h @@ -12,11 +12,9 @@ #ifndef _ASM_MB93093_FPGA_IRQS_H #define _ASM_MB93093_FPGA_IRQS_H -#ifndef __ASSEMBLY__ - -#include <asm/irq-routing.h> +#include <asm/irq.h> -#define IRQ_BASE_FPGA (NR_IRQ_ACTIONS_PER_GROUP * 1) +#ifndef __ASSEMBLY__ /* IRQ IDs presented to drivers */ enum { diff --git a/include/asm-frv/mb93493-irqs.h b/include/asm-frv/mb93493-irqs.h index 15096e73132..82c7aeddd33 100644 --- a/include/asm-frv/mb93493-irqs.h +++ b/include/asm-frv/mb93493-irqs.h @@ -12,11 +12,9 @@ #ifndef _ASM_MB93493_IRQS_H #define _ASM_MB93493_IRQS_H -#ifndef __ASSEMBLY__ - -#include <asm/irq-routing.h> +#include <asm/irq.h> -#define IRQ_BASE_MB93493 (NR_IRQ_ACTIONS_PER_GROUP * 2) +#ifndef __ASSEMBLY__ /* IRQ IDs presented to drivers */ enum { diff --git a/include/asm-frv/mb93493-regs.h b/include/asm-frv/mb93493-regs.h index c54aa9d1446..8a1f6aac8cf 100644 --- a/include/asm-frv/mb93493-regs.h +++ b/include/asm-frv/mb93493-regs.h @@ -15,6 +15,7 @@ #include <asm/mb-regs.h> #include <asm/mb93493-irqs.h> +#define __addr_MB93493(X) ((volatile unsigned long *)(__region_CS3 + (X))) #define __get_MB93493(X) ({ *(volatile unsigned long *)(__region_CS3 + (X)); }) #define __set_MB93493(X,V) \ @@ -26,6 +27,7 @@ do { \ #define __set_MB93493_STSR(X,V) __set_MB93493(0x3c0 + (X) * 4, (V)) #define MB93493_STSR_EN +#define __addr_MB93493_IQSR(X) __addr_MB93493(0x3d0 + (X) * 4) #define __get_MB93493_IQSR(X) __get_MB93493(0x3d0 + (X) * 4) #define __set_MB93493_IQSR(X,V) __set_MB93493(0x3d0 + (X) * 4, (V)) diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 7af7485e889..2fb3c6f05e0 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -217,7 +217,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) } #define pgd_page(pgd) (pud_page((pud_t){ pgd })) -#define pgd_page_kernel(pgd) (pud_page_kernel((pud_t){ pgd })) +#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) /* * allocating and freeing a pud is trivial: the 1-entry pud is @@ -246,7 +246,7 @@ static inline void pud_clear(pud_t *pud) { } #define set_pud(pudptr, pudval) set_pmd((pmd_t *)(pudptr), (pmd_t) { pudval }) #define pud_page(pud) (pmd_page((pmd_t){ pud })) -#define pud_page_kernel(pud) (pmd_page_kernel((pmd_t){ pud })) +#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud })) /* * (pmds are folded into pgds so this doesn't get actually called, @@ -362,7 +362,7 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address) #define pmd_bad(x) (pmd_val(x) & xAMPRx_SS) #define pmd_clear(xp) do { __set_pmd(xp, 0); } while(0) -#define pmd_page_kernel(pmd) \ +#define pmd_page_vaddr(pmd) \ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) #ifndef CONFIG_DISCONTIGMEM @@ -458,7 +458,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 68c6fea994d..7b88d3931e3 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -21,6 +21,10 @@ #define pud_present(pud) 1 #define pud_ERROR(pud) do { } while (0) #define pud_clear(pud) pgd_clear(pud) +#define pud_val(pud) pgd_val(pud) +#define pud_populate(mm, pud, pmd) pgd_populate(mm, pud, pmd) +#define pud_page(pud) pgd_page(pud) +#define pud_page_vaddr(pud) pgd_page_vaddr(pud) #undef pud_free_tlb #define pud_free_tlb(tlb, x) do { } while (0) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index e160e04290f..6d45ee5472a 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -14,7 +14,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name /* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) +#define per_cpu(var, cpu) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) #define __get_cpu_var(var) per_cpu(var, smp_processor_id()) #define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id()) diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index c8d53ba20e1..29ff5d84d8c 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -47,7 +47,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address) #define __pmd(x) ((pmd_t) { __pud(x) } ) #define pud_page(pud) (pmd_page((pmd_t){ pud })) -#define pud_page_kernel(pud) (pmd_page_kernel((pmd_t){ pud })) +#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud })) /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 82e29f0ce46..56646450055 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -44,7 +44,7 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) #define __pud(x) ((pud_t) { __pgd(x) } ) #define pgd_page(pgd) (pud_page((pud_t){ pgd })) -#define pgd_page_kernel(pgd) (pud_page_kernel((pud_t){ pgd })) +#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) /* * allocating and freeing a pud is trivial: the 1-entry pud is diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index c2059a3a062..349260cd86e 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1,6 +1,8 @@ #ifndef _ASM_GENERIC_PGTABLE_H #define _ASM_GENERIC_PGTABLE_H +#ifndef __ASSEMBLY__ + #ifndef __HAVE_ARCH_PTEP_ESTABLISH /* * Establish a new mapping: @@ -188,7 +190,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres }) #endif -#ifndef __ASSEMBLY__ /* * When walking page tables, we usually want to skip any p?d_none entries; * and any p?d_bad entries - reporting the error before resetting to none. diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index db5a3732f10..253ae132827 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -194,3 +194,6 @@ .stab.index 0 : { *(.stab.index) } \ .stab.indexstr 0 : { *(.stab.indexstr) } \ .comment 0 : { *(.comment) } + +#define NOTES \ + .notes : { *(.note.*) } :note diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild index b75a348d0c1..147e4ac1ebf 100644 --- a/include/asm-i386/Kbuild +++ b/include/asm-i386/Kbuild @@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm header-y += boot.h header-y += debugreg.h header-y += ldt.h +header-y += ptrace-abi.h header-y += ucontext.h unifdef-y += mtrr.h diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h index 9cf20cacf76..576ae01d71c 100644 --- a/include/asm-i386/dma-mapping.h +++ b/include/asm-i386/dma-mapping.h @@ -21,8 +21,7 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, enum dma_data_direction direction) { - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); WARN_ON(size == 0); flush_write_buffers(); return virt_to_phys(ptr); @@ -32,8 +31,7 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction) { - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); } static inline int @@ -42,8 +40,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, { int i; - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); WARN_ON(nents == 0 || sg[0].length == 0); for (i = 0; i < nents; i++ ) { diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index a48cc3f7ccc..02428cb3662 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -19,7 +19,11 @@ * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ -#define __FIXADDR_TOP 0xfffff000 +#ifndef CONFIG_COMPAT_VDSO +extern unsigned long __FIXADDR_TOP; +#else +#define __FIXADDR_TOP 0xfffff000 +#endif #ifndef __ASSEMBLY__ #include <linux/kernel.h> @@ -93,6 +97,7 @@ enum fixed_addresses { extern void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags); +extern void reserve_top_address(unsigned long reserve); #define set_fixmap(idx, phys) \ __set_fixmap(idx, phys, PAGE_KERNEL) diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 22cb07cc8f3..61b07332200 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -38,10 +38,16 @@ static inline void get_memcfg_numa(void) } extern int early_pfn_to_nid(unsigned long pfn); +extern void numa_kva_reserve(void); #else /* !CONFIG_NUMA */ + #define get_memcfg_numa get_memcfg_numa_flat #define get_zholes_size(n) (0) + +static inline void numa_kva_reserve(void) +{ +} #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 2756d4b04c2..201c86a6711 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -21,8 +21,9 @@ #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR #define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0)) -#define pte_same(a, b) ((a).pte_low == (b).pte_low) + #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) #define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index dccb1b3337a..0d899173232 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -77,7 +77,7 @@ static inline void pud_clear (pud_t * pud) { } #define pud_page(pud) \ ((struct page *) __va(pud_val(pud) & PAGE_MASK)) -#define pud_page_kernel(pud) \ +#define pud_page_vaddr(pud) \ ((unsigned long) __va(pud_val(pud) & PAGE_MASK)) @@ -105,6 +105,7 @@ static inline void pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t res; @@ -117,6 +118,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return res; } +#define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t a, pte_t b) { return a.pte_low == b.pte_low && a.pte_high == b.pte_high; diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 09697fec3d2..0dc051a8078 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -246,6 +246,23 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p # include <asm/pgtable-2level.h> #endif +/* + * We only update the dirty/accessed state if we set + * the dirty bit by hand in the kernel, since the hardware + * will do the accessed bit for us, and we don't want to + * race with other CPU's that might be updating the dirty + * bit at the same time. + */ +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ +do { \ + if (dirty) { \ + (ptep)->pte_low = (entry).pte_low; \ + flush_tlb_page(vma, address); \ + } \ +} while (0) + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { if (!pte_dirty(*ptep)) @@ -253,6 +270,7 @@ static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); } +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { if (!pte_young(*ptep)) @@ -260,6 +278,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); } +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { pte_t pte; @@ -272,6 +291,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long return pte; } +#define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { clear_bit(_PAGE_BIT_RW, &ptep->pte_low); @@ -364,11 +384,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) -#define pmd_page_kernel(pmd) \ +#define pmd_page_vaddr(pmd) \ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) /* @@ -411,23 +431,8 @@ extern void noexec_setup(const char *str); /* * The i386 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. - * - * Also, we only update the dirty/accessed state if we set - * the dirty bit by hand in the kernel, since the hardware - * will do the accessed bit for us, and we don't want to - * race with other CPU's that might be updating the dirty - * bit at the same time. */ #define update_mmu_cache(vma,address,pte) do { } while (0) -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ - do { \ - if (__dirty) { \ - (__ptep)->pte_low = (__entry).pte_low; \ - flush_tlb_page(__vma, __address); \ - } \ - } while (0) - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_FLATMEM @@ -441,12 +446,6 @@ extern void noexec_setup(const char *str); #define GET_IOSPACE(pfn) 0 #define GET_PFN(pfn) (pfn) -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define __HAVE_ARCH_PTE_SAME #include <asm-generic/pgtable.h> #endif /* _I386_PGTABLE_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index b32346d62e1..2277127696d 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -143,6 +143,18 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {} #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + /* * Generic CPUID function * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx @@ -150,24 +162,18 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {} */ static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op), "c"(0)); + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); } /* Some CPUID calls want 'count' to be placed in ecx */ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) + int *edx) { - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op), "c" (count)); + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); } /* @@ -175,42 +181,30 @@ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, */ static inline unsigned int cpuid_eax(unsigned int op) { - unsigned int eax; + unsigned int eax, ebx, ecx, edx; - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); + cpuid(op, &eax, &ebx, &ecx, &edx); return eax; } static inline unsigned int cpuid_ebx(unsigned int op) { - unsigned int eax, ebx; + unsigned int eax, ebx, ecx, edx; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); + cpuid(op, &eax, &ebx, &ecx, &edx); return ebx; } static inline unsigned int cpuid_ecx(unsigned int op) { - unsigned int eax, ecx; + unsigned int eax, ebx, ecx, edx; - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); + cpuid(op, &eax, &ebx, &ecx, &edx); return ecx; } static inline unsigned int cpuid_edx(unsigned int op) { - unsigned int eax, edx; + unsigned int eax, ebx, ecx, edx; - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); + cpuid(op, &eax, &ebx, &ecx, &edx); return edx; } diff --git a/include/asm-i386/ptrace-abi.h b/include/asm-i386/ptrace-abi.h new file mode 100644 index 00000000000..a44901817a2 --- /dev/null +++ b/include/asm-i386/ptrace-abi.h @@ -0,0 +1,39 @@ +#ifndef I386_PTRACE_ABI_H +#define I386_PTRACE_ABI_H + +#define EBX 0 +#define ECX 1 +#define EDX 2 +#define ESI 3 +#define EDI 4 +#define EBP 5 +#define EAX 6 +#define DS 7 +#define ES 8 +#define FS 9 +#define GS 10 +#define ORIG_EAX 11 +#define EIP 12 +#define CS 13 +#define EFL 14 +#define UESP 15 +#define SS 16 +#define FRAME_SIZE 17 + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETFPXREGS 18 +#define PTRACE_SETFPXREGS 19 + +#define PTRACE_OLDSETOPTIONS 21 + +#define PTRACE_GET_THREAD_AREA 25 +#define PTRACE_SET_THREAD_AREA 26 + +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 + +#endif diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index f324c53b6f9..1910880fcd4 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -1,24 +1,7 @@ #ifndef _I386_PTRACE_H #define _I386_PTRACE_H -#define EBX 0 -#define ECX 1 -#define EDX 2 -#define ESI 3 -#define EDI 4 -#define EBP 5 -#define EAX 6 -#define DS 7 -#define ES 8 -#define FS 9 -#define GS 10 -#define ORIG_EAX 11 -#define EIP 12 -#define CS 13 -#define EFL 14 -#define UESP 15 -#define SS 16 -#define FRAME_SIZE 17 +#include <asm/ptrace-abi.h> /* this struct defines the way the registers are stored on the stack during a system call. */ @@ -41,22 +24,6 @@ struct pt_regs { int xss; }; -/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 -#define PTRACE_GETFPXREGS 18 -#define PTRACE_SETFPXREGS 19 - -#define PTRACE_OLDSETOPTIONS 21 - -#define PTRACE_GET_THREAD_AREA 25 -#define PTRACE_SET_THREAD_AREA 26 - -#define PTRACE_SYSEMU 31 -#define PTRACE_SYSEMU_SINGLESTEP 32 - #ifdef __KERNEL__ #include <asm/vm86.h> diff --git a/include/asm-i386/sync_bitops.h b/include/asm-i386/sync_bitops.h new file mode 100644 index 00000000000..c94d51c993e --- /dev/null +++ b/include/asm-i386/sync_bitops.h @@ -0,0 +1,156 @@ +#ifndef _I386_SYNC_BITOPS_H +#define _I386_SYNC_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#define ADDR (*(volatile long *) addr) + +/** + * sync_set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note: there are no guarantees that this function will not be reordered + * on non x86 architectures, so if you are writting portable code, + * make sure not to rely on its reordering guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void sync_set_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__("lock; btsl %1,%0" + :"+m" (ADDR) + :"Ir" (nr) + : "memory"); +} + +/** + * sync_clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * sync_clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void sync_clear_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__("lock; btrl %1,%0" + :"+m" (ADDR) + :"Ir" (nr) + : "memory"); +} + +/** + * sync_change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. It may be + * reordered on other architectures than x86. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void sync_change_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__("lock; btcl %1,%0" + :"+m" (ADDR) + :"Ir" (nr) + : "memory"); +} + +/** + * sync_test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It may be reordered on other architectures than x86. + * It also implies a memory barrier. + */ +static inline int sync_test_and_set_bit(int nr, volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__("lock; btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/** + * sync_test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It can be reorderdered on other architectures other than x86. + * It also implies a memory barrier. + */ +static inline int sync_test_and_clear_bit(int nr, volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__("lock; btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/** + * sync_test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int sync_test_and_change_bit(int nr, volatile unsigned long* addr) +{ + int oldbit; + + __asm__ __volatile__("lock; btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +static __always_inline int sync_const_test_bit(int nr, const volatile unsigned long *addr) +{ + return ((1UL << (nr & 31)) & + (((const volatile unsigned int *)addr)[nr >> 5])) != 0; +} + +static inline int sync_var_test_bit(int nr, const volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; +} + +#define sync_test_bit(nr,addr) \ + (__builtin_constant_p(nr) ? \ + sync_constant_test_bit((nr),(addr)) : \ + sync_var_test_bit((nr),(addr))) + +#undef ADDR + +#endif /* _I386_SYNC_BITOPS_H */ diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index 098bcee94e3..a6dabbcd6e6 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define sync_cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #endif static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, @@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, return old; } +/* + * Always use locked operations when touching memory shared with a + * hypervisor, since the system may be SMP even if the guest kernel + * isn't. + */ +static inline unsigned long __sync_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #ifndef CONFIG_X86_CMPXCHG /* * Building a kernel capable running on 80386. It may be necessary to diff --git a/include/asm-ia64/numa.h b/include/asm-ia64/numa.h index e5a8260593a..e0a1d173e42 100644 --- a/include/asm-ia64/numa.h +++ b/include/asm-ia64/numa.h @@ -64,6 +64,10 @@ extern int paddr_to_nid(unsigned long paddr); #define local_nodeid (cpu_to_node_map[smp_processor_id()]) +extern void map_cpu_to_node(int cpu, int nid); +extern void unmap_cpu_from_node(int cpu, int nid); + + #else /* !CONFIG_NUMA */ #define paddr_to_nid(addr) 0 diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index 228981cadf8..55318274772 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -275,21 +275,23 @@ ia64_phys_addr_valid (unsigned long addr) #define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd))) #define pmd_present(pmd) (pmd_val(pmd) != 0UL) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK)) +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK)) #define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET)) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud))) #define pud_present(pud) (pud_val(pud) != 0UL) #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define pud_page(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) +#define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) #ifdef CONFIG_PGTABLE_4 #define pgd_none(pgd) (!pgd_val(pgd)) #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) #define pgd_present(pgd) (pgd_val(pgd) != 0UL) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK)) +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK)) +#define pgd_page(pgd) virt_to_page((pgd_val(pgd) + PAGE_OFFSET)) #endif /* @@ -360,19 +362,19 @@ pgd_offset (struct mm_struct *mm, unsigned long address) #ifdef CONFIG_PGTABLE_4 /* Find an entry in the second-level page table.. */ #define pud_offset(dir,addr) \ - ((pud_t *) pgd_page(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) + ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #endif /* Find an entry in the third-level page table.. */ #define pmd_offset(dir,addr) \ - ((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + ((pmd_t *) pud_page_vaddr(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) /* * Find an entry in the third-level page table. This looks more complicated than it * should be because some platforms place page tables in high memory. */ #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr)) +#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr) #define pte_offset_map_nested(dir,addr) pte_offset_map(dir, addr) #define pte_unmap(pte) do { } while (0) diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h index 719ff309ce0..74bde1c2bb1 100644 --- a/include/asm-ia64/smp.h +++ b/include/asm-ia64/smp.h @@ -122,8 +122,6 @@ extern void __init smp_build_cpu_map(void); extern void __init init_smp_config (void); extern void smp_do_timer (struct pt_regs *regs); -extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info, - int retry, int wait); extern void smp_send_reschedule (int cpu); extern void lock_ipi_calllock(void); extern void unlock_ipi_calllock(void); diff --git a/include/asm-m32r/pgtable-2level.h b/include/asm-m32r/pgtable-2level.h index be0f167e344..6a674e3d37a 100644 --- a/include/asm-m32r/pgtable-2level.h +++ b/include/asm-m32r/pgtable-2level.h @@ -52,9 +52,13 @@ static inline int pgd_present(pgd_t pgd) { return 1; } #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -#define pgd_page(pgd) \ +#define pgd_page_vaddr(pgd) \ ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#ifndef CONFIG_DISCONTIGMEM +#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) >> PAGE_SHIFT) - PFN_BASE)) +#endif /* !CONFIG_DISCONTIGMEM */ + static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address) { return (pmd_t *) dir; diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h index 1983b7f4527..1c15ba7ce31 100644 --- a/include/asm-m32r/pgtable.h +++ b/include/asm-m32r/pgtable.h @@ -336,7 +336,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep) pmd_val(*pmdp) = (((unsigned long) ptep) & PAGE_MASK); } -#define pmd_page_kernel(pmd) \ +#define pmd_page_vaddr(pmd) \ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) #ifndef CONFIG_DISCONTIGMEM @@ -358,7 +358,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep) #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ - ((pte_t *)pmd_page_kernel(*(dir)) + pte_index(address)) + ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index(address)) #define pte_offset_map(dir, address) \ ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) diff --git a/include/asm-m68k/motorola_pgtable.h b/include/asm-m68k/motorola_pgtable.h index 1ccc7338a54..61e4406ed96 100644 --- a/include/asm-m68k/motorola_pgtable.h +++ b/include/asm-m68k/motorola_pgtable.h @@ -150,6 +150,7 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp) #define pgd_bad(pgd) ((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE) #define pgd_present(pgd) (pgd_val(pgd) & _PAGE_TABLE) #define pgd_clear(pgdp) ({ pgd_val(*pgdp) = 0; }) +#define pgd_page(pgd) (mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT)) #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) diff --git a/include/asm-mips/mach-au1x00/au1xxx_dbdma.h b/include/asm-mips/mach-au1x00/au1xxx_dbdma.h index d5b38a247e5..eeb0c3115b6 100644 --- a/include/asm-mips/mach-au1x00/au1xxx_dbdma.h +++ b/include/asm-mips/mach-au1x00/au1xxx_dbdma.h @@ -316,7 +316,7 @@ typedef struct dbdma_chan_config { au1x_ddma_desc_t *chan_desc_base; au1x_ddma_desc_t *get_ptr, *put_ptr, *cur_ptr; void *chan_callparam; - void (*chan_callback)(int, void *, struct pt_regs *); + void (*chan_callback)(int, void *); } chan_tab_t; #define DEV_FLAGS_INUSE (1 << 0) @@ -334,8 +334,8 @@ typedef struct dbdma_chan_config { * meaningful name. The 'callback' is called during dma completion * interrupt. */ -u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid, - void (*callback)(int, void *, struct pt_regs *), void *callparam); +extern u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid, + void (*callback)(int, void *), void *callparam); #define DBDMA_MEM_CHAN DSCR_CMD0_ALWAYS diff --git a/include/asm-mips/pgtable-32.h b/include/asm-mips/pgtable-32.h index 4b26d852813..d20f2e9b28b 100644 --- a/include/asm-mips/pgtable-32.h +++ b/include/asm-mips/pgtable-32.h @@ -156,9 +156,9 @@ pfn_pte(unsigned long pfn, pgprot_t prot) #define __pte_offset(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset(dir, address) \ - ((pte_t *) (pmd_page_kernel(*dir)) + __pte_offset(address)) + ((pte_t *) (pmd_page_vaddr(*dir)) + __pte_offset(address)) #define pte_offset_kernel(dir, address) \ - ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address)) + ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address)) #define pte_offset_map(dir, address) \ ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address)) diff --git a/include/asm-mips/pgtable-64.h b/include/asm-mips/pgtable-64.h index e3db93212ea..c59a1e21f5b 100644 --- a/include/asm-mips/pgtable-64.h +++ b/include/asm-mips/pgtable-64.h @@ -178,24 +178,26 @@ static inline void pud_clear(pud_t *pudp) /* to find an entry in a page-table-directory */ #define pgd_offset(mm,addr) ((mm)->pgd + pgd_index(addr)) -static inline unsigned long pud_page(pud_t pud) +static inline unsigned long pud_page_vaddr(pud_t pud) { return pud_val(pud); } +#define pud_phys(pud) (pud_val(pud) - PAGE_OFFSET) +#define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) /* Find an entry in the second-level page table.. */ static inline pmd_t *pmd_offset(pud_t * pud, unsigned long address) { - return (pmd_t *) pud_page(*pud) + pmd_index(address); + return (pmd_t *) pud_page_vaddr(*pud) + pmd_index(address); } /* Find an entry in the third-level page table.. */ #define __pte_offset(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset(dir, address) \ - ((pte_t *) (pmd_page_kernel(*dir)) + __pte_offset(address)) + ((pte_t *) (pmd_page_vaddr(*dir)) + __pte_offset(address)) #define pte_offset_kernel(dir, address) \ - ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address)) + ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address)) #define pte_offset_map(dir, address) \ ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address)) #define pte_offset_map_nested(dir, address) \ diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h index a36ca1be17f..1ca4d1e185c 100644 --- a/include/asm-mips/pgtable.h +++ b/include/asm-mips/pgtable.h @@ -87,7 +87,7 @@ extern void paging_init(void); */ #define pmd_phys(pmd) (pmd_val(pmd) - PAGE_OFFSET) #define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) -#define pmd_page_kernel(pmd) pmd_val(pmd) +#define pmd_page_vaddr(pmd) pmd_val(pmd) #if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1) diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h index 5066c54dae0..c0b61e0d149 100644 --- a/include/asm-parisc/pgtable.h +++ b/include/asm-parisc/pgtable.h @@ -303,7 +303,8 @@ static inline void pmd_clear(pmd_t *pmd) { #if PT_NLEVELS == 3 -#define pgd_page(pgd) ((unsigned long) __va(pgd_address(pgd))) +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd))) +#define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd)) /* For 64 bit we have three level tables */ @@ -382,7 +383,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_address(pmd))) +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_address(pmd))) #define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd))) #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) @@ -400,7 +401,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #if PT_NLEVELS == 3 #define pmd_offset(dir,address) \ -((pmd_t *) pgd_page(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) +((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) #else #define pmd_offset(dir,addr) ((pmd_t *) dir) #endif @@ -408,7 +409,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) /* Find an entry in the third-level page table.. */ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) #define pte_offset_kernel(pmd, address) \ - ((pte_t *) pmd_page_kernel(*(pmd)) + pte_index(address)) + ((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address)) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h index e7036155672..345d9b07b3e 100644 --- a/include/asm-powerpc/pgtable-4k.h +++ b/include/asm-powerpc/pgtable-4k.h @@ -88,10 +88,11 @@ #define pgd_bad(pgd) (pgd_val(pgd) == 0) #define pgd_present(pgd) (pgd_val(pgd) != 0) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) -#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) +#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) +#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd)) #define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page(*(pgdp))) + \ + (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h index 8dbf5ad8150..10f52743f4f 100644 --- a/include/asm-powerpc/pgtable.h +++ b/include/asm-powerpc/pgtable.h @@ -196,8 +196,8 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) || (pmd_val(pmd) & PMD_BAD_BITS)) #define pmd_present(pmd) (pmd_val(pmd) != 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) -#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) -#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) +#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) +#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd)) #define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) #define pud_none(pud) (!pud_val(pud)) @@ -205,7 +205,8 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) || (pud_val(pud) & PUD_BAD_BITS)) #define pud_present(pud) (pud_val(pud) != 0) #define pud_clear(pudp) (pud_val(*(pudp)) = 0) -#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS) +#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) +#define pud_page(pud) virt_to_page(pud_page_vaddr(pud)) #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) @@ -219,10 +220,10 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) #define pmd_offset(pudp,addr) \ - (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) #define pte_offset_kernel(dir,addr) \ - (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) + (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h index 51fa7c66291..b1fdbf40dba 100644 --- a/include/asm-ppc/pgtable.h +++ b/include/asm-ppc/pgtable.h @@ -526,7 +526,7 @@ static inline int pgd_bad(pgd_t pgd) { return 0; } static inline int pgd_present(pgd_t pgd) { return 1; } #define pgd_clear(xp) do { } while (0) -#define pgd_page(pgd) \ +#define pgd_page_vaddr(pgd) \ ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) /* @@ -720,12 +720,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, * of the pte page. -- paulus */ #ifndef CONFIG_BOOKE -#define pmd_page_kernel(pmd) \ +#define pmd_page_vaddr(pmd) \ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) #define pmd_page(pmd) \ (mem_map + (pmd_val(pmd) >> PAGE_SHIFT)) #else -#define pmd_page_kernel(pmd) \ +#define pmd_page_vaddr(pmd) \ ((unsigned long) (pmd_val(pmd) & PAGE_MASK)) #define pmd_page(pmd) \ (mem_map + (__pa(pmd_val(pmd)) >> PAGE_SHIFT)) @@ -748,7 +748,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, addr) \ - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr)) + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir, addr) \ ((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE0) + pte_index(addr)) #define pte_offset_map_nested(dir, addr) \ diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h index 28b3517e787..495ad99c763 100644 --- a/include/asm-s390/percpu.h +++ b/include/asm-s390/percpu.h @@ -15,18 +15,20 @@ */ #if defined(__s390x__) && defined(MODULE) -#define __reloc_hide(var,offset) \ - (*({ unsigned long *__ptr; \ - asm ( "larl %0,per_cpu__"#var"@GOTENT" \ - : "=a" (__ptr) : "X" (per_cpu__##var) ); \ - (typeof(&per_cpu__##var))((*__ptr) + (offset)); })) +#define __reloc_hide(var,offset) (*({ \ + extern int simple_indentifier_##var(void); \ + unsigned long *__ptr; \ + asm ( "larl %0,per_cpu__"#var"@GOTENT" \ + : "=a" (__ptr) : "X" (per_cpu__##var) ); \ + (typeof(&per_cpu__##var))((*__ptr) + (offset)); })) #else -#define __reloc_hide(var, offset) \ - (*({ unsigned long __ptr; \ - asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \ - (typeof(&per_cpu__##var)) (__ptr + (offset)); })) +#define __reloc_hide(var, offset) (*({ \ + extern int simple_indentifier_##var(void); \ + unsigned long __ptr; \ + asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \ + (typeof(&per_cpu__##var)) (__ptr + (offset)); })) #endif diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 1a07028d575..e965309feda 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -664,11 +664,13 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_page_kernel(pmd) (pmd_val(pmd) & PAGE_MASK) +#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK) #define pmd_page(pmd) (mem_map+(pmd_val(pmd) >> PAGE_SHIFT)) -#define pgd_page_kernel(pgd) (pgd_val(pgd) & PAGE_MASK) +#define pgd_page_vaddr(pgd) (pgd_val(pgd) & PAGE_MASK) + +#define pgd_page(pgd) (mem_map+(pgd_val(pgd) >> PAGE_SHIFT)) /* to find an entry in a page-table-directory */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) @@ -690,14 +692,14 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) /* Find an entry in the second-level page table.. */ #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) #define pmd_offset(dir,addr) \ - ((pmd_t *) pgd_page_kernel(*(dir)) + pmd_index(addr)) + ((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(addr)) #endif /* __s390x__ */ /* Find an entry in the third-level page table.. */ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) #define pte_offset_kernel(pmd, address) \ - ((pte_t *) pmd_page_kernel(*(pmd)) + pte_index(address)) + ((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address)) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index a3a4e5fd30d..578c2209fa7 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h @@ -337,6 +337,8 @@ struct notifier_block; int register_idle_notifier(struct notifier_block *nb); int unregister_idle_notifier(struct notifier_block *nb); +#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL + #endif /* diff --git a/include/asm-sh/pgtable-2level.h b/include/asm-sh/pgtable-2level.h index b0528aa3cb1..b525db6f61c 100644 --- a/include/asm-sh/pgtable-2level.h +++ b/include/asm-sh/pgtable-2level.h @@ -50,9 +50,12 @@ static inline void pgd_clear (pgd_t * pgdp) { } #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -#define pgd_page(pgd) \ +#define pgd_page_vaddr(pgd) \ ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) \ + (phys_to_page(pgd_val(pgd))) + static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { return (pmd_t *) dir; diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h index dcd23a03683..40d41a78041 100644 --- a/include/asm-sh/pgtable.h +++ b/include/asm-sh/pgtable.h @@ -225,7 +225,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; } -#define pmd_page_kernel(pmd) \ +#define pmd_page_vaddr(pmd) \ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) #define pmd_page(pmd) \ @@ -242,7 +242,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pte_index(address) \ ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) #define pte_offset_map(dir, address) pte_offset_kernel(dir, address) #define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address) #define pte_unmap(pte) do { } while (0) diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h index 54c7821893f..6b97c4cb1d6 100644 --- a/include/asm-sh64/pgtable.h +++ b/include/asm-sh64/pgtable.h @@ -190,7 +190,9 @@ static inline int pgd_bad(pgd_t pgd) { return 0; } #endif -#define pgd_page(pgd_entry) ((unsigned long) (pgd_val(pgd_entry) & PAGE_MASK)) +#define pgd_page_vaddr(pgd_entry) ((unsigned long) (pgd_val(pgd_entry) & PAGE_MASK)) +#define pgd_page(pgd) (virt_to_page(pgd_val(pgd))) + /* * PMD defines. Middle level. @@ -219,7 +221,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) #define pmd_none(pmd_entry) (pmd_val((pmd_entry)) == _PMD_EMPTY) #define pmd_bad(pmd_entry) ((pmd_val(pmd_entry) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) -#define pmd_page_kernel(pmd_entry) \ +#define pmd_page_vaddr(pmd_entry) \ ((unsigned long) __va(pmd_val(pmd_entry) & PAGE_MASK)) #define pmd_page(pmd) \ diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h index 226c6475c9a..4f0a5ba0d6a 100644 --- a/include/asm-sparc/pgtable.h +++ b/include/asm-sparc/pgtable.h @@ -143,10 +143,10 @@ extern unsigned long empty_zero_page; /* */ BTFIXUPDEF_CALL_CONST(struct page *, pmd_page, pmd_t) -BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page, pgd_t) +BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page_vaddr, pgd_t) #define pmd_page(pmd) BTFIXUP_CALL(pmd_page)(pmd) -#define pgd_page(pgd) BTFIXUP_CALL(pgd_page)(pgd) +#define pgd_page_vaddr(pgd) BTFIXUP_CALL(pgd_page_vaddr)(pgd) BTFIXUPDEF_SETHI(none_mask) BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t) diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index ebfe395cfb8..b12be7a869f 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -630,8 +630,9 @@ static inline unsigned long pte_present(pte_t pte) #define __pmd_page(pmd) \ ((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL))) #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) -#define pud_page(pud) \ +#define pud_page_vaddr(pud) \ ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL))) +#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (0) #define pmd_present(pmd) (pmd_val(pmd) != 0U) @@ -653,7 +654,7 @@ static inline unsigned long pte_present(pte_t pte) /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ - ((pmd_t *) pud_page(*(pudp)) + \ + ((pmd_t *) pud_page_vaddr(*(pudp)) + \ (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))) /* Find an entry in the third-level page table.. */ diff --git a/include/asm-um/pgtable-2level.h b/include/asm-um/pgtable-2level.h index ffe017f6b64..6050e0eb257 100644 --- a/include/asm-um/pgtable-2level.h +++ b/include/asm-um/pgtable-2level.h @@ -41,7 +41,7 @@ static inline void pgd_mkuptodate(pgd_t pgd) { } #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) -#define pmd_page_kernel(pmd) \ +#define pmd_page_vaddr(pmd) \ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) /* diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h index 786c2572728..ca0c2a92a11 100644 --- a/include/asm-um/pgtable-3level.h +++ b/include/asm-um/pgtable-3level.h @@ -74,11 +74,12 @@ extern inline void pud_clear (pud_t *pud) set_pud(pud, __pud(0)); } -#define pud_page(pud) \ +#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) +#define pud_page_vaddr(pud) \ ((struct page *) __va(pud_val(pud) & PAGE_MASK)) /* Find an entry in the second-level page table.. */ -#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ +#define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \ pmd_index(address)) static inline unsigned long pte_pfn(pte_t pte) diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h index ac64eb95586..4862daf8b90 100644 --- a/include/asm-um/pgtable.h +++ b/include/asm-um/pgtable.h @@ -349,7 +349,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return pte; } -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) /* * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] @@ -389,7 +389,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) */ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) #define pte_offset_map(dir, address) \ ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h index 824c2889638..afa4fe1ca9f 100644 --- a/include/asm-um/processor-generic.h +++ b/include/asm-um/processor-generic.h @@ -138,9 +138,7 @@ extern struct cpuinfo_um cpu_data[]; #ifdef CONFIG_MODE_SKAS #define KSTK_REG(tsk, reg) \ - ({ union uml_pt_regs regs; \ - get_thread_regs(®s, tsk->thread.mode.skas.switch_buf); \ - UPT_REG(®s, reg); }) + get_thread_reg(reg, tsk->thread.mode.skas.switch_buf) #else #define KSTK_REG(tsk, reg) (0xbadbabe) #endif diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h index a36f5371b36..99c87c5ce99 100644 --- a/include/asm-um/ptrace-generic.h +++ b/include/asm-um/ptrace-generic.h @@ -8,19 +8,7 @@ #ifndef __ASSEMBLY__ - -#define pt_regs pt_regs_subarch -#define show_regs show_regs_subarch -#define send_sigtrap send_sigtrap_subarch - -#include "asm/arch/ptrace.h" - -#undef pt_regs -#undef show_regs -#undef send_sigtrap -#undef user_mode -#undef instruction_pointer - +#include "asm/arch/ptrace-abi.h" #include "sysdep/ptrace.h" struct pt_regs { diff --git a/include/asm-um/ptrace-x86_64.h b/include/asm-um/ptrace-x86_64.h index c894e68b1f9..2074483e6ca 100644 --- a/include/asm-um/ptrace-x86_64.h +++ b/include/asm-um/ptrace-x86_64.h @@ -11,15 +11,11 @@ #include "asm/errno.h" #include "asm/host_ldt.h" -#define signal_fault signal_fault_x86_64 #define __FRAME_OFFSETS /* Needed to get the R* macros */ #include "asm/ptrace-generic.h" -#undef signal_fault #define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 -void signal_fault(struct pt_regs_subarch *regs, void *frame, char *where); - #define FS_BASE (21 * sizeof(unsigned long)) #define GS_BASE (22 * sizeof(unsigned long)) #define DS (23 * sizeof(unsigned long)) diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild index 40f2f13fe17..1ee9b07f3fe 100644 --- a/include/asm-x86_64/Kbuild +++ b/include/asm-x86_64/Kbuild @@ -11,6 +11,7 @@ header-y += debugreg.h header-y += ldt.h header-y += msr.h header-y += prctl.h +header-y += ptrace-abi.h header-y += setup.h header-y += sigcontext32.h header-y += ucontext.h diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h index 670a3388e70..f6567483231 100644 --- a/include/asm-x86_64/e820.h +++ b/include/asm-x86_64/e820.h @@ -46,6 +46,7 @@ extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); extern void e820_reserve_resources(void); +extern void e820_mark_nosave_regions(void); extern void e820_print_map(char *who); extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 08dd9f9dda8..bffb2f886a5 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -21,9 +21,15 @@ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name /* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) -#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) -#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) +#define per_cpu(var, cpu) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); })) +#define __get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) +#define __raw_get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) /* A macro to avoid #include hell... */ #define percpu_modcopy(pcpudst, src, size) \ diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index a31ab4e68a9..51eba239517 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -101,9 +101,6 @@ static inline void pgd_clear (pgd_t * pgd) set_pgd(pgd, __pgd(0)); } -#define pud_page(pud) \ -((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) - #define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0)) struct mm_struct; @@ -326,7 +323,8 @@ static inline int pmd_large(pmd_t pte) { /* * Level 4 access. */ -#define pgd_page(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK)) +#define pgd_page_vaddr(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK)) +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)) #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address)) @@ -335,16 +333,18 @@ static inline int pmd_large(pmd_t pte) { /* PUD - Level3 access */ /* to find an entry in a page-table-directory. */ +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) +#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address)) #define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT) /* PMD - Level 2 access */ -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \ +#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \ pmd_index(address)) #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) @@ -382,7 +382,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \ +#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ pte_index(address)) /* x86-64 always has all page tables mapped. */ diff --git a/include/asm-x86_64/ptrace-abi.h b/include/asm-x86_64/ptrace-abi.h new file mode 100644 index 00000000000..19184b0806b --- /dev/null +++ b/include/asm-x86_64/ptrace-abi.h @@ -0,0 +1,51 @@ +#ifndef _X86_64_PTRACE_ABI_H +#define _X86_64_PTRACE_ABI_H + +#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 +/* arguments: interrupts/non tracing syscalls only save upto here*/ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* = ERROR */ +/* end of arguments */ +/* cpu exception frame or undefined in case of fast syscall. */ +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 +#define ARGOFFSET R11 +#endif /* __ASSEMBLY__ */ + +/* top of stack page */ +#define FRAME_SIZE 168 + +#define PTRACE_OLDSETOPTIONS 21 + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETFPXREGS 18 +#define PTRACE_SETFPXREGS 19 + +/* only useful for access 32bit programs */ +#define PTRACE_GET_THREAD_AREA 25 +#define PTRACE_SET_THREAD_AREA 26 + +#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */ + +#endif diff --git a/include/asm-x86_64/ptrace.h b/include/asm-x86_64/ptrace.h index ca6f15ff61d..ab827dc381d 100644 --- a/include/asm-x86_64/ptrace.h +++ b/include/asm-x86_64/ptrace.h @@ -1,40 +1,9 @@ #ifndef _X86_64_PTRACE_H #define _X86_64_PTRACE_H -#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) -#define R15 0 -#define R14 8 -#define R13 16 -#define R12 24 -#define RBP 32 -#define RBX 40 -/* arguments: interrupts/non tracing syscalls only save upto here*/ -#define R11 48 -#define R10 56 -#define R9 64 -#define R8 72 -#define RAX 80 -#define RCX 88 -#define RDX 96 -#define RSI 104 -#define RDI 112 -#define ORIG_RAX 120 /* = ERROR */ -/* end of arguments */ -/* cpu exception frame or undefined in case of fast syscall. */ -#define RIP 128 -#define CS 136 -#define EFLAGS 144 -#define RSP 152 -#define SS 160 -#define ARGOFFSET R11 -#endif /* __ASSEMBLY__ */ +#include <asm/ptrace-abi.h> -/* top of stack page */ -#define FRAME_SIZE 168 - -#define PTRACE_OLDSETOPTIONS 21 - -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLY__ struct pt_regs { unsigned long r15; @@ -45,7 +14,7 @@ struct pt_regs { unsigned long rbx; /* arguments: non interrupts/non tracing syscalls only save upto here*/ unsigned long r11; - unsigned long r10; + unsigned long r10; unsigned long r9; unsigned long r8; unsigned long rax; @@ -54,32 +23,18 @@ struct pt_regs { unsigned long rsi; unsigned long rdi; unsigned long orig_rax; -/* end of arguments */ +/* end of arguments */ /* cpu exception frame or undefined */ unsigned long rip; unsigned long cs; - unsigned long eflags; - unsigned long rsp; + unsigned long eflags; + unsigned long rsp; unsigned long ss; -/* top of stack page */ +/* top of stack page */ }; #endif -/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 -#define PTRACE_GETFPXREGS 18 -#define PTRACE_SETFPXREGS 19 - -/* only useful for access 32bit programs */ -#define PTRACE_GET_THREAD_AREA 25 -#define PTRACE_SET_THREAD_AREA 26 - -#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */ - #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #define user_mode(regs) (!!((regs)->cs & 3)) #define user_mode_vm(regs) user_mode(regs) diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 6805e1feb30..ce97f65e1d1 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -48,8 +48,6 @@ extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); void smp_stop_cpu(void); -extern int smp_call_function_single(int cpuid, void (*func) (void *info), - void *info, int retry, int wait); extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h index 7b15afb70c5..a47cc734c20 100644 --- a/include/asm-xtensa/pgtable.h +++ b/include/asm-xtensa/pgtable.h @@ -218,7 +218,7 @@ extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)]; /* * The pmd contains the kernel virtual address of the pte page. */ -#define pmd_page_kernel(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK)) +#define pmd_page_vaddr(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK)) #define pmd_page(pmd) virt_to_page(pmd_val(pmd)) /* @@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* Find an entry in the third-level page table.. */ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir,addr) \ - ((pte_t*) pmd_page_kernel(*(dir)) + pte_index(addr)) + ((pte_t*) pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir),(addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir),(addr)) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index e319c649e4f..31e9abb6d97 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -4,11 +4,8 @@ #ifndef _LINUX_BOOTMEM_H #define _LINUX_BOOTMEM_H -#include <asm/pgtable.h> -#include <asm/dma.h> -#include <linux/cache.h> -#include <linux/init.h> #include <linux/mmzone.h> +#include <asm/dma.h> /* * simple boot-time physical memory area allocator. @@ -41,45 +38,64 @@ typedef struct bootmem_data { struct list_head list; } bootmem_data_t; -extern unsigned long __init bootmem_bootmap_pages (unsigned long); -extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); -extern void __init free_bootmem (unsigned long addr, unsigned long size); -extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, - unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata, - unsigned long size, unsigned long align, unsigned long goal, - unsigned long limit); +extern unsigned long bootmem_bootmap_pages(unsigned long); +extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); +extern void free_bootmem(unsigned long addr, unsigned long size); +extern void *__alloc_bootmem(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_nopanic(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit); + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern void __init reserve_bootmem (unsigned long addr, unsigned long size); +extern void reserve_bootmem(unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ - __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ - __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0) + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ - __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low((x), PAGE_SIZE, 0) + __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -extern unsigned long __init free_all_bootmem (void); -extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); -extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); -extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); -extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); + +extern unsigned long free_all_bootmem(void); +extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); +extern void *__alloc_bootmem_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern unsigned long init_bootmem_node(pg_data_t *pgdat, + unsigned long freepfn, + unsigned long startpfn, + unsigned long endpfn); +extern void reserve_bootmem_node(pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size); +extern void free_bootmem_node(pg_data_t *pgdat, + unsigned long addr, + unsigned long size); + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ - __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0) + __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP @@ -89,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size) { return NULL; } -#endif +#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */ extern unsigned long __meminitdata nr_kernel_pages; extern unsigned long nr_all_pages; -extern void *__init alloc_large_system_hash(const char *tablename, - unsigned long bucketsize, - unsigned long numentries, - int scale, - int flags, - unsigned int *_hash_shift, - unsigned int *_hash_mask, - unsigned long limit); +extern void *alloc_large_system_hash(const char *tablename, + unsigned long bucketsize, + unsigned long numentries, + int scale, + int flags, + unsigned int *_hash_shift, + unsigned int *_hash_mask, + unsigned long limit); #define HASH_HIGHMEM 0x00000001 /* Consider highmem? */ #define HASH_EARLY 0x00000002 /* Allocating during early boot? */ diff --git a/include/linux/console.h b/include/linux/console.h index 3bdf2155e56..76a1807726e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -120,9 +120,14 @@ extern void console_stop(struct console *); extern void console_start(struct console *); extern int is_console_locked(void); +#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND /* Suspend and resume console messages over PM events */ extern void suspend_console(void); extern void resume_console(void); +#else +static inline void suspend_console(void) {} +static inline void resume_console(void) {} +#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */ /* Some debug stub to catch some of the obvious races in the VT code */ #if 1 diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8fb344a9abd..3fef7d67aed 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu); static inline int cpu_is_offline(int cpu) { return 0; } #endif +#ifdef CONFIG_SUSPEND_SMP +extern int disable_nonboot_cpus(void); +extern void enable_nonboot_cpus(void); +#else +static inline int disable_nonboot_cpus(void) { return 0; } +static inline void enable_nonboot_cpus(void) {} +#endif + #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 2d7671c92c0..d6f4ec467a4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -169,6 +169,12 @@ enum { DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* DCCP CCIDS */ +enum { + DCCPC_CCID2 = 2, + DCCPC_CCID3 = 3, +}; + /* DCCP features */ enum { DCCPF_RESERVED = 0, @@ -320,7 +326,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) /* initial values for each feature */ #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 -#define DCCPF_INITIAL_CCID 2 +#define DCCPF_INITIAL_CCID DCCPC_CCID2 #define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -404,6 +410,7 @@ struct dccp_service_list { }; #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) +#define DCCP_SERVICE_CODE_IS_ABSENT 0 static inline int dccp_list_has_service(const struct dccp_service_list *sl, const __be32 service) @@ -484,11 +491,6 @@ static inline struct dccp_minisock *dccp_msk(const struct sock *sk) return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; } -static inline int dccp_service_not_initialized(const struct sock *sk) -{ - return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE; -} - static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h index 6a5796c81c9..666e0a5f00f 100644 --- a/include/linux/elf-em.h +++ b/include/linux/elf-em.h @@ -31,6 +31,7 @@ #define EM_M32R 88 /* Renesas M32R */ #define EM_H8_300 46 /* Renesas H8/300,300H,H8S */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ +#define EM_AVR32 0x18ad /* Atmel AVR32 */ /* * This is an interim value that we will use until the committee comes diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h new file mode 100644 index 00000000000..67396db141e --- /dev/null +++ b/include/linux/elfnote.h @@ -0,0 +1,90 @@ +#ifndef _LINUX_ELFNOTE_H +#define _LINUX_ELFNOTE_H +/* + * Helper macros to generate ELF Note structures, which are put into a + * PT_NOTE segment of the final vmlinux image. These are useful for + * including name-value pairs of metadata into the kernel binary (or + * modules?) for use by external programs. + * + * Each note has three parts: a name, a type and a desc. The name is + * intended to distinguish the note's originator, so it would be a + * company, project, subsystem, etc; it must be in a suitable form for + * use in a section name. The type is an integer which is used to tag + * the data, and is considered to be within the "name" namespace (so + * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The + * "desc" field is the actual data. There are no constraints on the + * desc field's contents, though typically they're fairly small. + * + * All notes from a given NAME are put into a section named + * .note.NAME. When the kernel image is finally linked, all the notes + * are packed into a single .notes section, which is mapped into the + * PT_NOTE segment. Because notes for a given name are grouped into + * the same section, they'll all be adjacent the output file. + * + * This file defines macros for both C and assembler use. Their + * syntax is slightly different, but they're semantically similar. + * + * See the ELF specification for more detail about ELF notes. + */ + +#ifdef __ASSEMBLER__ +/* + * Generate a structure with the same shape as Elf{32,64}_Nhdr (which + * turn out to be the same size and shape), followed by the name and + * desc data with appropriate padding. The 'desctype' argument is the + * assembler pseudo op defining the type of the data e.g. .asciz while + * 'descdata' is the data itself e.g. "hello, world". + * + * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") + * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) + */ +#define ELFNOTE(name, type, desctype, descdata) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz "name" ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection ; +#else /* !__ASSEMBLER__ */ +#include <linux/elf.h> +/* + * Use an anonymous structure which matches the shape of + * Elf{32,64}_Nhdr, but includes the name and desc data. The size and + * type of name and desc depend on the macro arguments. "name" must + * be a literal string, and "desc" must be passed by value. You may + * only define one note per line, since __LINE__ is used to generate + * unique symbols. + */ +#define _ELFNOTE_PASTE(a,b) a##b +#define _ELFNOTE(size, name, unique, type, desc) \ + static const struct { \ + struct elf##size##_note _nhdr; \ + unsigned char _name[sizeof(name)] \ + __attribute__((aligned(sizeof(Elf##size##_Word)))); \ + typeof(desc) _desc \ + __attribute__((aligned(sizeof(Elf##size##_Word)))); \ + } _ELFNOTE_PASTE(_note_, unique) \ + __attribute_used__ \ + __attribute__((section(".note." name), \ + aligned(sizeof(Elf##size##_Word)), \ + unused)) = { \ + { \ + sizeof(name), \ + sizeof(desc), \ + type, \ + }, \ + name, \ + desc \ + } +#define ELFNOTE(size, name, type, desc) \ + _ELFNOTE(size, name, __LINE__, type, desc) + +#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc) +#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc) +#endif /* __ASSEMBLER__ */ + +#endif /* _LINUX_ELFNOTE_H */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cc9e6084448..8b34aabfe4c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -9,17 +9,16 @@ struct vm_area_struct; /* * GFP bitmasks.. + * + * Zone modifiers (see linux/mmzone.h - low three bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use the consistently. The definitions here may + * be used in bit comparisons. */ -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */ #define __GFP_DMA ((__force gfp_t)0x01u) #define __GFP_HIGHMEM ((__force gfp_t)0x02u) -#ifdef CONFIG_DMA_IS_DMA32 -#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */ -#elif BITS_PER_LONG < 64 -#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */ -#else -#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */ -#endif +#define __GFP_DMA32 ((__force gfp_t)0x04u) /* * Action modifiers - doesn't change the zoning @@ -46,6 +45,7 @@ struct vm_area_struct; #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) @@ -54,7 +54,7 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_HARDWALL) + __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE) /* This equals 0, but use constants in case they ever change */ #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) @@ -67,6 +67,8 @@ struct vm_area_struct; #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ __GFP_HIGHMEM) +#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) + /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ @@ -76,11 +78,19 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 -static inline int gfp_zone(gfp_t gfp) +static inline enum zone_type gfp_zone(gfp_t flags) { - int zone = GFP_ZONEMASK & (__force int) gfp; - BUG_ON(zone >= GFP_ZONETYPES); - return zone; + if (flags & __GFP_DMA) + return ZONE_DMA; +#ifdef CONFIG_ZONE_DMA32 + if (flags & __GFP_DMA32) + return ZONE_DMA32; +#endif +#ifdef CONFIG_HIGHMEM + if (flags & __GFP_HIGHMEM) + return ZONE_HIGHMEM; +#endif + return ZONE_NORMAL; } /* diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 85ce7ef9a51..fd7d12daa94 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -24,11 +24,15 @@ static inline void flush_kernel_dcache_page(struct page *page) /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); +extern unsigned long totalhigh_pages; #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } +#define totalhigh_pages 0 + +#ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) { might_sleep(); @@ -41,6 +45,7 @@ static inline void *kmap(struct page *page) #define kunmap_atomic(addr, idx) do { } while (0) #define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) +#endif #endif /* CONFIG_HIGHMEM */ diff --git a/include/linux/irq.h b/include/linux/irq.h index fbf6d901e9c..48d3cb3b6a4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *, * Monolithic do_IRQ implementation. * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly) */ +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); +#endif /* * Architectures call this to let the generic IRQ layer @@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs) { struct irq_desc *desc = irq_desc + irq; +#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ + desc->handle_irq(irq, desc, regs); +#else if (likely(desc->handle_irq)) desc->handle_irq(irq, desc, regs); else __do_IRQ(irq, regs); +#endif } /* Handling of unhandled and spurious interrupts: */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b2ae4fdce8..e44a37e2c71 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,6 +33,7 @@ extern const char linux_banner[]; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) #define KERN_EMERG "<0>" /* system is unusable */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 72440f0a443..09f0f575ddf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); extern unsigned slab_node(struct mempolicy *policy); -extern int policy_zone; +extern enum zone_type policy_zone; -static inline void check_highest_zone(int k) +static inline void check_highest_zone(enum zone_type k) { if (k > policy_zone) policy_zone = k; diff --git a/include/linux/mm.h b/include/linux/mm.h index 224178a000d..856f0ee7e84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -15,6 +15,7 @@ #include <linux/fs.h> #include <linux/mutex.h> #include <linux/debug_locks.h> +#include <linux/backing-dev.h> struct mempolicy; struct anon_vma; @@ -218,7 +219,8 @@ struct inode; * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the * moment. Note that we have no way to track which tasks are using - * a page. + * a page, though if it is a pagecache page, rmap structures can tell us + * who is mapping it. */ struct page { unsigned long flags; /* Atomic flags, some possibly @@ -278,6 +280,12 @@ struct page { */ #include <linux/page-flags.h> +#ifdef CONFIG_DEBUG_VM +#define VM_BUG_ON(cond) BUG_ON(cond) +#else +#define VM_BUG_ON(condition) do { } while(0) +#endif + /* * Methods to modify the page usage count. * @@ -292,12 +300,11 @@ struct page { */ /* - * Drop a ref, return true if the logical refcount fell to zero (the page has - * no users) + * Drop a ref, return true if the refcount fell to zero (the page has no users) */ static inline int put_page_testzero(struct page *page) { - BUG_ON(atomic_read(&page->_count) == 0); + VM_BUG_ON(atomic_read(&page->_count) == 0); return atomic_dec_and_test(&page->_count); } @@ -307,11 +314,10 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { + VM_BUG_ON(PageCompound(page)); return atomic_inc_not_zero(&page->_count); } -extern void FASTCALL(__page_cache_release(struct page *)); - static inline int page_count(struct page *page) { if (unlikely(PageCompound(page))) @@ -323,6 +329,7 @@ static inline void get_page(struct page *page) { if (unlikely(PageCompound(page))) page = (struct page *)page_private(page); + VM_BUG_ON(atomic_read(&page->_count) == 0); atomic_inc(&page->_count); } @@ -349,43 +356,55 @@ void split_page(struct page *page, unsigned int order); * For the non-reserved pages, page_count(page) denotes a reference count. * page_count() == 0 means the page is free. page->lru is then used for * freelist management in the buddy allocator. - * page_count() == 1 means the page is used for exactly one purpose - * (e.g. a private data page of one process). + * page_count() > 0 means the page has been allocated. + * + * Pages are allocated by the slab allocator in order to provide memory + * to kmalloc and kmem_cache_alloc. In this case, the management of the + * page, and the fields in 'struct page' are the responsibility of mm/slab.c + * unless a particular usage is carefully commented. (the responsibility of + * freeing the kmalloc memory is the caller's, of course). * - * A page may be used for kmalloc() or anyone else who does a - * __get_free_page(). In this case the page_count() is at least 1, and - * all other fields are unused but should be 0 or NULL. The - * management of this page is the responsibility of the one who uses - * it. + * A page may be used by anyone else who does a __get_free_page(). + * In this case, page_count still tracks the references, and should only + * be used through the normal accessor functions. The top bits of page->flags + * and page->virtual store page management information, but all other fields + * are unused and could be used privately, carefully. The management of this + * page is the responsibility of the one who allocated it, and those who have + * subsequently been given references to it. * - * The other pages (we may call them "process pages") are completely + * The other pages (we may call them "pagecache pages") are completely * managed by the Linux memory manager: I/O, buffers, swapping etc. * The following discussion applies only to them. * - * A page may belong to an inode's memory mapping. In this case, - * page->mapping is the pointer to the inode, and page->index is the - * file offset of the page, in units of PAGE_CACHE_SIZE. + * A pagecache page contains an opaque `private' member, which belongs to the + * page's address_space. Usually, this is the address of a circular list of + * the page's disk buffers. PG_private must be set to tell the VM to call + * into the filesystem to release these pages. * - * A page contains an opaque `private' member, which belongs to the - * page's address_space. Usually, this is the address of a circular - * list of the page's disk buffers. + * A page may belong to an inode's memory mapping. In this case, page->mapping + * is the pointer to the inode, and page->index is the file offset of the page, + * in units of PAGE_CACHE_SIZE. * - * For pages belonging to inodes, the page_count() is the number of - * attaches, plus 1 if `private' contains something, plus one for - * the page cache itself. + * If pagecache pages are not associated with an inode, they are said to be + * anonymous pages. These may become associated with the swapcache, and in that + * case PG_swapcache is set, and page->private is an offset into the swapcache. * - * Instead of keeping dirty/clean pages in per address-space lists, we instead - * now tag pages as dirty/under writeback in the radix tree. + * In either case (swapcache or inode backed), the pagecache itself holds one + * reference to the page. Setting PG_private should also increment the + * refcount. The each user mapping also has a reference to the page. * - * There is also a per-mapping radix tree mapping index to the page - * in memory if present. The tree is rooted at mapping->root. + * The pagecache pages are stored in a per-mapping radix tree, which is + * rooted at mapping->page_tree, and indexed by offset. + * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space + * lists, we instead now tag pages as dirty/writeback in the radix tree. * - * All process pages can do I/O: + * All pagecache pages may be subject to I/O: * - inode pages may need to be read from disk, * - inode pages which have been modified and are MAP_SHARED may need - * to be written to disk, - * - private pages which have been modified may need to be swapped out - * to swap space and (later) to be read back into memory. + * to be written back to the inode on disk, + * - anonymous pages (including MAP_PRIVATE file mappings) which have been + * modified may need to be swapped out to swap space and (later) to be read + * back into memory. */ /* @@ -463,7 +482,7 @@ void split_page(struct page *page, unsigned int order); #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) #define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) -static inline unsigned long page_zonenum(struct page *page) +static inline enum zone_type page_zonenum(struct page *page) { return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } @@ -480,23 +499,29 @@ static inline struct zone *page_zone(struct page *page) return zone_table[page_zone_id(page)]; } +static inline unsigned long zone_to_nid(struct zone *zone) +{ + return zone->zone_pgdat->node_id; +} + static inline unsigned long page_to_nid(struct page *page) { if (FLAGS_HAS_NODE) return (page->flags >> NODES_PGSHIFT) & NODES_MASK; else - return page_zone(page)->zone_pgdat->node_id; + return zone_to_nid(page_zone(page)); } static inline unsigned long page_to_section(struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } -static inline void set_page_zone(struct page *page, unsigned long zone) +static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; } + static inline void set_page_node(struct page *page, unsigned long node) { page->flags &= ~(NODES_MASK << NODES_PGSHIFT); @@ -508,7 +533,7 @@ static inline void set_page_section(struct page *page, unsigned long section) page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline void set_page_links(struct page *page, unsigned long zone, +static inline void set_page_links(struct page *page, enum zone_type zone, unsigned long node, unsigned long pfn) { set_page_zone(page, zone); @@ -802,6 +827,39 @@ struct shrinker; extern struct shrinker *set_shrinker(int, shrinker_t); extern void remove_shrinker(struct shrinker *shrinker); +/* + * Some shared mappigns will want the pages marked read-only + * to track write events. If so, we'll downgrade vm_page_prot + * to the private version (using protection_map[] without the + * VM_SHARED bit). + */ +static inline int vma_wants_writenotify(struct vm_area_struct *vma) +{ + unsigned int vm_flags = vma->vm_flags; + + /* If it was private or non-writable, the write bit is already clear */ + if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) + return 0; + + /* The backer wishes to know when pages are first written to? */ + if (vma->vm_ops && vma->vm_ops->page_mkwrite) + return 1; + + /* The open routine did something to the protections already? */ + if (pgprot_val(vma->vm_page_prot) != + pgprot_val(protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)])) + return 0; + + /* Specialty mapping? */ + if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) + return 0; + + /* Can the mapping track the dirty pages? */ + return vma->vm_file && vma->vm_file->f_mapping && + mapping_cap_account_dirty(vma->vm_file->f_mapping); +} + extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f45163c528e..3693f1a5278 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -51,7 +51,8 @@ enum zone_stat_item { NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, - NR_SLAB, /* Pages used by slab allocator */ + NR_SLAB_RECLAIMABLE, + NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_FILE_DIRTY, NR_WRITEBACK, @@ -88,53 +89,68 @@ struct per_cpu_pageset { #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) #endif -#define ZONE_DMA 0 -#define ZONE_DMA32 1 -#define ZONE_NORMAL 2 -#define ZONE_HIGHMEM 3 - -#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ - +enum zone_type { + /* + * ZONE_DMA is used when there are devices that are not able + * to do DMA to all of addressable memory (ZONE_NORMAL). Then we + * carve out the portion of memory that is needed for these devices. + * The range is arch specific. + * + * Some examples + * + * Architecture Limit + * --------------------------- + * parisc, ia64, sparc <4G + * s390 <2G + * arm26 <48M + * arm Various + * alpha Unlimited or 0-16MB. + * + * i386, x86_64 and multiple other arches + * <16M. + */ + ZONE_DMA, +#ifdef CONFIG_ZONE_DMA32 + /* + * x86_64 needs two ZONE_DMAs because it supports devices that are + * only able to do DMA to the lower 16M but also 32 bit devices that + * can only do DMA areas below 4G. + */ + ZONE_DMA32, +#endif + /* + * Normal addressable memory is in ZONE_NORMAL. DMA operations can be + * performed on pages in ZONE_NORMAL if the DMA devices support + * transfers to all addressable memory. + */ + ZONE_NORMAL, +#ifdef CONFIG_HIGHMEM + /* + * A memory area that is only addressable by the kernel through + * mapping portions into its own address space. This is for example + * used by i386 to allow the kernel to address the memory beyond + * 900MB. The kernel will set up special mappings (page + * table entries on i386) for each page that the kernel needs to + * access. + */ + ZONE_HIGHMEM, +#endif + MAX_NR_ZONES +}; /* * When a memory allocation must conform to specific limitations (such * as being suitable for DMA) the caller will pass in hints to the * allocator in the gfp_mask, in the zone modifier bits. These bits * are used to select a priority ordered list of memory zones which - * match the requested limits. GFP_ZONEMASK defines which bits within - * the gfp_mask should be considered as zone modifiers. Each valid - * combination of the zone modifier bits has a corresponding list - * of zones (in node_zonelists). Thus for two zone modifiers there - * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will - * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible - * combinations of zone modifiers in "zone modifier space". - * - * As an optimisation any zone modifier bits which are only valid when - * no other zone modifier bits are set (loners) should be placed in - * the highest order bits of this field. This allows us to reduce the - * extent of the zonelists thus saving space. For example in the case - * of three zone modifier bits, we could require up to eight zonelists. - * If the left most zone modifier is a "loner" then the highest valid - * zonelist would be four allowing us to allocate only five zonelists. - * Use the first form for GFP_ZONETYPES when the left most bit is not - * a "loner", otherwise use the second. - * - * NOTE! Make sure this matches the zones in <linux/gfp.h> + * match the requested limits. See gfp_zone() in include/linux/gfp.h */ -#define GFP_ZONEMASK 0x07 -/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ -/* - * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a 32bit PC we have 4 zones: - * - * ZONE_DMA < 16 MB ISA DMA capable memory - * ZONE_DMA32 0 MB Empty - * ZONE_NORMAL 16-896 MB direct mapped by the kernel - * ZONE_HIGHMEM > 896 MB only page cache and user processes - */ +#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM) +#define ZONES_SHIFT 1 +#else +#define ZONES_SHIFT 2 +#endif struct zone { /* Fields commonly accessed by the page allocator */ @@ -154,7 +170,8 @@ struct zone { /* * zone reclaim becomes active if more unmapped pages exist. */ - unsigned long min_unmapped_ratio; + unsigned long min_unmapped_pages; + unsigned long min_slab_pages; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -266,7 +283,6 @@ struct zone { char *name; } ____cacheline_internodealigned_in_smp; - /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the @@ -304,7 +320,7 @@ struct zonelist { struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; - struct zonelist node_zonelists[GFP_ZONETYPES]; + struct zonelist node_zonelists[MAX_NR_ZONES]; int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP struct page *node_mem_map; @@ -373,12 +389,16 @@ static inline int populated_zone(struct zone *zone) return (!!zone->present_pages); } -static inline int is_highmem_idx(int idx) +static inline int is_highmem_idx(enum zone_type idx) { +#ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM); +#else + return 0; +#endif } -static inline int is_normal_idx(int idx) +static inline int is_normal_idx(enum zone_type idx) { return (idx == ZONE_NORMAL); } @@ -391,7 +411,11 @@ static inline int is_normal_idx(int idx) */ static inline int is_highmem(struct zone *zone) { +#ifdef CONFIG_HIGHMEM return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; +#else + return 0; +#endif } static inline int is_normal(struct zone *zone) @@ -401,7 +425,11 @@ static inline int is_normal(struct zone *zone) static inline int is_dma32(struct zone *zone) { +#ifdef CONFIG_ZONE_DMA32 return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; +#else + return 0; +#endif } static inline int is_dma(struct zone *zone) @@ -421,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); #include <linux/topology.h> /* Returns the number of the current Node. */ diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 9a285cecf24..312bd2ffee3 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -10,6 +10,8 @@ header-y += xt_connmark.h header-y += xt_CONNMARK.h header-y += xt_conntrack.h header-y += xt_dccp.h +header-y += xt_dscp.h +header-y += xt_DSCP.h header-y += xt_esp.h header-y += xt_helper.h header-y += xt_length.h diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5748642e9f3..9d7921dd50f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -13,24 +13,25 @@ * PG_reserved is set for special pages, which can never be swapped out. Some * of them might not even exist (eg empty_bad_page)... * - * The PG_private bitflag is set if page->private contains a valid value. + * The PG_private bitflag is set on pagecache pages if they contain filesystem + * specific data (which is normally at page->private). It can be used by + * private allocations for its own usage. * - * During disk I/O, PG_locked is used. This bit is set before I/O and - * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks - * waiting for the I/O on this page to complete. + * During initiation of disk I/O, PG_locked is set. This bit is set before I/O + * and cleared when writeback _starts_ or when read _completes_. PG_writeback + * is set before writeback starts and cleared when it finishes. + * + * PG_locked also pins a page in pagecache, and blocks truncation of the file + * while it is held. + * + * page_waitqueue(page) is a wait queue of all tasks waiting for the page + * to become unlocked. * * PG_uptodate tells whether the page's contents is valid. When a read * completes, the page becomes uptodate, unless a disk I/O error happened. * - * For choosing which pages to swap out, inode pages carry a PG_referenced bit, - * which is set any time the system accesses that page through the (mapping, - * index) hash table. This referenced bit, together with the referenced bit - * in the page tables, is used to manipulate page->age and move the page across - * the active, inactive_dirty and inactive_clean lists. - * - * Note that the referenced bit, the page->lru list_head and the active, - * inactive_dirty and inactive_clean lists are protected by the - * zone->lru_lock, and *NOT* by the usual PG_locked bit! + * PG_referenced, PG_reclaim are used for page reclaim for anonymous and + * file-backed pagecache (see mm/vmscan.c). * * PG_error is set to indicate that an I/O error occurred on this page. * @@ -42,6 +43,10 @@ * space, they need to be kmapped separately for doing IO on the pages. The * struct page (these bits with information) are always mapped into kernel * address space... + * + * PG_buddy is set to indicate that the page is free and in the buddy system + * (see mm/page_alloc.c). + * */ /* @@ -74,7 +79,7 @@ #define PG_checked 8 /* kill me in 2.5.<early>. */ #define PG_arch_1 9 #define PG_reserved 10 -#define PG_private 11 /* Has something at ->private */ +#define PG_private 11 /* If pagecache, has fs-private data */ #define PG_writeback 12 /* Page is under writeback */ #define PG_nosave 13 /* Used for system suspend/resume */ @@ -83,7 +88,7 @@ #define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ #define PG_reclaim 17 /* To be reclaimed asap */ -#define PG_nosave_free 18 /* Free, should not be written */ +#define PG_nosave_free 18 /* Used for system suspend/resume */ #define PG_buddy 19 /* Page is free, on buddy lists */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0a2f5d27f60..64f95092515 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, } extern void FASTCALL(__lock_page(struct page *page)); +extern void FASTCALL(__lock_page_nosync(struct page *page)); extern void FASTCALL(unlock_page(struct page *page)); +/* + * lock_page may only be called if we have the page's inode pinned. + */ static inline void lock_page(struct page *page) { might_sleep(); if (TestSetPageLocked(page)) __lock_page(page); } + +/* + * lock_page_nosync should only be used if we can't pin the page's inode. + * Doesn't play quite so well with block device plugging. + */ +static inline void lock_page_nosync(struct page *page) +{ + might_sleep(); + if (TestSetPageLocked(page)) + __lock_page_nosync(page); +} /* * This is exported only for wait_on_page_locked/wait_on_page_writeback. diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cb9039a21f2..3835a9642f1 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -1,9 +1,12 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H + #include <linux/spinlock.h> /* For preempt_disable() */ #include <linux/slab.h> /* For kmalloc() */ #include <linux/smp.h> #include <linux/string.h> /* For memset() */ +#include <linux/cpumask.h> + #include <asm/percpu.h> /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ @@ -11,8 +14,14 @@ #define PERCPU_ENOUGH_ROOM 32768 #endif -/* Must be an lvalue. */ -#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); })) +/* + * Must be an lvalue. Since @var must be a simple identifier, + * we force a syntax error here if it isn't. + */ +#define get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + preempt_disable(); \ + &__get_cpu_var(var); })) #define put_cpu_var(var) preempt_enable() #ifdef CONFIG_SMP @@ -21,39 +30,77 @@ struct percpu_data { void *ptrs[NR_CPUS]; }; +#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) /* - * Use this to get to a cpu's version of the per-cpu object allocated using - * alloc_percpu. Non-atomic access to the current CPU's version should + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should * probably be combined with get_cpu()/put_cpu(). */ -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ +#define percpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -extern void *__alloc_percpu(size_t size); -extern void free_percpu(const void *); +extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu); +extern void percpu_depopulate(void *__pdata, int cpu); +extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask); +extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask); +extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); +extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) + +static inline void percpu_depopulate(void *__pdata, int cpu) +{ +} + +static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) +{ +} -static inline void *__alloc_percpu(size_t size) +static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, + int cpu) { - void *ret = kmalloc(size, GFP_KERNEL); - if (ret) - memset(ret, 0, size); - return ret; + return percpu_ptr(__pdata, cpu); } -static inline void free_percpu(const void *ptr) -{ - kfree(ptr); + +static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask) +{ + return 0; +} + +static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +{ + return kzalloc(size, gfp); +} + +static inline void percpu_free(void *__pdata) +{ + kfree(__pdata); } #endif /* CONFIG_SMP */ -/* Simple wrapper for the common case: zeros memory. */ -#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) +#define percpu_populate_mask(__pdata, size, gfp, mask) \ + __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) +#define percpu_depopulate_mask(__pdata, mask) \ + __percpu_depopulate_mask((__pdata), &(mask)) +#define percpu_alloc_mask(size, gfp, mask) \ + __percpu_alloc_mask((size), (gfp), &(mask)) + +#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) + +/* (legacy) interface for use without CPU hotplug handling */ + +#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ + cpu_possible_map) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define free_percpu(ptr) percpu_free((ptr)) +#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index a376bd4ade3..81e9299ca14 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -3,21 +3,25 @@ #ifdef CONFIG_PM_TRACE +extern int pm_trace_enabled; + struct device; extern void set_trace_device(struct device *); extern void generate_resume_trace(void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) set_trace_device(dev) -#define TRACE_RESUME(user) do { \ - void *tracedata; \ - asm volatile("movl $1f,%0\n" \ - ".section .tracedata,\"a\"\n" \ - "1:\t.word %c1\n" \ - "\t.long %c2\n" \ - ".previous" \ - :"=r" (tracedata) \ - : "i" (__LINE__), "i" (__FILE__)); \ - generate_resume_trace(tracedata, user); \ +#define TRACE_RESUME(user) do { \ + if (pm_trace_enabled) { \ + void *tracedata; \ + asm volatile("movl $1f,%0\n" \ + ".section .tracedata,\"a\"\n" \ + "1:\t.word %c1\n" \ + "\t.long %c2\n" \ + ".previous" \ + :"=r" (tracedata) \ + : "i" (__LINE__), "i" (__FILE__)); \ + generate_resume_trace(tracedata, user); \ + } \ } while (0) #else diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf97b090001..db2c1df4fef 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *, */ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); +/* + * Cleans the PTEs of shared mappings. + * (and since clean PTEs should also be readonly, write protects them too) + * + * returns the number of cleaned PTEs. + */ +int page_mkclean(struct page *); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) @@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); #define page_referenced(page,l) TestClearPageReferenced(page) #define try_to_unmap(page, refs) SWAP_FAIL +static inline int page_mkclean(struct page *page) +{ + return 0; +} + + #endif /* CONFIG_MMU */ /* diff --git a/include/linux/selinux.h b/include/linux/selinux.h index aad4e390d6a..d1b7ca6c1c5 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule); /** * selinux_audit_rule_match - determine if a context ID matches a rule. - * @ctxid: the context ID to check + * @sid: the context ID to check * @field: the field this rule refers to * @op: the operater the rule uses * @rule: pointer to the audit rule to check against @@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule); * Returns 1 if the context id matches the rule, 0 if it does not, and * -errno on failure. */ -int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx); @@ -70,18 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, void selinux_audit_set_callback(int (*callback)(void)); /** - * selinux_task_ctxid - determine a context ID for a process. - * @tsk: the task object - * @ctxid: ID value returned via this - * - * On return, ctxid will contain an ID for the context. This value - * should only be used opaquely. - */ -void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); - -/** - * selinux_ctxid_to_string - map a security context ID to a string - * @ctxid: security context ID to be converted. + * selinux_sid_to_string - map a security context ID to a string + * @sid: security context ID to be converted. * @ctx: address of context string to be returned * @ctxlen: length of returned context string. * @@ -89,7 +79,7 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); * string will be allocated internally, and the caller must call * kfree() on it after use. */ -int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen); +int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen); /** * selinux_get_inode_sid - get the inode's security context ID @@ -154,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule) return; } -static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx) { @@ -166,12 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void)) return; } -static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) -{ - *ctxid = 0; -} - -static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) +static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen) { *ctx = NULL; *ctxlen = 0; diff --git a/include/linux/slab.h b/include/linux/slab.h index 45ad55b70d1..66d6eb78d1c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -203,7 +202,30 @@ extern int slab_is_available(void); #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); -extern void *kmalloc_node(size_t size, gfp_t flags, int node); +extern void *__kmalloc_node(size_t size, gfp_t flags, int node); + +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size)) { + int i = 0; +#define CACHE(x) \ + if (size <= x) \ + goto found; \ + else \ + i++; +#include "kmalloc_sizes.h" +#undef CACHE + { + extern void __you_cannot_kmalloc_that_much(void); + __you_cannot_kmalloc_that_much(); + } +found: + return kmem_cache_alloc_node((flags & GFP_DMA) ? + malloc_sizes[i].cs_dmacachep : + malloc_sizes[i].cs_cachep, flags, node); + } + return __kmalloc_node(size, flags, node); +} #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) { @@ -223,7 +245,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); /* SLOB allocator routines */ void kmem_cache_init(void); -struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags); struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, unsigned long, void (*)(void *, struct kmem_cache *, unsigned long), @@ -263,8 +284,6 @@ extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; -extern atomic_t slab_reclaim_pages; - #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 837e8bce134..51649987f69 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus); */ int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); +int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, + int retry, int wait); + /* * Call a function on all processors */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 96e31aa64cc..b1237f16ecd 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -10,29 +10,11 @@ #include <linux/pm.h> /* page backup entry */ -typedef struct pbe { +struct pbe { unsigned long address; /* address of the copy */ unsigned long orig_address; /* original address of page */ struct pbe *next; -} suspend_pagedir_t; - -#define for_each_pbe(pbe, pblist) \ - for (pbe = pblist ; pbe ; pbe = pbe->next) - -#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe)) -#define PB_PAGE_SKIP (PBES_PER_PAGE-1) - -#define for_each_pb_page(pbe, pblist) \ - for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next) - - -#define SWAP_FILENAME_MAXLENGTH 32 - - -extern dev_t swsusp_resume_device; - -/* mm/vmscan.c */ -extern int shrink_mem(void); +}; /* mm/page_alloc.c */ extern void drain_local_pages(void); @@ -53,18 +35,10 @@ static inline void pm_restore_console(void) {} static inline int software_suspend(void) { printk("Warning: fake suspend called\n"); - return -EPERM; + return -ENOSYS; } #endif /* CONFIG_PM */ -#ifdef CONFIG_SUSPEND_SMP -extern void disable_nonboot_cpus(void); -extern void enable_nonboot_cpus(void); -#else -static inline void disable_nonboot_cpus(void) {} -static inline void enable_nonboot_cpus(void) {} -#endif - void save_processor_state(void); void restore_processor_state(void); struct saved_context; diff --git a/include/linux/swap.h b/include/linux/swap.h index 5e59184c909..e7c36ba2a2d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -10,6 +10,10 @@ #include <asm/atomic.h> #include <asm/page.h> +struct notifier_block; + +struct bio; + #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 @@ -156,13 +160,14 @@ struct swap_list_t { /* linux/mm/oom_kill.c */ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); +extern int register_oom_notifier(struct notifier_block *nb); +extern int unregister_oom_notifier(struct notifier_block *nb); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; -extern unsigned long totalhigh_pages; extern unsigned long totalreserve_pages; extern long nr_swap_pages; extern unsigned int nr_free_pages(void); @@ -190,6 +195,7 @@ extern long vm_total_pages; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; extern int sysctl_min_unmapped_ratio; +extern int sysctl_min_slab_ratio; extern int zone_reclaim(struct zone *, gfp_t, unsigned int); #else #define zone_reclaim_mode 0 @@ -212,7 +218,9 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); /* linux/mm/page_io.c */ extern int swap_readpage(struct file *, struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern int rw_swap_page_sync(int, swp_entry_t, struct page *); +extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, + struct bio **bio_chain); +extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err); /* linux/mm/swap_state.c */ extern struct address_space swapper_space; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 736ed917a4f..eca555781d0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -191,6 +191,7 @@ enum VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ + VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ }; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 71b6363caaa..dee88c6b6fa 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); -extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, - pgprot_t prot, int node); extern void vfree(void *addr); extern void *vmap(struct page **pages, unsigned int count, diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 2d9b1b60798..176c7f79733 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -18,7 +18,19 @@ * generated will simply be the increment of a global address. */ -#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH +#ifdef CONFIG_ZONE_DMA32 +#define DMA32_ZONE(xx) xx##_DMA32, +#else +#define DMA32_ZONE(xx) +#endif + +#ifdef CONFIG_HIGHMEM +#define HIGHMEM_ZONE(xx) , xx##_HIGH +#else +#define HIGHMEM_ZONE(xx) +#endif + +#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), @@ -124,12 +136,10 @@ static inline unsigned long node_page_state(int node, struct zone *zones = NODE_DATA(node)->node_zones; return -#ifndef CONFIG_DMA_IS_NORMAL -#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64 +#ifdef CONFIG_ZONE_DMA32 zone_page_state(&zones[ZONE_DMA32], item) + #endif zone_page_state(&zones[ZONE_NORMAL], item) + -#endif #ifdef CONFIG_HIGHMEM zone_page_state(&zones[ZONE_HIGHMEM], item) + #endif diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 0422036af4e..56a23a0e7f2 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); +void set_page_dirty_balance(struct page *page); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 59406e0dc5b..2d72496c202 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -130,8 +130,9 @@ extern int cipso_v4_rbm_strictvalid; int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); -struct sk_buff *cipso_v4_doi_dump_all(size_t headroom); -struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom); +int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg); int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain); int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, const char *domain); @@ -152,14 +153,11 @@ static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) return NULL; } -static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +static inline int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg) { - return NULL; -} - -static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) -{ - return NULL; + return 0; } static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, @@ -205,6 +203,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); int cipso_v4_socket_setattr(const struct socket *sock, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); int cipso_v4_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr); int cipso_v4_skbuff_getattr(const struct sk_buff *skb, @@ -225,6 +224,12 @@ static inline int cipso_v4_socket_setattr(const struct socket *sock, return -ENOSYS; } +static inline int cipso_v4_sock_getattr(struct sock *sk, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + static inline int cipso_v4_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr) { diff --git a/include/net/netlabel.h b/include/net/netlabel.h index dd5780b3691..6692430063f 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -57,9 +57,8 @@ * The payload is dependent on the subsystem specified in the * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions * should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c - * file. All of the fields in the NetLabel payload are NETLINK attributes, the - * length of each field is the length of the NETLINK attribute payload, see - * include/net/netlink.h for more information on NETLINK attributes. + * file. All of the fields in the NetLabel payload are NETLINK attributes, see + * the include/net/netlink.h file for more information on NETLINK attributes. * */ @@ -82,50 +81,6 @@ #define NETLBL_NLTYPE_UNLABELED 5 #define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" -/* NetLabel return codes */ -#define NETLBL_E_OK 0 - -/* - * Helper functions - */ - -#define NETLBL_LEN_U8 nla_total_size(sizeof(u8)) -#define NETLBL_LEN_U16 nla_total_size(sizeof(u16)) -#define NETLBL_LEN_U32 nla_total_size(sizeof(u32)) - -/** - * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer - * @head: the amount of headroom in bytes - * @body: the desired size (minus headroom) in bytes - * @gfp_flags: the alloc flags to pass to alloc_skb() - * - * Description: - * Allocate a NETLINK message buffer based on the sizes given in @head and - * @body. If @head is greater than zero skb_reserve() is called to reserve - * @head bytes at the start of the buffer. Returns a valid sk_buff pointer on - * success, NULL on failure. - * - */ -static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head, - size_t body, - gfp_t gfp_flags) -{ - struct sk_buff *skb; - - skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags); - if (skb == NULL) - return NULL; - if (head > 0) { - skb_reserve(skb, head); - if (skb_tailroom(skb) < body) { - kfree_skb(skb); - return NULL; - } - } - - return skb; -} - /* * NetLabel - Kernel API for accessing the network packet label mappings. * @@ -238,6 +193,8 @@ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr, #ifdef CONFIG_NETLABEL int netlbl_socket_setattr(const struct socket *sock, const struct netlbl_lsm_secattr *secattr); +int netlbl_sock_getattr(struct sock *sk, + struct netlbl_lsm_secattr *secattr); int netlbl_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr); int netlbl_skbuff_getattr(const struct sk_buff *skb, @@ -250,6 +207,12 @@ static inline int netlbl_socket_setattr(const struct socket *sock, return -ENOSYS; } +static inline int netlbl_sock_getattr(struct sock *sk, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + static inline int netlbl_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr) { diff --git a/include/net/netlink.h b/include/net/netlink.h index 11dc2e7f679..4ab68a7a636 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -146,11 +146,13 @@ * nla_ok(nla, remaining) does nla fit into remaining bytes? * nla_next(nla, remaining) get next netlink attribute * nla_validate() validate a stream of attributes + * nla_validate_nested() validate a stream of nested attributes * nla_find() find attribute in stream of attributes * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets * nla_for_each_attr() loop over all attributes + * nla_for_each_nested() loop over the nested attributes *========================================================================= */ @@ -950,6 +952,24 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) } /** + * nla_validate_nested - Validate a stream of nested attributes + * @start: container attribute + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the nested attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate_nested(struct nlattr *start, int maxtype, + struct nla_policy *policy) +{ + return nla_validate(nla_data(start), nla_len(start), maxtype, policy); +} + +/** * nla_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @head: head of attribute stream diff --git a/kernel/audit.c b/kernel/audit.c index 963fd15c962..f9889ee7782 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; if (sid) { - if ((err = selinux_ctxid_to_string( + if ((err = selinux_sid_to_string( sid, &ctx, &len))) return err; else @@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) "user pid=%d uid=%u auid=%u", pid, uid, loginuid); if (sid) { - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( sid, &ctx, &len)) { audit_log_format(ab, " ssid=%u", sid); @@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) loginuid, sid); break; case AUDIT_SIGNAL_INFO: - err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len); + err = selinux_sid_to_string(audit_sig_sid, &ctx, &len); if (err) return err; sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a44879b0c72..1a58a81fb09 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, if (sid) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string(sid, &ctx, &len)) + if (selinux_sid_to_string(sid, &ctx, &len)) audit_log_format(ab, " ssid=%u", sid); else audit_log_format(ab, " subj=%s", ctx); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1bd8827a010..fb83c5cb8c3 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk, logged upon error */ if (f->se_rule) { if (need_sid) { - selinux_task_ctxid(tsk, &sid); + selinux_get_task_sid(tsk, &sid); need_sid = 0; } result = selinux_audit_rule_match(sid, f->type, @@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (axi->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( axi->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", axi->osid); @@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (n->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( n->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", n->osid); call_panic = 2; diff --git a/kernel/cpu.c b/kernel/cpu.c index f230f9ae01c..32c96628463 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock); static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); +/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. + * Should always be manipulated under cpu_add_remove_lock + */ +static int cpu_hotplug_disabled; + #ifdef CONFIG_HOTPLUG_CPU /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ @@ -108,30 +113,25 @@ static int take_cpu_down(void *unused) return 0; } -int cpu_down(unsigned int cpu) +/* Requires cpu_add_remove_lock to be held */ +static int _cpu_down(unsigned int cpu) { int err; struct task_struct *p; cpumask_t old_allowed, tmp; - mutex_lock(&cpu_add_remove_lock); - if (num_online_cpus() == 1) { - err = -EBUSY; - goto out; - } + if (num_online_cpus() == 1) + return -EBUSY; - if (!cpu_online(cpu)) { - err = -EINVAL; - goto out; - } + if (!cpu_online(cpu)) + return -EINVAL; err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, (void *)(long)cpu); if (err == NOTIFY_BAD) { printk("%s: attempt to take down CPU %u failed\n", __FUNCTION__, cpu); - err = -EINVAL; - goto out; + return -EINVAL; } /* Ensure that we are not runnable on dying cpu */ @@ -179,22 +179,32 @@ out_thread: err = kthread_stop(p); out_allowed: set_cpus_allowed(current, old_allowed); -out: + return err; +} + +int cpu_down(unsigned int cpu) +{ + int err = 0; + + mutex_lock(&cpu_add_remove_lock); + if (cpu_hotplug_disabled) + err = -EBUSY; + else + err = _cpu_down(cpu); + mutex_unlock(&cpu_add_remove_lock); return err; } #endif /*CONFIG_HOTPLUG_CPU*/ -int __devinit cpu_up(unsigned int cpu) +/* Requires cpu_add_remove_lock to be held */ +static int __devinit _cpu_up(unsigned int cpu) { int ret; void *hcpu = (void *)(long)cpu; - mutex_lock(&cpu_add_remove_lock); - if (cpu_online(cpu) || !cpu_present(cpu)) { - ret = -EINVAL; - goto out; - } + if (cpu_online(cpu) || !cpu_present(cpu)) + return -EINVAL; ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); if (ret == NOTIFY_BAD) { @@ -219,7 +229,95 @@ out_notify: if (ret != 0) blocking_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); + + return ret; +} + +int __devinit cpu_up(unsigned int cpu) +{ + int err = 0; + + mutex_lock(&cpu_add_remove_lock); + if (cpu_hotplug_disabled) + err = -EBUSY; + else + err = _cpu_up(cpu); + + mutex_unlock(&cpu_add_remove_lock); + return err; +} + +#ifdef CONFIG_SUSPEND_SMP +static cpumask_t frozen_cpus; + +int disable_nonboot_cpus(void) +{ + int cpu, first_cpu, error; + + mutex_lock(&cpu_add_remove_lock); + first_cpu = first_cpu(cpu_present_map); + if (!cpu_online(first_cpu)) { + error = _cpu_up(first_cpu); + if (error) { + printk(KERN_ERR "Could not bring CPU%d up.\n", + first_cpu); + goto out; + } + } + error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu)); + if (error) { + printk(KERN_ERR "Could not run on CPU%d\n", first_cpu); + goto out; + } + /* We take down all of the non-boot CPUs in one shot to avoid races + * with the userspace trying to use the CPU hotplug at the same time + */ + cpus_clear(frozen_cpus); + printk("Disabling non-boot CPUs ...\n"); + for_each_online_cpu(cpu) { + if (cpu == first_cpu) + continue; + error = _cpu_down(cpu); + if (!error) { + cpu_set(cpu, frozen_cpus); + printk("CPU%d is down\n", cpu); + } else { + printk(KERN_ERR "Error taking CPU%d down: %d\n", + cpu, error); + break; + } + } + if (!error) { + BUG_ON(num_online_cpus() > 1); + /* Make sure the CPUs won't be enabled by someone else */ + cpu_hotplug_disabled = 1; + } else { + printk(KERN_ERR "Non-boot CPUs are not disabled"); + } out: mutex_unlock(&cpu_add_remove_lock); - return ret; + return error; +} + +void enable_nonboot_cpus(void) +{ + int cpu, error; + + /* Allow everyone to use the CPU hotplug again */ + mutex_lock(&cpu_add_remove_lock); + cpu_hotplug_disabled = 0; + mutex_unlock(&cpu_add_remove_lock); + + printk("Enabling non-boot CPUs ...\n"); + for_each_cpu_mask(cpu, frozen_cpus) { + error = cpu_up(cpu); + if (!error) { + printk("CPU%d is up\n", cpu); + continue; + } + printk(KERN_WARNING "Error taking CPU%d up: %d\n", + cpu, error); + } + cpus_clear(frozen_cpus); } +#endif diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4ea6f0dc2fc..cff41511269 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2245,7 +2245,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) int i; for (i = 0; zl->zones[i]; i++) { - int nid = zl->zones[i]->zone_pgdat->node_id; + int nid = zone_to_nid(zl->zones[i]); if (node_isset(nid, current->mems_allowed)) return 1; @@ -2316,9 +2316,9 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) const struct cpuset *cs; /* current cpuset ancestors */ int allowed; /* is allocation in zone z allowed? */ - if (in_interrupt()) + if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) return 1; - node = z->zone_pgdat->node_id; + node = zone_to_nid(z); might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); if (node_isset(node, current->mems_allowed)) return 1; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 48a53f68af9..4c6cdbaed66 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, return retval; } +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ /** * __do_IRQ - original all in one highlevel IRQ handler * @irq: the interrupt number @@ -253,6 +254,7 @@ out: return 1; } +#endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/module.c b/kernel/module.c index 2a19cd47c04..b7fe6e84096 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1054,6 +1054,12 @@ static int mod_sysfs_setup(struct module *mod, { int err; + if (!module_subsys.kset.subsys) { + printk(KERN_ERR "%s: module_subsys not initialized\n", + mod->name); + err = -EINVAL; + goto out; + } memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name); if (err) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 619ecabf7c5..4b6e2f18e05 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -36,6 +36,17 @@ config PM_DEBUG code. This is helpful when debugging and reporting various PM bugs, like suspend support. +config DISABLE_CONSOLE_SUSPEND + bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" + depends on PM && PM_DEBUG + default n + ---help--- + This option turns off the console suspend mechanism that prevents + debug messages from reaching the console during the suspend/resume + operations. This may be helpful when debugging device drivers' + suspend/resume routines, but may itself lead to problems, for example + if netconsole is used. + config PM_TRACE bool "Suspend/resume event tracing" depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 8d0af3d37a4..38725f526af 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -7,6 +7,4 @@ obj-y := main.o process.o console.o obj-$(CONFIG_PM_LEGACY) += pm.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o -obj-$(CONFIG_SUSPEND_SMP) += smp.o - obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e13e7406784..7c7b9b65e36 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -18,6 +18,7 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/pm.h> +#include <linux/cpu.h> #include "power.h" @@ -72,7 +73,10 @@ static int prepare_processes(void) int error; pm_prepare_console(); - disable_nonboot_cpus(); + + error = disable_nonboot_cpus(); + if (error) + goto enable_cpus; if (freeze_processes()) { error = -EBUSY; @@ -84,6 +88,7 @@ static int prepare_processes(void) return 0; thaw: thaw_processes(); +enable_cpus: enable_nonboot_cpus(); pm_restore_console(); return error; diff --git a/kernel/power/main.c b/kernel/power/main.c index 6d295c77679..873228c71da 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -16,6 +16,8 @@ #include <linux/init.h> #include <linux/pm.h> #include <linux/console.h> +#include <linux/cpu.h> +#include <linux/resume-trace.h> #include "power.h" @@ -51,7 +53,7 @@ void pm_set_ops(struct pm_ops * ops) static int suspend_prepare(suspend_state_t state) { - int error = 0; + int error; unsigned int free_pages; if (!pm_ops || !pm_ops->enter) @@ -59,12 +61,9 @@ static int suspend_prepare(suspend_state_t state) pm_prepare_console(); - disable_nonboot_cpus(); - - if (num_online_cpus() != 1) { - error = -EPERM; + error = disable_nonboot_cpus(); + if (error) goto Enable_cpu; - } if (freeze_processes()) { error = -EAGAIN; @@ -283,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n power_attr(state); +#ifdef CONFIG_PM_TRACE +int pm_trace_enabled; + +static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) +{ + return sprintf(buf, "%d\n", pm_trace_enabled); +} + +static ssize_t +pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) +{ + int val; + + if (sscanf(buf, "%d", &val) == 1) { + pm_trace_enabled = !!val; + return n; + } + return -EINVAL; +} + +power_attr(pm_trace); + +static struct attribute * g[] = { + &state_attr.attr, + &pm_trace_attr.attr, + NULL, +}; +#else static struct attribute * g[] = { &state_attr.attr, NULL, }; +#endif /* CONFIG_PM_TRACE */ static struct attribute_group attr_group = { .attrs = g, diff --git a/kernel/power/power.h b/kernel/power/power.h index 57a792982fb..bfe999f7b27 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -38,8 +38,6 @@ extern struct subsystem power_subsys; /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; -extern struct pbe *pagedir_nosave; - /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; extern int in_suspend; @@ -50,21 +48,62 @@ extern asmlinkage int swsusp_arch_resume(void); extern unsigned int count_data_pages(void); +/** + * Auxiliary structure used for reading the snapshot image data and + * metadata from and writing them to the list of page backup entries + * (PBEs) which is the main data structure of swsusp. + * + * Using struct snapshot_handle we can transfer the image, including its + * metadata, as a continuous sequence of bytes with the help of + * snapshot_read_next() and snapshot_write_next(). + * + * The code that writes the image to a storage or transfers it to + * the user land is required to use snapshot_read_next() for this + * purpose and it should not make any assumptions regarding the internal + * structure of the image. Similarly, the code that reads the image from + * a storage or transfers it from the user land is required to use + * snapshot_write_next(). + * + * This may allow us to change the internal structure of the image + * in the future with considerably less effort. + */ + struct snapshot_handle { - loff_t offset; - unsigned int page; - unsigned int page_offset; - unsigned int prev; - struct pbe *pbe, *last_pbe; - void *buffer; - unsigned int buf_offset; + loff_t offset; /* number of the last byte ready for reading + * or writing in the sequence + */ + unsigned int cur; /* number of the block of PAGE_SIZE bytes the + * next operation will refer to (ie. current) + */ + unsigned int cur_offset; /* offset with respect to the current + * block (for the next operation) + */ + unsigned int prev; /* number of the block of PAGE_SIZE bytes that + * was the current one previously + */ + void *buffer; /* address of the block to read from + * or write to + */ + unsigned int buf_offset; /* location to read from or write to, + * given as a displacement from 'buffer' + */ + int sync_read; /* Set to one to notify the caller of + * snapshot_write_next() that it may + * need to call wait_on_bio_chain() + */ }; +/* This macro returns the address from/to which the caller of + * snapshot_read_next()/snapshot_write_next() is allowed to + * read/write data after the function returns + */ #define data_of(handle) ((handle).buffer + (handle).buf_offset) +extern unsigned int snapshot_additional_pages(struct zone *zone); extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); -int snapshot_image_loaded(struct snapshot_handle *handle); +extern int snapshot_image_loaded(struct snapshot_handle *handle); +extern void snapshot_free_unused_memory(struct snapshot_handle *handle); #define SNAPSHOT_IOC_MAGIC '3' #define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) diff --git a/kernel/power/smp.c b/kernel/power/smp.c deleted file mode 100644 index 5957312b2d6..00000000000 --- a/kernel/power/smp.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * drivers/power/smp.c - Functions for stopping other CPUs. - * - * Copyright 2004 Pavel Machek <pavel@suse.cz> - * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz> - * - * This file is released under the GPLv2. - */ - -#undef DEBUG - -#include <linux/smp_lock.h> -#include <linux/interrupt.h> -#include <linux/suspend.h> -#include <linux/module.h> -#include <linux/cpu.h> -#include <asm/atomic.h> -#include <asm/tlbflush.h> - -/* This is protected by pm_sem semaphore */ -static cpumask_t frozen_cpus; - -void disable_nonboot_cpus(void) -{ - int cpu, error; - - error = 0; - cpus_clear(frozen_cpus); - printk("Freezing cpus ...\n"); - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - error = cpu_down(cpu); - if (!error) { - cpu_set(cpu, frozen_cpus); - printk("CPU%d is down\n", cpu); - continue; - } - printk("Error taking cpu %d down: %d\n", cpu, error); - } - BUG_ON(raw_smp_processor_id() != 0); - if (error) - panic("cpus not sleeping"); -} - -void enable_nonboot_cpus(void) -{ - int cpu, error; - - printk("Thawing cpus ...\n"); - for_each_cpu_mask(cpu, frozen_cpus) { - error = cpu_up(cpu); - if (!error) { - printk("CPU%d is up\n", cpu); - continue; - } - printk("Error taking cpu %d up: %d\n", cpu, error); - panic("Not enough cpus"); - } - cpus_clear(frozen_cpus); -} - diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 75d4886e648..1b84313cbab 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -34,10 +34,12 @@ #include "power.h" -struct pbe *pagedir_nosave; +/* List of PBEs used for creating and restoring the suspend image */ +struct pbe *restore_pblist; + static unsigned int nr_copy_pages; static unsigned int nr_meta_pages; -static unsigned long *buffer; +static void *buffer; #ifdef CONFIG_HIGHMEM unsigned int count_highmem_pages(void) @@ -156,240 +158,637 @@ static inline int save_highmem(void) {return 0;} static inline int restore_highmem(void) {return 0;} #endif -static int pfn_is_nosave(unsigned long pfn) +/** + * @safe_needed - on resume, for storing the PBE list and the image, + * we can only use memory pages that do not conflict with the pages + * used before suspend. + * + * The unsafe pages are marked with the PG_nosave_free flag + * and we count them using unsafe_pages + */ + +#define PG_ANY 0 +#define PG_SAFE 1 +#define PG_UNSAFE_CLEAR 1 +#define PG_UNSAFE_KEEP 0 + +static unsigned int allocated_unsafe_pages; + +static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) { - unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); + void *res; + + res = (void *)get_zeroed_page(gfp_mask); + if (safe_needed) + while (res && PageNosaveFree(virt_to_page(res))) { + /* The page is unsafe, mark it for swsusp_free() */ + SetPageNosave(virt_to_page(res)); + allocated_unsafe_pages++; + res = (void *)get_zeroed_page(gfp_mask); + } + if (res) { + SetPageNosave(virt_to_page(res)); + SetPageNosaveFree(virt_to_page(res)); + } + return res; +} + +unsigned long get_safe_page(gfp_t gfp_mask) +{ + return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE); } /** - * saveable - Determine whether a page should be cloned or not. - * @pfn: The page - * - * We save a page if it's Reserved, and not in the range of pages - * statically defined as 'unsaveable', or if it isn't reserved, and - * isn't part of a free chunk of pages. + * free_image_page - free page represented by @addr, allocated with + * alloc_image_page (page flags set by it must be cleared) */ -static int saveable(struct zone *zone, unsigned long *zone_pfn) +static inline void free_image_page(void *addr, int clear_nosave_free) { - unsigned long pfn = *zone_pfn + zone->zone_start_pfn; - struct page *page; + ClearPageNosave(virt_to_page(addr)); + if (clear_nosave_free) + ClearPageNosaveFree(virt_to_page(addr)); + free_page((unsigned long)addr); +} - if (!pfn_valid(pfn)) - return 0; +/* struct linked_page is used to build chains of pages */ - page = pfn_to_page(pfn); - BUG_ON(PageReserved(page) && PageNosave(page)); - if (PageNosave(page)) - return 0; - if (PageReserved(page) && pfn_is_nosave(pfn)) - return 0; - if (PageNosaveFree(page)) - return 0; +#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) - return 1; -} +struct linked_page { + struct linked_page *next; + char data[LINKED_PAGE_DATA_SIZE]; +} __attribute__((packed)); -unsigned int count_data_pages(void) +static inline void +free_list_of_pages(struct linked_page *list, int clear_page_nosave) { - struct zone *zone; - unsigned long zone_pfn; - unsigned int n = 0; + while (list) { + struct linked_page *lp = list->next; - for_each_zone (zone) { - if (is_highmem(zone)) - continue; - mark_free_pages(zone); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) - n += saveable(zone, &zone_pfn); + free_image_page(list, clear_page_nosave); + list = lp; } - return n; } -static void copy_data_pages(struct pbe *pblist) +/** + * struct chain_allocator is used for allocating small objects out of + * a linked list of pages called 'the chain'. + * + * The chain grows each time when there is no room for a new object in + * the current page. The allocated objects cannot be freed individually. + * It is only possible to free them all at once, by freeing the entire + * chain. + * + * NOTE: The chain allocator may be inefficient if the allocated objects + * are not much smaller than PAGE_SIZE. + */ + +struct chain_allocator { + struct linked_page *chain; /* the chain */ + unsigned int used_space; /* total size of objects allocated out + * of the current page + */ + gfp_t gfp_mask; /* mask for allocating pages */ + int safe_needed; /* if set, only "safe" pages are allocated */ +}; + +static void +chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) { - struct zone *zone; - unsigned long zone_pfn; - struct pbe *pbe, *p; + ca->chain = NULL; + ca->used_space = LINKED_PAGE_DATA_SIZE; + ca->gfp_mask = gfp_mask; + ca->safe_needed = safe_needed; +} - pbe = pblist; - for_each_zone (zone) { - if (is_highmem(zone)) - continue; - mark_free_pages(zone); - /* This is necessary for swsusp_free() */ - for_each_pb_page (p, pblist) - SetPageNosaveFree(virt_to_page(p)); - for_each_pbe (p, pblist) - SetPageNosaveFree(virt_to_page(p->address)); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - if (saveable(zone, &zone_pfn)) { - struct page *page; - long *src, *dst; - int n; - - page = pfn_to_page(zone_pfn + zone->zone_start_pfn); - BUG_ON(!pbe); - pbe->orig_address = (unsigned long)page_address(page); - /* copy_page and memcpy are not usable for copying task structs. */ - dst = (long *)pbe->address; - src = (long *)pbe->orig_address; - for (n = PAGE_SIZE / sizeof(long); n; n--) - *dst++ = *src++; - pbe = pbe->next; - } - } +static void *chain_alloc(struct chain_allocator *ca, unsigned int size) +{ + void *ret; + + if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { + struct linked_page *lp; + + lp = alloc_image_page(ca->gfp_mask, ca->safe_needed); + if (!lp) + return NULL; + + lp->next = ca->chain; + ca->chain = lp; + ca->used_space = 0; } - BUG_ON(pbe); + ret = ca->chain->data + ca->used_space; + ca->used_space += size; + return ret; } +static void chain_free(struct chain_allocator *ca, int clear_page_nosave) +{ + free_list_of_pages(ca->chain, clear_page_nosave); + memset(ca, 0, sizeof(struct chain_allocator)); +} /** - * free_pagedir - free pages allocated with alloc_pagedir() + * Data types related to memory bitmaps. + * + * Memory bitmap is a structure consiting of many linked lists of + * objects. The main list's elements are of type struct zone_bitmap + * and each of them corresonds to one zone. For each zone bitmap + * object there is a list of objects of type struct bm_block that + * represent each blocks of bit chunks in which information is + * stored. + * + * struct memory_bitmap contains a pointer to the main list of zone + * bitmap objects, a struct bm_position used for browsing the bitmap, + * and a pointer to the list of pages used for allocating all of the + * zone bitmap objects and bitmap block objects. + * + * NOTE: It has to be possible to lay out the bitmap in memory + * using only allocations of order 0. Additionally, the bitmap is + * designed to work with arbitrary number of zones (this is over the + * top for now, but let's avoid making unnecessary assumptions ;-). + * + * struct zone_bitmap contains a pointer to a list of bitmap block + * objects and a pointer to the bitmap block object that has been + * most recently used for setting bits. Additionally, it contains the + * pfns that correspond to the start and end of the represented zone. + * + * struct bm_block contains a pointer to the memory page in which + * information is stored (in the form of a block of bit chunks + * of type unsigned long each). It also contains the pfns that + * correspond to the start and end of the represented memory area and + * the number of bit chunks in the block. + * + * NOTE: Memory bitmaps are used for two types of operations only: + * "set a bit" and "find the next bit set". Moreover, the searching + * is always carried out after all of the "set a bit" operations + * on given bitmap. */ -static void free_pagedir(struct pbe *pblist, int clear_nosave_free) +#define BM_END_OF_MAP (~0UL) + +#define BM_CHUNKS_PER_BLOCK (PAGE_SIZE / sizeof(long)) +#define BM_BITS_PER_CHUNK (sizeof(long) << 3) +#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) + +struct bm_block { + struct bm_block *next; /* next element of the list */ + unsigned long start_pfn; /* pfn represented by the first bit */ + unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ + unsigned int size; /* number of bit chunks */ + unsigned long *data; /* chunks of bits representing pages */ +}; + +struct zone_bitmap { + struct zone_bitmap *next; /* next element of the list */ + unsigned long start_pfn; /* minimal pfn in this zone */ + unsigned long end_pfn; /* maximal pfn in this zone plus 1 */ + struct bm_block *bm_blocks; /* list of bitmap blocks */ + struct bm_block *cur_block; /* recently used bitmap block */ +}; + +/* strcut bm_position is used for browsing memory bitmaps */ + +struct bm_position { + struct zone_bitmap *zone_bm; + struct bm_block *block; + int chunk; + int bit; +}; + +struct memory_bitmap { + struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */ + struct linked_page *p_list; /* list of pages used to store zone + * bitmap objects and bitmap block + * objects + */ + struct bm_position cur; /* most recently used bit position */ +}; + +/* Functions that operate on memory bitmaps */ + +static inline void memory_bm_reset_chunk(struct memory_bitmap *bm) { - struct pbe *pbe; + bm->cur.chunk = 0; + bm->cur.bit = -1; +} - while (pblist) { - pbe = (pblist + PB_PAGE_SKIP)->next; - ClearPageNosave(virt_to_page(pblist)); - if (clear_nosave_free) - ClearPageNosaveFree(virt_to_page(pblist)); - free_page((unsigned long)pblist); - pblist = pbe; - } +static void memory_bm_position_reset(struct memory_bitmap *bm) +{ + struct zone_bitmap *zone_bm; + + zone_bm = bm->zone_bm_list; + bm->cur.zone_bm = zone_bm; + bm->cur.block = zone_bm->bm_blocks; + memory_bm_reset_chunk(bm); } +static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); + /** - * fill_pb_page - Create a list of PBEs on a given memory page + * create_bm_block_list - create a list of block bitmap objects */ -static inline void fill_pb_page(struct pbe *pbpage) +static inline struct bm_block * +create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca) { - struct pbe *p; + struct bm_block *bblist = NULL; + + while (nr_blocks-- > 0) { + struct bm_block *bb; - p = pbpage; - pbpage += PB_PAGE_SKIP; - do - p->next = p + 1; - while (++p < pbpage); + bb = chain_alloc(ca, sizeof(struct bm_block)); + if (!bb) + return NULL; + + bb->next = bblist; + bblist = bb; + } + return bblist; } /** - * create_pbe_list - Create a list of PBEs on top of a given chain - * of memory pages allocated with alloc_pagedir() + * create_zone_bm_list - create a list of zone bitmap objects */ -static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) +static inline struct zone_bitmap * +create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca) { - struct pbe *pbpage, *p; - unsigned int num = PBES_PER_PAGE; + struct zone_bitmap *zbmlist = NULL; - for_each_pb_page (pbpage, pblist) { - if (num >= nr_pages) - break; + while (nr_zones-- > 0) { + struct zone_bitmap *zbm; + + zbm = chain_alloc(ca, sizeof(struct zone_bitmap)); + if (!zbm) + return NULL; + + zbm->next = zbmlist; + zbmlist = zbm; + } + return zbmlist; +} + +/** + * memory_bm_create - allocate memory for a memory bitmap + */ + +static int +memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) +{ + struct chain_allocator ca; + struct zone *zone; + struct zone_bitmap *zone_bm; + struct bm_block *bb; + unsigned int nr; + + chain_init(&ca, gfp_mask, safe_needed); - fill_pb_page(pbpage); - num += PBES_PER_PAGE; + /* Compute the number of zones */ + nr = 0; + for_each_zone (zone) + if (populated_zone(zone) && !is_highmem(zone)) + nr++; + + /* Allocate the list of zones bitmap objects */ + zone_bm = create_zone_bm_list(nr, &ca); + bm->zone_bm_list = zone_bm; + if (!zone_bm) { + chain_free(&ca, PG_UNSAFE_CLEAR); + return -ENOMEM; } - if (pbpage) { - for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++) - p->next = p + 1; - p->next = NULL; + + /* Initialize the zone bitmap objects */ + for_each_zone (zone) { + unsigned long pfn; + + if (!populated_zone(zone) || is_highmem(zone)) + continue; + + zone_bm->start_pfn = zone->zone_start_pfn; + zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages; + /* Allocate the list of bitmap block objects */ + nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); + bb = create_bm_block_list(nr, &ca); + zone_bm->bm_blocks = bb; + zone_bm->cur_block = bb; + if (!bb) + goto Free; + + nr = zone->spanned_pages; + pfn = zone->zone_start_pfn; + /* Initialize the bitmap block objects */ + while (bb) { + unsigned long *ptr; + + ptr = alloc_image_page(gfp_mask, safe_needed); + bb->data = ptr; + if (!ptr) + goto Free; + + bb->start_pfn = pfn; + if (nr >= BM_BITS_PER_BLOCK) { + pfn += BM_BITS_PER_BLOCK; + bb->size = BM_CHUNKS_PER_BLOCK; + nr -= BM_BITS_PER_BLOCK; + } else { + /* This is executed only once in the loop */ + pfn += nr; + bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK); + } + bb->end_pfn = pfn; + bb = bb->next; + } + zone_bm = zone_bm->next; } + bm->p_list = ca.chain; + memory_bm_position_reset(bm); + return 0; + +Free: + bm->p_list = ca.chain; + memory_bm_free(bm, PG_UNSAFE_CLEAR); + return -ENOMEM; } -static unsigned int unsafe_pages; +/** + * memory_bm_free - free memory occupied by the memory bitmap @bm + */ + +static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) +{ + struct zone_bitmap *zone_bm; + + /* Free the list of bit blocks for each zone_bitmap object */ + zone_bm = bm->zone_bm_list; + while (zone_bm) { + struct bm_block *bb; + + bb = zone_bm->bm_blocks; + while (bb) { + if (bb->data) + free_image_page(bb->data, clear_nosave_free); + bb = bb->next; + } + zone_bm = zone_bm->next; + } + free_list_of_pages(bm->p_list, clear_nosave_free); + bm->zone_bm_list = NULL; +} /** - * @safe_needed - on resume, for storing the PBE list and the image, - * we can only use memory pages that do not conflict with the pages - * used before suspend. + * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds + * to given pfn. The cur_zone_bm member of @bm and the cur_block member + * of @bm->cur_zone_bm are updated. * - * The unsafe pages are marked with the PG_nosave_free flag - * and we count them using unsafe_pages + * If the bit cannot be set, the function returns -EINVAL . */ -static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) +static int +memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) { - void *res; - - res = (void *)get_zeroed_page(gfp_mask); - if (safe_needed) - while (res && PageNosaveFree(virt_to_page(res))) { - /* The page is unsafe, mark it for swsusp_free() */ - SetPageNosave(virt_to_page(res)); - unsafe_pages++; - res = (void *)get_zeroed_page(gfp_mask); + struct zone_bitmap *zone_bm; + struct bm_block *bb; + + /* Check if the pfn is from the current zone */ + zone_bm = bm->cur.zone_bm; + if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { + zone_bm = bm->zone_bm_list; + /* We don't assume that the zones are sorted by pfns */ + while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { + zone_bm = zone_bm->next; + if (unlikely(!zone_bm)) + return -EINVAL; } - if (res) { - SetPageNosave(virt_to_page(res)); - SetPageNosaveFree(virt_to_page(res)); + bm->cur.zone_bm = zone_bm; } - return res; + /* Check if the pfn corresponds to the current bitmap block */ + bb = zone_bm->cur_block; + if (pfn < bb->start_pfn) + bb = zone_bm->bm_blocks; + + while (pfn >= bb->end_pfn) { + bb = bb->next; + if (unlikely(!bb)) + return -EINVAL; + } + zone_bm->cur_block = bb; + pfn -= bb->start_pfn; + set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK); + return 0; } -unsigned long get_safe_page(gfp_t gfp_mask) +/* Two auxiliary functions for memory_bm_next_pfn */ + +/* Find the first set bit in the given chunk, if there is one */ + +static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p) { - return (unsigned long)alloc_image_page(gfp_mask, 1); + bit++; + while (bit < BM_BITS_PER_CHUNK) { + if (test_bit(bit, chunk_p)) + return bit; + + bit++; + } + return -1; +} + +/* Find a chunk containing some bits set in given block of bits */ + +static inline int next_chunk_in_block(int n, struct bm_block *bb) +{ + n++; + while (n < bb->size) { + if (bb->data[n]) + return n; + + n++; + } + return -1; } /** - * alloc_pagedir - Allocate the page directory. - * - * First, determine exactly how many pages we need and - * allocate them. + * memory_bm_next_pfn - find the pfn that corresponds to the next set bit + * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is + * returned. * - * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE - * struct pbe elements (pbes) and the last element in the page points - * to the next page. + * It is required to run memory_bm_position_reset() before the first call to + * this function. + */ + +static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) +{ + struct zone_bitmap *zone_bm; + struct bm_block *bb; + int chunk; + int bit; + + do { + bb = bm->cur.block; + do { + chunk = bm->cur.chunk; + bit = bm->cur.bit; + do { + bit = next_bit_in_chunk(bit, bb->data + chunk); + if (bit >= 0) + goto Return_pfn; + + chunk = next_chunk_in_block(chunk, bb); + bit = -1; + } while (chunk >= 0); + bb = bb->next; + bm->cur.block = bb; + memory_bm_reset_chunk(bm); + } while (bb); + zone_bm = bm->cur.zone_bm->next; + if (zone_bm) { + bm->cur.zone_bm = zone_bm; + bm->cur.block = zone_bm->bm_blocks; + memory_bm_reset_chunk(bm); + } + } while (zone_bm); + memory_bm_position_reset(bm); + return BM_END_OF_MAP; + +Return_pfn: + bm->cur.chunk = chunk; + bm->cur.bit = bit; + return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit; +} + +/** + * snapshot_additional_pages - estimate the number of additional pages + * be needed for setting up the suspend image data structures for given + * zone (usually the returned value is greater than the exact number) + */ + +unsigned int snapshot_additional_pages(struct zone *zone) +{ + unsigned int res; + + res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); + res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); + return res; +} + +/** + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ + +static inline int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +/** + * saveable - Determine whether a page should be cloned or not. + * @pfn: The page * - * On each page we set up a list of struct_pbe elements. + * We save a page if it isn't Nosave, and is not in the range of pages + * statically defined as 'unsaveable', and it + * isn't a part of a free chunk of pages. */ -static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, - int safe_needed) +static struct page *saveable_page(unsigned long pfn) { - unsigned int num; - struct pbe *pblist, *pbe; + struct page *page; + + if (!pfn_valid(pfn)) + return NULL; - if (!nr_pages) + page = pfn_to_page(pfn); + + if (PageNosave(page)) + return NULL; + if (PageReserved(page) && pfn_is_nosave(pfn)) return NULL; + if (PageNosaveFree(page)) + return NULL; + + return page; +} + +unsigned int count_data_pages(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + unsigned int n = 0; - pblist = alloc_image_page(gfp_mask, safe_needed); - /* FIXME: rewrite this ugly loop */ - for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; - pbe = pbe->next, num += PBES_PER_PAGE) { - pbe += PB_PAGE_SKIP; - pbe->next = alloc_image_page(gfp_mask, safe_needed); + for_each_zone (zone) { + if (is_highmem(zone)) + continue; + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + n += !!saveable_page(pfn); } - if (!pbe) { /* get_zeroed_page() failed */ - free_pagedir(pblist, 1); - pblist = NULL; - } else - create_pbe_list(pblist, nr_pages); - return pblist; + return n; +} + +static inline void copy_data_page(long *dst, long *src) +{ + int n; + + /* copy_page and memcpy are not usable for copying task structs. */ + for (n = PAGE_SIZE / sizeof(long); n; n--) + *dst++ = *src++; +} + +static void +copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) +{ + struct zone *zone; + unsigned long pfn; + + for_each_zone (zone) { + unsigned long max_zone_pfn; + + if (is_highmem(zone)) + continue; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (saveable_page(pfn)) + memory_bm_set_bit(orig_bm, pfn); + } + memory_bm_position_reset(orig_bm); + memory_bm_position_reset(copy_bm); + do { + pfn = memory_bm_next_pfn(orig_bm); + if (likely(pfn != BM_END_OF_MAP)) { + struct page *page; + void *src; + + page = pfn_to_page(pfn); + src = page_address(page); + page = pfn_to_page(memory_bm_next_pfn(copy_bm)); + copy_data_page(page_address(page), src); + } + } while (pfn != BM_END_OF_MAP); } /** - * Free pages we allocated for suspend. Suspend pages are alocated - * before atomic copy, so we need to free them after resume. + * swsusp_free - free pages allocated for the suspend. + * + * Suspend pages are alocated before the atomic copy is made, so we + * need to release them after the resume. */ void swsusp_free(void) { struct zone *zone; - unsigned long zone_pfn; + unsigned long pfn, max_zone_pfn; for_each_zone(zone) { - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) - if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { - struct page *page; - page = pfn_to_page(zone_pfn + zone->zone_start_pfn); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + if (PageNosave(page) && PageNosaveFree(page)) { ClearPageNosave(page); ClearPageNosaveFree(page); @@ -399,7 +798,7 @@ void swsusp_free(void) } nr_copy_pages = 0; nr_meta_pages = 0; - pagedir_nosave = NULL; + restore_pblist = NULL; buffer = NULL; } @@ -414,46 +813,57 @@ void swsusp_free(void) static int enough_free_mem(unsigned int nr_pages) { struct zone *zone; - unsigned int n = 0; + unsigned int free = 0, meta = 0; for_each_zone (zone) - if (!is_highmem(zone)) - n += zone->free_pages; - pr_debug("swsusp: available memory: %u pages\n", n); - return n > (nr_pages + PAGES_FOR_IO + - (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); -} + if (!is_highmem(zone)) { + free += zone->free_pages; + meta += snapshot_additional_pages(zone); + } -static int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) -{ - struct pbe *p; + pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n", + nr_pages, PAGES_FOR_IO, meta, free); - for_each_pbe (p, pblist) { - p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed); - if (!p->address) - return -ENOMEM; - } - return 0; + return free > nr_pages + PAGES_FOR_IO + meta; } -static struct pbe *swsusp_alloc(unsigned int nr_pages) +static int +swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, + unsigned int nr_pages) { - struct pbe *pblist; + int error; - if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) { - printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); - return NULL; - } + error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); + if (error) + goto Free; - if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { - printk(KERN_ERR "suspend: Allocating image pages failed.\n"); - swsusp_free(); - return NULL; + error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); + if (error) + goto Free; + + while (nr_pages-- > 0) { + struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD); + if (!page) + goto Free; + + SetPageNosave(page); + SetPageNosaveFree(page); + memory_bm_set_bit(copy_bm, page_to_pfn(page)); } + return 0; - return pblist; +Free: + swsusp_free(); + return -ENOMEM; } +/* Memory bitmap used for marking saveable pages */ +static struct memory_bitmap orig_bm; +/* Memory bitmap used for marking allocated pages that will contain the copies + * of saveable pages + */ +static struct memory_bitmap copy_bm; + asmlinkage int swsusp_save(void) { unsigned int nr_pages; @@ -464,25 +874,19 @@ asmlinkage int swsusp_save(void) nr_pages = count_data_pages(); printk("swsusp: Need to copy %u pages\n", nr_pages); - pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n", - nr_pages, - (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE, - PAGES_FOR_IO, nr_free_pages()); - if (!enough_free_mem(nr_pages)) { printk(KERN_ERR "swsusp: Not enough free memory\n"); return -ENOMEM; } - pagedir_nosave = swsusp_alloc(nr_pages); - if (!pagedir_nosave) + if (swsusp_alloc(&orig_bm, ©_bm, nr_pages)) return -ENOMEM; /* During allocating of suspend pagedir, new cold pages may appear. * Kill them. */ drain_local_pages(); - copy_data_pages(pagedir_nosave); + copy_data_pages(©_bm, &orig_bm); /* * End of critical section. From now on, we can write to memory, @@ -511,22 +915,20 @@ static void init_header(struct swsusp_info *info) } /** - * pack_orig_addresses - the .orig_address fields of the PBEs from the - * list starting at @pbe are stored in the array @buf[] (1 page) + * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm + * are stored in the array @buf[] (1 page at a time) */ -static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pbe) +static inline void +pack_pfns(unsigned long *buf, struct memory_bitmap *bm) { int j; - for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { - buf[j] = pbe->orig_address; - pbe = pbe->next; + for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { + buf[j] = memory_bm_next_pfn(bm); + if (unlikely(buf[j] == BM_END_OF_MAP)) + break; } - if (!pbe) - for (; j < PAGE_SIZE / sizeof(long); j++) - buf[j] = 0; - return pbe; } /** @@ -553,37 +955,39 @@ static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pb int snapshot_read_next(struct snapshot_handle *handle, size_t count) { - if (handle->page > nr_meta_pages + nr_copy_pages) + if (handle->cur > nr_meta_pages + nr_copy_pages) return 0; + if (!buffer) { /* This makes the buffer be freed by swsusp_free() */ - buffer = alloc_image_page(GFP_ATOMIC, 0); + buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); if (!buffer) return -ENOMEM; } if (!handle->offset) { init_header((struct swsusp_info *)buffer); handle->buffer = buffer; - handle->pbe = pagedir_nosave; + memory_bm_position_reset(&orig_bm); + memory_bm_position_reset(©_bm); } - if (handle->prev < handle->page) { - if (handle->page <= nr_meta_pages) { - handle->pbe = pack_orig_addresses(buffer, handle->pbe); - if (!handle->pbe) - handle->pbe = pagedir_nosave; + if (handle->prev < handle->cur) { + if (handle->cur <= nr_meta_pages) { + memset(buffer, 0, PAGE_SIZE); + pack_pfns(buffer, &orig_bm); } else { - handle->buffer = (void *)handle->pbe->address; - handle->pbe = handle->pbe->next; + unsigned long pfn = memory_bm_next_pfn(©_bm); + + handle->buffer = page_address(pfn_to_page(pfn)); } - handle->prev = handle->page; + handle->prev = handle->cur; } - handle->buf_offset = handle->page_offset; - if (handle->page_offset + count >= PAGE_SIZE) { - count = PAGE_SIZE - handle->page_offset; - handle->page_offset = 0; - handle->page++; + handle->buf_offset = handle->cur_offset; + if (handle->cur_offset + count >= PAGE_SIZE) { + count = PAGE_SIZE - handle->cur_offset; + handle->cur_offset = 0; + handle->cur++; } else { - handle->page_offset += count; + handle->cur_offset += count; } handle->offset += count; return count; @@ -595,47 +999,50 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) * had been used before suspend */ -static int mark_unsafe_pages(struct pbe *pblist) +static int mark_unsafe_pages(struct memory_bitmap *bm) { struct zone *zone; - unsigned long zone_pfn; - struct pbe *p; - - if (!pblist) /* a sanity check */ - return -EINVAL; + unsigned long pfn, max_zone_pfn; /* Clear page flags */ for_each_zone (zone) { - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) - if (pfn_valid(zone_pfn + zone->zone_start_pfn)) - ClearPageNosaveFree(pfn_to_page(zone_pfn + - zone->zone_start_pfn)); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (pfn_valid(pfn)) + ClearPageNosaveFree(pfn_to_page(pfn)); } - /* Mark orig addresses */ - for_each_pbe (p, pblist) { - if (virt_addr_valid(p->orig_address)) - SetPageNosaveFree(virt_to_page(p->orig_address)); - else - return -EFAULT; - } + /* Mark pages that correspond to the "original" pfns as "unsafe" */ + memory_bm_position_reset(bm); + do { + pfn = memory_bm_next_pfn(bm); + if (likely(pfn != BM_END_OF_MAP)) { + if (likely(pfn_valid(pfn))) + SetPageNosaveFree(pfn_to_page(pfn)); + else + return -EFAULT; + } + } while (pfn != BM_END_OF_MAP); - unsafe_pages = 0; + allocated_unsafe_pages = 0; return 0; } -static void copy_page_backup_list(struct pbe *dst, struct pbe *src) +static void +duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) { - /* We assume both lists contain the same number of elements */ - while (src) { - dst->orig_address = src->orig_address; - dst = dst->next; - src = src->next; + unsigned long pfn; + + memory_bm_position_reset(src); + pfn = memory_bm_next_pfn(src); + while (pfn != BM_END_OF_MAP) { + memory_bm_set_bit(dst, pfn); + pfn = memory_bm_next_pfn(src); } } -static int check_header(struct swsusp_info *info) +static inline int check_header(struct swsusp_info *info) { char *reason = NULL; @@ -662,19 +1069,14 @@ static int check_header(struct swsusp_info *info) * load header - check the image header and copy data from it */ -static int load_header(struct snapshot_handle *handle, - struct swsusp_info *info) +static int +load_header(struct swsusp_info *info) { int error; - struct pbe *pblist; + restore_pblist = NULL; error = check_header(info); if (!error) { - pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, 0); - if (!pblist) - return -ENOMEM; - pagedir_nosave = pblist; - handle->pbe = pblist; nr_copy_pages = info->image_pages; nr_meta_pages = info->pages - info->image_pages - 1; } @@ -682,113 +1084,137 @@ static int load_header(struct snapshot_handle *handle, } /** - * unpack_orig_addresses - copy the elements of @buf[] (1 page) to - * the PBEs in the list starting at @pbe + * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set + * the corresponding bit in the memory bitmap @bm */ -static inline struct pbe *unpack_orig_addresses(unsigned long *buf, - struct pbe *pbe) +static inline void +unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) { int j; - for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { - pbe->orig_address = buf[j]; - pbe = pbe->next; + for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { + if (unlikely(buf[j] == BM_END_OF_MAP)) + break; + + memory_bm_set_bit(bm, buf[j]); } - return pbe; } /** - * prepare_image - use metadata contained in the PBE list - * pointed to by pagedir_nosave to mark the pages that will - * be overwritten in the process of restoring the system - * memory state from the image ("unsafe" pages) and allocate - * memory for the image + * prepare_image - use the memory bitmap @bm to mark the pages that will + * be overwritten in the process of restoring the system memory state + * from the suspend image ("unsafe" pages) and allocate memory for the + * image. * - * The idea is to allocate the PBE list first and then - * allocate as many pages as it's needed for the image data, - * but not to assign these pages to the PBEs initially. - * Instead, we just mark them as allocated and create a list - * of "safe" which will be used later + * The idea is to allocate a new memory bitmap first and then allocate + * as many pages as needed for the image data, but not to assign these + * pages to specific tasks initially. Instead, we just mark them as + * allocated and create a list of "safe" pages that will be used later. */ -struct safe_page { - struct safe_page *next; - char padding[PAGE_SIZE - sizeof(void *)]; -}; +#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) -static struct safe_page *safe_pages; +static struct linked_page *safe_pages_list; -static int prepare_image(struct snapshot_handle *handle) +static int +prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) { - int error = 0; - unsigned int nr_pages = nr_copy_pages; - struct pbe *p, *pblist = NULL; + unsigned int nr_pages; + struct linked_page *sp_list, *lp; + int error; - p = pagedir_nosave; - error = mark_unsafe_pages(p); - if (!error) { - pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); - if (pblist) - copy_page_backup_list(pblist, p); - free_pagedir(p, 0); - if (!pblist) + error = mark_unsafe_pages(bm); + if (error) + goto Free; + + error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); + if (error) + goto Free; + + duplicate_memory_bitmap(new_bm, bm); + memory_bm_free(bm, PG_UNSAFE_KEEP); + /* Reserve some safe pages for potential later use. + * + * NOTE: This way we make sure there will be enough safe pages for the + * chain_alloc() in get_buffer(). It is a bit wasteful, but + * nr_copy_pages cannot be greater than 50% of the memory anyway. + */ + sp_list = NULL; + /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ + nr_pages = nr_copy_pages - allocated_unsafe_pages; + nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); + while (nr_pages > 0) { + lp = alloc_image_page(GFP_ATOMIC, PG_SAFE); + if (!lp) { error = -ENOMEM; + goto Free; + } + lp->next = sp_list; + sp_list = lp; + nr_pages--; } - safe_pages = NULL; - if (!error && nr_pages > unsafe_pages) { - nr_pages -= unsafe_pages; - while (nr_pages--) { - struct safe_page *ptr; - - ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC); - if (!ptr) { - error = -ENOMEM; - break; - } - if (!PageNosaveFree(virt_to_page(ptr))) { - /* The page is "safe", add it to the list */ - ptr->next = safe_pages; - safe_pages = ptr; - } - /* Mark the page as allocated */ - SetPageNosave(virt_to_page(ptr)); - SetPageNosaveFree(virt_to_page(ptr)); + /* Preallocate memory for the image */ + safe_pages_list = NULL; + nr_pages = nr_copy_pages - allocated_unsafe_pages; + while (nr_pages > 0) { + lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); + if (!lp) { + error = -ENOMEM; + goto Free; + } + if (!PageNosaveFree(virt_to_page(lp))) { + /* The page is "safe", add it to the list */ + lp->next = safe_pages_list; + safe_pages_list = lp; } + /* Mark the page as allocated */ + SetPageNosave(virt_to_page(lp)); + SetPageNosaveFree(virt_to_page(lp)); + nr_pages--; } - if (!error) { - pagedir_nosave = pblist; - } else { - handle->pbe = NULL; - swsusp_free(); + /* Free the reserved safe pages so that chain_alloc() can use them */ + while (sp_list) { + lp = sp_list->next; + free_image_page(sp_list, PG_UNSAFE_CLEAR); + sp_list = lp; } + return 0; + +Free: + swsusp_free(); return error; } -static void *get_buffer(struct snapshot_handle *handle) +/** + * get_buffer - compute the address that snapshot_write_next() should + * set for its caller to write to. + */ + +static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) { - struct pbe *pbe = handle->pbe, *last = handle->last_pbe; - struct page *page = virt_to_page(pbe->orig_address); + struct pbe *pbe; + struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); - if (PageNosave(page) && PageNosaveFree(page)) { - /* - * We have allocated the "original" page frame and we can - * use it directly to store the read page + if (PageNosave(page) && PageNosaveFree(page)) + /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. */ - pbe->address = 0; - if (last && last->next) - last->next = NULL; - return (void *)pbe->orig_address; - } - /* - * The "original" page frame has not been allocated and we have to - * use a "safe" page frame to store the read page + return page_address(page); + + /* The "original" page frame has not been allocated and we have to + * use a "safe" page frame to store the loaded page. */ - pbe->address = (unsigned long)safe_pages; - safe_pages = safe_pages->next; - if (last) - last->next = pbe; - handle->last_pbe = pbe; + pbe = chain_alloc(ca, sizeof(struct pbe)); + if (!pbe) { + swsusp_free(); + return NULL; + } + pbe->orig_address = (unsigned long)page_address(page); + pbe->address = (unsigned long)safe_pages_list; + safe_pages_list = safe_pages_list->next; + pbe->next = restore_pblist; + restore_pblist = pbe; return (void *)pbe->address; } @@ -816,46 +1242,60 @@ static void *get_buffer(struct snapshot_handle *handle) int snapshot_write_next(struct snapshot_handle *handle, size_t count) { + static struct chain_allocator ca; int error = 0; - if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages) + /* Check if we have already loaded the entire image */ + if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) return 0; + if (!buffer) { /* This makes the buffer be freed by swsusp_free() */ - buffer = alloc_image_page(GFP_ATOMIC, 0); + buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); if (!buffer) return -ENOMEM; } if (!handle->offset) handle->buffer = buffer; - if (handle->prev < handle->page) { - if (!handle->prev) { - error = load_header(handle, (struct swsusp_info *)buffer); + handle->sync_read = 1; + if (handle->prev < handle->cur) { + if (handle->prev == 0) { + error = load_header(buffer); if (error) return error; + + error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); + if (error) + return error; + } else if (handle->prev <= nr_meta_pages) { - handle->pbe = unpack_orig_addresses(buffer, handle->pbe); - if (!handle->pbe) { - error = prepare_image(handle); + unpack_orig_pfns(buffer, ©_bm); + if (handle->prev == nr_meta_pages) { + error = prepare_image(&orig_bm, ©_bm); if (error) return error; - handle->pbe = pagedir_nosave; - handle->last_pbe = NULL; - handle->buffer = get_buffer(handle); + + chain_init(&ca, GFP_ATOMIC, PG_SAFE); + memory_bm_position_reset(&orig_bm); + restore_pblist = NULL; + handle->buffer = get_buffer(&orig_bm, &ca); + handle->sync_read = 0; + if (!handle->buffer) + return -ENOMEM; } } else { - handle->pbe = handle->pbe->next; - handle->buffer = get_buffer(handle); + handle->buffer = get_buffer(&orig_bm, &ca); + handle->sync_read = 0; } - handle->prev = handle->page; + handle->prev = handle->cur; } - handle->buf_offset = handle->page_offset; - if (handle->page_offset + count >= PAGE_SIZE) { - count = PAGE_SIZE - handle->page_offset; - handle->page_offset = 0; - handle->page++; + handle->buf_offset = handle->cur_offset; + if (handle->cur_offset + count >= PAGE_SIZE) { + count = PAGE_SIZE - handle->cur_offset; + handle->cur_offset = 0; + handle->cur++; } else { - handle->page_offset += count; + handle->cur_offset += count; } handle->offset += count; return count; @@ -863,6 +1303,13 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) int snapshot_image_loaded(struct snapshot_handle *handle) { - return !(!handle->pbe || handle->pbe->next || !nr_copy_pages || - handle->page <= nr_meta_pages + nr_copy_pages); + return !(!nr_copy_pages || + handle->cur <= nr_meta_pages + nr_copy_pages); +} + +void snapshot_free_unused_memory(struct snapshot_handle *handle) +{ + /* Free only if we have loaded the image entirely */ + if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) + memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index f1dd146bd64..9b2ee5344de 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -22,6 +22,7 @@ #include <linux/device.h> #include <linux/buffer_head.h> #include <linux/bio.h> +#include <linux/blkdev.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/pm.h> @@ -49,18 +50,16 @@ static int mark_swapfiles(swp_entry_t start) { int error; - rw_swap_page_sync(READ, - swp_entry(root_swap, 0), - virt_to_page((unsigned long)&swsusp_header)); + rw_swap_page_sync(READ, swp_entry(root_swap, 0), + virt_to_page((unsigned long)&swsusp_header), NULL); if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); memcpy(swsusp_header.sig,SWSUSP_SIG, 10); swsusp_header.image = start; - error = rw_swap_page_sync(WRITE, - swp_entry(root_swap, 0), - virt_to_page((unsigned long) - &swsusp_header)); + error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), + virt_to_page((unsigned long)&swsusp_header), + NULL); } else { pr_debug("swsusp: Partition is not swap space.\n"); error = -ENODEV; @@ -88,16 +87,37 @@ static int swsusp_swap_check(void) /* This is called before saving image */ * write_page - Write one page to given swap location. * @buf: Address we're writing. * @offset: Offset of the swap page we're writing to. + * @bio_chain: Link the next write BIO here */ -static int write_page(void *buf, unsigned long offset) +static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) { swp_entry_t entry; int error = -ENOSPC; if (offset) { + struct page *page = virt_to_page(buf); + + if (bio_chain) { + /* + * Whether or not we successfully allocated a copy page, + * we take a ref on the page here. It gets undone in + * wait_on_bio_chain(). + */ + struct page *page_copy; + page_copy = alloc_page(GFP_ATOMIC); + if (page_copy == NULL) { + WARN_ON_ONCE(1); + bio_chain = NULL; /* Go synchronous */ + get_page(page); + } else { + memcpy(page_address(page_copy), + page_address(page), PAGE_SIZE); + page = page_copy; + } + } entry = swp_entry(root_swap, offset); - error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf)); + error = rw_swap_page_sync(WRITE, entry, page, bio_chain); } return error; } @@ -146,6 +166,26 @@ static void release_swap_writer(struct swap_map_handle *handle) handle->bitmap = NULL; } +static void show_speed(struct timeval *start, struct timeval *stop, + unsigned nr_pages, char *msg) +{ + s64 elapsed_centisecs64; + int centisecs; + int k; + int kps; + + elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); + do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); + centisecs = elapsed_centisecs64; + if (centisecs == 0) + centisecs = 1; /* avoid div-by-zero */ + k = nr_pages * (PAGE_SIZE / 1024); + kps = (k * 100) / centisecs; + printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, + centisecs / 100, centisecs % 100, + kps / 1000, (kps % 1000) / 10); +} + static int get_swap_writer(struct swap_map_handle *handle) { handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); @@ -165,37 +205,70 @@ static int get_swap_writer(struct swap_map_handle *handle) return 0; } -static int swap_write_page(struct swap_map_handle *handle, void *buf) +static int wait_on_bio_chain(struct bio **bio_chain) { - int error; + struct bio *bio; + struct bio *next_bio; + int ret = 0; + + if (bio_chain == NULL) + return 0; + + bio = *bio_chain; + if (bio == NULL) + return 0; + while (bio) { + struct page *page; + + next_bio = bio->bi_private; + page = bio->bi_io_vec[0].bv_page; + wait_on_page_locked(page); + if (!PageUptodate(page) || PageError(page)) + ret = -EIO; + put_page(page); + bio_put(bio); + bio = next_bio; + } + *bio_chain = NULL; + return ret; +} + +static int swap_write_page(struct swap_map_handle *handle, void *buf, + struct bio **bio_chain) +{ + int error = 0; unsigned long offset; if (!handle->cur) return -EINVAL; offset = alloc_swap_page(root_swap, handle->bitmap); - error = write_page(buf, offset); + error = write_page(buf, offset, bio_chain); if (error) return error; handle->cur->entries[handle->k++] = offset; if (handle->k >= MAP_PAGE_ENTRIES) { + error = wait_on_bio_chain(bio_chain); + if (error) + goto out; offset = alloc_swap_page(root_swap, handle->bitmap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; - error = write_page(handle->cur, handle->cur_swap); + error = write_page(handle->cur, handle->cur_swap, NULL); if (error) - return error; + goto out; memset(handle->cur, 0, PAGE_SIZE); handle->cur_swap = offset; handle->k = 0; } - return 0; +out: + return error; } static int flush_swap_writer(struct swap_map_handle *handle) { if (handle->cur && handle->cur_swap) - return write_page(handle->cur, handle->cur_swap); + return write_page(handle->cur, handle->cur_swap, NULL); else return -EINVAL; } @@ -206,21 +279,29 @@ static int flush_swap_writer(struct swap_map_handle *handle) static int save_image(struct swap_map_handle *handle, struct snapshot_handle *snapshot, - unsigned int nr_pages) + unsigned int nr_to_write) { unsigned int m; int ret; int error = 0; + int nr_pages; + int err2; + struct bio *bio; + struct timeval start; + struct timeval stop; - printk("Saving image data pages (%u pages) ... ", nr_pages); - m = nr_pages / 100; + printk("Saving image data pages (%u pages) ... ", nr_to_write); + m = nr_to_write / 100; if (!m) m = 1; nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); do { ret = snapshot_read_next(snapshot, PAGE_SIZE); if (ret > 0) { - error = swap_write_page(handle, data_of(*snapshot)); + error = swap_write_page(handle, data_of(*snapshot), + &bio); if (error) break; if (!(nr_pages % m)) @@ -228,8 +309,13 @@ static int save_image(struct swap_map_handle *handle, nr_pages++; } } while (ret > 0); + err2 = wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!error) + error = err2; if (!error) printk("\b\b\b\bdone\n"); + show_speed(&start, &stop, nr_to_write, "Wrote"); return error; } @@ -245,8 +331,7 @@ static int enough_swap(unsigned int nr_pages) unsigned int free_swap = count_swap_pages(root_swap, 1); pr_debug("swsusp: free swap pages: %u\n", free_swap); - return free_swap > (nr_pages + PAGES_FOR_IO + - (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); + return free_swap > nr_pages + PAGES_FOR_IO; } /** @@ -266,7 +351,8 @@ int swsusp_write(void) int error; if ((error = swsusp_swap_check())) { - printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n"); + printk(KERN_ERR "swsusp: Cannot find swap device, try " + "swapon -a.\n"); return error; } memset(&snapshot, 0, sizeof(struct snapshot_handle)); @@ -281,7 +367,7 @@ int swsusp_write(void) error = get_swap_writer(&handle); if (!error) { unsigned long start = handle.cur_swap; - error = swap_write_page(&handle, header); + error = swap_write_page(&handle, header, NULL); if (!error) error = save_image(&handle, &snapshot, header->pages - 1); @@ -298,27 +384,6 @@ int swsusp_write(void) return error; } -/* - * Using bio to read from swap. - * This code requires a bit more work than just using buffer heads - * but, it is the recommended way for 2.5/2.6. - * The following are to signal the beginning and end of I/O. Bios - * finish asynchronously, while we want them to happen synchronously. - * A simple atomic_t, and a wait loop take care of this problem. - */ - -static atomic_t io_done = ATOMIC_INIT(0); - -static int end_io(struct bio *bio, unsigned int num, int err) -{ - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { - printk(KERN_ERR "I/O error reading swsusp image.\n"); - return -EIO; - } - atomic_set(&io_done, 0); - return 0; -} - static struct block_device *resume_bdev; /** @@ -326,15 +391,15 @@ static struct block_device *resume_bdev; * @rw: READ or WRITE. * @off physical offset of page. * @page: page we're reading or writing. + * @bio_chain: list of pending biod (for async reading) * * Straight from the textbook - allocate and initialize the bio. - * If we're writing, make sure the page is marked as dirty. - * Then submit it and wait. + * If we're reading, make sure the page is marked as dirty. + * Then submit it and, if @bio_chain == NULL, wait. */ - -static int submit(int rw, pgoff_t page_off, void *page) +static int submit(int rw, pgoff_t page_off, struct page *page, + struct bio **bio_chain) { - int error = 0; struct bio *bio; bio = bio_alloc(GFP_ATOMIC, 1); @@ -342,33 +407,40 @@ static int submit(int rw, pgoff_t page_off, void *page) return -ENOMEM; bio->bi_sector = page_off * (PAGE_SIZE >> 9); bio->bi_bdev = resume_bdev; - bio->bi_end_io = end_io; + bio->bi_end_io = end_swap_bio_read; - if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { - printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); - error = -EFAULT; - goto Done; + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + printk("swsusp: ERROR: adding page to bio at %ld\n", page_off); + bio_put(bio); + return -EFAULT; } - atomic_set(&io_done, 1); - submit_bio(rw | (1 << BIO_RW_SYNC), bio); - while (atomic_read(&io_done)) - yield(); - if (rw == READ) - bio_set_pages_dirty(bio); - Done: - bio_put(bio); - return error; + lock_page(page); + bio_get(bio); + + if (bio_chain == NULL) { + submit_bio(rw | (1 << BIO_RW_SYNC), bio); + wait_on_page_locked(page); + if (rw == READ) + bio_set_pages_dirty(bio); + bio_put(bio); + } else { + get_page(page); + bio->bi_private = *bio_chain; + *bio_chain = bio; + submit_bio(rw | (1 << BIO_RW_SYNC), bio); + } + return 0; } -static int bio_read_page(pgoff_t page_off, void *page) +static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) { - return submit(READ, page_off, page); + return submit(READ, page_off, virt_to_page(addr), bio_chain); } -static int bio_write_page(pgoff_t page_off, void *page) +static int bio_write_page(pgoff_t page_off, void *addr) { - return submit(WRITE, page_off, page); + return submit(WRITE, page_off, virt_to_page(addr), NULL); } /** @@ -393,7 +465,7 @@ static int get_swap_reader(struct swap_map_handle *handle, handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); if (!handle->cur) return -ENOMEM; - error = bio_read_page(swp_offset(start), handle->cur); + error = bio_read_page(swp_offset(start), handle->cur, NULL); if (error) { release_swap_reader(handle); return error; @@ -402,7 +474,8 @@ static int get_swap_reader(struct swap_map_handle *handle, return 0; } -static int swap_read_page(struct swap_map_handle *handle, void *buf) +static int swap_read_page(struct swap_map_handle *handle, void *buf, + struct bio **bio_chain) { unsigned long offset; int error; @@ -412,16 +485,17 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf) offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = bio_read_page(offset, buf); + error = bio_read_page(offset, buf, bio_chain); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { + error = wait_on_bio_chain(bio_chain); handle->k = 0; offset = handle->cur->next_swap; if (!offset) release_swap_reader(handle); - else - error = bio_read_page(offset, handle->cur); + else if (!error) + error = bio_read_page(offset, handle->cur, NULL); } return error; } @@ -434,33 +508,49 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf) static int load_image(struct swap_map_handle *handle, struct snapshot_handle *snapshot, - unsigned int nr_pages) + unsigned int nr_to_read) { unsigned int m; - int ret; int error = 0; + struct timeval start; + struct timeval stop; + struct bio *bio; + int err2; + unsigned nr_pages; - printk("Loading image data pages (%u pages) ... ", nr_pages); - m = nr_pages / 100; + printk("Loading image data pages (%u pages) ... ", nr_to_read); + m = nr_to_read / 100; if (!m) m = 1; nr_pages = 0; - do { - ret = snapshot_write_next(snapshot, PAGE_SIZE); - if (ret > 0) { - error = swap_read_page(handle, data_of(*snapshot)); - if (error) - break; - if (!(nr_pages % m)) - printk("\b\b\b\b%3d%%", nr_pages / m); - nr_pages++; - } - } while (ret > 0); + bio = NULL; + do_gettimeofday(&start); + for ( ; ; ) { + error = snapshot_write_next(snapshot, PAGE_SIZE); + if (error <= 0) + break; + error = swap_read_page(handle, data_of(*snapshot), &bio); + if (error) + break; + if (snapshot->sync_read) + error = wait_on_bio_chain(&bio); + if (error) + break; + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + err2 = wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!error) + error = err2; if (!error) { printk("\b\b\b\bdone\n"); + snapshot_free_unused_memory(snapshot); if (!snapshot_image_loaded(snapshot)) error = -ENODATA; } + show_speed(&start, &stop, nr_to_read, "Read"); return error; } @@ -483,7 +573,7 @@ int swsusp_read(void) header = (struct swsusp_info *)data_of(snapshot); error = get_swap_reader(&handle, swsusp_header.image); if (!error) - error = swap_read_page(&handle, header); + error = swap_read_page(&handle, header, NULL); if (!error) error = load_image(&handle, &snapshot, header->pages - 1); release_swap_reader(&handle); @@ -509,7 +599,7 @@ int swsusp_check(void) if (!IS_ERR(resume_bdev)) { set_blocksize(resume_bdev, PAGE_SIZE); memset(&swsusp_header, 0, sizeof(swsusp_header)); - if ((error = bio_read_page(0, &swsusp_header))) + if ((error = bio_read_page(0, &swsusp_header, NULL))) return error; if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 17f669c8301..8ef677ea0ce 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -193,14 +193,13 @@ int swsusp_shrink_memory(void) printk("Shrinking memory... "); do { size = 2 * count_highmem_pages(); - size += size / 50 + count_data_pages(); - size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + - PAGES_FOR_IO; + size += size / 50 + count_data_pages() + PAGES_FOR_IO; tmp = size; for_each_zone (zone) if (!is_highmem(zone) && populated_zone(zone)) { tmp -= zone->free_pages; tmp += zone->lowmem_reserve[ZONE_NORMAL]; + tmp += snapshot_additional_pages(zone); } if (tmp > 0) { tmp = __shrink_memory(tmp); diff --git a/kernel/power/user.c b/kernel/power/user.c index 3f1539fbe48..2e4499f3e4d 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -19,6 +19,7 @@ #include <linux/swapops.h> #include <linux/pm.h> #include <linux/fs.h> +#include <linux/cpu.h> #include <asm/uaccess.h> @@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, if (data->frozen) break; down(&pm_sem); - disable_nonboot_cpus(); - if (freeze_processes()) { - thaw_processes(); - enable_nonboot_cpus(); - error = -EBUSY; + error = disable_nonboot_cpus(); + if (!error) { + error = freeze_processes(); + if (error) { + thaw_processes(); + error = -EBUSY; + } } + enable_nonboot_cpus(); up(&pm_sem); if (!error) data->frozen = 1; @@ -189,6 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } + snapshot_free_unused_memory(&data->handle); down(&pm_sem); pm_prepare_console(); error = device_suspend(PMSG_FREEZE); diff --git a/kernel/printk.c b/kernel/printk.c index 1149365e989..771f5e861bc 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -721,6 +721,7 @@ int __init add_preferred_console(char *name, int idx, char *options) return 0; } +#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND /** * suspend_console - suspend the console subsystem * @@ -728,6 +729,7 @@ int __init add_preferred_console(char *name, int idx, char *options) */ void suspend_console(void) { + printk("Suspending console(s)\n"); acquire_console_sem(); console_suspended = 1; } @@ -737,6 +739,7 @@ void resume_console(void) console_suspended = 0; release_console_sem(); } +#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */ /** * acquire_console_sem - lock the console system for exclusive use. diff --git a/kernel/profile.c b/kernel/profile.c index d5bd75e7501..fb660c7d35b 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -309,13 +309,17 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, node = cpu_to_node(cpu); per_cpu(cpu_profile_flip, cpu) = 0; if (!per_cpu(cpu_profile_hits, cpu)[1]) { - page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + page = alloc_pages_node(node, + GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + 0); if (!page) return NOTIFY_BAD; per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); } if (!per_cpu(cpu_profile_hits, cpu)[0]) { - page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + page = alloc_pages_node(node, + GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + 0); if (!page) goto out_free; per_cpu(cpu_profile_hits, cpu)[0] = page_address(page); @@ -491,12 +495,16 @@ static int __init create_hash_tables(void) int node = cpu_to_node(cpu); struct page *page; - page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + page = alloc_pages_node(node, + GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + 0); if (!page) goto out_cleanup; per_cpu(cpu_profile_hits, cpu)[1] = (struct profile_hit *)page_address(page); - page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + page = alloc_pages_node(node, + GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + 0); if (!page) goto out_cleanup; per_cpu(cpu_profile_hits, cpu)[0] diff --git a/kernel/sched.c b/kernel/sched.c index a234fbee123..5c848fd4e46 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -238,6 +238,7 @@ struct rq { /* For active balancing */ int active_balance; int push_cpu; + int cpu; /* cpu of this runqueue */ struct task_struct *migration_thread; struct list_head migration_queue; @@ -267,6 +268,15 @@ struct rq { static DEFINE_PER_CPU(struct rq, runqueues); +static inline int cpu_of(struct rq *rq) +{ +#ifdef CONFIG_SMP + return rq->cpu; +#else + return 0; +#endif +} + /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. @@ -2211,7 +2221,8 @@ out: */ static struct sched_group * find_busiest_group(struct sched_domain *sd, int this_cpu, - unsigned long *imbalance, enum idle_type idle, int *sd_idle) + unsigned long *imbalance, enum idle_type idle, int *sd_idle, + cpumask_t *cpus) { struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; unsigned long max_load, avg_load, total_load, this_load, total_pwr; @@ -2248,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, sum_weighted_load = sum_nr_running = avg_load = 0; for_each_cpu_mask(i, group->cpumask) { - struct rq *rq = cpu_rq(i); + struct rq *rq; + + if (!cpu_isset(i, *cpus)) + continue; + + rq = cpu_rq(i); if (*sd_idle && !idle_cpu(i)) *sd_idle = 0; @@ -2466,13 +2482,17 @@ ret: */ static struct rq * find_busiest_queue(struct sched_group *group, enum idle_type idle, - unsigned long imbalance) + unsigned long imbalance, cpumask_t *cpus) { struct rq *busiest = NULL, *rq; unsigned long max_load = 0; int i; for_each_cpu_mask(i, group->cpumask) { + + if (!cpu_isset(i, *cpus)) + continue; + rq = cpu_rq(i); if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) @@ -2511,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_group *group; unsigned long imbalance; struct rq *busiest; + cpumask_t cpus = CPU_MASK_ALL; if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) @@ -2518,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq, schedstat_inc(sd, lb_cnt[idle]); - group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); +redo: + group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, + &cpus); if (!group) { schedstat_inc(sd, lb_nobusyg[idle]); goto out_balanced; } - busiest = find_busiest_queue(group, idle, imbalance); + busiest = find_busiest_queue(group, idle, imbalance, &cpus); if (!busiest) { schedstat_inc(sd, lb_nobusyq[idle]); goto out_balanced; @@ -2549,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq, double_rq_unlock(this_rq, busiest); /* All tasks on this runqueue were pinned by CPU affinity */ - if (unlikely(all_pinned)) + if (unlikely(all_pinned)) { + cpu_clear(cpu_of(busiest), cpus); + if (!cpus_empty(cpus)) + goto redo; goto out_balanced; + } } if (!nr_moved) { @@ -2639,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) unsigned long imbalance; int nr_moved = 0; int sd_idle = 0; + cpumask_t cpus = CPU_MASK_ALL; if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) sd_idle = 1; schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); - group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); +redo: + group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, + &sd_idle, &cpus); if (!group) { schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); goto out_balanced; } - busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance); + busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance, + &cpus); if (!busiest) { schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); goto out_balanced; @@ -2668,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) minus_1_or_zero(busiest->nr_running), imbalance, sd, NEWLY_IDLE, NULL); spin_unlock(&busiest->lock); + + if (!nr_moved) { + cpu_clear(cpu_of(busiest), cpus); + if (!cpus_empty(cpus)) + goto redo; + } } if (!nr_moved) { @@ -6747,6 +6784,7 @@ void __init sched_init(void) rq->cpu_load[j] = 0; rq->active_balance = 0; rq->push_cpu = 0; + rq->cpu = i; rq->migration_thread = NULL; INIT_LIST_HEAD(&rq->migration_queue); #endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 362a0cc3713..fd43c3e6786 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -943,6 +943,17 @@ static ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = VM_MIN_SLAB, + .procname = "min_slab_ratio", + .data = &sysctl_min_slab_ratio, + .maxlen = sizeof(sysctl_min_slab_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, #endif #ifdef CONFIG_X86_32 { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 554ee688a9f..3f21cc79a13 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -277,7 +277,7 @@ config DEBUG_HIGHMEM config DEBUG_BUGVERBOSE bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED depends on BUG - depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV + depends on ARM || ARM26 || AVR32 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV default !EMBEDDED help Say Y here to make BUG() panics output the file name and line number @@ -315,7 +315,7 @@ config DEBUG_VM config FRAME_POINTER bool "Compile the kernel with frame pointers" - depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390) + depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32) default y if DEBUG_INFO && UML help If you say Y here the resulting kernel image will be slightly larger diff --git a/mm/Makefile b/mm/Makefile index 9dd824c11ee..60c56c0b5e1 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -23,4 +23,4 @@ obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o - +obj-$(CONFIG_SMP) += allocpercpu.o diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c new file mode 100644 index 00000000000..eaa9abeea53 --- /dev/null +++ b/mm/allocpercpu.c @@ -0,0 +1,129 @@ +/* + * linux/mm/allocpercpu.c + * + * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com> + */ +#include <linux/mm.h> +#include <linux/module.h> + +/** + * percpu_depopulate - depopulate per-cpu data for given cpu + * @__pdata: per-cpu data to depopulate + * @cpu: depopulate per-cpu data for this cpu + * + * Depopulating per-cpu data for a cpu going offline would be a typical + * use case. You need to register a cpu hotplug handler for that purpose. + */ +void percpu_depopulate(void *__pdata, int cpu) +{ + struct percpu_data *pdata = __percpu_disguise(__pdata); + if (pdata->ptrs[cpu]) { + kfree(pdata->ptrs[cpu]); + pdata->ptrs[cpu] = NULL; + } +} +EXPORT_SYMBOL_GPL(percpu_depopulate); + +/** + * percpu_depopulate_mask - depopulate per-cpu data for some cpu's + * @__pdata: per-cpu data to depopulate + * @mask: depopulate per-cpu data for cpu's selected through mask bits + */ +void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) +{ + int cpu; + for_each_cpu_mask(cpu, *mask) + percpu_depopulate(__pdata, cpu); +} +EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); + +/** + * percpu_populate - populate per-cpu data for given cpu + * @__pdata: per-cpu data to populate further + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @cpu: populate per-data for this cpu + * + * Populating per-cpu data for a cpu coming online would be a typical + * use case. You need to register a cpu hotplug handler for that purpose. + * Per-cpu object is populated with zeroed buffer. + */ +void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) +{ + struct percpu_data *pdata = __percpu_disguise(__pdata); + int node = cpu_to_node(cpu); + + BUG_ON(pdata->ptrs[cpu]); + if (node_online(node)) { + /* FIXME: kzalloc_node(size, gfp, node) */ + pdata->ptrs[cpu] = kmalloc_node(size, gfp, node); + if (pdata->ptrs[cpu]) + memset(pdata->ptrs[cpu], 0, size); + } else + pdata->ptrs[cpu] = kzalloc(size, gfp); + return pdata->ptrs[cpu]; +} +EXPORT_SYMBOL_GPL(percpu_populate); + +/** + * percpu_populate_mask - populate per-cpu data for more cpu's + * @__pdata: per-cpu data to populate further + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @mask: populate per-cpu data for cpu's selected through mask bits + * + * Per-cpu objects are populated with zeroed buffers. + */ +int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask) +{ + cpumask_t populated = CPU_MASK_NONE; + int cpu; + + for_each_cpu_mask(cpu, *mask) + if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) { + __percpu_depopulate_mask(__pdata, &populated); + return -ENOMEM; + } else + cpu_set(cpu, populated); + return 0; +} +EXPORT_SYMBOL_GPL(__percpu_populate_mask); + +/** + * percpu_alloc_mask - initial setup of per-cpu data + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @mask: populate per-data for cpu's selected through mask bits + * + * Populating per-cpu data for all online cpu's would be a typical use case, + * which is simplified by the percpu_alloc() wrapper. + * Per-cpu objects are populated with zeroed buffers. + */ +void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +{ + void *pdata = kzalloc(sizeof(struct percpu_data), gfp); + void *__pdata = __percpu_disguise(pdata); + + if (unlikely(!pdata)) + return NULL; + if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) + return __pdata; + kfree(pdata); + return NULL; +} +EXPORT_SYMBOL_GPL(__percpu_alloc_mask); + +/** + * percpu_free - final cleanup of per-cpu data + * @__pdata: object to clean up + * + * We simply clean up any per-cpu object left. No need for the client to + * track and specify through a bis mask which per-cpu objects are to free. + */ +void percpu_free(void *__pdata) +{ + __percpu_depopulate_mask(__pdata, &cpu_possible_map); + kfree(__percpu_disguise(__pdata)); +} +EXPORT_SYMBOL_GPL(percpu_free); diff --git a/mm/bootmem.c b/mm/bootmem.c index 50353e0dac1..d53112fcb40 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -8,17 +8,15 @@ * free memory collector. It's used to deal with reserved * system memory and memory holes as well. */ - -#include <linux/mm.h> -#include <linux/kernel_stat.h> -#include <linux/swap.h> -#include <linux/interrupt.h> #include <linux/init.h> +#include <linux/pfn.h> #include <linux/bootmem.h> -#include <linux/mmzone.h> #include <linux/module.h> -#include <asm/dma.h> + +#include <asm/bug.h> #include <asm/io.h> +#include <asm/processor.h> + #include "internal.h" /* @@ -41,7 +39,7 @@ unsigned long saved_max_pfn; #endif /* return the number of _pages_ that will be allocated for the boot bitmap */ -unsigned long __init bootmem_bootmap_pages (unsigned long pages) +unsigned long __init bootmem_bootmap_pages(unsigned long pages) { unsigned long mapsize; @@ -51,12 +49,14 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages) return mapsize; } + /* * link bdata in order */ -static void link_bootmem(bootmem_data_t *bdata) +static void __init link_bootmem(bootmem_data_t *bdata) { bootmem_data_t *ent; + if (list_empty(&bdata_list)) { list_add(&bdata->list, &bdata_list); return; @@ -69,22 +69,32 @@ static void link_bootmem(bootmem_data_t *bdata) } } list_add_tail(&bdata->list, &bdata_list); - return; } +/* + * Given an initialised bdata, it returns the size of the boot bitmap + */ +static unsigned long __init get_mapsize(bootmem_data_t *bdata) +{ + unsigned long mapsize; + unsigned long start = PFN_DOWN(bdata->node_boot_start); + unsigned long end = bdata->node_low_pfn; + + mapsize = ((end - start) + 7) / 8; + return ALIGN(mapsize, sizeof(long)); +} /* * Called once to set up the allocator itself. */ -static unsigned long __init init_bootmem_core (pg_data_t *pgdat, +static unsigned long __init init_bootmem_core(pg_data_t *pgdat, unsigned long mapstart, unsigned long start, unsigned long end) { bootmem_data_t *bdata = pgdat->bdata; - unsigned long mapsize = ((end - start)+7)/8; + unsigned long mapsize; - mapsize = ALIGN(mapsize, sizeof(long)); - bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); - bdata->node_boot_start = (start << PAGE_SHIFT); + bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); + bdata->node_boot_start = PFN_PHYS(start); bdata->node_low_pfn = end; link_bootmem(bdata); @@ -92,6 +102,7 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, * Initially all pages are reserved - setup_arch() has to * register free RAM areas explicitly. */ + mapsize = get_mapsize(bdata); memset(bdata->node_bootmem_map, 0xff, mapsize); return mapsize; @@ -102,22 +113,22 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, * might be used for boot-time allocations - or it might get added * to the free page pool later on. */ -static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) +static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, + unsigned long size) { + unsigned long sidx, eidx; unsigned long i; + /* * round up, partially reserved pages are considered * fully reserved. */ - unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE; - unsigned long eidx = (addr + size - bdata->node_boot_start + - PAGE_SIZE-1)/PAGE_SIZE; - unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; - BUG_ON(!size); - BUG_ON(sidx >= eidx); - BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn); - BUG_ON(end > bdata->node_low_pfn); + BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn); + BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn); + + sidx = PFN_DOWN(addr - bdata->node_boot_start); + eidx = PFN_UP(addr + size - bdata->node_boot_start); for (i = sidx; i < eidx; i++) if (test_and_set_bit(i, bdata->node_bootmem_map)) { @@ -127,20 +138,18 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add } } -static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) +static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, + unsigned long size) { + unsigned long sidx, eidx; unsigned long i; - unsigned long start; + /* * round down end of usable mem, partially free pages are * considered reserved. */ - unsigned long sidx; - unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE; - unsigned long end = (addr + size)/PAGE_SIZE; - BUG_ON(!size); - BUG_ON(end > bdata->node_low_pfn); + BUG_ON(PFN_DOWN(addr + size) > bdata->node_low_pfn); if (addr < bdata->last_success) bdata->last_success = addr; @@ -148,8 +157,8 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, /* * Round up the beginning of the address. */ - start = (addr + PAGE_SIZE-1) / PAGE_SIZE; - sidx = start - (bdata->node_boot_start/PAGE_SIZE); + sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start); + eidx = PFN_DOWN(addr + size - bdata->node_boot_start); for (i = sidx; i < eidx; i++) { if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) @@ -175,10 +184,10 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { unsigned long offset, remaining_size, areasize, preferred; - unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn; + unsigned long i, start = 0, incr, eidx, end_pfn; void *ret; - if(!size) { + if (!size) { printk("__alloc_bootmem_core(): zero-sized request\n"); BUG(); } @@ -187,23 +196,22 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, if (limit && bdata->node_boot_start >= limit) return NULL; - limit >>=PAGE_SHIFT; + end_pfn = bdata->node_low_pfn; + limit = PFN_DOWN(limit); if (limit && end_pfn > limit) end_pfn = limit; - eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + eidx = end_pfn - PFN_DOWN(bdata->node_boot_start); offset = 0; - if (align && - (bdata->node_boot_start & (align - 1UL)) != 0) - offset = (align - (bdata->node_boot_start & (align - 1UL))); - offset >>= PAGE_SHIFT; + if (align && (bdata->node_boot_start & (align - 1UL)) != 0) + offset = align - (bdata->node_boot_start & (align - 1UL)); + offset = PFN_DOWN(offset); /* * We try to allocate bootmem pages above 'goal' * first, then we try to allocate lower pages. */ - if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < end_pfn)) { + if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) { preferred = goal - bdata->node_boot_start; if (bdata->last_success >= preferred) @@ -212,9 +220,8 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, } else preferred = 0; - preferred = ALIGN(preferred, align) >> PAGE_SHIFT; - preferred += offset; - areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; + preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; + areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; incr = align >> PAGE_SHIFT ? : 1; restart_scan: @@ -229,7 +236,7 @@ restart_scan: for (j = i + 1; j < i + areasize; ++j) { if (j >= eidx) goto fail_block; - if (test_bit (j, bdata->node_bootmem_map)) + if (test_bit(j, bdata->node_bootmem_map)) goto fail_block; } start = i; @@ -245,7 +252,7 @@ restart_scan: return NULL; found: - bdata->last_success = start << PAGE_SHIFT; + bdata->last_success = PFN_PHYS(start); BUG_ON(start >= eidx); /* @@ -257,19 +264,21 @@ found: bdata->last_offset && bdata->last_pos+1 == start) { offset = ALIGN(bdata->last_offset, align); BUG_ON(offset > PAGE_SIZE); - remaining_size = PAGE_SIZE-offset; + remaining_size = PAGE_SIZE - offset; if (size < remaining_size) { areasize = 0; /* last_pos unchanged */ - bdata->last_offset = offset+size; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + - bdata->node_boot_start); + bdata->last_offset = offset + size; + ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + + offset + + bdata->node_boot_start); } else { remaining_size = size - remaining_size; - areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + - bdata->node_boot_start); - bdata->last_pos = start+areasize-1; + areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; + ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + + offset + + bdata->node_boot_start); + bdata->last_pos = start + areasize - 1; bdata->last_offset = remaining_size; } bdata->last_offset &= ~PAGE_MASK; @@ -282,7 +291,7 @@ found: /* * Reserve the area now: */ - for (i = start; i < start+areasize; i++) + for (i = start; i < start + areasize; i++) if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) BUG(); memset(ret, 0, size); @@ -303,8 +312,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) count = 0; /* first extant page of the node */ - pfn = bdata->node_boot_start >> PAGE_SHIFT; - idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + pfn = PFN_DOWN(bdata->node_boot_start); + idx = bdata->node_low_pfn - pfn; map = bdata->node_bootmem_map; /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ if (bdata->node_boot_start == 0 || @@ -333,7 +342,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) } } } else { - i+=BITS_PER_LONG; + i += BITS_PER_LONG; } pfn += BITS_PER_LONG; } @@ -345,9 +354,10 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) */ page = virt_to_page(bdata->node_bootmem_map); count = 0; - for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { - count++; + idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT; + for (i = 0; i < idx; i++, page++) { __free_pages_bootmem(page, 0); + count++; } total += count; bdata->node_bootmem_map = NULL; @@ -355,64 +365,72 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) return total; } -unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) +unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, + unsigned long startpfn, unsigned long endpfn) { - return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn)); + return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); } -void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) +void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { reserve_bootmem_core(pgdat->bdata, physaddr, size); } -void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) +void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { free_bootmem_core(pgdat->bdata, physaddr, size); } -unsigned long __init free_all_bootmem_node (pg_data_t *pgdat) +unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) { - return(free_all_bootmem_core(pgdat)); + return free_all_bootmem_core(pgdat); } -unsigned long __init init_bootmem (unsigned long start, unsigned long pages) +unsigned long __init init_bootmem(unsigned long start, unsigned long pages) { max_low_pfn = pages; min_low_pfn = start; - return(init_bootmem_core(NODE_DATA(0), start, 0, pages)); + return init_bootmem_core(NODE_DATA(0), start, 0, pages); } #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -void __init reserve_bootmem (unsigned long addr, unsigned long size) +void __init reserve_bootmem(unsigned long addr, unsigned long size) { reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size); } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -void __init free_bootmem (unsigned long addr, unsigned long size) +void __init free_bootmem(unsigned long addr, unsigned long size) { free_bootmem_core(NODE_DATA(0)->bdata, addr, size); } -unsigned long __init free_all_bootmem (void) +unsigned long __init free_all_bootmem(void) { - return(free_all_bootmem_core(NODE_DATA(0))); + return free_all_bootmem_core(NODE_DATA(0)); } -void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, + unsigned long goal) { bootmem_data_t *bdata; void *ptr; - list_for_each_entry(bdata, &bdata_list, list) - if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) - return(ptr); + list_for_each_entry(bdata, &bdata_list, list) { + ptr = __alloc_bootmem_core(bdata, size, align, goal, 0); + if (ptr) + return ptr; + } return NULL; } -void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem(unsigned long size, unsigned long align, + unsigned long goal) { void *mem = __alloc_bootmem_nopanic(size,align,goal); + if (mem) return mem; /* @@ -424,29 +442,34 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned } -void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal) +void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) { void *ptr; ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); if (ptr) - return (ptr); + return ptr; return __alloc_bootmem(size, align, goal); } -#define LOW32LIMIT 0xffffffff +#ifndef ARCH_LOW_ADDRESS_LIMIT +#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL +#endif -void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, + unsigned long goal) { bootmem_data_t *bdata; void *ptr; - list_for_each_entry(bdata, &bdata_list, list) - if ((ptr = __alloc_bootmem_core(bdata, size, - align, goal, LOW32LIMIT))) - return(ptr); + list_for_each_entry(bdata, &bdata_list, list) { + ptr = __alloc_bootmem_core(bdata, size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); + if (ptr) + return ptr; + } /* * Whoops, we cannot satisfy the allocation request. @@ -459,5 +482,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsig void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT); + return __alloc_bootmem_core(pgdat->bdata, size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); } diff --git a/mm/filemap.c b/mm/filemap.c index b9a60c43b61..afcdc72b5e9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -488,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x) EXPORT_SYMBOL(page_cache_alloc_cold); #endif +static int __sleep_on_page_lock(void *word) +{ + io_schedule(); + return 0; +} + /* * In order to wait for pages to become available there must be * waitqueues associated with pages. By using a hash table of @@ -577,13 +583,24 @@ void fastcall __lock_page(struct page *page) } EXPORT_SYMBOL(__lock_page); +/* + * Variant of lock_page that does not require the caller to hold a reference + * on the page's mapping. + */ +void fastcall __lock_page_nosync(struct page *page) +{ + DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); + __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, + TASK_UNINTERRUPTIBLE); +} + /** * find_get_page - find and get a page reference * @mapping: the address_space to search * @offset: the page index * - * A rather lightweight function, finding and getting a reference to a - * hashed page atomically. + * Is there a pagecache struct page at the given (mapping, offset) tuple? + * If yes, increment its refcount and return it; if no, return NULL. */ struct page * find_get_page(struct address_space *mapping, unsigned long offset) { @@ -970,7 +987,7 @@ page_not_up_to_date: /* Get exclusive access to the page ... */ lock_page(page); - /* Did it get unhashed before we got the lock? */ + /* Did it get truncated before we got the lock? */ if (!page->mapping) { unlock_page(page); page_cache_release(page); @@ -1610,7 +1627,7 @@ no_cached_page: page_not_uptodate: lock_page(page); - /* Did it get unhashed while we waited for it? */ + /* Did it get truncated while we waited for it? */ if (!page->mapping) { unlock_page(page); goto err; diff --git a/mm/fremap.c b/mm/fremap.c index 21b7d0cbc98..aa30618ec6b 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -79,9 +79,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, inc_mm_counter(mm, file_rss); flush_icache_page(vma, page); - set_pte_at(mm, addr, pte, mk_pte(page, prot)); + pte_val = mk_pte(page, prot); + set_pte_at(mm, addr, pte, pte_val); page_add_file_rmap(page); - pte_val = *pte; update_mmu_cache(vma, addr, pte_val); lazy_mmu_prot_update(pte_val); err = 0; diff --git a/mm/highmem.c b/mm/highmem.c index 9b2a5403c44..ee5519b176e 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -46,6 +46,19 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) */ #ifdef CONFIG_HIGHMEM +unsigned long totalhigh_pages __read_mostly; + +unsigned int nr_free_highpages (void) +{ + pg_data_t *pgdat; + unsigned int pages = 0; + + for_each_online_pgdat(pgdat) + pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; + + return pages; +} + static int pkmap_count[LAST_PKMAP]; static unsigned int last_pkmap_nr; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index df499973255..7c7d03dbf73 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -72,7 +72,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, struct zone **z; for (z = zonelist->zones; *z; z++) { - nid = (*z)->zone_pgdat->node_id; + nid = zone_to_nid(*z); if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && !list_empty(&hugepage_freelists[nid])) break; @@ -177,7 +177,7 @@ static void update_and_free_page(struct page *page) { int i; nr_huge_pages--; - nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--; + nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | @@ -191,7 +191,8 @@ static void update_and_free_page(struct page *page) #ifdef CONFIG_HIGHMEM static void try_to_free_low(unsigned long count) { - int i, nid; + int i; + for (i = 0; i < MAX_NUMNODES; ++i) { struct page *page, *next; list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { @@ -199,9 +200,8 @@ static void try_to_free_low(unsigned long count) continue; list_del(&page->lru); update_and_free_page(page); - nid = page_zone(page)->zone_pgdat->node_id; free_huge_pages--; - free_huge_pages_node[nid]--; + free_huge_pages_node[page_to_nid(page)]--; if (count >= nr_huge_pages) return; } diff --git a/mm/internal.h b/mm/internal.h index d20e3cc4aef..d527b80b292 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -24,8 +24,8 @@ static inline void set_page_count(struct page *page, int v) */ static inline void set_page_refcounted(struct page *page) { - BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); - BUG_ON(atomic_read(&page->_count)); + VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); + VM_BUG_ON(atomic_read(&page->_count)); set_page_count(page, 1); } diff --git a/mm/memory.c b/mm/memory.c index 109e9866237..92a3ebd8d79 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -49,6 +49,7 @@ #include <linux/module.h> #include <linux/delayacct.h> #include <linux/init.h> +#include <linux/writeback.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> @@ -1226,7 +1227,12 @@ out: return retval; } -/* +/** + * vm_insert_page - insert single page into user vma + * @vma: user vma to map to + * @addr: target user address of this page + * @page: source kernel page + * * This allows drivers to insert individual pages they've allocated * into a user vma. * @@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, return 0; } -/* Note: this is only safe if the mm semaphore is held when called. */ +/** + * remap_pfn_range - remap kernel memory to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @pfn: physical address of kernel memory + * @size: size of map area + * @prot: page protection flags for this mapping + * + * Note: this is only safe if the mm semaphore is held when called. + */ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { @@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *old_page, *new_page; pte_t entry; - int reuse, ret = VM_FAULT_MINOR; + int reuse = 0, ret = VM_FAULT_MINOR; + struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) goto gotten; - if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == - (VM_SHARED|VM_WRITE))) { + /* + * Take out anonymous pages first, anonymous shared vmas are + * not dirty accountable. + */ + if (PageAnon(old_page)) { + if (!TestSetPageLocked(old_page)) { + reuse = can_share_swap_page(old_page); + unlock_page(old_page); + } + } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED))) { + /* + * Only catch write-faults on shared writable pages, + * read-only shared pages can get COWed by + * get_user_pages(.write=1, .force=1). + */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { /* * Notify the address space that the page is about to @@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, if (!pte_same(*page_table, orig_pte)) goto unlock; } - + dirty_page = old_page; + get_page(dirty_page); reuse = 1; - } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { - reuse = can_share_swap_page(old_page); - unlock_page(old_page); - } else { - reuse = 0; } if (reuse) { @@ -1566,6 +1592,10 @@ gotten: page_cache_release(old_page); unlock: pte_unmap_unlock(page_table, ptl); + if (dirty_page) { + set_page_dirty_balance(dirty_page); + put_page(dirty_page); + } return ret; oom: if (old_page) @@ -1785,9 +1815,10 @@ void unmap_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL(unmap_mapping_range); -/* - * Handle all mappings that got truncated by a "truncate()" - * system call. +/** + * vmtruncate - unmap mappings "freed" by truncate() syscall + * @inode: inode of the file used + * @offset: file offset to start truncating * * NOTE! We have to be ready to update the memory sharing * between the file and the memory map for a potential last @@ -1856,11 +1887,16 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) } EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */ -/* +/** + * swapin_readahead - swap in pages in hope we need them soon + * @entry: swap entry of this memory + * @addr: address to start + * @vma: user vma this addresses belong to + * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue - * the 'original' request together with the readahead ones... + * the 'original' request together with the readahead ones... * * This has been extended to use the NUMA policies from the mm triggering * the readahead. @@ -2098,6 +2134,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned int sequence = 0; int ret = VM_FAULT_MINOR; int anon = 0; + struct page *dirty_page = NULL; pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); @@ -2192,6 +2229,10 @@ retry: } else { inc_mm_counter(mm, file_rss); page_add_file_rmap(new_page); + if (write_access) { + dirty_page = new_page; + get_page(dirty_page); + } } } else { /* One of our sibling threads was faster, back out. */ @@ -2204,6 +2245,10 @@ retry: lazy_mmu_prot_update(entry); unlock: pte_unmap_unlock(page_table, ptl); + if (dirty_page) { + set_page_dirty_balance(dirty_page); + put_page(dirty_page); + } return ret; oom: page_cache_release(new_page); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a9963ceddd6..38f89650bc8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ -int policy_zone = ZONE_DMA; +enum zone_type policy_zone = ZONE_DMA; struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ @@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes) static struct zonelist *bind_zonelist(nodemask_t *nodes) { struct zonelist *zl; - int num, max, nd, k; + int num, max, nd; + enum zone_type k; max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); @@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) lower zones etc. Avoid empty zones because the memory allocator doesn't like them. If you implement node hot removal you have to fix that. */ - for (k = policy_zone; k >= 0; k--) { + k = policy_zone; + while (1) { for_each_node_mask(nd, *nodes) { struct zone *z = &NODE_DATA(nd)->node_zones[k]; if (z->present_pages > 0) zl->zones[num++] = z; } + if (k == 0) + break; + k--; } zl->zones[num] = NULL; return zl; @@ -482,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes) switch (p->policy) { case MPOL_BIND: for (i = 0; p->v.zonelist->zones[i]; i++) - node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, + node_set(zone_to_nid(p->v.zonelist->zones[i]), *nodes); break; case MPOL_DEFAULT: @@ -1140,7 +1145,7 @@ unsigned slab_node(struct mempolicy *policy) * Follow bind policy behavior and start allocation at the * first node. */ - return policy->v.zonelist->zones[0]->zone_pgdat->node_id; + return zone_to_nid(policy->v.zonelist->zones[0]); case MPOL_PREFERRED: if (policy->v.preferred_node >= 0) @@ -1285,7 +1290,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) if ((gfp & __GFP_WAIT) && !in_interrupt()) cpuset_update_task_memory_state(); - if (!pol || in_interrupt()) + if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE) return alloc_page_interleave(gfp, order, interleave_nodes(pol)); @@ -1644,7 +1649,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) nodes_clear(nodes); for (z = pol->v.zonelist->zones; *z; z++) - node_set((*z)->zone_pgdat->node_id, nodes); + node_set(zone_to_nid(*z), nodes); nodes_remap(tmp, nodes, *mpolmask, *newmask); nodes = tmp; diff --git a/mm/migrate.c b/mm/migrate.c index 3f1e0c2c942..20a8c2687b1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -741,7 +741,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, *result = &pm->status; - return alloc_pages_node(pm->node, GFP_HIGHUSER, 0); + return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0); } /* diff --git a/mm/mmap.c b/mm/mmap.c index d799d896d74..eea8eefd51a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -116,7 +116,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ - free += atomic_read(&slab_reclaim_pages); + free += global_page_state(NR_SLAB_RECLAIMABLE); /* * Leave the last 3% for root @@ -1105,12 +1105,6 @@ munmap_back: goto free_vma; } - /* Don't make the VMA automatically writable if it's shared, but the - * backer wishes to know when pages are first written to */ - if (vma->vm_ops && vma->vm_ops->page_mkwrite) - vma->vm_page_prot = - protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; - /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) * that memory reservation must be checked; but that reservation @@ -1128,6 +1122,10 @@ munmap_back: pgoff = vma->vm_pgoff; vm_flags = vma->vm_flags; + if (vma_wants_writenotify(vma)) + vma->vm_page_prot = + protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; + if (!file || !vma_merge(mm, prev, addr, vma->vm_end, vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { file = vma->vm_file; diff --git a/mm/mprotect.c b/mm/mprotect.c index 638edabaff7..955f9d0e38a 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -27,7 +27,8 @@ #include <asm/tlbflush.h> static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, unsigned long end, pgprot_t newprot) + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { pte_t *pte, oldpte; spinlock_t *ptl; @@ -42,7 +43,14 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, * bits by wiping the pte and then setting the new pte * into place. */ - ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot); + ptent = ptep_get_and_clear(mm, addr, pte); + ptent = pte_modify(ptent, newprot); + /* + * Avoid taking write faults for pages we know to be + * dirty. + */ + if (dirty_accountable && pte_dirty(ptent)) + ptent = pte_mkwrite(ptent); set_pte_at(mm, addr, pte, ptent); lazy_mmu_prot_update(ptent); #ifdef CONFIG_MIGRATION @@ -66,7 +74,8 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, } static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, pgprot_t newprot) + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { pmd_t *pmd; unsigned long next; @@ -76,12 +85,13 @@ static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; - change_pte_range(mm, pmd, addr, next, newprot); + change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable); } while (pmd++, addr = next, addr != end); } static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, pgprot_t newprot) + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { pud_t *pud; unsigned long next; @@ -91,12 +101,13 @@ static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - change_pmd_range(mm, pud, addr, next, newprot); + change_pmd_range(mm, pud, addr, next, newprot, dirty_accountable); } while (pud++, addr = next, addr != end); } static void change_protection(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, pgprot_t newprot) + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; @@ -110,7 +121,7 @@ static void change_protection(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - change_pud_range(mm, pgd, addr, next, newprot); + change_pud_range(mm, pgd, addr, next, newprot, dirty_accountable); } while (pgd++, addr = next, addr != end); flush_tlb_range(vma, start, end); } @@ -123,10 +134,9 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; unsigned long charged = 0; - unsigned int mask; - pgprot_t newprot; pgoff_t pgoff; int error; + int dirty_accountable = 0; if (newflags == oldflags) { *pprev = vma; @@ -176,24 +186,23 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, } success: - /* Don't make the VMA automatically writable if it's shared, but the - * backer wishes to know when pages are first written to */ - mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED; - if (vma->vm_ops && vma->vm_ops->page_mkwrite) - mask &= ~VM_SHARED; - - newprot = protection_map[newflags & mask]; - /* * vm_flags and vm_page_prot are protected by the mmap_sem * held in write mode. */ vma->vm_flags = newflags; - vma->vm_page_prot = newprot; + vma->vm_page_prot = protection_map[newflags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + if (vma_wants_writenotify(vma)) { + vma->vm_page_prot = protection_map[newflags & + (VM_READ|VM_WRITE|VM_EXEC)]; + dirty_accountable = 1; + } + if (is_vm_hugetlb_page(vma)) - hugetlb_change_protection(vma, start, end, newprot); + hugetlb_change_protection(vma, start, end, vma->vm_page_prot); else - change_protection(vma, start, end, newprot); + change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); return 0; diff --git a/mm/msync.c b/mm/msync.c index d083544df21..358d73cf7b7 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -7,149 +7,33 @@ /* * The msync() system call. */ -#include <linux/slab.h> -#include <linux/pagemap.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/hugetlb.h> -#include <linux/writeback.h> #include <linux/file.h> #include <linux/syscalls.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> - -static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end) -{ - pte_t *pte; - spinlock_t *ptl; - int progress = 0; - unsigned long ret = 0; - -again: - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - do { - struct page *page; - - if (progress >= 64) { - progress = 0; - if (need_resched() || need_lockbreak(ptl)) - break; - } - progress++; - if (!pte_present(*pte)) - continue; - if (!pte_maybe_dirty(*pte)) - continue; - page = vm_normal_page(vma, addr, *pte); - if (!page) - continue; - if (ptep_clear_flush_dirty(vma, addr, pte) || - page_test_and_clear_dirty(page)) - ret += set_page_dirty(page); - progress += 3; - } while (pte++, addr += PAGE_SIZE, addr != end); - pte_unmap_unlock(pte - 1, ptl); - cond_resched(); - if (addr != end) - goto again; - return ret; -} - -static inline unsigned long msync_pmd_range(struct vm_area_struct *vma, - pud_t *pud, unsigned long addr, unsigned long end) -{ - pmd_t *pmd; - unsigned long next; - unsigned long ret = 0; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) - continue; - ret += msync_pte_range(vma, pmd, addr, next); - } while (pmd++, addr = next, addr != end); - return ret; -} - -static inline unsigned long msync_pud_range(struct vm_area_struct *vma, - pgd_t *pgd, unsigned long addr, unsigned long end) -{ - pud_t *pud; - unsigned long next; - unsigned long ret = 0; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - ret += msync_pmd_range(vma, pud, addr, next); - } while (pud++, addr = next, addr != end); - return ret; -} - -static unsigned long msync_page_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end) -{ - pgd_t *pgd; - unsigned long next; - unsigned long ret = 0; - - /* For hugepages we can't go walking the page table normally, - * but that's ok, hugetlbfs is memory based, so we don't need - * to do anything more on an msync(). - */ - if (vma->vm_flags & VM_HUGETLB) - return 0; - - BUG_ON(addr >= end); - pgd = pgd_offset(vma->vm_mm, addr); - flush_cache_range(vma, addr, end); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - ret += msync_pud_range(vma, pgd, addr, next); - } while (pgd++, addr = next, addr != end); - return ret; -} - /* * MS_SYNC syncs the entire file - including mappings. * - * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just - * marks the relevant pages dirty. The application may now run fsync() to + * MS_ASYNC does not start I/O (it used to, up to 2.5.67). + * Nor does it marks the relevant pages dirty (it used to up to 2.6.17). + * Now it doesn't do anything, since dirty pages are properly tracked. + * + * The application may now run fsync() to * write out the dirty pages and wait on the writeout and check the result. * Or the application may run fadvise(FADV_DONTNEED) against the fd to start * async writeout immediately. * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to * applications. */ -static int msync_interval(struct vm_area_struct *vma, unsigned long addr, - unsigned long end, int flags, - unsigned long *nr_pages_dirtied) -{ - struct file *file = vma->vm_file; - - if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED)) - return -EBUSY; - - if (file && (vma->vm_flags & VM_SHARED)) - *nr_pages_dirtied = msync_page_range(vma, addr, end); - return 0; -} - asmlinkage long sys_msync(unsigned long start, size_t len, int flags) { unsigned long end; + struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int unmapped_error = 0; int error = -EINVAL; - int done = 0; if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) goto out; @@ -169,64 +53,50 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags) * If the interval [start,end) covers some unmapped address ranges, * just ignore them, but return -ENOMEM at the end. */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, start); - if (!vma) { - error = -ENOMEM; - goto out_unlock; - } - do { - unsigned long nr_pages_dirtied = 0; + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + for (;;) { struct file *file; + /* Still start < end. */ + error = -ENOMEM; + if (!vma) + goto out_unlock; /* Here start < vma->vm_end. */ if (start < vma->vm_start) { - unmapped_error = -ENOMEM; start = vma->vm_start; + if (start >= end) + goto out_unlock; + unmapped_error = -ENOMEM; } /* Here vma->vm_start <= start < vma->vm_end. */ - if (end <= vma->vm_end) { - if (start < end) { - error = msync_interval(vma, start, end, flags, - &nr_pages_dirtied); - if (error) - goto out_unlock; - } - error = unmapped_error; - done = 1; - } else { - /* Here vma->vm_start <= start < vma->vm_end < end. */ - error = msync_interval(vma, start, vma->vm_end, flags, - &nr_pages_dirtied); - if (error) - goto out_unlock; + if ((flags & MS_INVALIDATE) && + (vma->vm_flags & VM_LOCKED)) { + error = -EBUSY; + goto out_unlock; } file = vma->vm_file; start = vma->vm_end; - if ((flags & MS_ASYNC) && file && nr_pages_dirtied) { - get_file(file); - up_read(¤t->mm->mmap_sem); - balance_dirty_pages_ratelimited_nr(file->f_mapping, - nr_pages_dirtied); - fput(file); - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, start); - } else if ((flags & MS_SYNC) && file && + if ((flags & MS_SYNC) && file && (vma->vm_flags & VM_SHARED)) { get_file(file); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); error = do_fsync(file, 0); fput(file); - down_read(¤t->mm->mmap_sem); - if (error) - goto out_unlock; - vma = find_vma(current->mm, start); + if (error || start >= end) + goto out; + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); } else { + if (start >= end) { + error = 0; + goto out_unlock; + } vma = vma->vm_next; } - } while (vma && !done); + } out_unlock: - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); out: - return error; + return error ? : unmapped_error; } diff --git a/mm/nommu.c b/mm/nommu.c index c576df71e3b..d99dea31e44 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1133,7 +1133,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ - free += atomic_read(&slab_reclaim_pages); + free += global_page_state(NR_SLAB_RECLAIMABLE); /* * Leave the last 3% for root diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b9af136e5cf..bada3d03119 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -21,6 +21,8 @@ #include <linux/timex.h> #include <linux/jiffies.h> #include <linux/cpuset.h> +#include <linux/module.h> +#include <linux/notifier.h> int sysctl_panic_on_oom; /* #define DEBUG */ @@ -58,6 +60,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) } /* + * swapoff can easily use up all memory, so kill those first. + */ + if (p->flags & PF_SWAPOFF) + return ULONG_MAX; + + /* * The memory size of the process is the basis for the badness. */ points = mm->total_vm; @@ -127,6 +135,14 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) points /= 4; /* + * If p's nodes don't overlap ours, it may still help to kill p + * because p may have allocated or otherwise mapped memory on + * this node before. However it will be less likely. + */ + if (!cpuset_excl_nodes_overlap(p)) + points /= 8; + + /* * Adjust the score by oomkilladj. */ if (p->oomkilladj) { @@ -161,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) for (z = zonelist->zones; *z; z++) if (cpuset_zone_allowed(*z, gfp_mask)) - node_clear((*z)->zone_pgdat->node_id, - nodes); + node_clear(zone_to_nid(*z), nodes); else return CONSTRAINT_CPUSET; @@ -191,25 +206,38 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) unsigned long points; int releasing; + /* skip kernel threads */ + if (!p->mm) + continue; /* skip the init task with pid == 1 */ if (p->pid == 1) continue; - if (p->oomkilladj == OOM_DISABLE) - continue; - /* If p's nodes don't overlap ours, it won't help to kill p. */ - if (!cpuset_excl_nodes_overlap(p)) - continue; /* * This is in the process of releasing memory so wait for it * to finish before killing some other task by mistake. + * + * However, if p is the current task, we allow the 'kill' to + * go ahead if it is exiting: this will simply set TIF_MEMDIE, + * which will allow it to gain access to memory reserves in + * the process of exiting and releasing its resources. + * Otherwise we could get an OOM deadlock. */ releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || p->flags & PF_EXITING; - if (releasing && !(p->flags & PF_DEAD)) + if (releasing) { + /* PF_DEAD tasks have already released their mm */ + if (p->flags & PF_DEAD) + continue; + if (p->flags & PF_EXITING && p == current) { + chosen = p; + *ppoints = ULONG_MAX; + break; + } return ERR_PTR(-1UL); - if (p->flags & PF_SWAPOFF) - return p; + } + if (p->oomkilladj == OOM_DISABLE) + continue; points = badness(p, uptime.tv_sec); if (points > *ppoints || !chosen) { @@ -221,9 +249,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) } /** - * We must be careful though to never send SIGKILL a process with - * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that - * we select a process with CAP_SYS_RAW_IO set). + * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO + * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO + * set. */ static void __oom_kill_task(struct task_struct *p, const char *message) { @@ -241,8 +269,11 @@ static void __oom_kill_task(struct task_struct *p, const char *message) return; } task_unlock(p); - printk(KERN_ERR "%s: Killed process %d (%s).\n", + + if (message) { + printk(KERN_ERR "%s: Killed process %d (%s).\n", message, p->pid, p->comm); + } /* * We give our sacrificial lamb high priority and access to @@ -293,8 +324,17 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, struct task_struct *c; struct list_head *tsk; - printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and " - "children.\n", p->pid, p->comm, points); + /* + * If the task is already exiting, don't alarm the sysadmin or kill + * its children or threads, just set TIF_MEMDIE so it can die quickly + */ + if (p->flags & PF_EXITING) { + __oom_kill_task(p, NULL); + return 0; + } + + printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li" + " and children.\n", p->pid, p->comm, points); /* Try to kill a child first */ list_for_each(tsk, &p->children) { c = list_entry(tsk, struct task_struct, sibling); @@ -306,6 +346,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, return oom_kill_task(p, message); } +static BLOCKING_NOTIFIER_HEAD(oom_notify_list); + +int register_oom_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&oom_notify_list, nb); +} +EXPORT_SYMBOL_GPL(register_oom_notifier); + +int unregister_oom_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&oom_notify_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_oom_notifier); + /** * out_of_memory - kill the "best" process when we run out of memory * @@ -318,10 +372,17 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { struct task_struct *p; unsigned long points = 0; + unsigned long freed = 0; + + blocking_notifier_call_chain(&oom_notify_list, 0, &freed); + if (freed > 0) + /* Got some memory back in the last second. */ + return; if (printk_ratelimit()) { - printk("oom-killer: gfp_mask=0x%x, order=%d\n", - gfp_mask, order); + printk(KERN_WARNING "%s invoked oom-killer: " + "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", + current->comm, gfp_mask, order, current->oomkilladj); dump_stack(); show_mem(); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 77a0bc4e261..555752907dc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -23,6 +23,7 @@ #include <linux/backing-dev.h> #include <linux/blkdev.h> #include <linux/mpage.h> +#include <linux/rmap.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/smp.h> @@ -243,6 +244,16 @@ static void balance_dirty_pages(struct address_space *mapping) pdflush_operation(background_writeout, 0); } +void set_page_dirty_balance(struct page *page) +{ + if (set_page_dirty(page)) { + struct address_space *mapping = page_mapping(page); + + if (mapping) + balance_dirty_pages_ratelimited(mapping); + } +} + /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied @@ -550,7 +561,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) return 0; wbc->for_writepages = 1; if (mapping->a_ops->writepages) - ret = mapping->a_ops->writepages(mapping, wbc); + ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); wbc->for_writepages = 0; @@ -690,7 +701,7 @@ int set_page_dirty_lock(struct page *page) { int ret; - lock_page(page); + lock_page_nosync(page); ret = set_page_dirty(page); unlock_page(page); return ret; @@ -712,9 +723,15 @@ int test_clear_page_dirty(struct page *page) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - if (mapping_cap_account_dirty(mapping)) - __dec_zone_page_state(page, NR_FILE_DIRTY); write_unlock_irqrestore(&mapping->tree_lock, flags); + /* + * We can continue to use `mapping' here because the + * page is locked, which pins the address_space + */ + if (mapping_cap_account_dirty(mapping)) { + page_mkclean(page); + dec_zone_page_state(page, NR_FILE_DIRTY); + } return 1; } write_unlock_irqrestore(&mapping->tree_lock, flags); @@ -744,8 +761,10 @@ int clear_page_dirty_for_io(struct page *page) if (mapping) { if (TestClearPageDirty(page)) { - if (mapping_cap_account_dirty(mapping)) + if (mapping_cap_account_dirty(mapping)) { + page_mkclean(page); dec_zone_page_state(page, NR_FILE_DIRTY); + } return 1; } return 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3b5358a0561..9810f0a60db 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -51,7 +51,6 @@ EXPORT_SYMBOL(node_online_map); nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; EXPORT_SYMBOL(node_possible_map); unsigned long totalram_pages __read_mostly; -unsigned long totalhigh_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; long nr_swap_pages; int percpu_pagelist_fraction; @@ -69,7 +68,15 @@ static void __free_pages_ok(struct page *page, unsigned int order); * TBD: should special case ZONE_DMA32 machines here - in those we normally * don't need any ZONE_NORMAL reservation */ -int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; +int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { + 256, +#ifdef CONFIG_ZONE_DMA32 + 256, +#endif +#ifdef CONFIG_HIGHMEM + 32 +#endif +}; EXPORT_SYMBOL(totalram_pages); @@ -80,7 +87,17 @@ EXPORT_SYMBOL(totalram_pages); struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; EXPORT_SYMBOL(zone_table); -static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; +static char *zone_names[MAX_NR_ZONES] = { + "DMA", +#ifdef CONFIG_ZONE_DMA32 + "DMA32", +#endif + "Normal", +#ifdef CONFIG_HIGHMEM + "HighMem" +#endif +}; + int min_free_kbytes = 1024; unsigned long __meminitdata nr_kernel_pages; @@ -127,7 +144,6 @@ static int bad_range(struct zone *zone, struct page *page) return 0; } - #else static inline int bad_range(struct zone *zone, struct page *page) { @@ -218,12 +234,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i; - BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); + VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); /* * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO * and __GFP_HIGHMEM from hard or soft interrupt context. */ - BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); + VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); for (i = 0; i < (1 << order); i++) clear_highpage(page + i); } @@ -347,8 +363,8 @@ static inline void __free_one_page(struct page *page, page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); - BUG_ON(page_idx & (order_size - 1)); - BUG_ON(bad_range(zone, page)); + VM_BUG_ON(page_idx & (order_size - 1)); + VM_BUG_ON(bad_range(zone, page)); zone->free_pages += order_size; while (order < MAX_ORDER-1) { @@ -421,7 +437,7 @@ static void free_pages_bulk(struct zone *zone, int count, while (count--) { struct page *page; - BUG_ON(list_empty(list)); + VM_BUG_ON(list_empty(list)); page = list_entry(list->prev, struct page, lru); /* have to delete it as __free_one_page list manipulates */ list_del(&page->lru); @@ -432,9 +448,11 @@ static void free_pages_bulk(struct zone *zone, int count, static void free_one_page(struct zone *zone, struct page *page, int order) { - LIST_HEAD(list); - list_add(&page->lru, &list); - free_pages_bulk(zone, 1, &list, order); + spin_lock(&zone->lock); + zone->all_unreclaimable = 0; + zone->pages_scanned = 0; + __free_one_page(page, zone ,order); + spin_unlock(&zone->lock); } static void __free_pages_ok(struct page *page, unsigned int order) @@ -512,7 +530,7 @@ static inline void expand(struct zone *zone, struct page *page, area--; high--; size >>= 1; - BUG_ON(bad_range(zone, &page[size])); + VM_BUG_ON(bad_range(zone, &page[size])); list_add(&page[size].lru, &area->free_list); area->nr_free++; set_page_order(&page[size], high); @@ -615,19 +633,23 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, #ifdef CONFIG_NUMA /* * Called from the slab reaper to drain pagesets on a particular node that - * belong to the currently executing processor. + * belongs to the currently executing processor. * Note that this function must be called with the thread pinned to * a single processor. */ void drain_node_pages(int nodeid) { - int i, z; + int i; + enum zone_type z; unsigned long flags; for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = NODE_DATA(nodeid)->node_zones + z; struct per_cpu_pageset *pset; + if (!populated_zone(zone)) + continue; + pset = zone_pcp(zone, smp_processor_id()); for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { struct per_cpu_pages *pcp; @@ -672,7 +694,8 @@ static void __drain_pages(unsigned int cpu) void mark_free_pages(struct zone *zone) { - unsigned long zone_pfn, flags; + unsigned long pfn, max_zone_pfn; + unsigned long flags; int order; struct list_head *curr; @@ -680,18 +703,25 @@ void mark_free_pages(struct zone *zone) return; spin_lock_irqsave(&zone->lock, flags); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) - ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn)); + + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + + if (!PageNosave(page)) + ClearPageNosaveFree(page); + } for (order = MAX_ORDER - 1; order >= 0; --order) list_for_each(curr, &zone->free_area[order].free_list) { - unsigned long start_pfn, i; + unsigned long i; - start_pfn = page_to_pfn(list_entry(curr, struct page, lru)); + pfn = page_to_pfn(list_entry(curr, struct page, lru)); + for (i = 0; i < (1UL << order); i++) + SetPageNosaveFree(pfn_to_page(pfn + i)); + } - for (i=0; i < (1<<order); i++) - SetPageNosaveFree(pfn_to_page(start_pfn+i)); - } spin_unlock_irqrestore(&zone->lock, flags); } @@ -761,8 +791,8 @@ void split_page(struct page *page, unsigned int order) { int i; - BUG_ON(PageCompound(page)); - BUG_ON(!page_count(page)); + VM_BUG_ON(PageCompound(page)); + VM_BUG_ON(!page_count(page)); for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); } @@ -809,7 +839,7 @@ again: local_irq_restore(flags); put_cpu(); - BUG_ON(bad_range(zone, page)); + VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) goto again; return page; @@ -870,32 +900,37 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, struct zone **z = zonelist->zones; struct page *page = NULL; int classzone_idx = zone_idx(*z); + struct zone *zone; /* * Go through the zonelist once, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ do { + zone = *z; + if (unlikely((gfp_mask & __GFP_THISNODE) && + zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) + break; if ((alloc_flags & ALLOC_CPUSET) && - !cpuset_zone_allowed(*z, gfp_mask)) + !cpuset_zone_allowed(zone, gfp_mask)) continue; if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; if (alloc_flags & ALLOC_WMARK_MIN) - mark = (*z)->pages_min; + mark = zone->pages_min; else if (alloc_flags & ALLOC_WMARK_LOW) - mark = (*z)->pages_low; + mark = zone->pages_low; else - mark = (*z)->pages_high; - if (!zone_watermark_ok(*z, order, mark, + mark = zone->pages_high; + if (!zone_watermark_ok(zone , order, mark, classzone_idx, alloc_flags)) if (!zone_reclaim_mode || - !zone_reclaim(*z, gfp_mask, order)) + !zone_reclaim(zone, gfp_mask, order)) continue; } - page = buffered_rmqueue(zonelist, *z, order, gfp_mask); + page = buffered_rmqueue(zonelist, zone, order, gfp_mask); if (page) { break; } @@ -1083,7 +1118,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask) * get_zeroed_page() returns a 32-bit address, which cannot represent * a highmem page */ - BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); + VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); page = alloc_pages(gfp_mask | __GFP_ZERO, 0); if (page) @@ -1116,7 +1151,7 @@ EXPORT_SYMBOL(__free_pages); fastcall void free_pages(unsigned long addr, unsigned int order) { if (addr != 0) { - BUG_ON(!virt_addr_valid((void *)addr)); + VM_BUG_ON(!virt_addr_valid((void *)addr)); __free_pages(virt_to_page((void *)addr), order); } } @@ -1142,7 +1177,8 @@ EXPORT_SYMBOL(nr_free_pages); #ifdef CONFIG_NUMA unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) { - unsigned int i, sum = 0; + unsigned int sum = 0; + enum zone_type i; for (i = 0; i < MAX_NR_ZONES; i++) sum += pgdat->node_zones[i].free_pages; @@ -1186,24 +1222,10 @@ unsigned int nr_free_pagecache_pages(void) { return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); } - -#ifdef CONFIG_HIGHMEM -unsigned int nr_free_highpages (void) -{ - pg_data_t *pgdat; - unsigned int pages = 0; - - for_each_online_pgdat(pgdat) - pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; - - return pages; -} -#endif - #ifdef CONFIG_NUMA static void show_node(struct zone *zone) { - printk("Node %d ", zone->zone_pgdat->node_id); + printk("Node %ld ", zone_to_nid(zone)); } #else #define show_node(zone) do { } while (0) @@ -1215,13 +1237,8 @@ void si_meminfo(struct sysinfo *val) val->sharedram = 0; val->freeram = nr_free_pages(); val->bufferram = nr_blockdev_pages(); -#ifdef CONFIG_HIGHMEM val->totalhigh = totalhigh_pages; val->freehigh = nr_free_highpages(); -#else - val->totalhigh = 0; - val->freehigh = 0; -#endif val->mem_unit = PAGE_SIZE; } @@ -1234,8 +1251,13 @@ void si_meminfo_node(struct sysinfo *val, int nid) val->totalram = pgdat->node_present_pages; val->freeram = nr_free_pages_pgdat(pgdat); +#ifdef CONFIG_HIGHMEM val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; +#else + val->totalhigh = 0; + val->freehigh = 0; +#endif val->mem_unit = PAGE_SIZE; } #endif @@ -1282,10 +1304,6 @@ void show_free_areas(void) get_zone_counts(&active, &inactive, &free); - printk("Free pages: %11ukB (%ukB HighMem)\n", - K(nr_free_pages()), - K(nr_free_highpages())); - printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", active, @@ -1294,7 +1312,8 @@ void show_free_areas(void) global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), nr_free_pages(), - global_page_state(NR_SLAB), + global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE), global_page_state(NR_FILE_MAPPED), global_page_state(NR_PAGETABLE)); @@ -1360,39 +1379,25 @@ void show_free_areas(void) * Add all populated zones of a node to the zonelist. */ static int __meminit build_zonelists_node(pg_data_t *pgdat, - struct zonelist *zonelist, int nr_zones, int zone_type) + struct zonelist *zonelist, int nr_zones, enum zone_type zone_type) { struct zone *zone; - BUG_ON(zone_type > ZONE_HIGHMEM); + BUG_ON(zone_type >= MAX_NR_ZONES); + zone_type++; do { + zone_type--; zone = pgdat->node_zones + zone_type; if (populated_zone(zone)) { -#ifndef CONFIG_HIGHMEM - BUG_ON(zone_type > ZONE_NORMAL); -#endif zonelist->zones[nr_zones++] = zone; check_highest_zone(zone_type); } - zone_type--; - } while (zone_type >= 0); + } while (zone_type); return nr_zones; } -static inline int highest_zone(int zone_bits) -{ - int res = ZONE_NORMAL; - if (zone_bits & (__force int)__GFP_HIGHMEM) - res = ZONE_HIGHMEM; - if (zone_bits & (__force int)__GFP_DMA32) - res = ZONE_DMA32; - if (zone_bits & (__force int)__GFP_DMA) - res = ZONE_DMA; - return res; -} - #ifdef CONFIG_NUMA #define MAX_NODE_LOAD (num_online_nodes()) static int __meminitdata node_load[MAX_NUMNODES]; @@ -1458,13 +1463,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask) static void __meminit build_zonelists(pg_data_t *pgdat) { - int i, j, k, node, local_node; + int j, node, local_node; + enum zone_type i; int prev_node, load; struct zonelist *zonelist; nodemask_t used_mask; /* initialize zonelists */ - for (i = 0; i < GFP_ZONETYPES; i++) { + for (i = 0; i < MAX_NR_ZONES; i++) { zonelist = pgdat->node_zonelists + i; zonelist->zones[0] = NULL; } @@ -1494,13 +1500,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat) node_load[node] += load; prev_node = node; load--; - for (i = 0; i < GFP_ZONETYPES; i++) { + for (i = 0; i < MAX_NR_ZONES; i++) { zonelist = pgdat->node_zonelists + i; for (j = 0; zonelist->zones[j] != NULL; j++); - k = highest_zone(i); - - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); zonelist->zones[j] = NULL; } } @@ -1510,17 +1514,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat) static void __meminit build_zonelists(pg_data_t *pgdat) { - int i, j, k, node, local_node; + int node, local_node; + enum zone_type i,j; local_node = pgdat->node_id; - for (i = 0; i < GFP_ZONETYPES; i++) { + for (i = 0; i < MAX_NR_ZONES; i++) { struct zonelist *zonelist; zonelist = pgdat->node_zonelists + i; - j = 0; - k = highest_zone(i); - j = build_zonelists_node(pgdat, zonelist, j, k); + j = build_zonelists_node(pgdat, zonelist, 0, i); /* * Now we build the zonelist so that it contains the zones * of all the other nodes. @@ -1532,12 +1535,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat) for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node)) continue; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); } for (node = 0; node < local_node; node++) { if (!node_online(node)) continue; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); } zonelist->zones[j] = NULL; @@ -1643,7 +1646,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { unsigned long realtotalpages, totalpages = 0; - int i; + enum zone_type i; for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zones_size[i]; @@ -1698,8 +1701,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, } #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) -void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, - unsigned long size) +void zonetable_add(struct zone *zone, int nid, enum zone_type zid, + unsigned long pfn, unsigned long size) { unsigned long snum = pfn_to_section_nr(pfn); unsigned long end = pfn_to_section_nr(pfn + size); @@ -1845,8 +1848,10 @@ static inline void free_zone_pagesets(int cpu) for_each_zone(zone) { struct per_cpu_pageset *pset = zone_pcp(zone, cpu); + /* Free per_cpu_pageset if it is slab allocated */ + if (pset != &boot_pageset[cpu]) + kfree(pset); zone_pcp(zone, cpu) = NULL; - kfree(pset); } } @@ -1981,7 +1986,7 @@ __meminit int init_currently_empty_zone(struct zone *zone, static void __meminit free_area_init_core(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { - unsigned long j; + enum zone_type j; int nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; int ret; @@ -1999,15 +2004,16 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, if (zholes_size) realsize -= zholes_size[j]; - if (j < ZONE_HIGHMEM) + if (!is_highmem_idx(j)) nr_kernel_pages += realsize; nr_all_pages += realsize; zone->spanned_pages = size; zone->present_pages = realsize; #ifdef CONFIG_NUMA - zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) + zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) / 100; + zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; #endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); @@ -2129,7 +2135,7 @@ static void calculate_totalreserve_pages(void) { struct pglist_data *pgdat; unsigned long reserve_pages = 0; - int i, j; + enum zone_type i, j; for_each_online_pgdat(pgdat) { for (i = 0; i < MAX_NR_ZONES; i++) { @@ -2162,7 +2168,7 @@ static void calculate_totalreserve_pages(void) static void setup_per_zone_lowmem_reserve(void) { struct pglist_data *pgdat; - int j, idx; + enum zone_type j, idx; for_each_online_pgdat(pgdat) { for (j = 0; j < MAX_NR_ZONES; j++) { @@ -2171,9 +2177,12 @@ static void setup_per_zone_lowmem_reserve(void) zone->lowmem_reserve[j] = 0; - for (idx = j-1; idx >= 0; idx--) { + idx = j; + while (idx) { struct zone *lower_zone; + idx--; + if (sysctl_lowmem_reserve_ratio[idx] < 1) sysctl_lowmem_reserve_ratio[idx] = 1; @@ -2314,10 +2323,26 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, return rc; for_each_zone(zone) - zone->min_unmapped_ratio = (zone->present_pages * + zone->min_unmapped_pages = (zone->present_pages * sysctl_min_unmapped_ratio) / 100; return 0; } + +int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + struct zone *zone; + int rc; + + rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (rc) + return rc; + + for_each_zone(zone) + zone->min_slab_pages = (zone->present_pages * + sysctl_min_slab_ratio) / 100; + return 0; +} #endif /* diff --git a/mm/page_io.c b/mm/page_io.c index 88029948d00..d4840ecbf8f 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -52,14 +52,29 @@ static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err) if (bio->bi_size) return 1; - if (!uptodate) + if (!uptodate) { SetPageError(page); + /* + * We failed to write the page out to swap-space. + * Re-dirty the page in order to avoid it being reclaimed. + * Also print a dire warning that things will go BAD (tm) + * very quickly. + * + * Also clear PG_reclaim to avoid rotate_reclaimable_page() + */ + set_page_dirty(page); + printk(KERN_ALERT "Write-error on swap-device (%u:%u:%Lu)\n", + imajor(bio->bi_bdev->bd_inode), + iminor(bio->bi_bdev->bd_inode), + (unsigned long long)bio->bi_sector); + ClearPageReclaim(page); + } end_page_writeback(page); bio_put(bio); return 0; } -static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err) +int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; @@ -70,6 +85,10 @@ static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err) if (!uptodate) { SetPageError(page); ClearPageUptodate(page); + printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", + imajor(bio->bi_bdev->bd_inode), + iminor(bio->bi_bdev->bd_inode), + (unsigned long long)bio->bi_sector); } else { SetPageUptodate(page); } @@ -137,10 +156,12 @@ out: * We use end_swap_bio_read() even for writes, because it happens to do what * we want. */ -int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page) +int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, + struct bio **bio_chain) { struct bio *bio; int ret = 0; + int bio_rw; lock_page(page); @@ -151,11 +172,22 @@ int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page) goto out; } - submit_bio(rw | (1 << BIO_RW_SYNC), bio); - wait_on_page_locked(page); - - if (!PageUptodate(page) || PageError(page)) - ret = -EIO; + bio_rw = rw; + if (!bio_chain) + bio_rw |= (1 << BIO_RW_SYNC); + if (bio_chain) + bio_get(bio); + submit_bio(bio_rw, bio); + if (bio_chain == NULL) { + wait_on_page_locked(page); + + if (!PageUptodate(page) || PageError(page)) + ret = -EIO; + } + if (bio_chain) { + bio->bi_private = *bio_chain; + *bio_chain = bio; + } out: return ret; } diff --git a/mm/rmap.c b/mm/rmap.c index 40158b59729..e2155d791d9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -434,6 +434,71 @@ int page_referenced(struct page *page, int is_locked) return referenced; } +static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte, entry; + spinlock_t *ptl; + int ret = 0; + + address = vma_address(page, vma); + if (address == -EFAULT) + goto out; + + pte = page_check_address(page, mm, address, &ptl); + if (!pte) + goto out; + + if (!pte_dirty(*pte) && !pte_write(*pte)) + goto unlock; + + entry = ptep_get_and_clear(mm, address, pte); + entry = pte_mkclean(entry); + entry = pte_wrprotect(entry); + ptep_establish(vma, address, pte, entry); + lazy_mmu_prot_update(entry); + ret = 1; + +unlock: + pte_unmap_unlock(pte, ptl); +out: + return ret; +} + +static int page_mkclean_file(struct address_space *mapping, struct page *page) +{ + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + struct vm_area_struct *vma; + struct prio_tree_iter iter; + int ret = 0; + + BUG_ON(PageAnon(page)); + + spin_lock(&mapping->i_mmap_lock); + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + if (vma->vm_flags & VM_SHARED) + ret += page_mkclean_one(page, vma); + } + spin_unlock(&mapping->i_mmap_lock); + return ret; +} + +int page_mkclean(struct page *page) +{ + int ret = 0; + + BUG_ON(!PageLocked(page)); + + if (page_mapped(page)) { + struct address_space *mapping = page_mapping(page); + if (mapping) + ret = page_mkclean_file(mapping, page); + } + + return ret; +} + /** * page_set_anon_rmap - setup new anonymous rmap * @page: the page to add the mapping to diff --git a/mm/shmem.c b/mm/shmem.c index db21c51531c..8631be45b40 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -45,6 +45,7 @@ #include <linux/namei.h> #include <linux/ctype.h> #include <linux/migrate.h> +#include <linux/highmem.h> #include <asm/uaccess.h> #include <asm/div64.h> diff --git a/mm/slab.c b/mm/slab.c index 21ba0603570..7a48eb1a60c 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); -static void enable_cpucache(struct kmem_cache *cachep); +static int enable_cpucache(struct kmem_cache *cachep); static void cache_reap(void *unused); /* @@ -674,6 +674,8 @@ static struct kmem_cache cache_cache = { #endif }; +#define BAD_ALIEN_MAGIC 0x01020304ul + #ifdef CONFIG_LOCKDEP /* @@ -682,42 +684,58 @@ static struct kmem_cache cache_cache = { * The locking for this is tricky in that it nests within the locks * of all other slabs in a few places; to deal with this special * locking we put on-slab caches into a separate lock-class. + * + * We set lock class for alien array caches which are up during init. + * The lock annotation will be lost if all cpus of a node goes down and + * then comes back up during hotplug */ -static struct lock_class_key on_slab_key; +static struct lock_class_key on_slab_l3_key; +static struct lock_class_key on_slab_alc_key; + +static inline void init_lock_keys(void) -static inline void init_lock_keys(struct cache_sizes *s) { int q; - - for (q = 0; q < MAX_NUMNODES; q++) { - if (!s->cs_cachep->nodelists[q] || OFF_SLAB(s->cs_cachep)) - continue; - lockdep_set_class(&s->cs_cachep->nodelists[q]->list_lock, - &on_slab_key); + struct cache_sizes *s = malloc_sizes; + + while (s->cs_size != ULONG_MAX) { + for_each_node(q) { + struct array_cache **alc; + int r; + struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; + if (!l3 || OFF_SLAB(s->cs_cachep)) + continue; + lockdep_set_class(&l3->list_lock, &on_slab_l3_key); + alc = l3->alien; + /* + * FIXME: This check for BAD_ALIEN_MAGIC + * should go away when common slab code is taught to + * work even without alien caches. + * Currently, non NUMA code returns BAD_ALIEN_MAGIC + * for alloc_alien_cache, + */ + if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) + continue; + for_each_node(r) { + if (alc[r]) + lockdep_set_class(&alc[r]->lock, + &on_slab_alc_key); + } + } + s++; } } - #else -static inline void init_lock_keys(struct cache_sizes *s) +static inline void init_lock_keys(void) { } #endif - - /* Guard access to the cache-chain. */ static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; /* - * vm_enough_memory() looks at this to determine how many slab-allocated pages - * are possibly freeable under pressure - * - * SLAB_RECLAIM_ACCOUNT turns this on per-slab - */ -atomic_t slab_reclaim_pages; - -/* * chicken and egg problem: delay the per-cpu array allocation * until the general caches are up. */ @@ -768,11 +786,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size, return csizep->cs_cachep; } -struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) +static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) { return __find_general_cachep(size, gfpflags); } -EXPORT_SYMBOL(kmem_find_general_cachep); static size_t slab_mgmt_size(size_t nr_objs, size_t align) { @@ -1092,7 +1109,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) static inline struct array_cache **alloc_alien_cache(int node, int limit) { - return (struct array_cache **) 0x01020304ul; + return (struct array_cache **)BAD_ALIEN_MAGIC; } static inline void free_alien_cache(struct array_cache **ac_ptr) @@ -1422,7 +1439,6 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL, NULL); } - init_lock_keys(sizes); sizes->cs_dmacachep = kmem_cache_create(names->name_dma, sizes->cs_size, @@ -1491,10 +1507,15 @@ void __init kmem_cache_init(void) struct kmem_cache *cachep; mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) - enable_cpucache(cachep); + if (enable_cpucache(cachep)) + BUG(); mutex_unlock(&cache_chain_mutex); } + /* Annotate slab for lockdep -- annotate the malloc caches */ + init_lock_keys(); + + /* Done! */ g_cpucache_up = FULL; @@ -1551,8 +1572,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - atomic_add(nr_pages, &slab_reclaim_pages); - add_zone_page_state(page_zone(page), NR_SLAB, nr_pages); + add_zone_page_state(page_zone(page), + NR_SLAB_RECLAIMABLE, nr_pages); + else + add_zone_page_state(page_zone(page), + NR_SLAB_UNRECLAIMABLE, nr_pages); for (i = 0; i < nr_pages; i++) __SetPageSlab(page + i); return page_address(page); @@ -1567,7 +1591,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) struct page *page = virt_to_page(addr); const unsigned long nr_freed = i; - sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed); + if (cachep->flags & SLAB_RECLAIM_ACCOUNT) + sub_zone_page_state(page_zone(page), + NR_SLAB_RECLAIMABLE, nr_freed); + else + sub_zone_page_state(page_zone(page), + NR_SLAB_UNRECLAIMABLE, nr_freed); while (i--) { BUG_ON(!PageSlab(page)); __ClearPageSlab(page); @@ -1576,8 +1605,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; free_pages((unsigned long)addr, cachep->gfporder); - if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages); } static void kmem_rcu_free(struct rcu_head *head) @@ -1834,6 +1861,27 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) } } +static void __kmem_cache_destroy(struct kmem_cache *cachep) +{ + int i; + struct kmem_list3 *l3; + + for_each_online_cpu(i) + kfree(cachep->array[i]); + + /* NUMA: free the list3 structures */ + for_each_online_node(i) { + l3 = cachep->nodelists[i]; + if (l3) { + kfree(l3->shared); + free_alien_cache(l3->alien); + kfree(l3); + } + } + kmem_cache_free(&cache_cache, cachep); +} + + /** * calculate_slab_order - calculate size (page order) of slabs * @cachep: pointer to the cache that is being created @@ -1904,12 +1952,11 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, return left_over; } -static void setup_cpu_cache(struct kmem_cache *cachep) +static int setup_cpu_cache(struct kmem_cache *cachep) { - if (g_cpucache_up == FULL) { - enable_cpucache(cachep); - return; - } + if (g_cpucache_up == FULL) + return enable_cpucache(cachep); + if (g_cpucache_up == NONE) { /* * Note: the first kmem_cache_create must create the cache @@ -1956,6 +2003,7 @@ static void setup_cpu_cache(struct kmem_cache *cachep) cpu_cache_get(cachep)->touched = 0; cachep->batchcount = 1; cachep->limit = BOOT_CPUCACHE_ENTRIES; + return 0; } /** @@ -2097,6 +2145,15 @@ kmem_cache_create (const char *name, size_t size, size_t align, } else { ralign = BYTES_PER_WORD; } + + /* + * Redzoning and user store require word alignment. Note this will be + * overridden by architecture or caller mandated alignment if either + * is greater than BYTES_PER_WORD. + */ + if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) + ralign = BYTES_PER_WORD; + /* 2) arch mandated alignment: disables debug if necessary */ if (ralign < ARCH_SLAB_MINALIGN) { ralign = ARCH_SLAB_MINALIGN; @@ -2110,8 +2167,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); } /* - * 4) Store it. Note that the debug code below can reduce - * the alignment to BYTES_PER_WORD. + * 4) Store it. */ align = ralign; @@ -2123,20 +2179,19 @@ kmem_cache_create (const char *name, size_t size, size_t align, #if DEBUG cachep->obj_size = size; + /* + * Both debugging options require word-alignment which is calculated + * into align above. + */ if (flags & SLAB_RED_ZONE) { - /* redzoning only works with word aligned caches */ - align = BYTES_PER_WORD; - /* add space for red zone words */ cachep->obj_offset += BYTES_PER_WORD; size += 2 * BYTES_PER_WORD; } if (flags & SLAB_STORE_USER) { - /* user store requires word alignment and - * one word storage behind the end of the real - * object. + /* user store requires one word storage behind the end of + * the real object. */ - align = BYTES_PER_WORD; size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) @@ -2200,14 +2255,26 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->gfpflags |= GFP_DMA; cachep->buffer_size = size; - if (flags & CFLGS_OFF_SLAB) + if (flags & CFLGS_OFF_SLAB) { cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); + /* + * This is a possibility for one of the malloc_sizes caches. + * But since we go off slab only for object size greater than + * PAGE_SIZE/8, and malloc_sizes gets created in ascending order, + * this should not happen at all. + * But leave a BUG_ON for some lucky dude. + */ + BUG_ON(!cachep->slabp_cache); + } cachep->ctor = ctor; cachep->dtor = dtor; cachep->name = name; - - setup_cpu_cache(cachep); + if (setup_cpu_cache(cachep)) { + __kmem_cache_destroy(cachep); + cachep = NULL; + goto oops; + } /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); @@ -2389,9 +2456,6 @@ EXPORT_SYMBOL(kmem_cache_shrink); */ int kmem_cache_destroy(struct kmem_cache *cachep) { - int i; - struct kmem_list3 *l3; - BUG_ON(!cachep || in_interrupt()); /* Don't let CPUs to come and go */ @@ -2417,25 +2481,23 @@ int kmem_cache_destroy(struct kmem_cache *cachep) if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) synchronize_rcu(); - for_each_online_cpu(i) - kfree(cachep->array[i]); - - /* NUMA: free the list3 structures */ - for_each_online_node(i) { - l3 = cachep->nodelists[i]; - if (l3) { - kfree(l3->shared); - free_alien_cache(l3->alien); - kfree(l3); - } - } - kmem_cache_free(&cache_cache, cachep); + __kmem_cache_destroy(cachep); unlock_cpu_hotplug(); return 0; } EXPORT_SYMBOL(kmem_cache_destroy); -/* Get the memory for a slab management obj. */ +/* + * Get the memory for a slab management obj. + * For a slab cache when the slab descriptor is off-slab, slab descriptors + * always come from malloc_sizes caches. The slab descriptor cannot + * come from the same cache which is getting created because, + * when we are searching for an appropriate cache for these + * descriptors in kmem_cache_create, we search through the malloc_sizes array. + * If we are creating a malloc_sizes cache here it would not be visible to + * kmem_find_general_cachep till the initialization is complete. + * Hence we cannot have slabp_cache same as the original cache. + */ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, int colour_off, gfp_t local_flags, int nodeid) @@ -3119,6 +3181,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, if (slabp->inuse == 0) { if (l3->free_objects > l3->free_limit) { l3->free_objects -= cachep->num; + /* No need to drop any previously held + * lock here, even if we have a off-slab slab + * descriptor it is guaranteed to come from + * a different cache, refer to comments before + * alloc_slabmgmt. + */ slab_destroy(cachep, slabp); } else { list_add(&slabp->list, &l3->slabs_free); @@ -3317,7 +3385,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) } EXPORT_SYMBOL(kmem_cache_alloc_node); -void *kmalloc_node(size_t size, gfp_t flags, int node) +void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *cachep; @@ -3326,7 +3394,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node) return NULL; return kmem_cache_alloc_node(cachep, flags, node); } -EXPORT_SYMBOL(kmalloc_node); +EXPORT_SYMBOL(__kmalloc_node); #endif /** @@ -3370,55 +3438,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) EXPORT_SYMBOL(__kmalloc_track_caller); #endif -#ifdef CONFIG_SMP -/** - * __alloc_percpu - allocate one copy of the object for every present - * cpu in the system, zeroing them. - * Objects should be dereferenced using the per_cpu_ptr macro only. - * - * @size: how many bytes of memory are required. - */ -void *__alloc_percpu(size_t size) -{ - int i; - struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); - - if (!pdata) - return NULL; - - /* - * Cannot use for_each_online_cpu since a cpu may come online - * and we have no way of figuring out how to fix the array - * that we have allocated then.... - */ - for_each_possible_cpu(i) { - int node = cpu_to_node(i); - - if (node_online(node)) - pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node); - else - pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); - - if (!pdata->ptrs[i]) - goto unwind_oom; - memset(pdata->ptrs[i], 0, size); - } - - /* Catch derefs w/o wrappers */ - return (void *)(~(unsigned long)pdata); - -unwind_oom: - while (--i >= 0) { - if (!cpu_possible(i)) - continue; - kfree(pdata->ptrs[i]); - } - kfree(pdata); - return NULL; -} -EXPORT_SYMBOL(__alloc_percpu); -#endif - /** * kmem_cache_free - Deallocate an object * @cachep: The cache the allocation was from. @@ -3464,29 +3483,6 @@ void kfree(const void *objp) } EXPORT_SYMBOL(kfree); -#ifdef CONFIG_SMP -/** - * free_percpu - free previously allocated percpu memory - * @objp: pointer returned by alloc_percpu. - * - * Don't free memory not originally allocated by alloc_percpu() - * The complemented objp is to check for that. - */ -void free_percpu(const void *objp) -{ - int i; - struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp); - - /* - * We allocate for all cpus so we cannot use for online cpu here. - */ - for_each_possible_cpu(i) - kfree(p->ptrs[i]); - kfree(p); -} -EXPORT_SYMBOL(free_percpu); -#endif - unsigned int kmem_cache_size(struct kmem_cache *cachep) { return obj_size(cachep); @@ -3603,22 +3599,26 @@ static void do_ccupdate_local(void *info) static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, int shared) { - struct ccupdate_struct new; - int i, err; + struct ccupdate_struct *new; + int i; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; - memset(&new.new, 0, sizeof(new.new)); for_each_online_cpu(i) { - new.new[i] = alloc_arraycache(cpu_to_node(i), limit, + new->new[i] = alloc_arraycache(cpu_to_node(i), limit, batchcount); - if (!new.new[i]) { + if (!new->new[i]) { for (i--; i >= 0; i--) - kfree(new.new[i]); + kfree(new->new[i]); + kfree(new); return -ENOMEM; } } - new.cachep = cachep; + new->cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); + on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); check_irq_on(); cachep->batchcount = batchcount; @@ -3626,7 +3626,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, cachep->shared = shared; for_each_online_cpu(i) { - struct array_cache *ccold = new.new[i]; + struct array_cache *ccold = new->new[i]; if (!ccold) continue; spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); @@ -3634,18 +3634,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); kfree(ccold); } - - err = alloc_kmemlist(cachep); - if (err) { - printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n", - cachep->name, -err); - BUG(); - } - return 0; + kfree(new); + return alloc_kmemlist(cachep); } /* Called with cache_chain_mutex held always */ -static void enable_cpucache(struct kmem_cache *cachep) +static int enable_cpucache(struct kmem_cache *cachep) { int err; int limit, shared; @@ -3697,6 +3691,7 @@ static void enable_cpucache(struct kmem_cache *cachep) if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", cachep->name, -err); + return err; } /* @@ -4157,6 +4152,7 @@ static int leaks_show(struct seq_file *m, void *p) show_symbol(m, n[2*i+2]); seq_putc(m, '\n'); } + return 0; } diff --git a/mm/slob.c b/mm/slob.c index 7b52b20b960..20188627347 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -339,52 +339,3 @@ void kmem_cache_init(void) mod_timer(&slob_timer, jiffies + HZ); } - -atomic_t slab_reclaim_pages = ATOMIC_INIT(0); -EXPORT_SYMBOL(slab_reclaim_pages); - -#ifdef CONFIG_SMP - -void *__alloc_percpu(size_t size) -{ - int i; - struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); - - if (!pdata) - return NULL; - - for_each_possible_cpu(i) { - pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); - if (!pdata->ptrs[i]) - goto unwind_oom; - memset(pdata->ptrs[i], 0, size); - } - - /* Catch derefs w/o wrappers */ - return (void *) (~(unsigned long) pdata); - -unwind_oom: - while (--i >= 0) { - if (!cpu_possible(i)) - continue; - kfree(pdata->ptrs[i]); - } - kfree(pdata); - return NULL; -} -EXPORT_SYMBOL(__alloc_percpu); - -void -free_percpu(const void *objp) -{ - int i; - struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); - - for_each_possible_cpu(i) - kfree(p->ptrs[i]); - - kfree(p); -} -EXPORT_SYMBOL(free_percpu); - -#endif diff --git a/mm/swap.c b/mm/swap.c index 687686a61f7..2e0e871f542 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -34,6 +34,25 @@ /* How many pages do we try to swap or page in/out together? */ int page_cluster; +/* + * This path almost never happens for VM activity - pages are normally + * freed via pagevecs. But it gets used by networking. + */ +static void fastcall __page_cache_release(struct page *page) +{ + if (PageLRU(page)) { + unsigned long flags; + struct zone *zone = page_zone(page); + + spin_lock_irqsave(&zone->lru_lock, flags); + VM_BUG_ON(!PageLRU(page)); + __ClearPageLRU(page); + del_page_from_lru(zone, page); + spin_unlock_irqrestore(&zone->lru_lock, flags); + } + free_hot_page(page); +} + static void put_compound_page(struct page *page) { page = (struct page *)page_private(page); @@ -223,26 +242,6 @@ int lru_add_drain_all(void) #endif /* - * This path almost never happens for VM activity - pages are normally - * freed via pagevecs. But it gets used by networking. - */ -void fastcall __page_cache_release(struct page *page) -{ - if (PageLRU(page)) { - unsigned long flags; - struct zone *zone = page_zone(page); - - spin_lock_irqsave(&zone->lru_lock, flags); - BUG_ON(!PageLRU(page)); - __ClearPageLRU(page); - del_page_from_lru(zone, page); - spin_unlock_irqrestore(&zone->lru_lock, flags); - } - free_hot_page(page); -} -EXPORT_SYMBOL(__page_cache_release); - -/* * Batched page_cache_release(). Decrement the reference count on all the * passed pages. If it fell to zero then remove the page from the LRU and * free it. @@ -284,7 +283,7 @@ void release_pages(struct page **pages, int nr, int cold) zone = pagezone; spin_lock_irq(&zone->lru_lock); } - BUG_ON(!PageLRU(page)); + VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); del_page_from_lru(zone, page); } @@ -337,7 +336,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec) for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); if (put_page_testzero(page)) pagevec_add(&pages_to_free, page); } @@ -364,7 +363,7 @@ void __pagevec_lru_add(struct pagevec *pvec) zone = pagezone; spin_lock_irq(&zone->lru_lock); } - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); add_page_to_inactive_list(zone, page); } @@ -391,9 +390,9 @@ void __pagevec_lru_add_active(struct pagevec *pvec) zone = pagezone; spin_lock_irq(&zone->lru_lock); } - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - BUG_ON(PageActive(page)); + VM_BUG_ON(PageActive(page)); SetPageActive(page); add_page_to_active_list(zone, page); } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 266162d2ba2..9aad8b0cc6e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -24,6 +24,9 @@ DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist; +static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, + int node); + static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) { pte_t *pte; @@ -478,8 +481,8 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. */ -void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, - int node) +static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, + int node) { struct vm_struct *area; @@ -493,7 +496,6 @@ void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, return __vmalloc_area_node(area, gfp_mask, prot, node); } -EXPORT_SYMBOL(__vmalloc_node); void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 5d4c4d02254..87779dda4ec 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -62,6 +62,8 @@ struct scan_control { int swap_cluster_max; int swappiness; + + int all_unreclaimable; }; /* @@ -377,8 +379,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) int remove_mapping(struct address_space *mapping, struct page *page) { - if (!mapping) - return 0; /* truncate got there first */ + BUG_ON(!PageLocked(page)); + BUG_ON(mapping != page_mapping(page)); write_lock_irq(&mapping->tree_lock); @@ -440,7 +442,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (TestSetPageLocked(page)) goto keep; - BUG_ON(PageActive(page)); + VM_BUG_ON(PageActive(page)); sc->nr_scanned++; @@ -547,7 +549,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto free_it; } - if (!remove_mapping(mapping, page)) + if (!mapping || !remove_mapping(mapping, page)) goto keep_locked; free_it: @@ -564,7 +566,7 @@ keep_locked: unlock_page(page); keep: list_add(&page->lru, &ret_pages); - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); } list_splice(&ret_pages, page_list); if (pagevec_count(&freed_pvec)) @@ -603,7 +605,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); - BUG_ON(!PageLRU(page)); + VM_BUG_ON(!PageLRU(page)); list_del(&page->lru); target = src; @@ -674,7 +676,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, */ while (!list_empty(&page_list)) { page = lru_to_page(&page_list); - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); list_del(&page->lru); if (PageActive(page)) @@ -695,6 +697,11 @@ done: return nr_reclaimed; } +static inline int zone_is_near_oom(struct zone *zone) +{ + return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; +} + /* * This moves pages from the active list to the inactive list. * @@ -730,6 +737,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, long distress; long swap_tendency; + if (zone_is_near_oom(zone)) + goto force_reclaim_mapped; + /* * `distress' is a measure of how much trouble we're having * reclaiming pages. 0 -> no problems. 100 -> great trouble. @@ -765,6 +775,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * memory onto the inactive list. */ if (swap_tendency >= 100) +force_reclaim_mapped: reclaim_mapped = 1; } @@ -797,9 +808,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, while (!list_empty(&l_inactive)) { page = lru_to_page(&l_inactive); prefetchw_prev_lru_page(page, &l_inactive, flags); - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - BUG_ON(!PageActive(page)); + VM_BUG_ON(!PageActive(page)); ClearPageActive(page); list_move(&page->lru, &zone->inactive_list); @@ -827,9 +838,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, while (!list_empty(&l_active)) { page = lru_to_page(&l_active); prefetchw_prev_lru_page(page, &l_active, flags); - BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - BUG_ON(!PageActive(page)); + VM_BUG_ON(!PageActive(page)); list_move(&page->lru, &zone->active_list); pgmoved++; if (!pagevec_add(&pvec, page)) { @@ -925,6 +936,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones, unsigned long nr_reclaimed = 0; int i; + sc->all_unreclaimable = 1; for (i = 0; zones[i] != NULL; i++) { struct zone *zone = zones[i]; @@ -941,6 +953,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones, if (zone->all_unreclaimable && priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ + sc->all_unreclaimable = 0; + nr_reclaimed += shrink_zone(priority, zone, sc); } return nr_reclaimed; @@ -1021,6 +1035,9 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) if (sc.nr_scanned && priority < DEF_PRIORITY - 2) blk_congestion_wait(WRITE, HZ/10); } + /* top priority shrink_caches still had more to do? don't OOM, then */ + if (!sc.all_unreclaimable) + ret = 1; out: for (i = 0; zones[i] != 0; i++) { struct zone *zone = zones[i]; @@ -1153,7 +1170,7 @@ scan: if (zone->all_unreclaimable) continue; if (nr_slab == 0 && zone->pages_scanned >= - (zone->nr_active + zone->nr_inactive) * 4) + (zone->nr_active + zone->nr_inactive) * 6) zone->all_unreclaimable = 1; /* * If we've done a decent amount of scanning and @@ -1361,7 +1378,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) for_each_zone(zone) lru_pages += zone->nr_active + zone->nr_inactive; - nr_slab = global_page_state(NR_SLAB); + nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); /* If slab caches are huge, it's better to hit them first */ while (nr_slab >= lru_pages) { reclaim_state.reclaimed_slab = 0; @@ -1510,7 +1527,6 @@ int zone_reclaim_mode __read_mostly; #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ -#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */ /* * Priority for ZONE_RECLAIM. This determines the fraction of pages @@ -1526,6 +1542,12 @@ int zone_reclaim_mode __read_mostly; int sysctl_min_unmapped_ratio = 1; /* + * If the number of slab pages in a zone grows beyond this percentage then + * slab reclaim needs to occur. + */ +int sysctl_min_slab_ratio = 5; + +/* * Try to free up some pages from this zone through reclaim. */ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) @@ -1544,6 +1566,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) .gfp_mask = gfp_mask, .swappiness = vm_swappiness, }; + unsigned long slab_reclaimable; disable_swap_token(); cond_resched(); @@ -1556,29 +1579,43 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - /* - * Free memory by calling shrink zone with increasing priorities - * until we have enough memory freed. - */ - priority = ZONE_RECLAIM_PRIORITY; - do { - nr_reclaimed += shrink_zone(priority, zone, &sc); - priority--; - } while (priority >= 0 && nr_reclaimed < nr_pages); + if (zone_page_state(zone, NR_FILE_PAGES) - + zone_page_state(zone, NR_FILE_MAPPED) > + zone->min_unmapped_pages) { + /* + * Free memory by calling shrink zone with increasing + * priorities until we have enough memory freed. + */ + priority = ZONE_RECLAIM_PRIORITY; + do { + nr_reclaimed += shrink_zone(priority, zone, &sc); + priority--; + } while (priority >= 0 && nr_reclaimed < nr_pages); + } - if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { + slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE); + if (slab_reclaimable > zone->min_slab_pages) { /* * shrink_slab() does not currently allow us to determine how - * many pages were freed in this zone. So we just shake the slab - * a bit and then go off node for this particular allocation - * despite possibly having freed enough memory to allocate in - * this zone. If we freed local memory then the next - * allocations will be local again. + * many pages were freed in this zone. So we take the current + * number of slab pages and shake the slab until it is reduced + * by the same nr_pages that we used for reclaiming unmapped + * pages. * - * shrink_slab will free memory on all zones and may take - * a long time. + * Note that shrink_slab will free memory on all zones and may + * take a long time. + */ + while (shrink_slab(sc.nr_scanned, gfp_mask, order) && + zone_page_state(zone, NR_SLAB_RECLAIMABLE) > + slab_reclaimable - nr_pages) + ; + + /* + * Update nr_reclaimed by the number of slab pages we + * reclaimed from this zone. */ - shrink_slab(sc.nr_scanned, gfp_mask, order); + nr_reclaimed += slab_reclaimable - + zone_page_state(zone, NR_SLAB_RECLAIMABLE); } p->reclaim_state = NULL; @@ -1592,7 +1629,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int node_id; /* - * Zone reclaim reclaims unmapped file backed pages. + * Zone reclaim reclaims unmapped file backed pages and + * slab pages if we are over the defined limits. * * A small portion of unmapped file backed pages is needed for * file I/O otherwise pages read by file I/O will be immediately @@ -1601,7 +1639,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * unmapped file backed pages. */ if (zone_page_state(zone, NR_FILE_PAGES) - - zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) + zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages + && zone_page_state(zone, NR_SLAB_RECLAIMABLE) + <= zone->min_slab_pages) return 0; /* @@ -1621,7 +1661,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * over remote processors and spread off node memory allocations * as wide as possible. */ - node_id = zone->zone_pgdat->node_id; + node_id = zone_to_nid(zone); mask = node_to_cpumask(node_id); if (!cpus_empty(mask) && node_id != numa_node_id()) return 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index c1b5f4106b3..490d8c1a0de 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -321,6 +321,9 @@ void refresh_cpu_vm_stats(int cpu) for_each_zone(zone) { struct per_cpu_pageset *pcp; + if (!populated_zone(zone)) + continue; + pcp = zone_pcp(zone, cpu); for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) @@ -435,12 +438,28 @@ struct seq_operations fragmentation_op = { .show = frag_show, }; +#ifdef CONFIG_ZONE_DMA32 +#define TEXT_FOR_DMA32(xx) xx "_dma32", +#else +#define TEXT_FOR_DMA32(xx) +#endif + +#ifdef CONFIG_HIGHMEM +#define TEXT_FOR_HIGHMEM(xx) xx "_high", +#else +#define TEXT_FOR_HIGHMEM(xx) +#endif + +#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \ + TEXT_FOR_HIGHMEM(xx) + static char *vmstat_text[] = { /* Zoned VM counters */ "nr_anon_pages", "nr_mapped", "nr_file_pages", - "nr_slab", + "nr_slab_reclaimable", + "nr_slab_unreclaimable", "nr_page_table_pages", "nr_dirty", "nr_writeback", @@ -462,10 +481,7 @@ static char *vmstat_text[] = { "pswpin", "pswpout", - "pgalloc_dma", - "pgalloc_dma32", - "pgalloc_normal", - "pgalloc_high", + TEXTS_FOR_ZONES("pgalloc") "pgfree", "pgactivate", @@ -474,25 +490,10 @@ static char *vmstat_text[] = { "pgfault", "pgmajfault", - "pgrefill_dma", - "pgrefill_dma32", - "pgrefill_normal", - "pgrefill_high", - - "pgsteal_dma", - "pgsteal_dma32", - "pgsteal_normal", - "pgsteal_high", - - "pgscan_kswapd_dma", - "pgscan_kswapd_dma32", - "pgscan_kswapd_normal", - "pgscan_kswapd_high", - - "pgscan_direct_dma", - "pgscan_direct_dma32", - "pgscan_direct_normal", - "pgscan_direct_high", + TEXTS_FOR_ZONES("pgrefill") + TEXTS_FOR_ZONES("pgsteal") + TEXTS_FOR_ZONES("pgscan_kswapd") + TEXTS_FOR_ZONES("pgscan_direct") "pginodesteal", "slabs_scanned", diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 859e3359fcd..e2a095d0fd8 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -40,6 +40,22 @@ config IP_DCCP_DEBUG Just say N. +config NET_DCCPPROBE + tristate "DCCP connection probing" + depends on PROC_FS && KPROBES + ---help--- + This module allows for capturing the changes to DCCP connection + state in response to incoming packets. It is used for debugging + DCCP congestion avoidance modules. If you don't understand + what was just said, you don't need it: say N. + + Documentation on how to use the packet generator can be found + at http://linux-net.osdl.org/index.php/DccpProbe + + To compile this code as a module, choose M here: the + module will be called dccp_probe. + + endmenu endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 7696e219b05..17ed99c4661 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -11,9 +11,11 @@ dccp_ipv4-y := ipv4.o dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o +obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o dccp-$(CONFIG_SYSCTL) += sysctl.o dccp_diag-y := diag.o +dccp_probe-y := probe.o obj-y += ccids/ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 457dd3db7f4..2efb505aeb3 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -808,7 +808,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } static struct ccid_operations ccid2 = { - .ccid_id = 2, + .ccid_id = DCCPC_CCID2, .ccid_name = "ccid2", .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 195aa956622..67d2dc0e7c6 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1240,7 +1240,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, } static struct ccid_operations ccid3 = { - .ccid_id = 3, + .ccid_id = DCCPC_CCID3, .ccid_name = "ccid3", .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9a1a76a7dc4..66be29b6f50 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -56,9 +56,6 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) dp->dccps_role = DCCP_ROLE_CLIENT; - if (dccp_service_not_initialized(sk)) - return -EPROTO; - if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; diff --git a/net/dccp/probe.c b/net/dccp/probe.c new file mode 100644 index 00000000000..146496fce2e --- /dev/null +++ b/net/dccp/probe.c @@ -0,0 +1,198 @@ +/* + * dccp_probe - Observe the DCCP flow with kprobes. + * + * The idea for this came from Werner Almesberger's umlsim + * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> + * + * Modified for DCCP from Stephen Hemminger's code + * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/socket.h> +#include <linux/dccp.h> +#include <linux/proc_fs.h> +#include <linux/module.h> +#include <linux/kfifo.h> +#include <linux/vmalloc.h> + +#include "dccp.h" +#include "ccid.h" +#include "ccids/ccid3.h" + +static int port; + +static int bufsize = 64 * 1024; + +static const char procname[] = "dccpprobe"; + +struct { + struct kfifo *fifo; + spinlock_t lock; + wait_queue_head_t wait; + struct timeval tstart; +} dccpw; + +static void printl(const char *fmt, ...) +{ + va_list args; + int len; + struct timeval now; + char tbuf[256]; + + va_start(args, fmt); + do_gettimeofday(&now); + + now.tv_sec -= dccpw.tstart.tv_sec; + now.tv_usec -= dccpw.tstart.tv_usec; + if (now.tv_usec < 0) { + --now.tv_sec; + now.tv_usec += 1000000; + } + + len = sprintf(tbuf, "%lu.%06lu ", + (unsigned long) now.tv_sec, + (unsigned long) now.tv_usec); + len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); + va_end(args); + + kfifo_put(dccpw.fifo, tbuf, len); + wake_up(&dccpw.wait); +} + +static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + const struct dccp_minisock *dmsk = dccp_msk(sk); + const struct inet_sock *inet = inet_sk(sk); + const struct ccid3_hc_tx_sock *hctx; + + if (dmsk->dccpms_tx_ccid == DCCPC_CCID3) + hctx = ccid3_hc_tx_sk(sk); + else + hctx = NULL; + + if (port == 0 || ntohs(inet->dport) == port || + ntohs(inet->sport) == port) { + if (hctx) + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size, + hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi); + else + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size); + } + + jprobe_return(); + return 0; +} + +static struct jprobe dccp_send_probe = { + .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, }, + .entry = (kprobe_opcode_t *)&jdccp_sendmsg, +}; + +static int dccpprobe_open(struct inode *inode, struct file *file) +{ + kfifo_reset(dccpw.fifo); + do_gettimeofday(&dccpw.tstart); + return 0; +} + +static ssize_t dccpprobe_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + int error = 0, cnt = 0; + unsigned char *tbuf; + + if (!buf || len < 0) + return -EINVAL; + + if (len == 0) + return 0; + + tbuf = vmalloc(len); + if (!tbuf) + return -ENOMEM; + + error = wait_event_interruptible(dccpw.wait, + __kfifo_len(dccpw.fifo) != 0); + if (error) + goto out_free; + + cnt = kfifo_get(dccpw.fifo, tbuf, len); + error = copy_to_user(buf, tbuf, cnt); + +out_free: + vfree(tbuf); + + return error ? error : cnt; +} + +static struct file_operations dccpprobe_fops = { + .owner = THIS_MODULE, + .open = dccpprobe_open, + .read = dccpprobe_read, +}; + +static __init int dccpprobe_init(void) +{ + int ret = -ENOMEM; + + init_waitqueue_head(&dccpw.wait); + spin_lock_init(&dccpw.lock); + dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock); + + if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + goto err0; + + ret = register_jprobe(&dccp_send_probe); + if (ret) + goto err1; + + pr_info("DCCP watch registered (port=%d)\n", port); + return 0; +err1: + proc_net_remove(procname); +err0: + kfifo_free(dccpw.fifo); + return ret; +} +module_init(dccpprobe_init); + +static __exit void dccpprobe_exit(void) +{ + kfifo_free(dccpw.fifo); + proc_net_remove(procname); + unregister_jprobe(&dccp_send_probe); + +} +module_exit(dccpprobe_exit); + +MODULE_PARM_DESC(port, "Port to match (0=all)"); +module_param(port, int, 0); + +MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +module_param(bufsize, int, 0); + +MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>"); +MODULE_DESCRIPTION("DCCP snooper"); +MODULE_LICENSE("GPL"); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 962df0ea31a..72cbdcfc2c6 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -217,7 +217,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; - dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; + dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; return 0; @@ -267,12 +267,6 @@ static inline int dccp_listen_start(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; - /* - * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) - * before calling listen() - */ - if (dccp_service_not_initialized(sk)) - return -EPROTO; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -540,9 +534,6 @@ static int dccp_getsockopt_service(struct sock *sk, int len, int err = -ENOENT, slen = 0, total_len = sizeof(u32); lock_sock(sk); - if (dccp_service_not_initialized(sk)) - goto out; - if ((sl = dp->dccps_service_list) != NULL) { slen = sl->dccpsl_nr * sizeof(u32); total_len += slen; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 1650b64415a..30af4a4dfcc 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -448,24 +448,22 @@ config INET_TCP_DIAG depends on INET_DIAG def_tristate INET_DIAG -config TCP_CONG_ADVANCED +menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- Support for selection of various TCP congestion control modules. Nearly all users can safely say no here, and a safe default - selection will be made (BIC-TCP with new Reno as a fallback). + selection will be made (CUBIC with new Reno as a fallback). If unsure, say N. -# TCP Reno is builtin (required as fallback) -menu "TCP congestion control" - depends on TCP_CONG_ADVANCED +if TCP_CONG_ADVANCED config TCP_CONG_BIC tristate "Binary Increase Congestion (BIC) control" - default y + default m ---help--- BIC-TCP is a sender-side only change that ensures a linear RTT fairness under large windows while offering both scalability and @@ -479,7 +477,7 @@ config TCP_CONG_BIC config TCP_CONG_CUBIC tristate "CUBIC TCP" - default m + default y ---help--- This is version 2.0 of BIC-TCP which uses a cubic growth function among other techniques. @@ -574,12 +572,49 @@ config TCP_CONG_VENO loss packets. See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf -endmenu +choice + prompt "Default TCP congestion control" + default DEFAULT_CUBIC + help + Select the TCP congestion control that will be used by default + for all connections. -config TCP_CONG_BIC + config DEFAULT_BIC + bool "Bic" if TCP_CONG_BIC=y + + config DEFAULT_CUBIC + bool "Cubic" if TCP_CONG_CUBIC=y + + config DEFAULT_HTCP + bool "Htcp" if TCP_CONG_HTCP=y + + config DEFAULT_VEGAS + bool "Vegas" if TCP_CONG_VEGAS=y + + config DEFAULT_WESTWOOD + bool "Westwood" if TCP_CONG_WESTWOOD=y + + config DEFAULT_RENO + bool "Reno" + +endchoice + +endif + +config TCP_CONG_CUBIC tristate depends on !TCP_CONG_ADVANCED default y +config DEFAULT_TCP_CONG + string + default "bic" if DEFAULT_BIC + default "cubic" if DEFAULT_CUBIC + default "htcp" if DEFAULT_HTCP + default "vegas" if DEFAULT_VEGAS + default "westwood" if DEFAULT_WESTWOOD + default "reno" if DEFAULT_RENO + default "cubic" + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 80a2a0911b4..e6ce0b3ba62 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -259,7 +259,7 @@ void cipso_v4_cache_invalidate(void) u32 iter; for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { - spin_lock(&cipso_v4_cache[iter].lock); + spin_lock_bh(&cipso_v4_cache[iter].lock); list_for_each_entry_safe(entry, tmp_entry, &cipso_v4_cache[iter].list, list) { @@ -267,7 +267,7 @@ void cipso_v4_cache_invalidate(void) cipso_v4_cache_entry_free(entry); } cipso_v4_cache[iter].size = 0; - spin_unlock(&cipso_v4_cache[iter].lock); + spin_unlock_bh(&cipso_v4_cache[iter].lock); } return; @@ -309,7 +309,7 @@ static int cipso_v4_cache_check(const unsigned char *key, hash = cipso_v4_map_cache_hash(key, key_len); bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); - spin_lock(&cipso_v4_cache[bkt].lock); + spin_lock_bh(&cipso_v4_cache[bkt].lock); list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { if (entry->hash == hash && entry->key_len == key_len && @@ -318,7 +318,7 @@ static int cipso_v4_cache_check(const unsigned char *key, secattr->cache.free = entry->lsm_data.free; secattr->cache.data = entry->lsm_data.data; if (prev_entry == NULL) { - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } @@ -333,12 +333,12 @@ static int cipso_v4_cache_check(const unsigned char *key, &prev_entry->list); } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } prev_entry = entry; } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return -ENOENT; } @@ -387,7 +387,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb, entry->lsm_data.data = secattr->cache.data; bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); - spin_lock(&cipso_v4_cache[bkt].lock); + spin_lock_bh(&cipso_v4_cache[bkt].lock); if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; @@ -398,7 +398,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb, list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache_entry_free(old_entry); } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; @@ -530,197 +530,42 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) } /** - * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff - * @headroom: the amount of headroom to allocate for the sk_buff + * cipso_v4_doi_walk - Iterate through the DOI definitions + * @skip_cnt: skip past this number of DOI definitions, updated + * @callback: callback for each DOI definition + * @cb_arg: argument for the callback function * * Description: - * Dump a list of all the configured DOI values into a sk_buff. The returned - * sk_buff has room at the front of the sk_buff for @headroom bytes. See - * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This - * function may fail if another process is changing the DOI list at the same - * time. Returns a pointer to a sk_buff on success, NULL on error. + * Iterate over the DOI definition list, skipping the first @skip_cnt entries. + * For each entry call @callback, if @callback returns a negative value stop + * 'walking' through the list and return. Updates the value in @skip_cnt upon + * return. Returns zero on success, negative values on failure. * */ -struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg) { - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; + int ret_val = -ENOENT; u32 doi_cnt = 0; - ssize_t buf_len; + struct cipso_v4_doi *iter_doi; - buf_len = NETLBL_LEN_U32; rcu_read_lock(); - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - doi_cnt += 1; - buf_len += 2 * NETLBL_LEN_U32; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_all_failure; - - if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0) - goto doi_dump_all_failure; - buf_len -= NETLBL_LEN_U32; - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - if (buf_len < 2 * NETLBL_LEN_U32) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->doi) != 0) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_all_failure; - buf_len -= 2 * NETLBL_LEN_U32; - } - rcu_read_unlock(); - - return skb; - -doi_dump_all_failure: - rcu_read_unlock(); - kfree(skb); - return NULL; -} - -/** - * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff - * @doi: the DOI value - * @headroom: the amount of headroom to allocate for the sk_buff - * - * Description: - * Lookup the DOI definition matching @doi and dump it's contents into a - * sk_buff. The returned sk_buff has room at the front of the sk_buff for - * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message - * format. This function may fail if another process is changing the DOI list - * at the same time. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) -{ - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; - u32 tag_cnt = 0; - u32 lvl_cnt = 0; - u32 cat_cnt = 0; - ssize_t buf_len; - ssize_t tmp; - - rcu_read_lock(); - iter = cipso_v4_doi_getdef(doi); - if (iter == NULL) - goto doi_dump_failure; - buf_len = NETLBL_LEN_U32; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - buf_len += NETLBL_LEN_U32; - while(tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - break; - case CIPSO_V4_MAP_STD: - buf_len += 3 * NETLBL_LEN_U32; - while (tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - lvl_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - cat_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16; + list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) + if (iter_doi->valid) { + if (doi_cnt++ < *skip_cnt) + continue; + ret_val = callback(iter_doi, cb_arg); + if (ret_val < 0) { + doi_cnt--; + goto doi_walk_return; } - break; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_failure; - - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - if (iter != cipso_v4_doi_getdef(doi)) - goto doi_dump_failure; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; } - break; - case CIPSO_V4_MAP_STD: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0) - goto doi_dump_failure; - buf_len -= 3 * NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u8(skb, - NLA_U8, - iter->map.std->lvl.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u16(skb, - NLA_U16, - iter->map.std->cat.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16; - } - break; - } - rcu_read_unlock(); - - return skb; -doi_dump_failure: +doi_walk_return: rcu_read_unlock(); - kfree(skb); - return NULL; + *skip_cnt = doi_cnt; + return ret_val; } /** @@ -1486,43 +1331,40 @@ socket_setattr_failure: } /** - * cipso_v4_socket_getattr - Get the security attributes from a socket - * @sock: the socket + * cipso_v4_sock_getattr - Get the security attributes from a sock + * @sk: the sock * @secattr: the security attributes * * Description: - * Query @sock to see if there is a CIPSO option attached to the socket and if - * there is return the CIPSO security attributes in @secattr. Returns zero on - * success and negative values on failure. + * Query @sk to see if there is a CIPSO option attached to the sock and if + * there is return the CIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. * */ -int cipso_v4_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; - struct sock *sk; struct inet_sock *sk_inet; unsigned char *cipso_ptr; u32 doi; struct cipso_v4_doi *doi_def; - sk = sock->sk; - lock_sock(sk); sk_inet = inet_sk(sk); if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) - goto socket_getattr_return; + return -ENOMSG; cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - sizeof(struct iphdr); ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); if (ret_val == 0) - goto socket_getattr_return; + return ret_val; doi = ntohl(*(u32 *)&cipso_ptr[2]); rcu_read_lock(); doi_def = cipso_v4_doi_getdef(doi); if (doi_def == NULL) { rcu_read_unlock(); - goto socket_getattr_return; + return -ENOMSG; } switch (cipso_ptr[6]) { case CIPSO_V4_TAG_RBITMAP: @@ -1533,8 +1375,29 @@ int cipso_v4_socket_getattr(const struct socket *sock, } rcu_read_unlock(); -socket_getattr_return: - release_sock(sk); + return ret_val; +} + +/** + * cipso_v4_socket_getattr - Get the security attributes from a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Query @sock to see if there is a CIPSO option attached to the socket and if + * there is return the CIPSO security attributes in @secattr. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + lock_sock(sock->sk); + ret_val = cipso_v4_sock_getattr(sock->sk, secattr); + release_sock(sock->sk); + return ret_val; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 19b2071ff31..e82a5be894b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -129,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, return ret; } +static int __init tcp_congestion_default(void) +{ + return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); +} + +late_initcall(tcp_congestion_default); ctl_table ipv4_table[] = { { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7ff2e4273a7..af0aca1e6be 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -48,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) printk(KERN_NOTICE "TCP %s already registered\n", ca->name); ret = -EEXIST; } else { - list_add_rcu(&ca->list, &tcp_cong_list); + list_add_tail_rcu(&ca->list, &tcp_cong_list); printk(KERN_INFO "TCP %s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig index fe23cb7f1e8..9f7121ae13e 100644 --- a/net/netlabel/Kconfig +++ b/net/netlabel/Kconfig @@ -9,6 +9,9 @@ config NETLABEL ---help--- NetLabel provides support for explicit network packet labeling protocols such as CIPSO and RIPSO. For more information see - Documentation/netlabel. + Documentation/netlabel as well as the NetLabel SourceForge project + for configuration tools and additional documentation. + + * http://netlabel.sf.net If you are unsure, say N. diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index a4f40adc447..4125a55f469 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -41,15 +41,37 @@ #include "netlabel_user.h" #include "netlabel_cipso_v4.h" +/* Argument struct for cipso_v4_doi_walk() */ +struct netlbl_cipsov4_doiwalk_arg { + struct netlink_callback *nl_cb; + struct sk_buff *skb; + u32 seq; +}; + /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_cipsov4_gnl_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, - .maxattr = 0, + .maxattr = NLBL_CIPSOV4_A_MAX, }; +/* NetLabel Netlink attribute policy */ +static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { + [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 }, + [NLBL_CIPSOV4_A_TAGLST] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSLVLLOC] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSLVLREM] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSLVL] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSLVLLST] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSCATLOC] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSCATREM] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSCAT] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSCATLST] = { .type = NLA_NESTED }, +}; /* * Helper Functions @@ -81,6 +103,41 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry) kfree(ptr); } +/** + * netlbl_cipsov4_add_common - Parse the common sections of a ADD message + * @info: the Generic NETLINK info block + * @doi_def: the CIPSO V4 DOI definition + * + * Description: + * Parse the common sections of a ADD message and fill in the related values + * in @doi_def. Returns zero on success, negative values on failure. + * + */ +static int netlbl_cipsov4_add_common(struct genl_info *info, + struct cipso_v4_doi *doi_def) +{ + struct nlattr *nla; + int nla_rem; + u32 iter = 0; + + doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + + if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + return -EINVAL; + + nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) + if (nla->nla_type == NLBL_CIPSOV4_A_TAG) { + if (iter > CIPSO_V4_TAG_MAXCNT) + return -EINVAL; + doi_def->tags[iter++] = nla_get_u8(nla); + } + if (iter < CIPSO_V4_TAG_MAXCNT) + doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; + + return 0; +} /* * NetLabel Command Handlers @@ -88,9 +145,7 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry) /** * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition - * @doi: the DOI value - * @msg: the ADD message data - * @msg_size: the size of the ADD message buffer + * @info: the Generic NETLINK info block * * Description: * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message @@ -98,29 +153,28 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry) * error. * */ -static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size) +static int netlbl_cipsov4_add_std(struct genl_info *info) { int ret_val = -EINVAL; - int msg_len = msg_size; - u32 num_tags; - u32 num_lvls; - u32 num_cats; struct cipso_v4_doi *doi_def = NULL; - u32 iter; - u32 tmp_val_a; - u32 tmp_val_b; + struct nlattr *nla_a; + struct nlattr *nla_b; + int nla_a_rem; + int nla_b_rem; - if (msg_len < NETLBL_LEN_U32) - goto add_std_failure; - num_tags = netlbl_getinc_u32(&msg, &msg_len); - if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) - goto add_std_failure; + if (!info->attrs[NLBL_CIPSOV4_A_DOI] || + !info->attrs[NLBL_CIPSOV4_A_TAGLST] || + !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST]) + return -EINVAL; + + if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); - if (doi_def == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } + if (doi_def == NULL) + return -ENOMEM; doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); if (doi_def->map.std == NULL) { ret_val = -ENOMEM; @@ -128,28 +182,32 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size) } doi_def->type = CIPSO_V4_MAP_STD; - for (iter = 0; iter < num_tags; iter++) { - if (msg_len < NETLBL_LEN_U8) - goto add_std_failure; - doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len); - switch (doi_def->tags[iter]) { - case CIPSO_V4_TAG_RBITMAP: - break; - default: - goto add_std_failure; - } - } - if (iter < CIPSO_V4_TAG_MAXCNT) - doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; - - if (msg_len < 6 * NETLBL_LEN_U32) + ret_val = netlbl_cipsov4_add_common(info, doi_def); + if (ret_val != 0) goto add_std_failure; - num_lvls = netlbl_getinc_u32(&msg, &msg_len); - if (num_lvls == 0) - goto add_std_failure; - doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len); - if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS) + nla_for_each_nested(nla_a, + info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], + nla_a_rem) + if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + nla_for_each_nested(nla_b, nla_a, nla_b_rem) + switch (nla_b->nla_type) { + case NLBL_CIPSOV4_A_MLSLVLLOC: + if (nla_get_u32(nla_b) >= + doi_def->map.std->lvl.local_size) + doi_def->map.std->lvl.local_size = + nla_get_u32(nla_b) + 1; + break; + case NLBL_CIPSOV4_A_MLSLVLREM: + if (nla_get_u32(nla_b) >= + doi_def->map.std->lvl.cipso_size) + doi_def->map.std->lvl.cipso_size = + nla_get_u32(nla_b) + 1; + break; + } + } + if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS || + doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS) goto add_std_failure; doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, sizeof(u32), @@ -158,9 +216,6 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size) ret_val = -ENOMEM; goto add_std_failure; } - doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len); - if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS) - goto add_std_failure; doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size, sizeof(u32), GFP_KERNEL); @@ -168,68 +223,101 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size) ret_val = -ENOMEM; goto add_std_failure; } + nla_for_each_nested(nla_a, + info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], + nla_a_rem) + if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + struct nlattr *lvl_loc; + struct nlattr *lvl_rem; + + if (nla_validate_nested(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + goto add_std_failure; + + lvl_loc = nla_find_nested(nla_a, + NLBL_CIPSOV4_A_MLSLVLLOC); + lvl_rem = nla_find_nested(nla_a, + NLBL_CIPSOV4_A_MLSLVLREM); + if (lvl_loc == NULL || lvl_rem == NULL) + goto add_std_failure; + doi_def->map.std->lvl.local[nla_get_u32(lvl_loc)] = + nla_get_u32(lvl_rem); + doi_def->map.std->lvl.cipso[nla_get_u32(lvl_rem)] = + nla_get_u32(lvl_loc); + } - num_cats = netlbl_getinc_u32(&msg, &msg_len); - doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len); - if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS) - goto add_std_failure; - doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size, + if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) { + if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + goto add_std_failure; + + nla_for_each_nested(nla_a, + info->attrs[NLBL_CIPSOV4_A_MLSCATLST], + nla_a_rem) + if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + if (nla_validate_nested(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + goto add_std_failure; + nla_for_each_nested(nla_b, nla_a, nla_b_rem) + switch (nla_b->nla_type) { + case NLBL_CIPSOV4_A_MLSCATLOC: + if (nla_get_u32(nla_b) >= + doi_def->map.std->cat.local_size) + doi_def->map.std->cat.local_size = + nla_get_u32(nla_b) + 1; + break; + case NLBL_CIPSOV4_A_MLSCATREM: + if (nla_get_u32(nla_b) >= + doi_def->map.std->cat.cipso_size) + doi_def->map.std->cat.cipso_size = + nla_get_u32(nla_b) + 1; + break; + } + } + if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS || + doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS) + goto add_std_failure; + doi_def->map.std->cat.local = kcalloc( + doi_def->map.std->cat.local_size, sizeof(u32), GFP_KERNEL); - if (doi_def->map.std->cat.local == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } - doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len); - if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS) - goto add_std_failure; - doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size, + if (doi_def->map.std->cat.local == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + doi_def->map.std->cat.cipso = kcalloc( + doi_def->map.std->cat.cipso_size, sizeof(u32), GFP_KERNEL); - if (doi_def->map.std->cat.cipso == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } - - if (msg_len < - num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) + - num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16)) - goto add_std_failure; - - for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++) - doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL; - for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++) - doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL; - for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++) - doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT; - for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++) - doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT; - - for (iter = 0; iter < num_lvls; iter++) { - tmp_val_a = netlbl_getinc_u32(&msg, &msg_len); - tmp_val_b = netlbl_getinc_u8(&msg, &msg_len); - - if (tmp_val_a >= doi_def->map.std->lvl.local_size || - tmp_val_b >= doi_def->map.std->lvl.cipso_size) - goto add_std_failure; - - doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a; - doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b; - } - - for (iter = 0; iter < num_cats; iter++) { - tmp_val_a = netlbl_getinc_u32(&msg, &msg_len); - tmp_val_b = netlbl_getinc_u16(&msg, &msg_len); - - if (tmp_val_a >= doi_def->map.std->cat.local_size || - tmp_val_b >= doi_def->map.std->cat.cipso_size) + if (doi_def->map.std->cat.cipso == NULL) { + ret_val = -ENOMEM; goto add_std_failure; - - doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a; - doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b; + } + nla_for_each_nested(nla_a, + info->attrs[NLBL_CIPSOV4_A_MLSCATLST], + nla_a_rem) + if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + struct nlattr *cat_loc; + struct nlattr *cat_rem; + + cat_loc = nla_find_nested(nla_a, + NLBL_CIPSOV4_A_MLSCATLOC); + cat_rem = nla_find_nested(nla_a, + NLBL_CIPSOV4_A_MLSCATREM); + if (cat_loc == NULL || cat_rem == NULL) + goto add_std_failure; + doi_def->map.std->cat.local[ + nla_get_u32(cat_loc)] = + nla_get_u32(cat_rem); + doi_def->map.std->cat.cipso[ + nla_get_u32(cat_rem)] = + nla_get_u32(cat_loc); + } } - doi_def->doi = doi; ret_val = cipso_v4_doi_add(doi_def); if (ret_val != 0) goto add_std_failure; @@ -243,9 +331,7 @@ add_std_failure: /** * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition - * @doi: the DOI value - * @msg: the ADD message data - * @msg_size: the size of the ADD message buffer + * @info: the Generic NETLINK info block * * Description: * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message @@ -253,52 +339,31 @@ add_std_failure: * error. * */ -static int netlbl_cipsov4_add_pass(u32 doi, - struct nlattr *msg, - size_t msg_size) +static int netlbl_cipsov4_add_pass(struct genl_info *info) { - int ret_val = -EINVAL; - int msg_len = msg_size; - u32 num_tags; + int ret_val; struct cipso_v4_doi *doi_def = NULL; - u32 iter; - if (msg_len < NETLBL_LEN_U32) - goto add_pass_failure; - num_tags = netlbl_getinc_u32(&msg, &msg_len); - if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) - goto add_pass_failure; + if (!info->attrs[NLBL_CIPSOV4_A_DOI] || + !info->attrs[NLBL_CIPSOV4_A_TAGLST]) + return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); - if (doi_def == NULL) { - ret_val = -ENOMEM; - goto add_pass_failure; - } + if (doi_def == NULL) + return -ENOMEM; doi_def->type = CIPSO_V4_MAP_PASS; - for (iter = 0; iter < num_tags; iter++) { - if (msg_len < NETLBL_LEN_U8) - goto add_pass_failure; - doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len); - switch (doi_def->tags[iter]) { - case CIPSO_V4_TAG_RBITMAP: - break; - default: - goto add_pass_failure; - } - } - if (iter < CIPSO_V4_TAG_MAXCNT) - doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; + ret_val = netlbl_cipsov4_add_common(info, doi_def); + if (ret_val != 0) + goto add_pass_failure; - doi_def->doi = doi; ret_val = cipso_v4_doi_add(doi_def); if (ret_val != 0) goto add_pass_failure; return 0; add_pass_failure: - if (doi_def) - netlbl_cipsov4_doi_free(&doi_def->rcu); + netlbl_cipsov4_doi_free(&doi_def->rcu); return ret_val; } @@ -316,34 +381,21 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; - u32 doi; u32 map_type; - int msg_len = netlbl_netlink_payload_len(skb); - struct nlattr *msg = netlbl_netlink_payload_data(skb); - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto add_return; - if (msg_len < 2 * NETLBL_LEN_U32) - goto add_return; + if (!info->attrs[NLBL_CIPSOV4_A_MTYPE]) + return -EINVAL; - doi = netlbl_getinc_u32(&msg, &msg_len); - map_type = netlbl_getinc_u32(&msg, &msg_len); + map_type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); switch (map_type) { case CIPSO_V4_MAP_STD: - ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len); + ret_val = netlbl_cipsov4_add_std(info); break; case CIPSO_V4_MAP_PASS: - ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len); + ret_val = netlbl_cipsov4_add_pass(info); break; } -add_return: - netlbl_netlink_send_ack(info, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_ACK, - -ret_val); return ret_val; } @@ -353,84 +405,239 @@ add_return: * @info: the Generic NETLINK info block * * Description: - * Process a user generated LIST message and respond accordingly. Returns - * zero on success and negative values on error. + * Process a user generated LIST message and respond accordingly. While the + * response message generated by the kernel is straightforward, determining + * before hand the size of the buffer to allocate is not (we have to generate + * the message to know the size). In order to keep this function sane what we + * do is allocate a buffer of NLMSG_GOODSIZE and try to fit the response in + * that size, if we fail then we restart with a larger buffer and try again. + * We continue in this manner until we hit a limit of failed attempts then we + * give up and just send an error message. Returns zero on success and + * negative values on error. * */ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) { - int ret_val = -EINVAL; + int ret_val; + struct sk_buff *ans_skb = NULL; + u32 nlsze_mult = 1; + void *data; u32 doi; - struct nlattr *msg = netlbl_netlink_payload_data(skb); - struct sk_buff *ans_skb; + struct nlattr *nla_a; + struct nlattr *nla_b; + struct cipso_v4_doi *doi_def; + u32 iter; - if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) + if (!info->attrs[NLBL_CIPSOV4_A_DOI]) { + ret_val = -EINVAL; goto list_failure; + } - doi = nla_get_u32(msg); - ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN)); +list_start: + ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL); if (ans_skb == NULL) { ret_val = -ENOMEM; goto list_failure; } - netlbl_netlink_hdr_push(ans_skb, - info->snd_pid, - 0, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_LIST); + data = netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + info->snd_seq, + netlbl_cipsov4_gnl_family.id, + 0, + NLBL_CIPSOV4_C_LIST); + if (data == NULL) { + ret_val = -ENOMEM; + goto list_failure; + } + + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) { + ret_val = -EINVAL; + goto list_failure; + } + + ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); + if (ret_val != 0) + goto list_failure_lock; + + nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST); + if (nla_a == NULL) { + ret_val = -ENOMEM; + goto list_failure_lock; + } + for (iter = 0; + iter < CIPSO_V4_TAG_MAXCNT && + doi_def->tags[iter] != CIPSO_V4_TAG_INVALID; + iter++) { + ret_val = nla_put_u8(ans_skb, + NLBL_CIPSOV4_A_TAG, + doi_def->tags[iter]); + if (ret_val != 0) + goto list_failure_lock; + } + nla_nest_end(ans_skb, nla_a); + + switch (doi_def->type) { + case CIPSO_V4_MAP_STD: + nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); + if (nla_a == NULL) { + ret_val = -ENOMEM; + goto list_failure_lock; + } + for (iter = 0; + iter < doi_def->map.std->lvl.local_size; + iter++) { + if (doi_def->map.std->lvl.local[iter] == + CIPSO_V4_INV_LVL) + continue; + + nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL); + if (nla_b == NULL) { + ret_val = -ENOMEM; + goto list_retry; + } + ret_val = nla_put_u32(ans_skb, + NLBL_CIPSOV4_A_MLSLVLLOC, + iter); + if (ret_val != 0) + goto list_retry; + ret_val = nla_put_u32(ans_skb, + NLBL_CIPSOV4_A_MLSLVLREM, + doi_def->map.std->lvl.local[iter]); + if (ret_val != 0) + goto list_retry; + nla_nest_end(ans_skb, nla_b); + } + nla_nest_end(ans_skb, nla_a); + + nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST); + if (nla_a == NULL) { + ret_val = -ENOMEM; + goto list_retry; + } + for (iter = 0; + iter < doi_def->map.std->cat.local_size; + iter++) { + if (doi_def->map.std->cat.local[iter] == + CIPSO_V4_INV_CAT) + continue; + + nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT); + if (nla_b == NULL) { + ret_val = -ENOMEM; + goto list_retry; + } + ret_val = nla_put_u32(ans_skb, + NLBL_CIPSOV4_A_MLSCATLOC, + iter); + if (ret_val != 0) + goto list_retry; + ret_val = nla_put_u32(ans_skb, + NLBL_CIPSOV4_A_MLSCATREM, + doi_def->map.std->cat.local[iter]); + if (ret_val != 0) + goto list_retry; + nla_nest_end(ans_skb, nla_b); + } + nla_nest_end(ans_skb, nla_a); + + break; + } + rcu_read_unlock(); - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + genlmsg_end(ans_skb, data); + + ret_val = genlmsg_unicast(ans_skb, info->snd_pid); if (ret_val != 0) goto list_failure; return 0; +list_retry: + /* XXX - this limit is a guesstimate */ + if (nlsze_mult < 4) { + rcu_read_unlock(); + kfree_skb(ans_skb); + nlsze_mult++; + goto list_start; + } +list_failure_lock: + rcu_read_unlock(); list_failure: - netlbl_netlink_send_ack(info, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_ACK, - -ret_val); + kfree_skb(ans_skb); + return ret_val; +} + +/** + * netlbl_cipsov4_listall_cb - cipso_v4_doi_walk() callback for LISTALL + * @doi_def: the CIPSOv4 DOI definition + * @arg: the netlbl_cipsov4_doiwalk_arg structure + * + * Description: + * This function is designed to be used as a callback to the + * cipso_v4_doi_walk() function for use in generating a response for a LISTALL + * message. Returns the size of the message on success, negative values on + * failure. + * + */ +static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) +{ + int ret_val = -ENOMEM; + struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; + void *data; + + data = netlbl_netlink_hdr_put(cb_arg->skb, + NETLINK_CB(cb_arg->nl_cb->skb).pid, + cb_arg->seq, + netlbl_cipsov4_gnl_family.id, + NLM_F_MULTI, + NLBL_CIPSOV4_C_LISTALL); + if (data == NULL) + goto listall_cb_failure; + + ret_val = nla_put_u32(cb_arg->skb, NLBL_CIPSOV4_A_DOI, doi_def->doi); + if (ret_val != 0) + goto listall_cb_failure; + ret_val = nla_put_u32(cb_arg->skb, + NLBL_CIPSOV4_A_MTYPE, + doi_def->type); + if (ret_val != 0) + goto listall_cb_failure; + + return genlmsg_end(cb_arg->skb, data); + +listall_cb_failure: + genlmsg_cancel(cb_arg->skb, data); return ret_val; } /** * netlbl_cipsov4_listall - Handle a LISTALL message * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block + * @cb: the NETLINK callback * * Description: * Process a user generated LISTALL message and respond accordingly. Returns * zero on success and negative values on error. * */ -static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info) +static int netlbl_cipsov4_listall(struct sk_buff *skb, + struct netlink_callback *cb) { - int ret_val = -EINVAL; - struct sk_buff *ans_skb; + struct netlbl_cipsov4_doiwalk_arg cb_arg; + int doi_skip = cb->args[0]; - ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN)); - if (ans_skb == NULL) { - ret_val = -ENOMEM; - goto listall_failure; - } - netlbl_netlink_hdr_push(ans_skb, - info->snd_pid, - 0, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_LISTALL); + cb_arg.nl_cb = cb; + cb_arg.skb = skb; + cb_arg.seq = cb->nlh->nlmsg_seq; - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto listall_failure; - - return 0; + cipso_v4_doi_walk(&doi_skip, netlbl_cipsov4_listall_cb, &cb_arg); -listall_failure: - netlbl_netlink_send_ack(info, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_ACK, - -ret_val); - return ret_val; + cb->args[0] = doi_skip; + return skb->len; } /** @@ -445,27 +652,14 @@ listall_failure: */ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) { - int ret_val; + int ret_val = -EINVAL; u32 doi; - struct nlattr *msg = netlbl_netlink_payload_data(skb); - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto remove_return; - - if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) { - ret_val = -EINVAL; - goto remove_return; + if (info->attrs[NLBL_CIPSOV4_A_DOI]) { + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free); } - doi = nla_get_u32(msg); - ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free); - -remove_return: - netlbl_netlink_send_ack(info, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_ACK, - -ret_val); return ret_val; } @@ -475,14 +669,16 @@ remove_return: static struct genl_ops netlbl_cipsov4_genl_c_add = { .cmd = NLBL_CIPSOV4_C_ADD, - .flags = 0, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_add, .dumpit = NULL, }; static struct genl_ops netlbl_cipsov4_genl_c_remove = { .cmd = NLBL_CIPSOV4_C_REMOVE, - .flags = 0, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_remove, .dumpit = NULL, }; @@ -490,6 +686,7 @@ static struct genl_ops netlbl_cipsov4_genl_c_remove = { static struct genl_ops netlbl_cipsov4_genl_c_list = { .cmd = NLBL_CIPSOV4_C_LIST, .flags = 0, + .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_list, .dumpit = NULL, }; @@ -497,8 +694,9 @@ static struct genl_ops netlbl_cipsov4_genl_c_list = { static struct genl_ops netlbl_cipsov4_genl_c_listall = { .cmd = NLBL_CIPSOV4_C_LISTALL, .flags = 0, - .doit = netlbl_cipsov4_listall, - .dumpit = NULL, + .policy = netlbl_cipsov4_genl_policy, + .doit = NULL, + .dumpit = netlbl_cipsov4_listall, }; /* diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index 4c6ff4b9300..f03cf9b7828 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -34,175 +34,71 @@ #include <net/netlabel.h> /* - * The following NetLabel payloads are supported by the CIPSO subsystem, all - * of which are preceeded by the nlmsghdr struct. + * The following NetLabel payloads are supported by the CIPSO subsystem. * - * o ACK: - * Sent by the kernel in response to an applications message, applications - * should never send this message. + * o ADD: + * Sent by an application to add a new DOI mapping table. * - * +----------------------+-----------------------+ - * | seq number (32 bits) | return code (32 bits) | - * +----------------------+-----------------------+ + * Required attributes: * - * seq number: the sequence number of the original message, taken from the - * nlmsghdr structure - * return code: return value, based on errno values + * NLBL_CIPSOV4_A_DOI + * NLBL_CIPSOV4_A_MTYPE + * NLBL_CIPSOV4_A_TAGLST * - * o ADD: - * Sent by an application to add a new DOI mapping table, after completion - * of the task the kernel should ACK this message. - * - * +---------------+--------------------+---------------------+ - * | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ... - * +---------------+--------------------+---------------------+ - * - * +-----------------+ - * | tag #X (8 bits) | ... repeated - * +-----------------+ - * - * +-------------- ---- --- -- - - * | mapping data - * +-------------- ---- --- -- - - * - * DOI: the DOI value - * map type: the mapping table type (defined in the cipso_ipv4.h header - * as CIPSO_V4_MAP_*) - * tag count: the number of tags, must be greater than zero - * tag: the CIPSO tag for the DOI, tags listed first are given - * higher priorirty when sending packets - * mapping data: specific to the map type (see below) - * - * CIPSO_V4_MAP_STD - * - * +------------------+-----------------------+----------------------+ - * | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ... - * +------------------+-----------------------+----------------------+ - * - * +----------------------+---------------------+---------------------+ - * | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ... - * +----------------------+---------------------+---------------------+ - * - * +--------------------------+-------------------------+ - * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated - * +--------------------------+-------------------------+ - * - * +-----------------------------+-----------------------------+ - * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated - * +-----------------------------+-----------------------------+ - * - * levels: the number of level mappings - * max l level: the highest local level - * max r level: the highest remote/CIPSO level - * categories: the number of category mappings - * max l cat: the highest local category - * max r cat: the highest remote/CIPSO category - * local level: the local part of a level mapping - * CIPSO level: the remote/CIPSO part of a level mapping - * local category: the local part of a category mapping - * CIPSO category: the remote/CIPSO part of a category mapping - * - * CIPSO_V4_MAP_PASS - * - * No mapping data is needed for this map type. + * If using CIPSO_V4_MAP_STD the following attributes are required: + * + * NLBL_CIPSOV4_A_MLSLVLLST + * NLBL_CIPSOV4_A_MLSCATLST + * + * If using CIPSO_V4_MAP_PASS no additional attributes are required. * * o REMOVE: * Sent by an application to remove a specific DOI mapping table from the - * CIPSO V4 system. The kernel should ACK this message. + * CIPSO V4 system. * - * +---------------+ - * | DOI (32 bits) | - * +---------------+ + * Required attributes: * - * DOI: the DOI value + * NLBL_CIPSOV4_A_DOI * * o LIST: - * Sent by an application to list the details of a DOI definition. The - * kernel should send an ACK on error or a response as indicated below. The - * application generated message format is shown below. + * Sent by an application to list the details of a DOI definition. On + * success the kernel should send a response using the following format. * - * +---------------+ - * | DOI (32 bits) | - * +---------------+ + * Required attributes: * - * DOI: the DOI value + * NLBL_CIPSOV4_A_DOI * * The valid response message format depends on the type of the DOI mapping, - * the known formats are shown below. - * - * +--------------------+ - * | map type (32 bits) | ... - * +--------------------+ - * - * map type: the DOI mapping table type (defined in the cipso_ipv4.h - * header as CIPSO_V4_MAP_*) - * - * (map type == CIPSO_V4_MAP_STD) - * - * +----------------+------------------+----------------------+ - * | tags (32 bits) | levels (32 bits) | categories (32 bits) | ... - * +----------------+------------------+----------------------+ + * the defined formats are shown below. * - * +-----------------+ - * | tag #X (8 bits) | ... repeated - * +-----------------+ + * Required attributes: * - * +--------------------------+-------------------------+ - * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated - * +--------------------------+-------------------------+ + * NLBL_CIPSOV4_A_MTYPE + * NLBL_CIPSOV4_A_TAGLST * - * +-----------------------------+-----------------------------+ - * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated - * +-----------------------------+-----------------------------+ + * If using CIPSO_V4_MAP_STD the following attributes are required: * - * tags: the number of CIPSO tag types - * levels: the number of level mappings - * categories: the number of category mappings - * tag: the tag number, tags listed first are given higher - * priority when sending packets - * local level: the local part of a level mapping - * CIPSO level: the remote/CIPSO part of a level mapping - * local category: the local part of a category mapping - * CIPSO category: the remote/CIPSO part of a category mapping + * NLBL_CIPSOV4_A_MLSLVLLST + * NLBL_CIPSOV4_A_MLSCATLST * - * (map type == CIPSO_V4_MAP_PASS) - * - * +----------------+ - * | tags (32 bits) | ... - * +----------------+ - * - * +-----------------+ - * | tag #X (8 bits) | ... repeated - * +-----------------+ - * - * tags: the number of CIPSO tag types - * tag: the tag number, tags listed first are given higher - * priority when sending packets + * If using CIPSO_V4_MAP_PASS no additional attributes are required. * * o LISTALL: * This message is sent by an application to list the valid DOIs on the - * system. There is no payload and the kernel should respond with an ACK - * or the following message. - * - * +---------------------+------------------+-----------------------+ - * | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) | - * +---------------------+------------------+-----------------------+ + * system. When sent by an application there is no payload and the + * NLM_F_DUMP flag should be set. The kernel should respond with a series of + * the following messages. * - * +-----------------------+ - * | map type #X (32 bits) | ... - * +-----------------------+ + * Required attributes: * - * DOI count: the number of DOIs - * DOI: the DOI value - * map type: the DOI mapping table type (defined in the cipso_ipv4.h - * header as CIPSO_V4_MAP_*) + * NLBL_CIPSOV4_A_DOI + * NLBL_CIPSOV4_A_MTYPE * */ /* NetLabel CIPSOv4 commands */ enum { NLBL_CIPSOV4_C_UNSPEC, - NLBL_CIPSOV4_C_ACK, NLBL_CIPSOV4_C_ADD, NLBL_CIPSOV4_C_REMOVE, NLBL_CIPSOV4_C_LIST, @@ -211,6 +107,59 @@ enum { }; #define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1) +/* NetLabel CIPSOv4 attributes */ +enum { + NLBL_CIPSOV4_A_UNSPEC, + NLBL_CIPSOV4_A_DOI, + /* (NLA_U32) + * the DOI value */ + NLBL_CIPSOV4_A_MTYPE, + /* (NLA_U32) + * the mapping table type (defined in the cipso_ipv4.h header as + * CIPSO_V4_MAP_*) */ + NLBL_CIPSOV4_A_TAG, + /* (NLA_U8) + * a CIPSO tag type, meant to be used within a NLBL_CIPSOV4_A_TAGLST + * attribute */ + NLBL_CIPSOV4_A_TAGLST, + /* (NLA_NESTED) + * the CIPSO tag list for the DOI, there must be at least one + * NLBL_CIPSOV4_A_TAG attribute, tags listed first are given higher + * priorirty when sending packets */ + NLBL_CIPSOV4_A_MLSLVLLOC, + /* (NLA_U32) + * the local MLS sensitivity level */ + NLBL_CIPSOV4_A_MLSLVLREM, + /* (NLA_U32) + * the remote MLS sensitivity level */ + NLBL_CIPSOV4_A_MLSLVL, + /* (NLA_NESTED) + * a MLS sensitivity level mapping, must contain only one attribute of + * each of the following types: NLBL_CIPSOV4_A_MLSLVLLOC and + * NLBL_CIPSOV4_A_MLSLVLREM */ + NLBL_CIPSOV4_A_MLSLVLLST, + /* (NLA_NESTED) + * the CIPSO level mappings, there must be at least one + * NLBL_CIPSOV4_A_MLSLVL attribute */ + NLBL_CIPSOV4_A_MLSCATLOC, + /* (NLA_U32) + * the local MLS category */ + NLBL_CIPSOV4_A_MLSCATREM, + /* (NLA_U32) + * the remote MLS category */ + NLBL_CIPSOV4_A_MLSCAT, + /* (NLA_NESTED) + * a MLS category mapping, must contain only one attribute of each of + * the following types: NLBL_CIPSOV4_A_MLSCATLOC and + * NLBL_CIPSOV4_A_MLSCATREM */ + NLBL_CIPSOV4_A_MLSCATLST, + /* (NLA_NESTED) + * the CIPSO category mappings, there must be at least one + * NLBL_CIPSOV4_A_MLSCAT attribute */ + __NLBL_CIPSOV4_A_MAX, +}; +#define NLBL_CIPSOV4_A_MAX (__NLBL_CIPSOV4_A_MAX - 1) + /* NetLabel protocol functions */ int netlbl_cipsov4_genl_init(void); diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 0489a137810..f56d7a8ac7b 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -354,160 +354,51 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) } /** - * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff + * netlbl_domhsh_walk - Iterate through the domain mapping hash table + * @skip_bkt: the number of buckets to skip at the start + * @skip_chain: the number of entries to skip in the first iterated bucket + * @callback: callback for each entry + * @cb_arg: argument for the callback function * * Description: - * Dump the domain hash table into a buffer suitable for returning to an - * application in response to a NetLabel management DOMAIN message. This - * function may fail if another process is growing the hash table at the same - * time. The returned sk_buff has room at the front of the sk_buff for - * @headroom bytes. See netlabel.h for the DOMAIN message format. Returns a - * pointer to a sk_buff on success, NULL on error. + * Interate over the domain mapping hash table, skipping the first @skip_bkt + * buckets and @skip_chain entries. For each entry in the table call + * @callback, if @callback returns a negative value stop 'walking' through the + * table and return. Updates the values in @skip_bkt and @skip_chain on + * return. Returns zero on succcess, negative values on failure. * */ -struct sk_buff *netlbl_domhsh_dump(size_t headroom) +int netlbl_domhsh_walk(u32 *skip_bkt, + u32 *skip_chain, + int (*callback) (struct netlbl_dom_map *entry, void *arg), + void *cb_arg) { - struct sk_buff *skb = NULL; - ssize_t buf_len; - u32 bkt_iter; - u32 dom_cnt = 0; - struct netlbl_domhsh_tbl *hsh_tbl; - struct netlbl_dom_map *list_iter; - ssize_t tmp_len; + int ret_val = -ENOENT; + u32 iter_bkt; + struct netlbl_dom_map *iter_entry; + u32 chain_cnt = 0; - buf_len = NETLBL_LEN_U32; rcu_read_lock(); - hsh_tbl = rcu_dereference(netlbl_domhsh); - for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) - list_for_each_entry_rcu(list_iter, - &hsh_tbl->tbl[bkt_iter], list) { - buf_len += NETLBL_LEN_U32 + - nla_total_size(strlen(list_iter->domain) + 1); - switch (list_iter->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - buf_len += 2 * NETLBL_LEN_U32; - break; - } - dom_cnt++; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto dump_failure; - - if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0) - goto dump_failure; - buf_len -= NETLBL_LEN_U32; - hsh_tbl = rcu_dereference(netlbl_domhsh); - for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) - list_for_each_entry_rcu(list_iter, - &hsh_tbl->tbl[bkt_iter], list) { - tmp_len = nla_total_size(strlen(list_iter->domain) + - 1); - if (buf_len < NETLBL_LEN_U32 + tmp_len) - goto dump_failure; - if (nla_put_string(skb, - NLA_STRING, - list_iter->domain) != 0) - goto dump_failure; - if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0) - goto dump_failure; - buf_len -= NETLBL_LEN_U32 + tmp_len; - switch (list_iter->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - if (buf_len < 2 * NETLBL_LEN_U32) - goto dump_failure; - if (nla_put_u32(skb, - NLA_U32, - list_iter->type_def.cipsov4->type) != 0) - goto dump_failure; - if (nla_put_u32(skb, - NLA_U32, - list_iter->type_def.cipsov4->doi) != 0) - goto dump_failure; - buf_len -= 2 * NETLBL_LEN_U32; - break; + for (iter_bkt = *skip_bkt; + iter_bkt < rcu_dereference(netlbl_domhsh)->size; + iter_bkt++, chain_cnt = 0) { + list_for_each_entry_rcu(iter_entry, + &netlbl_domhsh->tbl[iter_bkt], + list) + if (iter_entry->valid) { + if (chain_cnt++ < *skip_chain) + continue; + ret_val = callback(iter_entry, cb_arg); + if (ret_val < 0) { + chain_cnt--; + goto walk_return; + } } - } - rcu_read_unlock(); - - return skb; - -dump_failure: - rcu_read_unlock(); - kfree_skb(skb); - return NULL; -} - -/** - * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff - * - * Description: - * Dump the default domain mapping into a buffer suitable for returning to an - * application in response to a NetLabel management DEFDOMAIN message. This - * function may fail if another process is changing the default domain mapping - * at the same time. The returned sk_buff has room at the front of the - * skb_buff for @headroom bytes. See netlabel.h for the DEFDOMAIN message - * format. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *netlbl_domhsh_dump_default(size_t headroom) -{ - struct sk_buff *skb; - ssize_t buf_len; - struct netlbl_dom_map *entry; - - buf_len = NETLBL_LEN_U32; - rcu_read_lock(); - entry = rcu_dereference(netlbl_domhsh_def); - if (entry != NULL) - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - buf_len += 2 * NETLBL_LEN_U32; - break; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto dump_default_failure; - - if (entry != rcu_dereference(netlbl_domhsh_def)) - goto dump_default_failure; - if (entry != NULL) { - if (nla_put_u32(skb, NLA_U32, entry->type) != 0) - goto dump_default_failure; - buf_len -= NETLBL_LEN_U32; - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - if (buf_len < 2 * NETLBL_LEN_U32) - goto dump_default_failure; - if (nla_put_u32(skb, - NLA_U32, - entry->type_def.cipsov4->type) != 0) - goto dump_default_failure; - if (nla_put_u32(skb, - NLA_U32, - entry->type_def.cipsov4->doi) != 0) - goto dump_default_failure; - buf_len -= 2 * NETLBL_LEN_U32; - break; - } - } else - nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE); - rcu_read_unlock(); - - return skb; + } -dump_default_failure: +walk_return: rcu_read_unlock(); - kfree_skb(skb); - return NULL; + *skip_bkt = iter_bkt; + *skip_chain = chain_cnt; + return ret_val; } diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 99a2287de24..02af72a7877 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -61,7 +61,9 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry); int netlbl_domhsh_add_default(struct netlbl_dom_map *entry); int netlbl_domhsh_remove_default(void); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); -struct sk_buff *netlbl_domhsh_dump(size_t headroom); -struct sk_buff *netlbl_domhsh_dump_default(size_t headroom); +int netlbl_domhsh_walk(u32 *skip_bkt, + u32 *skip_chain, + int (*callback) (struct netlbl_dom_map *entry, void *arg), + void *cb_arg); #endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 0fd8aaafe23..54fb7de3c2b 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -85,6 +85,29 @@ socket_setattr_return: } /** + * netlbl_sock_getattr - Determine the security attributes of a sock + * @sk: the sock + * @secattr: the security attributes + * + * Description: + * Examines the given sock to see any NetLabel style labeling has been + * applied to the sock, if so it parses the socket label and returns the + * security attributes in @secattr. Returns zero on success, negative values + * on failure. + * + */ +int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + ret_val = cipso_v4_sock_getattr(sk, secattr); + if (ret_val == 0) + return 0; + + return netlbl_unlabel_getattr(secattr); +} + +/** * netlbl_socket_getattr - Determine the security attributes of a socket * @sock: the socket * @secattr: the security attributes diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 85bc11a1fc4..8626c9f678e 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -42,15 +42,29 @@ #include "netlabel_user.h" #include "netlabel_mgmt.h" +/* Argument struct for netlbl_domhsh_walk() */ +struct netlbl_domhsh_walk_arg { + struct netlink_callback *nl_cb; + struct sk_buff *skb; + u32 seq; +}; + /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_mgmt_gnl_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_MGMT_NAME, .version = NETLBL_PROTO_VERSION, - .maxattr = 0, + .maxattr = NLBL_MGMT_A_MAX, }; +/* NetLabel Netlink attribute policy */ +static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { + [NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING }, + [NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 }, + [NLBL_MGMT_A_VERSION] = { .type = NLA_U32 }, + [NLBL_MGMT_A_CV4DOI] = { .type = NLA_U32 }, +}; /* * NetLabel Command Handlers @@ -70,97 +84,62 @@ static struct genl_family netlbl_mgmt_gnl_family = { static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; - struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); - int msg_len = netlbl_netlink_payload_len(skb); - u32 count; struct netlbl_dom_map *entry = NULL; - u32 iter; + size_t tmp_size; u32 tmp_val; - int tmp_size; - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) + if (!info->attrs[NLBL_MGMT_A_DOMAIN] || + !info->attrs[NLBL_MGMT_A_PROTOCOL]) goto add_failure; - if (msg_len < NETLBL_LEN_U32) + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); + entry->domain = kmalloc(tmp_size, GFP_KERNEL); + if (entry->domain == NULL) { + ret_val = -ENOMEM; goto add_failure; - count = netlbl_getinc_u32(&msg_ptr, &msg_len); + } + entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); + nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); - for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) { - if (msg_len <= 0) { - ret_val = -EINVAL; - goto add_failure; - } - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } - tmp_size = nla_len(msg_ptr); - if (tmp_size <= 0 || tmp_size > msg_len) { - ret_val = -EINVAL; - goto add_failure; - } - entry->domain = kmalloc(tmp_size, GFP_KERNEL); - if (entry->domain == NULL) { - ret_val = -ENOMEM; + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + ret_val = netlbl_domhsh_add(entry); + break; + case NETLBL_NLTYPE_CIPSOV4: + if (!info->attrs[NLBL_MGMT_A_CV4DOI]) goto add_failure; - } - nla_strlcpy(entry->domain, msg_ptr, tmp_size); - entry->domain[tmp_size - 1] = '\0'; - msg_ptr = nla_next(msg_ptr, &msg_len); - if (msg_len < NETLBL_LEN_U32) { - ret_val = -EINVAL; - goto add_failure; - } - tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); - entry->type = tmp_val; - switch (tmp_val) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry); - break; - case NETLBL_NLTYPE_CIPSOV4: - if (msg_len < NETLBL_LEN_U32) { - ret_val = -EINVAL; - goto add_failure; - } - tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); - /* We should be holding a rcu_read_lock() here - * while we hold the result but since the entry - * will always be deleted when the CIPSO DOI - * is deleted we aren't going to keep the lock. */ - rcu_read_lock(); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); - ret_val = -EINVAL; - goto add_failure; - } - ret_val = netlbl_domhsh_add(entry); + tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); + /* We should be holding a rcu_read_lock() here while we hold + * the result but since the entry will always be deleted when + * the CIPSO DOI is deleted we aren't going to keep the + * lock. */ + rcu_read_lock(); + entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); + if (entry->type_def.cipsov4 == NULL) { rcu_read_unlock(); - break; - default: - ret_val = -EINVAL; - } - if (ret_val != 0) goto add_failure; + } + ret_val = netlbl_domhsh_add(entry); + rcu_read_unlock(); + break; + default: + goto add_failure; } + if (ret_val != 0) + goto add_failure; - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - NETLBL_E_OK); return 0; add_failure: if (entry) kfree(entry->domain); kfree(entry); - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); return ret_val; } @@ -176,87 +155,98 @@ add_failure: */ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) { - int ret_val = -EINVAL; - struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); - int msg_len = netlbl_netlink_payload_len(skb); - u32 count; - u32 iter; - int tmp_size; - unsigned char *domain; - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto remove_return; + char *domain; - if (msg_len < NETLBL_LEN_U32) - goto remove_return; - count = netlbl_getinc_u32(&msg_ptr, &msg_len); + if (!info->attrs[NLBL_MGMT_A_DOMAIN]) + return -EINVAL; - for (iter = 0; iter < count && msg_len > 0; iter++) { - if (msg_len <= 0) { - ret_val = -EINVAL; - goto remove_return; - } - tmp_size = nla_len(msg_ptr); - domain = nla_data(msg_ptr); - if (tmp_size <= 0 || tmp_size > msg_len || - domain[tmp_size - 1] != '\0') { - ret_val = -EINVAL; - goto remove_return; - } - ret_val = netlbl_domhsh_remove(domain); + domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); + return netlbl_domhsh_remove(domain); +} + +/** + * netlbl_mgmt_listall_cb - netlbl_domhsh_walk() callback for LISTALL + * @entry: the domain mapping hash table entry + * @arg: the netlbl_domhsh_walk_arg structure + * + * Description: + * This function is designed to be used as a callback to the + * netlbl_domhsh_walk() function for use in generating a response for a LISTALL + * message. Returns the size of the message on success, negative values on + * failure. + * + */ +static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) +{ + int ret_val = -ENOMEM; + struct netlbl_domhsh_walk_arg *cb_arg = arg; + void *data; + + data = netlbl_netlink_hdr_put(cb_arg->skb, + NETLINK_CB(cb_arg->nl_cb->skb).pid, + cb_arg->seq, + netlbl_mgmt_gnl_family.id, + NLM_F_MULTI, + NLBL_MGMT_C_LISTALL); + if (data == NULL) + goto listall_cb_failure; + + ret_val = nla_put_string(cb_arg->skb, + NLBL_MGMT_A_DOMAIN, + entry->domain); + if (ret_val != 0) + goto listall_cb_failure; + ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type); + if (ret_val != 0) + goto listall_cb_failure; + switch (entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(cb_arg->skb, + NLBL_MGMT_A_CV4DOI, + entry->type_def.cipsov4->doi); if (ret_val != 0) - goto remove_return; - msg_ptr = nla_next(msg_ptr, &msg_len); + goto listall_cb_failure; + break; } - ret_val = 0; + cb_arg->seq++; + return genlmsg_end(cb_arg->skb, data); -remove_return: - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); +listall_cb_failure: + genlmsg_cancel(cb_arg->skb, data); return ret_val; } /** - * netlbl_mgmt_list - Handle a LIST message + * netlbl_mgmt_listall - Handle a LISTALL message * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block + * @cb: the NETLINK callback * * Description: - * Process a user generated LIST message and dumps the domain hash table in a - * form suitable for use in a kernel generated LIST message. Returns zero on - * success, negative values on failure. + * Process a user generated LISTALL message and dumps the domain hash table in + * a form suitable for use in a kernel generated LISTALL message. Returns zero + * on success, negative values on failure. * */ -static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info) +static int netlbl_mgmt_listall(struct sk_buff *skb, + struct netlink_callback *cb) { - int ret_val = -ENOMEM; - struct sk_buff *ans_skb; - - ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN)); - if (ans_skb == NULL) - goto list_failure; - netlbl_netlink_hdr_push(ans_skb, - info->snd_pid, - 0, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_LIST); - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto list_failure; - - return 0; - -list_failure: - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; + struct netlbl_domhsh_walk_arg cb_arg; + u32 skip_bkt = cb->args[0]; + u32 skip_chain = cb->args[1]; + + cb_arg.nl_cb = cb; + cb_arg.skb = skb; + cb_arg.seq = cb->nlh->nlmsg_seq; + + netlbl_domhsh_walk(&skip_bkt, + &skip_chain, + netlbl_mgmt_listall_cb, + &cb_arg); + + cb->args[0] = skip_bkt; + cb->args[1] = skip_chain; + return skb->len; } /** @@ -272,68 +262,51 @@ list_failure: static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; - struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); - int msg_len = netlbl_netlink_payload_len(skb); struct netlbl_dom_map *entry = NULL; u32 tmp_val; - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto adddef_failure; - - if (msg_len < NETLBL_LEN_U32) + if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) goto adddef_failure; - tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; goto adddef_failure; } + entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); - entry->type = tmp_val; switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: ret_val = netlbl_domhsh_add_default(entry); break; case NETLBL_NLTYPE_CIPSOV4: - if (msg_len < NETLBL_LEN_U32) { - ret_val = -EINVAL; + if (!info->attrs[NLBL_MGMT_A_CV4DOI]) goto adddef_failure; - } - tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); - /* We should be holding a rcu_read_lock here while we - * hold the result but since the entry will always be - * deleted when the CIPSO DOI is deleted we are going - * to skip the lock. */ + + tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); + /* We should be holding a rcu_read_lock() here while we hold + * the result but since the entry will always be deleted when + * the CIPSO DOI is deleted we aren't going to keep the + * lock. */ rcu_read_lock(); entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); if (entry->type_def.cipsov4 == NULL) { rcu_read_unlock(); - ret_val = -EINVAL; goto adddef_failure; } ret_val = netlbl_domhsh_add_default(entry); rcu_read_unlock(); break; default: - ret_val = -EINVAL; + goto adddef_failure; } if (ret_val != 0) goto adddef_failure; - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - NETLBL_E_OK); return 0; adddef_failure: kfree(entry); - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); return ret_val; } @@ -349,20 +322,7 @@ adddef_failure: */ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) { - int ret_val; - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto removedef_return; - - ret_val = netlbl_domhsh_remove_default(); - -removedef_return: - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; + return netlbl_domhsh_remove_default(); } /** @@ -379,88 +339,131 @@ removedef_return: static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) { int ret_val = -ENOMEM; - struct sk_buff *ans_skb; + struct sk_buff *ans_skb = NULL; + void *data; + struct netlbl_dom_map *entry; - ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN)); + ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (ans_skb == NULL) + return -ENOMEM; + data = netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + info->snd_seq, + netlbl_mgmt_gnl_family.id, + 0, + NLBL_MGMT_C_LISTDEF); + if (data == NULL) goto listdef_failure; - netlbl_netlink_hdr_push(ans_skb, - info->snd_pid, - 0, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_LISTDEF); - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + rcu_read_lock(); + entry = netlbl_domhsh_getentry(NULL); + if (entry == NULL) { + ret_val = -ENOENT; + goto listdef_failure_lock; + } + ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type); if (ret_val != 0) - goto listdef_failure; + goto listdef_failure_lock; + switch (entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(ans_skb, + NLBL_MGMT_A_CV4DOI, + entry->type_def.cipsov4->doi); + if (ret_val != 0) + goto listdef_failure_lock; + break; + } + rcu_read_unlock(); + genlmsg_end(ans_skb, data); + + ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + if (ret_val != 0) + goto listdef_failure; return 0; +listdef_failure_lock: + rcu_read_unlock(); listdef_failure: - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); + kfree_skb(ans_skb); return ret_val; } /** - * netlbl_mgmt_modules - Handle a MODULES message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block + * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response + * @skb: the skb to write to + * @seq: the NETLINK sequence number + * @cb: the NETLINK callback + * @protocol: the NetLabel protocol to use in the message * * Description: - * Process a user generated MODULES message and respond accordingly. + * This function is to be used in conjunction with netlbl_mgmt_protocols() to + * answer a application's PROTOCOLS message. Returns the size of the message + * on success, negative values on failure. * */ -static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info) +static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, + struct netlink_callback *cb, + u32 protocol) { int ret_val = -ENOMEM; - size_t data_size; - u32 mod_count; - struct sk_buff *ans_skb = NULL; - - /* unlabeled + cipsov4 */ - mod_count = 2; - - data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32; - ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); - if (ans_skb == NULL) - goto modules_failure; - - if (netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - 0, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_MODULES) == NULL) - goto modules_failure; - - ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count); - if (ret_val != 0) - goto modules_failure; - ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED); + void *data; + + data = netlbl_netlink_hdr_put(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + netlbl_mgmt_gnl_family.id, + NLM_F_MULTI, + NLBL_MGMT_C_PROTOCOLS); + if (data == NULL) + goto protocols_cb_failure; + + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, protocol); if (ret_val != 0) - goto modules_failure; - ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4); - if (ret_val != 0) - goto modules_failure; - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto modules_failure; + goto protocols_cb_failure; - return 0; + return genlmsg_end(skb, data); -modules_failure: - kfree_skb(ans_skb); - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); +protocols_cb_failure: + genlmsg_cancel(skb, data); return ret_val; } /** + * netlbl_mgmt_protocols - Handle a PROTOCOLS message + * @skb: the NETLINK buffer + * @cb: the NETLINK callback + * + * Description: + * Process a user generated PROTOCOLS message and respond accordingly. + * + */ +static int netlbl_mgmt_protocols(struct sk_buff *skb, + struct netlink_callback *cb) +{ + u32 protos_sent = cb->args[0]; + + if (protos_sent == 0) { + if (netlbl_mgmt_protocols_cb(skb, + cb, + NETLBL_NLTYPE_UNLABELED) < 0) + goto protocols_return; + protos_sent++; + } + if (protos_sent == 1) { + if (netlbl_mgmt_protocols_cb(skb, + cb, + NETLBL_NLTYPE_CIPSOV4) < 0) + goto protocols_return; + protos_sent++; + } + +protocols_return: + cb->args[0] = protos_sent; + return skb->len; +} + +/** * netlbl_mgmt_version - Handle a VERSION message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block @@ -474,35 +477,35 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) { int ret_val = -ENOMEM; struct sk_buff *ans_skb = NULL; + void *data; - ans_skb = netlbl_netlink_alloc_skb(0, - GENL_HDRLEN + NETLBL_LEN_U32, - GFP_KERNEL); + ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (ans_skb == NULL) - goto version_failure; - if (netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - 0, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_VERSION) == NULL) + return -ENOMEM; + data = netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + info->snd_seq, + netlbl_mgmt_gnl_family.id, + 0, + NLBL_MGMT_C_VERSION); + if (data == NULL) goto version_failure; - ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION); + ret_val = nla_put_u32(ans_skb, + NLBL_MGMT_A_VERSION, + NETLBL_PROTO_VERSION); if (ret_val != 0) goto version_failure; - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + genlmsg_end(ans_skb, data); + + ret_val = genlmsg_unicast(ans_skb, info->snd_pid); if (ret_val != 0) goto version_failure; - return 0; version_failure: kfree_skb(ans_skb); - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); return ret_val; } @@ -513,35 +516,40 @@ version_failure: static struct genl_ops netlbl_mgmt_genl_c_add = { .cmd = NLBL_MGMT_C_ADD, - .flags = 0, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_add, .dumpit = NULL, }; static struct genl_ops netlbl_mgmt_genl_c_remove = { .cmd = NLBL_MGMT_C_REMOVE, - .flags = 0, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_remove, .dumpit = NULL, }; -static struct genl_ops netlbl_mgmt_genl_c_list = { - .cmd = NLBL_MGMT_C_LIST, +static struct genl_ops netlbl_mgmt_genl_c_listall = { + .cmd = NLBL_MGMT_C_LISTALL, .flags = 0, - .doit = netlbl_mgmt_list, - .dumpit = NULL, + .policy = netlbl_mgmt_genl_policy, + .doit = NULL, + .dumpit = netlbl_mgmt_listall, }; static struct genl_ops netlbl_mgmt_genl_c_adddef = { .cmd = NLBL_MGMT_C_ADDDEF, - .flags = 0, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_adddef, .dumpit = NULL, }; static struct genl_ops netlbl_mgmt_genl_c_removedef = { .cmd = NLBL_MGMT_C_REMOVEDEF, - .flags = 0, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_removedef, .dumpit = NULL, }; @@ -549,20 +557,23 @@ static struct genl_ops netlbl_mgmt_genl_c_removedef = { static struct genl_ops netlbl_mgmt_genl_c_listdef = { .cmd = NLBL_MGMT_C_LISTDEF, .flags = 0, + .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_listdef, .dumpit = NULL, }; -static struct genl_ops netlbl_mgmt_genl_c_modules = { - .cmd = NLBL_MGMT_C_MODULES, +static struct genl_ops netlbl_mgmt_genl_c_protocols = { + .cmd = NLBL_MGMT_C_PROTOCOLS, .flags = 0, - .doit = netlbl_mgmt_modules, - .dumpit = NULL, + .policy = netlbl_mgmt_genl_policy, + .doit = NULL, + .dumpit = netlbl_mgmt_protocols, }; static struct genl_ops netlbl_mgmt_genl_c_version = { .cmd = NLBL_MGMT_C_VERSION, .flags = 0, + .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_version, .dumpit = NULL, }; @@ -596,7 +607,7 @@ int netlbl_mgmt_genl_init(void) if (ret_val != 0) return ret_val; ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_list); + &netlbl_mgmt_genl_c_listall); if (ret_val != 0) return ret_val; ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, @@ -612,7 +623,7 @@ int netlbl_mgmt_genl_init(void) if (ret_val != 0) return ret_val; ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_modules); + &netlbl_mgmt_genl_c_protocols); if (ret_val != 0) return ret_val; ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index fd6c6acbfa0..3642d3bfc8e 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -34,212 +34,137 @@ #include <net/netlabel.h> /* - * The following NetLabel payloads are supported by the management interface, - * all of which are preceeded by the nlmsghdr struct. - * - * o ACK: - * Sent by the kernel in response to an applications message, applications - * should never send this message. - * - * +----------------------+-----------------------+ - * | seq number (32 bits) | return code (32 bits) | - * +----------------------+-----------------------+ - * - * seq number: the sequence number of the original message, taken from the - * nlmsghdr structure - * return code: return value, based on errno values + * The following NetLabel payloads are supported by the management interface. * * o ADD: * Sent by an application to add a domain mapping to the NetLabel system. - * The kernel should respond with an ACK. - * - * +-------------------+ - * | domains (32 bits) | ... - * +-------------------+ - * - * domains: the number of domains in the message - * - * +--------------------------+-------------------------+ - * | domain string (variable) | protocol type (32 bits) | ... - * +--------------------------+-------------------------+ * - * +-------------- ---- --- -- - - * | mapping data ... repeated - * +-------------- ---- --- -- - + * Required attributes: * - * domain string: the domain string, NULL terminated - * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) - * mapping data: specific to the map type (see below) + * NLBL_MGMT_A_DOMAIN + * NLBL_MGMT_A_PROTOCOL * - * NETLBL_NLTYPE_UNLABELED + * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: * - * No mapping data for this protocol type. + * NLBL_MGMT_A_CV4DOI * - * NETLBL_NLTYPE_CIPSOV4 - * - * +---------------+ - * | doi (32 bits) | - * +---------------+ - * - * doi: the CIPSO DOI value + * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. * * o REMOVE: * Sent by an application to remove a domain mapping from the NetLabel - * system. The kernel should ACK this message. - * - * +-------------------+ - * | domains (32 bits) | ... - * +-------------------+ + * system. * - * domains: the number of domains in the message + * Required attributes: * - * +--------------------------+ - * | domain string (variable) | ... - * +--------------------------+ + * NLBL_MGMT_A_DOMAIN * - * domain string: the domain string, NULL terminated - * - * o LIST: + * o LISTALL: * This message can be sent either from an application or by the kernel in - * response to an application generated LIST message. When sent by an - * application there is no payload. The kernel should respond to a LIST - * message either with a LIST message on success or an ACK message on - * failure. - * - * +-------------------+ - * | domains (32 bits) | ... - * +-------------------+ - * - * domains: the number of domains in the message + * response to an application generated LISTALL message. When sent by an + * application there is no payload and the NLM_F_DUMP flag should be set. + * The kernel should respond with a series of the following messages. * - * +--------------------------+ - * | domain string (variable) | ... - * +--------------------------+ + * Required attributes: * - * +-------------------------+-------------- ---- --- -- - - * | protocol type (32 bits) | mapping data ... repeated - * +-------------------------+-------------- ---- --- -- - + * NLBL_MGMT_A_DOMAIN + * NLBL_MGMT_A_PROTOCOL * - * domain string: the domain string, NULL terminated - * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) - * mapping data: specific to the map type (see below) + * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: * - * NETLBL_NLTYPE_UNLABELED + * NLBL_MGMT_A_CV4DOI * - * No mapping data for this protocol type. - * - * NETLBL_NLTYPE_CIPSOV4 - * - * +----------------+---------------+ - * | type (32 bits) | doi (32 bits) | - * +----------------+---------------+ - * - * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header - * as CIPSO_V4_MAP_*) - * doi: the CIPSO DOI value + * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. * * o ADDDEF: * Sent by an application to set the default domain mapping for the NetLabel - * system. The kernel should respond with an ACK. + * system. * - * +-------------------------+-------------- ---- --- -- - - * | protocol type (32 bits) | mapping data ... repeated - * +-------------------------+-------------- ---- --- -- - + * Required attributes: * - * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) - * mapping data: specific to the map type (see below) + * NLBL_MGMT_A_PROTOCOL * - * NETLBL_NLTYPE_UNLABELED + * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: * - * No mapping data for this protocol type. + * NLBL_MGMT_A_CV4DOI * - * NETLBL_NLTYPE_CIPSOV4 - * - * +---------------+ - * | doi (32 bits) | - * +---------------+ - * - * doi: the CIPSO DOI value + * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. * * o REMOVEDEF: * Sent by an application to remove the default domain mapping from the - * NetLabel system, there is no payload. The kernel should ACK this message. + * NetLabel system, there is no payload. * * o LISTDEF: * This message can be sent either from an application or by the kernel in * response to an application generated LISTDEF message. When sent by an - * application there is no payload. The kernel should respond to a - * LISTDEF message either with a LISTDEF message on success or an ACK message - * on failure. - * - * +-------------------------+-------------- ---- --- -- - - * | protocol type (32 bits) | mapping data ... repeated - * +-------------------------+-------------- ---- --- -- - + * application there is no payload. On success the kernel should send a + * response using the following format. * - * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) - * mapping data: specific to the map type (see below) + * Required attributes: * - * NETLBL_NLTYPE_UNLABELED + * NLBL_MGMT_A_PROTOCOL * - * No mapping data for this protocol type. + * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: * - * NETLBL_NLTYPE_CIPSOV4 + * NLBL_MGMT_A_CV4DOI * - * +----------------+---------------+ - * | type (32 bits) | doi (32 bits) | - * +----------------+---------------+ + * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. * - * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header - * as CIPSO_V4_MAP_*) - * doi: the CIPSO DOI value + * o PROTOCOLS: + * Sent by an application to request a list of configured NetLabel protocols + * in the kernel. When sent by an application there is no payload and the + * NLM_F_DUMP flag should be set. The kernel should respond with a series of + * the following messages. * - * o MODULES: - * Sent by an application to request a list of configured NetLabel modules - * in the kernel. When sent by an application there is no payload. + * Required attributes: * - * +-------------------+ - * | modules (32 bits) | ... - * +-------------------+ - * - * modules: the number of modules in the message, if this is an application - * generated message and the value is zero then return a list of - * the configured modules - * - * +------------------+ - * | module (32 bits) | ... repeated - * +------------------+ - * - * module: the module number as defined by NETLBL_NLTYPE_* + * NLBL_MGMT_A_PROTOCOL * * o VERSION: - * Sent by an application to request the NetLabel version string. When sent - * by an application there is no payload. This message type is also used by - * the kernel to respond to an VERSION request. + * Sent by an application to request the NetLabel version. When sent by an + * application there is no payload. This message type is also used by the + * kernel to respond to an VERSION request. * - * +-------------------+ - * | version (32 bits) | - * +-------------------+ + * Required attributes: * - * version: the protocol version number + * NLBL_MGMT_A_VERSION * */ /* NetLabel Management commands */ enum { NLBL_MGMT_C_UNSPEC, - NLBL_MGMT_C_ACK, NLBL_MGMT_C_ADD, NLBL_MGMT_C_REMOVE, - NLBL_MGMT_C_LIST, + NLBL_MGMT_C_LISTALL, NLBL_MGMT_C_ADDDEF, NLBL_MGMT_C_REMOVEDEF, NLBL_MGMT_C_LISTDEF, - NLBL_MGMT_C_MODULES, + NLBL_MGMT_C_PROTOCOLS, NLBL_MGMT_C_VERSION, __NLBL_MGMT_C_MAX, }; #define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1) +/* NetLabel Management attributes */ +enum { + NLBL_MGMT_A_UNSPEC, + NLBL_MGMT_A_DOMAIN, + /* (NLA_NUL_STRING) + * the NULL terminated LSM domain string */ + NLBL_MGMT_A_PROTOCOL, + /* (NLA_U32) + * the NetLabel protocol type (defined by NETLBL_NLTYPE_*) */ + NLBL_MGMT_A_VERSION, + /* (NLA_U32) + * the NetLabel protocol version number (defined by + * NETLBL_PROTO_VERSION) */ + NLBL_MGMT_A_CV4DOI, + /* (NLA_U32) + * the CIPSOv4 DOI value */ + __NLBL_MGMT_A_MAX, +}; +#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1) + /* NetLabel protocol functions */ int netlbl_mgmt_genl_init(void); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 785f4960e0d..440f5c4e1e2 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -55,9 +55,13 @@ static struct genl_family netlbl_unlabel_gnl_family = { .hdrsize = 0, .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, - .maxattr = 0, + .maxattr = NLBL_UNLABEL_A_MAX, }; +/* NetLabel Netlink attribute policy */ +static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { + [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, +}; /* * NetLabel Command Handlers @@ -75,31 +79,18 @@ static struct genl_family netlbl_unlabel_gnl_family = { */ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) { - int ret_val; - struct nlattr *data = netlbl_netlink_payload_data(skb); - u32 value; - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - return ret_val; + int ret_val = -EINVAL; + u8 value; - if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) { - value = nla_get_u32(data); + if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { + value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { atomic_set(&netlabel_unlabel_accept_flg, value); - netlbl_netlink_send_ack(info, - netlbl_unlabel_gnl_family.id, - NLBL_UNLABEL_C_ACK, - NETLBL_E_OK); - return 0; + ret_val = 0; } } - netlbl_netlink_send_ack(info, - netlbl_unlabel_gnl_family.id, - NLBL_UNLABEL_C_ACK, - EINVAL); - return -EINVAL; + return ret_val; } /** @@ -114,39 +105,39 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) */ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) { - int ret_val = -ENOMEM; + int ret_val = -EINVAL; struct sk_buff *ans_skb; + void *data; - ans_skb = netlbl_netlink_alloc_skb(0, - GENL_HDRLEN + NETLBL_LEN_U32, - GFP_KERNEL); + ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (ans_skb == NULL) goto list_failure; - - if (netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - 0, - netlbl_unlabel_gnl_family.id, - NLBL_UNLABEL_C_LIST) == NULL) + data = netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + info->snd_seq, + netlbl_unlabel_gnl_family.id, + 0, + NLBL_UNLABEL_C_LIST); + if (data == NULL) { + ret_val = -ENOMEM; goto list_failure; + } - ret_val = nla_put_u32(ans_skb, - NLA_U32, - atomic_read(&netlabel_unlabel_accept_flg)); + ret_val = nla_put_u8(ans_skb, + NLBL_UNLABEL_A_ACPTFLG, + atomic_read(&netlabel_unlabel_accept_flg)); if (ret_val != 0) goto list_failure; - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + genlmsg_end(ans_skb, data); + + ret_val = genlmsg_unicast(ans_skb, info->snd_pid); if (ret_val != 0) goto list_failure; - return 0; list_failure: - netlbl_netlink_send_ack(info, - netlbl_unlabel_gnl_family.id, - NLBL_UNLABEL_C_ACK, - -ret_val); + kfree(ans_skb); return ret_val; } @@ -157,7 +148,8 @@ list_failure: static struct genl_ops netlbl_unlabel_genl_c_accept = { .cmd = NLBL_UNLABEL_C_ACCEPT, - .flags = 0, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_accept, .dumpit = NULL, }; @@ -165,6 +157,7 @@ static struct genl_ops netlbl_unlabel_genl_c_accept = { static struct genl_ops netlbl_unlabel_genl_c_list = { .cmd = NLBL_UNLABEL_C_LIST, .flags = 0, + .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_list, .dumpit = NULL, }; @@ -218,10 +211,8 @@ int netlbl_unlabel_genl_init(void) */ int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) { - if (atomic_read(&netlabel_unlabel_accept_flg) == 1) { - memset(secattr, 0, sizeof(*secattr)); - return 0; - } + if (atomic_read(&netlabel_unlabel_accept_flg) == 1) + return netlbl_secattr_init(secattr); return -ENOMSG; } diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index f300e54e14b..c2917fbb42c 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -36,56 +36,47 @@ /* * The following NetLabel payloads are supported by the Unlabeled subsystem. * - * o ACK: - * Sent by the kernel in response to an applications message, applications - * should never send this message. - * - * +----------------------+-----------------------+ - * | seq number (32 bits) | return code (32 bits) | - * +----------------------+-----------------------+ - * - * seq number: the sequence number of the original message, taken from the - * nlmsghdr structure - * return code: return value, based on errno values - * * o ACCEPT * This message is sent from an application to specify if the kernel should * allow unlabled packets to pass if they do not match any of the static * mappings defined in the unlabeled module. * - * +-----------------+ - * | allow (32 bits) | - * +-----------------+ + * Required attributes: * - * allow: if true (1) then allow the packets to pass, if false (0) then - * reject the packets + * NLBL_UNLABEL_A_ACPTFLG * * o LIST * This message can be sent either from an application or by the kernel in * response to an application generated LIST message. When sent by an * application there is no payload. The kernel should respond to a LIST - * message either with a LIST message on success or an ACK message on - * failure. + * message with a LIST message on success. * - * +-----------------------+ - * | accept flag (32 bits) | - * +-----------------------+ + * Required attributes: * - * accept flag: if true (1) then unlabeled packets are allowed to pass, - * if false (0) then unlabeled packets are rejected + * NLBL_UNLABEL_A_ACPTFLG * */ /* NetLabel Unlabeled commands */ enum { NLBL_UNLABEL_C_UNSPEC, - NLBL_UNLABEL_C_ACK, NLBL_UNLABEL_C_ACCEPT, NLBL_UNLABEL_C_LIST, __NLBL_UNLABEL_C_MAX, }; #define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) +/* NetLabel Unlabeled attributes */ +enum { + NLBL_UNLABEL_A_UNSPEC, + NLBL_UNLABEL_A_ACPTFLG, + /* (NLA_U8) + * if true then unlabeled packets are allowed to pass, else unlabeled + * packets are rejected */ + __NLBL_UNLABEL_A_MAX, +}; +#define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1) + /* NetLabel protocol functions */ int netlbl_unlabel_genl_init(void); diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 73cbe66e42f..eeb7d768d2b 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -74,85 +74,3 @@ int netlbl_netlink_init(void) return 0; } - -/* - * NetLabel Common Protocol Functions - */ - -/** - * netlbl_netlink_send_ack - Send an ACK message - * @info: the generic NETLINK information - * @genl_family: the generic NETLINK family ID value - * @ack_cmd: the generic NETLINK family ACK command value - * @ret_code: return code to use - * - * Description: - * This function sends an ACK message to the sender of the NETLINK message - * specified by @info. - * - */ -void netlbl_netlink_send_ack(const struct genl_info *info, - u32 genl_family, - u8 ack_cmd, - u32 ret_code) -{ - size_t data_size; - struct sk_buff *skb; - - data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32; - skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); - if (skb == NULL) - return; - - if (netlbl_netlink_hdr_put(skb, - info->snd_pid, - 0, - genl_family, - ack_cmd) == NULL) - goto send_ack_failure; - - if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0) - goto send_ack_failure; - if (nla_put_u32(skb, NLA_U32, ret_code) != 0) - goto send_ack_failure; - - netlbl_netlink_snd(skb, info->snd_pid); - return; - -send_ack_failure: - kfree_skb(skb); -} - -/* - * NETLINK I/O Functions - */ - -/** - * netlbl_netlink_snd - Send a NetLabel message - * @skb: NetLabel message - * @pid: destination PID - * - * Description: - * Sends a unicast NetLabel message over the NETLINK socket. - * - */ -int netlbl_netlink_snd(struct sk_buff *skb, u32 pid) -{ - return genlmsg_unicast(skb, pid); -} - -/** - * netlbl_netlink_snd - Send a NetLabel message - * @skb: NetLabel message - * @pid: sending PID - * @group: multicast group id - * - * Description: - * Sends a multicast NetLabel message over the NETLINK socket to all members - * of @group except @pid. - * - */ -int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group) -{ - return genlmsg_multicast(skb, pid, group, GFP_KERNEL); -} diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 385a6c7488c..3f9386b917d 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -41,72 +41,6 @@ /* NetLabel NETLINK helper functions */ /** - * netlbl_netlink_cap_check - Check the NETLINK msg capabilities - * @skb: the NETLINK buffer - * @req_cap: the required capability - * - * Description: - * Check the NETLINK buffer's capabilities against the required capabilities. - * Returns zero on success, negative values on failure. - * - */ -static inline int netlbl_netlink_cap_check(const struct sk_buff *skb, - kernel_cap_t req_cap) -{ - if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap)) - return 0; - return -EPERM; -} - -/** - * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u8 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len) -{ - u8 val = nla_get_u8(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - -/** - * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u16 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len) -{ - u16 val = nla_get_u16(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - -/** - * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u32 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len) -{ - u32 val = nla_get_u32(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - -/** * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff * @skb: the packet * @pid: the PID of the receipient @@ -124,6 +58,7 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, u32 pid, u32 seq, int type, + int flags, u8 cmd) { return genlmsg_put(skb, @@ -131,85 +66,13 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, seq, type, 0, - 0, + flags, cmd, NETLBL_PROTO_VERSION); } -/** - * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff - * @skb: the packet - * @pid: the PID of the receipient - * @seq: the sequence number - * @type: the generic NETLINK message family type - * @cmd: command - * - * Description: - * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr - * struct to the packet. - * - */ -static inline void netlbl_netlink_hdr_push(struct sk_buff *skb, - u32 pid, - u32 seq, - int type, - u8 cmd) - -{ - struct nlmsghdr *nlh; - struct genlmsghdr *hdr; - - nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN)); - nlh->nlmsg_type = type; - nlh->nlmsg_len = skb->len; - nlh->nlmsg_flags = 0; - nlh->nlmsg_pid = pid; - nlh->nlmsg_seq = seq; - - hdr = nlmsg_data(nlh); - hdr->cmd = cmd; - hdr->version = NETLBL_PROTO_VERSION; - hdr->reserved = 0; -} - -/** - * netlbl_netlink_payload_len - Return the length of the payload - * @skb: the NETLINK buffer - * - * Description: - * This function returns the length of the NetLabel payload. - * - */ -static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb) -{ - return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN; -} - -/** - * netlbl_netlink_payload_data - Returns a pointer to the start of the payload - * @skb: the NETLINK buffer - * - * Description: - * This function returns a pointer to the start of the NetLabel payload. - * - */ -static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb) -{ - return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) + - GENL_HDRLEN; -} - -/* NetLabel common protocol functions */ - -void netlbl_netlink_send_ack(const struct genl_info *info, - u32 genl_family, - u8 ack_cmd, - u32 ret_code); - /* NetLabel NETLINK I/O functions */ int netlbl_netlink_init(void); -int netlbl_netlink_snd(struct sk_buff *skb, u32 pid); -int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group); #endif diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index 814ddc42f1f..293dbd6246c 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -124,3 +124,40 @@ config SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT If you are unsure what do do here, select N. +config SECURITY_SELINUX_POLICYDB_VERSION_MAX + bool "NSA SELinux maximum supported policy format version" + depends on SECURITY_SELINUX + default n + help + This option enables the maximum policy format version supported + by SELinux to be set to a particular value. This value is reported + to userspace via /selinux/policyvers and used at policy load time. + It can be adjusted downward to support legacy userland (init) that + does not correctly handle kernels that support newer policy versions. + + Examples: + For the Fedora Core 3 or 4 Linux distributions, enable this option + and set the value via the next option. For Fedore Core 5 and later, + do not enable this option. + + If you are unsure how to answer this question, answer N. + +config SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE + int "NSA SELinux maximum supported policy format version value" + depends on SECURITY_SELINUX_POLICYDB_VERSION_MAX + range 15 21 + default 19 + help + This option sets the value for the maximum policy format version + supported by SELinux. + + Examples: + For Fedora Core 3, use 18. + For Fedora Core 4, use 19. + + If you are unsure how to answer this question, look for the + policy format version supported by your policy toolchain, by + running 'checkpolicy -V'. Or look at what policy you have + installed under /etc/selinux/$SELINUXTYPE/policy, where + SELINUXTYPE is defined in your /etc/selinux/config. + diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 9d7737db5e5..b6f96943be1 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -21,19 +21,10 @@ #include "security.h" #include "objsec.h" -void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) +int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen) { - struct task_security_struct *tsec = tsk->security; if (selinux_enabled) - *ctxid = tsec->sid; - else - *ctxid = 0; -} - -int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) -{ - if (selinux_enabled) - return security_sid_to_context(ctxid, ctx, ctxlen); + return security_sid_to_context(sid, ctx, ctxlen); else { *ctx = NULL; *ctxlen = 0; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5a66c4c09f7..e4d81a42fca 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -51,7 +51,6 @@ #include <net/ip.h> /* for sysctl_local_port_range[] */ #include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */ #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <asm/ioctls.h> #include <linux/bitops.h> #include <linux/interrupt.h> @@ -71,6 +70,7 @@ #include <linux/audit.h> #include <linux/string.h> #include <linux/selinux.h> +#include <linux/mutex.h> #include "avc.h" #include "objsec.h" @@ -185,7 +185,7 @@ static int inode_alloc_security(struct inode *inode) return -ENOMEM; memset(isec, 0, sizeof(*isec)); - init_MUTEX(&isec->sem); + mutex_init(&isec->lock); INIT_LIST_HEAD(&isec->list); isec->inode = inode; isec->sid = SECINITSID_UNLABELED; @@ -242,7 +242,7 @@ static int superblock_alloc_security(struct super_block *sb) if (!sbsec) return -ENOMEM; - init_MUTEX(&sbsec->sem); + mutex_init(&sbsec->lock); INIT_LIST_HEAD(&sbsec->list); INIT_LIST_HEAD(&sbsec->isec_head); spin_lock_init(&sbsec->isec_lock); @@ -594,7 +594,7 @@ static int superblock_doinit(struct super_block *sb, void *data) struct inode *inode = root->d_inode; int rc = 0; - down(&sbsec->sem); + mutex_lock(&sbsec->lock); if (sbsec->initialized) goto out; @@ -689,7 +689,7 @@ next_inode: } spin_unlock(&sbsec->isec_lock); out: - up(&sbsec->sem); + mutex_unlock(&sbsec->lock); return rc; } @@ -843,15 +843,13 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent char *context = NULL; unsigned len = 0; int rc = 0; - int hold_sem = 0; if (isec->initialized) goto out; - down(&isec->sem); - hold_sem = 1; + mutex_lock(&isec->lock); if (isec->initialized) - goto out; + goto out_unlock; sbsec = inode->i_sb->s_security; if (!sbsec->initialized) { @@ -862,7 +860,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent if (list_empty(&isec->list)) list_add(&isec->list, &sbsec->isec_head); spin_unlock(&sbsec->isec_lock); - goto out; + goto out_unlock; } switch (sbsec->behavior) { @@ -885,7 +883,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent printk(KERN_WARNING "%s: no dentry for dev=%s " "ino=%ld\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino); - goto out; + goto out_unlock; } len = INITCONTEXTLEN; @@ -893,7 +891,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent if (!context) { rc = -ENOMEM; dput(dentry); - goto out; + goto out_unlock; } rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, context, len); @@ -903,7 +901,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent NULL, 0); if (rc < 0) { dput(dentry); - goto out; + goto out_unlock; } kfree(context); len = rc; @@ -911,7 +909,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent if (!context) { rc = -ENOMEM; dput(dentry); - goto out; + goto out_unlock; } rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, @@ -924,7 +922,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent "%d for dev=%s ino=%ld\n", __FUNCTION__, -rc, inode->i_sb->s_id, inode->i_ino); kfree(context); - goto out; + goto out_unlock; } /* Map ENODATA to the default file SID */ sid = sbsec->def_sid; @@ -960,7 +958,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent isec->sclass, &sid); if (rc) - goto out; + goto out_unlock; isec->sid = sid; break; case SECURITY_FS_USE_MNTPOINT: @@ -978,7 +976,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent isec->sclass, &sid); if (rc) - goto out; + goto out_unlock; isec->sid = sid; } } @@ -987,12 +985,11 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent isec->initialized = 1; +out_unlock: + mutex_unlock(&isec->lock); out: if (isec->sclass == SECCLASS_FILE) isec->sclass = inode_mode_to_security_class(inode->i_mode); - - if (hold_sem) - up(&isec->sem); return rc; } @@ -1364,25 +1361,6 @@ static inline u32 file_to_av(struct file *file) return av; } -/* Set an inode's SID to a specified value. */ -static int inode_security_set_sid(struct inode *inode, u32 sid) -{ - struct inode_security_struct *isec = inode->i_security; - struct superblock_security_struct *sbsec = inode->i_sb->s_security; - - if (!sbsec->initialized) { - /* Defer initialization to selinux_complete_init. */ - return 0; - } - - down(&isec->sem); - isec->sclass = inode_mode_to_security_class(inode->i_mode); - isec->sid = sid; - isec->initialized = 1; - up(&isec->sem); - return 0; -} - /* Hook functions begin here. */ static int selinux_ptrace(struct task_struct *parent, struct task_struct *child) @@ -1711,10 +1689,12 @@ static inline void flush_unauthorized_files(struct files_struct * files) { struct avc_audit_data ad; struct file *file, *devnull = NULL; - struct tty_struct *tty = current->signal->tty; + struct tty_struct *tty; struct fdtable *fdt; long j = -1; + mutex_lock(&tty_mutex); + tty = current->signal->tty; if (tty) { file_list_lock(); file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list); @@ -1734,6 +1714,7 @@ static inline void flush_unauthorized_files(struct files_struct * files) } file_list_unlock(); } + mutex_unlock(&tty_mutex); /* Revalidate access to inherited open files. */ @@ -2091,7 +2072,13 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, } } - inode_security_set_sid(inode, newsid); + /* Possibly defer initialization to selinux_complete_init. */ + if (sbsec->initialized) { + struct inode_security_struct *isec = inode->i_security; + isec->sclass = inode_mode_to_security_class(inode->i_mode); + isec->sid = newsid; + isec->initialized = 1; + } if (!ss_initialized || sbsec->behavior == SECURITY_FS_USE_MNTPOINT) return -EOPNOTSUPP; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 0a39bfd1319..ef2267fea8b 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -44,7 +44,7 @@ struct inode_security_struct { u32 sid; /* SID of this object */ u16 sclass; /* security class of this object */ unsigned char initialized; /* initialization flag */ - struct semaphore sem; + struct mutex lock; unsigned char inherit; /* inherit SID from parent entry */ }; @@ -63,7 +63,7 @@ struct superblock_security_struct { unsigned int behavior; /* labeling behavior */ unsigned char initialized; /* initialization flag */ unsigned char proc; /* proc fs */ - struct semaphore sem; + struct mutex lock; struct list_head isec_head; spinlock_t isec_lock; }; diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 911954a692f..1ef79172cc8 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -24,10 +24,15 @@ #define POLICYDB_VERSION_VALIDATETRANS 19 #define POLICYDB_VERSION_MLS 19 #define POLICYDB_VERSION_AVTAB 20 +#define POLICYDB_VERSION_RANGETRANS 21 /* Range of policy versions we understand*/ #define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE -#define POLICYDB_VERSION_MAX POLICYDB_VERSION_AVTAB +#ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX +#define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE +#else +#define POLICYDB_VERSION_MAX POLICYDB_VERSION_RANGETRANS +#endif extern int selinux_enabled; extern int selinux_mls_enabled; diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 119bd6078ba..c713af23250 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -530,22 +530,21 @@ int mls_compute_sid(struct context *scontext, u32 specified, struct context *newcontext) { + struct range_trans *rtr; + if (!selinux_mls_enabled) return 0; switch (specified) { case AVTAB_TRANSITION: - if (tclass == SECCLASS_PROCESS) { - struct range_trans *rangetr; - /* Look for a range transition rule. */ - for (rangetr = policydb.range_tr; rangetr; - rangetr = rangetr->next) { - if (rangetr->dom == scontext->type && - rangetr->type == tcontext->type) { - /* Set the range from the rule */ - return mls_range_set(newcontext, - &rangetr->range); - } + /* Look for a range transition rule. */ + for (rtr = policydb.range_tr; rtr; rtr = rtr->next) { + if (rtr->source_type == scontext->type && + rtr->target_type == tcontext->type && + rtr->target_class == tclass) { + /* Set the range from the rule */ + return mls_range_set(newcontext, + &rtr->target_range); } } /* Fallthrough */ diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index f03960e697c..b1889530255 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -96,6 +96,11 @@ static struct policydb_compat_info policydb_compat[] = { .sym_num = SYM_NUM, .ocon_num = OCON_NUM, }, + { + .version = POLICYDB_VERSION_RANGETRANS, + .sym_num = SYM_NUM, + .ocon_num = OCON_NUM, + }, }; static struct policydb_compat_info *policydb_lookup_compat(int version) @@ -645,15 +650,15 @@ void policydb_destroy(struct policydb *p) for (rt = p->range_tr; rt; rt = rt -> next) { if (lrt) { - ebitmap_destroy(&lrt->range.level[0].cat); - ebitmap_destroy(&lrt->range.level[1].cat); + ebitmap_destroy(&lrt->target_range.level[0].cat); + ebitmap_destroy(&lrt->target_range.level[1].cat); kfree(lrt); } lrt = rt; } if (lrt) { - ebitmap_destroy(&lrt->range.level[0].cat); - ebitmap_destroy(&lrt->range.level[1].cat); + ebitmap_destroy(&lrt->target_range.level[0].cat); + ebitmap_destroy(&lrt->target_range.level[1].cat); kfree(lrt); } @@ -1829,6 +1834,7 @@ int policydb_read(struct policydb *p, void *fp) } if (p->policyvers >= POLICYDB_VERSION_MLS) { + int new_rangetr = p->policyvers >= POLICYDB_VERSION_RANGETRANS; rc = next_entry(buf, fp, sizeof(u32)); if (rc < 0) goto bad; @@ -1847,9 +1853,16 @@ int policydb_read(struct policydb *p, void *fp) rc = next_entry(buf, fp, (sizeof(u32) * 2)); if (rc < 0) goto bad; - rt->dom = le32_to_cpu(buf[0]); - rt->type = le32_to_cpu(buf[1]); - rc = mls_read_range_helper(&rt->range, fp); + rt->source_type = le32_to_cpu(buf[0]); + rt->target_type = le32_to_cpu(buf[1]); + if (new_rangetr) { + rc = next_entry(buf, fp, sizeof(u32)); + if (rc < 0) + goto bad; + rt->target_class = le32_to_cpu(buf[0]); + } else + rt->target_class = SECCLASS_PROCESS; + rc = mls_read_range_helper(&rt->target_range, fp); if (rc) goto bad; lrt = rt; diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index b1340711f72..8319d5ff594 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -106,9 +106,10 @@ struct cat_datum { }; struct range_trans { - u32 dom; /* current process domain */ - u32 type; /* program executable type */ - struct mls_range range; /* new range */ + u32 source_type; + u32 target_type; + u32 target_class; + struct mls_range target_range; struct range_trans *next; }; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 7eb69a602d8..0c219a1b324 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2003,7 +2003,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, return rc; } -int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx) { @@ -2026,11 +2026,11 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, goto out; } - ctxt = sidtab_search(&sidtab, ctxid); + ctxt = sidtab_search(&sidtab, sid); if (!ctxt) { audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR, "selinux_audit_rule_match: unrecognized SID %d\n", - ctxid); + sid); match = -ENOENT; goto out; } @@ -2502,14 +2502,24 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) { struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; struct sk_security_struct *sksec = sk->sk_security; + struct netlbl_lsm_secattr secattr; + u32 nlbl_peer_sid; sksec->sclass = isec->sclass; if (sk->sk_family != PF_INET) return; + netlbl_secattr_init(&secattr); + if (netlbl_sock_getattr(sk, &secattr) == 0 && + selinux_netlbl_secattr_to_sid(NULL, + &secattr, + sksec->sid, + &nlbl_peer_sid) == 0) + sksec->peer_sid = nlbl_peer_sid; + netlbl_secattr_destroy(&secattr, 0); + sksec->nlbl_state = NLBL_REQUIRE; - sksec->peer_sid = sksec->sid; /* Try to set the NetLabel on the socket to save time later, if we fail * here we will pick up the pieces in later calls to @@ -2568,7 +2578,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask) sock = SOCKET_I(inode); isec = inode->i_security; sksec = sock->sk->sk_security; - down(&isec->sem); + mutex_lock(&isec->lock); if (unlikely(sksec->nlbl_state == NLBL_REQUIRE && (mask & (MAY_WRITE | MAY_APPEND)))) { lock_sock(sock->sk); @@ -2576,7 +2586,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask) release_sock(sock->sk); } else rc = 0; - up(&isec->sem); + mutex_unlock(&isec->lock); return rc; } @@ -2601,7 +2611,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, u32 netlbl_sid; u32 recv_perm; - rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid); + rc = selinux_netlbl_skbuff_getsid(skb, SECINITSID_NETMSG, &netlbl_sid); if (rc != 0) return rc; @@ -2610,13 +2620,13 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, switch (sksec->sclass) { case SECCLASS_UDP_SOCKET: - recv_perm = UDP_SOCKET__RECV_MSG; + recv_perm = UDP_SOCKET__RECVFROM; break; case SECCLASS_TCP_SOCKET: - recv_perm = TCP_SOCKET__RECV_MSG; + recv_perm = TCP_SOCKET__RECVFROM; break; default: - recv_perm = RAWIP_SOCKET__RECV_MSG; + recv_perm = RAWIP_SOCKET__RECVFROM; } rc = avc_has_perm(sksec->sid, diff --git a/sound/oss/au1550_ac97.c b/sound/oss/au1550_ac97.c index 4cdb86252d6..219795171c7 100644 --- a/sound/oss/au1550_ac97.c +++ b/sound/oss/au1550_ac97.c @@ -719,8 +719,7 @@ prog_dmabuf_dac(struct au1550_state *s) } -static void -dac_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static void dac_dma_interrupt(int irq, void *dev_id) { struct au1550_state *s = (struct au1550_state *) dev_id; struct dmabuf *db = &s->dma_dac; @@ -754,8 +753,7 @@ dac_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs) } -static void -adc_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static void adc_dma_interrupt(int irq, void *dev_id) { struct au1550_state *s = (struct au1550_state *)dev_id; struct dmabuf *dp = &s->dma_adc; diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c index 2bd8e40b854..be0bd503f01 100644 --- a/sound/sparc/amd7930.c +++ b/sound/sparc/amd7930.c @@ -755,7 +755,7 @@ static struct snd_pcm_ops snd_amd7930_capture_ops = { .pointer = snd_amd7930_capture_pointer, }; -static int __init snd_amd7930_pcm(struct snd_amd7930 *amd) +static int __devinit snd_amd7930_pcm(struct snd_amd7930 *amd) { struct snd_pcm *pcm; int err; @@ -870,7 +870,7 @@ static int snd_amd7930_put_volume(struct snd_kcontrol *kctl, struct snd_ctl_elem return change; } -static struct snd_kcontrol_new amd7930_controls[] __initdata = { +static struct snd_kcontrol_new amd7930_controls[] __devinitdata = { { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Monitor Volume", @@ -900,7 +900,7 @@ static struct snd_kcontrol_new amd7930_controls[] __initdata = { }, }; -static int __init snd_amd7930_mixer(struct snd_amd7930 *amd) +static int __devinit snd_amd7930_mixer(struct snd_amd7930 *amd) { struct snd_card *card; int idx, err; @@ -945,11 +945,11 @@ static struct snd_device_ops snd_amd7930_dev_ops = { .dev_free = snd_amd7930_dev_free, }; -static int __init snd_amd7930_create(struct snd_card *card, - struct resource *rp, - unsigned int reg_size, - int irq, int dev, - struct snd_amd7930 **ramd) +static int __devinit snd_amd7930_create(struct snd_card *card, + struct resource *rp, + unsigned int reg_size, + int irq, int dev, + struct snd_amd7930 **ramd) { unsigned long flags; struct snd_amd7930 *amd; @@ -1013,7 +1013,7 @@ static int __init snd_amd7930_create(struct snd_card *card, return 0; } -static int __init amd7930_attach_common(struct resource *rp, int irq) +static int __devinit amd7930_attach_common(struct resource *rp, int irq) { static int dev_num; struct snd_card *card; @@ -1065,7 +1065,7 @@ out_err: return err; } -static int __init amd7930_obio_attach(struct device_node *dp) +static int __devinit amd7930_obio_attach(struct device_node *dp) { struct linux_prom_registers *regs; struct linux_prom_irqs *irqp; |