diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-12-07 16:35:17 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-12-07 16:35:17 -0500 |
commit | 21b4e736922f546e0f1aa7b9d6c442f309a2444a (patch) | |
tree | e1be8645297f8ebe87445251743ebcc52081a20d | |
parent | 34161db6b14d984fb9b06c735b7b42f8803f6851 (diff) | |
parent | 68380b581383c028830f79ec2670f4a193854aa6 (diff) |
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/ into merge_linus
1227 files changed, 24505 insertions, 14149 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 05431621c86..805db4b2cba 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -77,7 +77,7 @@ To get this part of the dma_ API, you must #include <linux/dmapool.h> Many drivers need lots of small dma-coherent memory regions for DMA descriptors or I/O buffers. Rather than allocating in units of a page or more using dma_alloc_coherent(), you can use DMA pools. These work -much like a kmem_cache_t, except that they use the dma-coherent allocator +much like a struct kmem_cache, except that they use the dma-coherent allocator not __get_free_pages(). Also, they understand common hardware constraints for alignment, like queue heads needing to be aligned on N byte boundaries. @@ -94,7 +94,7 @@ The pool create() routines initialize a pool of dma-coherent buffers for use with a given device. It must be called in a context which can sleep. -The "name" is for diagnostics (like a kmem_cache_t name); dev and size +The "name" is for diagnostics (like a struct kmem_cache name); dev and size are like what you'd pass to dma_alloc_coherent(). The device's hardware alignment requirement for this type of data is "align" (which is expressed in bytes, and must be a power of two). If your device has no boundary @@ -431,10 +431,10 @@ be identical to those passed in (and returned by dma_alloc_noncoherent()). int -dma_is_consistent(dma_addr_t dma_handle) +dma_is_consistent(struct device *dev, dma_addr_t dma_handle) -returns true if the memory pointed to by the dma_handle is actually -consistent. +returns true if the device dev is performing consistent DMA on the memory +area pointed to by the dma_handle. int dma_get_cache_alignment(void) @@ -459,7 +459,7 @@ anything like this. You must also be extra careful about accessing memory you intend to sync partially. void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) Do a partial sync of memory that was allocated by diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index a166675c430..ca094913c55 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -418,9 +418,35 @@ X!Edrivers/pnp/system.c !Idrivers/parport/daisy.c </chapter> - <chapter id="viddev"> - <title>Video4Linux</title> -!Edrivers/media/video/videodev.c + <chapter id="message_devices"> + <title>Message-based devices</title> + <sect1><title>Fusion message devices</title> +!Edrivers/message/fusion/mptbase.c +!Idrivers/message/fusion/mptbase.c +!Edrivers/message/fusion/mptscsih.c +!Idrivers/message/fusion/mptscsih.c +!Idrivers/message/fusion/mptctl.c +!Idrivers/message/fusion/mptspi.c +!Idrivers/message/fusion/mptfc.c +!Idrivers/message/fusion/mptlan.c + </sect1> + <sect1><title>I2O message devices</title> +!Iinclude/linux/i2o.h +!Idrivers/message/i2o/core.h +!Edrivers/message/i2o/iop.c +!Idrivers/message/i2o/iop.c +!Idrivers/message/i2o/config-osm.c +!Edrivers/message/i2o/exec-osm.c +!Idrivers/message/i2o/exec-osm.c +!Idrivers/message/i2o/bus-osm.c +!Edrivers/message/i2o/device.c +!Idrivers/message/i2o/device.c +!Idrivers/message/i2o/driver.c +!Idrivers/message/i2o/pci.c +!Idrivers/message/i2o/i2o_block.c +!Idrivers/message/i2o/i2o_scsi.c +!Idrivers/message/i2o/i2o_proc.c + </sect1> </chapter> <chapter id="snddev"> diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt index 0e3924ecd76..24dc3fcf159 100644 --- a/Documentation/IPMI.txt +++ b/Documentation/IPMI.txt @@ -365,6 +365,7 @@ You can change this at module load time (for a module) with: regshifts=<shift1>,<shift2>,... slave_addrs=<addr1>,<addr2>,... force_kipmid=<enable1>,<enable2>,... + unload_when_empty=[0|1] Each of these except si_trydefaults is a list, the first item for the first interface, second item for the second interface, etc. @@ -416,6 +417,11 @@ by the driver, but systems with broken interrupts might need an enable, or users that don't want the daemon (don't need the performance, don't want the CPU hit) can disable it. +If unload_when_empty is set to 1, the driver will be unloaded if it +doesn't find any interfaces or all the interfaces fail to work. The +default is one. Setting to 0 is useful with the hotmod, but is +obviously only useful for modules. + When compiled into the kernel, the parameters can be specified on the kernel command line as: @@ -441,6 +447,25 @@ have high-res timers enabled in the kernel and you don't have interrupts enabled, the driver will run VERY slowly. Don't blame me, these interfaces suck. +The driver supports a hot add and remove of interfaces. This way, +interfaces can be added or removed after the kernel is up and running. +This is done using /sys/modules/ipmi_si/hotmod, which is a write-only +parameter. You write a string to this interface. The string has the +format: + <op1>[:op2[:op3...]] +The "op"s are: + add|remove,kcs|bt|smic,mem|i/o,<address>[,<opt1>[,<opt2>[,...]]] +You can specify more than one interface on the line. The "opt"s are: + rsp=<regspacing> + rsi=<regsize> + rsh=<regshift> + irq=<irq> + ipmb=<ipmb slave addr> +and these have the same meanings as discussed above. Note that you +can also use this on the kernel command line for a more compact format +for specifying an interface. Note that when removing an interface, +only the first three parameters (si type, address type, and address) +are used for the comparison. Any options are ignored for removing. The SMBus Driver ---------------- @@ -502,7 +527,10 @@ used to control it: modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type> preaction=<preaction type> preop=<preop type> start_now=x - nowayout=x + nowayout=x ifnum_to_use=n + +ifnum_to_use specifies which interface the watchdog timer should use. +The default is -1, which means to pick the first one registered. The timeout is the number of seconds to the action, and the pretimeout is the amount of seconds before the reset that the pre-timeout panic will @@ -624,5 +652,9 @@ command line. The parameter is also available via the proc filesystem in /proc/sys/dev/ipmi/poweroff_powercycle. Note that if the system does not support power cycling, it will always do the power off. +The "ifnum_to_use" parameter specifies which interface the poweroff +code should use. The default is -1, which means to pick the first one +registered. + Note that if you have ACPI enabled, the system will prefer using ACPI to power off. diff --git a/Documentation/block/as-iosched.txt b/Documentation/block/as-iosched.txt index e2a66f8143c..a598fe10a29 100644 --- a/Documentation/block/as-iosched.txt +++ b/Documentation/block/as-iosched.txt @@ -24,8 +24,10 @@ very similar behavior to the deadline IO scheduler. Selecting IO schedulers ----------------------- To choose IO schedulers at boot time, use the argument 'elevator=deadline'. -'noop' and 'as' (the default) are also available. IO schedulers are assigned -globally at boot time only presently. +'noop', 'as' and 'cfq' (the default) are also available. IO schedulers are +assigned globally at boot time only presently. It's also possible to change +the IO scheduler for a determined device on the fly, as described in +Documentation/block/switching-sched.txt. Anticipatory IO scheduler Policies diff --git a/Documentation/devices.txt b/Documentation/devices.txt index 70690f1a14a..8de132a02ba 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -3,7 +3,7 @@ Maintained by Torben Mathiasen <device@lanana.org> - Last revised: 15 May 2006 + Last revised: 29 November 2006 This list is the Linux Device List, the official registry of allocated device numbers and /dev directory nodes for the Linux operating @@ -94,6 +94,7 @@ Your cooperation is appreciated. 9 = /dev/urandom Faster, less secure random number gen. 10 = /dev/aio Asynchronous I/O notification interface 11 = /dev/kmsg Writes to this come out as printk's + 1 block RAM disk 0 = /dev/ram0 First RAM disk 1 = /dev/ram1 Second RAM disk @@ -122,7 +123,7 @@ Your cooperation is appreciated. devices are on major 128 and above and use the PTY master multiplex (/dev/ptmx) to acquire a PTY on demand. - + 2 block Floppy disks 0 = /dev/fd0 Controller 0, drive 0, autodetect 1 = /dev/fd1 Controller 0, drive 1, autodetect @@ -257,7 +258,7 @@ Your cooperation is appreciated. 129 = /dev/vcsa1 tty1 text/attribute contents ... 191 = /dev/vcsa63 tty63 text/attribute contents - + NOTE: These devices permit both read and write access. 7 block Loopback devices @@ -411,7 +412,7 @@ Your cooperation is appreciated. 207 = /dev/video/em8300_sp EM8300 DVD decoder subpicture 208 = /dev/compaq/cpqphpc Compaq PCI Hot Plug Controller 209 = /dev/compaq/cpqrid Compaq Remote Insight Driver - 210 = /dev/impi/bt IMPI coprocessor block transfer + 210 = /dev/impi/bt IMPI coprocessor block transfer 211 = /dev/impi/smic IMPI coprocessor stream interface 212 = /dev/watchdogs/0 First watchdog device 213 = /dev/watchdogs/1 Second watchdog device @@ -506,6 +507,7 @@ Your cooperation is appreciated. 33 = /dev/patmgr1 Sequencer patch manager 34 = /dev/midi02 Third MIDI port 50 = /dev/midi03 Fourth MIDI port + 14 block BIOS harddrive callback support {2.6} 0 = /dev/dos_hda First BIOS harddrive whole disk 64 = /dev/dos_hdb Second BIOS harddrive whole disk @@ -527,6 +529,7 @@ Your cooperation is appreciated. 16 char Non-SCSI scanners 0 = /dev/gs4500 Genius 4500 handheld scanner + 16 block GoldStar CD-ROM 0 = /dev/gscd GoldStar CD-ROM @@ -548,6 +551,7 @@ Your cooperation is appreciated. 0 = /dev/ttyC0 First Cyclades port ... 31 = /dev/ttyC31 32nd Cyclades port + 19 block "Double" compressed disk 0 = /dev/double0 First compressed disk ... @@ -563,6 +567,7 @@ Your cooperation is appreciated. 0 = /dev/cub0 Callout device for ttyC0 ... 31 = /dev/cub31 Callout device for ttyC31 + 20 block Hitachi CD-ROM (under development) 0 = /dev/hitcd Hitachi CD-ROM @@ -582,7 +587,7 @@ Your cooperation is appreciated. This device is used on the ARM-based Acorn RiscPC. Partitions are handled the same way as for IDE disks - (see major number 3). + (see major number 3). 22 char Digiboard serial card 0 = /dev/ttyD0 First Digiboard port @@ -591,7 +596,7 @@ Your cooperation is appreciated. 22 block Second IDE hard disk/CD-ROM interface 0 = /dev/hdc Master: whole disk (or CD-ROM) 64 = /dev/hdd Slave: whole disk (or CD-ROM) - + Partitions are handled the same way as for the first interface (see major number 3). @@ -639,6 +644,7 @@ Your cooperation is appreciated. 26 char Quanta WinVision frame grabber {2.6} 0 = /dev/wvisfgrab Quanta WinVision frame grabber + 26 block Second Matsushita (Panasonic/SoundBlaster) CD-ROM 0 = /dev/sbpcd4 Panasonic CD-ROM controller 1 unit 0 1 = /dev/sbpcd5 Panasonic CD-ROM controller 1 unit 1 @@ -670,6 +676,7 @@ Your cooperation is appreciated. 37 = /dev/nrawqft1 Unit 1, no rewind-on-close, no file marks 38 = /dev/nrawqft2 Unit 2, no rewind-on-close, no file marks 39 = /dev/nrawqft3 Unit 3, no rewind-on-close, no file marks + 27 block Third Matsushita (Panasonic/SoundBlaster) CD-ROM 0 = /dev/sbpcd8 Panasonic CD-ROM controller 2 unit 0 1 = /dev/sbpcd9 Panasonic CD-ROM controller 2 unit 1 @@ -681,6 +688,7 @@ Your cooperation is appreciated. 1 = /dev/staliomem1 Second Stallion card I/O memory 2 = /dev/staliomem2 Third Stallion card I/O memory 3 = /dev/staliomem3 Fourth Stallion card I/O memory + 28 char Atari SLM ACSI laser printer (68k/Atari) 0 = /dev/slm0 First SLM laser printer 1 = /dev/slm1 Second SLM laser printer @@ -690,6 +698,7 @@ Your cooperation is appreciated. 1 = /dev/sbpcd13 Panasonic CD-ROM controller 3 unit 1 2 = /dev/sbpcd14 Panasonic CD-ROM controller 3 unit 2 3 = /dev/sbpcd15 Panasonic CD-ROM controller 3 unit 3 + 28 block ACSI disk (68k/Atari) 0 = /dev/ada First ACSI disk whole disk 16 = /dev/adb Second ACSI disk whole disk @@ -750,6 +759,7 @@ Your cooperation is appreciated. 31 char MPU-401 MIDI 0 = /dev/mpu401data MPU-401 data port 1 = /dev/mpu401stat MPU-401 status port + 31 block ROM/flash memory card 0 = /dev/rom0 First ROM card (rw) ... @@ -801,7 +811,7 @@ Your cooperation is appreciated. 34 block Fourth IDE hard disk/CD-ROM interface 0 = /dev/hdg Master: whole disk (or CD-ROM) 64 = /dev/hdh Slave: whole disk (or CD-ROM) - + Partitions are handled the same way as for the first interface (see major number 3). @@ -818,6 +828,7 @@ Your cooperation is appreciated. 129 = /dev/smpte1 Second MIDI port, SMPTE timed 130 = /dev/smpte2 Third MIDI port, SMPTE timed 131 = /dev/smpte3 Fourth MIDI port, SMPTE timed + 35 block Slow memory ramdisk 0 = /dev/slram Slow memory ramdisk @@ -828,6 +839,7 @@ Your cooperation is appreciated. 16 = /dev/tap0 First Ethertap device ... 31 = /dev/tap15 16th Ethertap device + 36 block MCA ESDI hard disk 0 = /dev/eda First ESDI disk whole disk 64 = /dev/edb Second ESDI disk whole disk @@ -882,6 +894,7 @@ Your cooperation is appreciated. 40 char Matrox Meteor frame grabber {2.6} 0 = /dev/mmetfgrab Matrox Meteor frame grabber + 40 block Syquest EZ135 parallel port removable drive 0 = /dev/eza Parallel EZ135 drive, whole disk @@ -893,6 +906,7 @@ Your cooperation is appreciated. 41 char Yet Another Micro Monitor 0 = /dev/yamm Yet Another Micro Monitor + 41 block MicroSolutions BackPack parallel port CD-ROM 0 = /dev/bpcd BackPack CD-ROM @@ -901,6 +915,7 @@ Your cooperation is appreciated. the parallel port ATAPI CD-ROM driver at major number 46. 42 char Demo/sample use + 42 block Demo/sample use This number is intended for use in sample code, as @@ -918,6 +933,7 @@ Your cooperation is appreciated. 0 = /dev/ttyI0 First virtual modem ... 63 = /dev/ttyI63 64th virtual modem + 43 block Network block devices 0 = /dev/nb0 First network block device 1 = /dev/nb1 Second network block device @@ -934,12 +950,13 @@ Your cooperation is appreciated. 0 = /dev/cui0 Callout device for ttyI0 ... 63 = /dev/cui63 Callout device for ttyI63 + 44 block Flash Translation Layer (FTL) filesystems 0 = /dev/ftla FTL on first Memory Technology Device 16 = /dev/ftlb FTL on second Memory Technology Device 32 = /dev/ftlc FTL on third Memory Technology Device ... - 240 = /dev/ftlp FTL on 16th Memory Technology Device + 240 = /dev/ftlp FTL on 16th Memory Technology Device Partitions are handled in the same way as for IDE disks (see major number 3) except that the partition @@ -958,6 +975,7 @@ Your cooperation is appreciated. 191 = /dev/ippp63 64th SyncPPP device 255 = /dev/isdninfo ISDN monitor interface + 45 block Parallel port IDE disk devices 0 = /dev/pda First parallel port IDE disk 16 = /dev/pdb Second parallel port IDE disk @@ -1044,6 +1062,7 @@ Your cooperation is appreciated. 1 = /dev/dcbri1 Second DataComm card 2 = /dev/dcbri2 Third DataComm card 3 = /dev/dcbri3 Fourth DataComm card + 52 block Mylex DAC960 PCI RAID controller; fifth controller 0 = /dev/rd/c4d0 First disk, whole disk 8 = /dev/rd/c4d1 Second disk, whole disk @@ -1093,6 +1112,7 @@ Your cooperation is appreciated. 55 char DSP56001 digital signal processor 0 = /dev/dsp56k First DSP56001 + 55 block Mylex DAC960 PCI RAID controller; eighth controller 0 = /dev/rd/c7d0 First disk, whole disk 8 = /dev/rd/c7d1 Second disk, whole disk @@ -1130,6 +1150,7 @@ Your cooperation is appreciated. 0 = /dev/cup0 Callout device for ttyP0 1 = /dev/cup1 Callout device for ttyP1 ... + 58 block Reserved for logical volume manager 59 char sf firewall package @@ -1149,6 +1170,7 @@ Your cooperation is appreciated. NAMING CONFLICT -- PROPOSED REVISED NAME /dev/rpda0 etc 60-63 char LOCAL/EXPERIMENTAL USE + 60-63 block LOCAL/EXPERIMENTAL USE Allocated for local/experimental use. For devices not assigned official numbers, these ranges should be @@ -1434,7 +1456,6 @@ Your cooperation is appreciated. DAC960 (see major number 48) except that the limit on partitions is 15. - 78 char PAM Software's multimodem boards 0 = /dev/ttyM0 First PAM modem 1 = /dev/ttyM1 Second PAM modem @@ -1450,7 +1471,6 @@ Your cooperation is appreciated. DAC960 (see major number 48) except that the limit on partitions is 15. - 79 char PAM Software's multimodem boards - alternate devices 0 = /dev/cum0 Callout device for ttyM0 1 = /dev/cum1 Callout device for ttyM1 @@ -1466,7 +1486,6 @@ Your cooperation is appreciated. DAC960 (see major number 48) except that the limit on partitions is 15. - 80 char Photometrics AT200 CCD camera 0 = /dev/at200 Photometrics AT200 CCD camera @@ -1679,7 +1698,7 @@ Your cooperation is appreciated. 1 = /dev/dcxx1 Second capture card ... - 94 block IBM S/390 DASD block storage + 94 block IBM S/390 DASD block storage 0 = /dev/dasda First DASD device, major 1 = /dev/dasda1 First DASD device, block 1 2 = /dev/dasda2 First DASD device, block 2 @@ -1695,7 +1714,7 @@ Your cooperation is appreciated. 1 = /dev/ipnat NAT control device/log file 2 = /dev/ipstate State information log file 3 = /dev/ipauth Authentication control device/log file - ... + ... 96 char Parallel port ATAPI tape devices 0 = /dev/pt0 First parallel port ATAPI tape @@ -1705,7 +1724,7 @@ Your cooperation is appreciated. 129 = /dev/npt1 Second p.p. ATAPI tape, no rewind ... - 96 block Inverse NAND Flash Translation Layer + 96 block Inverse NAND Flash Translation Layer 0 = /dev/inftla First INFTL layer 16 = /dev/inftlb Second INFTL layer ... @@ -1937,7 +1956,6 @@ Your cooperation is appreciated. ... 113 block IBM iSeries virtual CD-ROM - 0 = /dev/iseries/vcda First virtual CD-ROM 1 = /dev/iseries/vcdb Second virtual CD-ROM ... @@ -2059,11 +2077,12 @@ Your cooperation is appreciated. ... 119 char VMware virtual network control - 0 = /dev/vnet0 1st virtual network - 1 = /dev/vnet1 2nd virtual network + 0 = /dev/vmnet0 1st virtual network + 1 = /dev/vmnet1 2nd virtual network ... 120-127 char LOCAL/EXPERIMENTAL USE + 120-127 block LOCAL/EXPERIMENTAL USE Allocated for local/experimental use. For devices not assigned official numbers, these ranges should be @@ -2075,7 +2094,6 @@ Your cooperation is appreciated. nodes; instead they should be accessed through the /dev/ptmx cloning interface. - 128 block SCSI disk devices (128-143) 0 = /dev/sddy 129th SCSI disk whole disk 16 = /dev/sddz 130th SCSI disk whole disk @@ -2087,7 +2105,6 @@ Your cooperation is appreciated. disks (see major number 3) except that the limit on partitions is 15. - 129 block SCSI disk devices (144-159) 0 = /dev/sdeo 145th SCSI disk whole disk 16 = /dev/sdep 146th SCSI disk whole disk @@ -2123,7 +2140,6 @@ Your cooperation is appreciated. disks (see major number 3) except that the limit on partitions is 15. - 132 block SCSI disk devices (192-207) 0 = /dev/sdgk 193rd SCSI disk whole disk 16 = /dev/sdgl 194th SCSI disk whole disk @@ -2135,7 +2151,6 @@ Your cooperation is appreciated. disks (see major number 3) except that the limit on partitions is 15. - 133 block SCSI disk devices (208-223) 0 = /dev/sdha 209th SCSI disk whole disk 16 = /dev/sdhb 210th SCSI disk whole disk @@ -2147,7 +2162,6 @@ Your cooperation is appreciated. disks (see major number 3) except that the limit on partitions is 15. - 134 block SCSI disk devices (224-239) 0 = /dev/sdhq 225th SCSI disk whole disk 16 = /dev/sdhr 226th SCSI disk whole disk @@ -2159,7 +2173,6 @@ Your cooperation is appreciated. disks (see major number 3) except that the limit on partitions is 15. - 135 block SCSI disk devices (240-255) 0 = /dev/sdig 241st SCSI disk whole disk 16 = /dev/sdih 242nd SCSI disk whole disk @@ -2171,7 +2184,6 @@ Your cooperation is appreciated. disks (see major number 3) except that the limit on partitions is 15. - 136-143 char Unix98 PTY slaves 0 = /dev/pts/0 First Unix98 pseudo-TTY 1 = /dev/pts/1 Second Unix98 pesudo-TTY @@ -2384,6 +2396,7 @@ Your cooperation is appreciated. ... 159 char RESERVED + 159 block RESERVED 160 char General Purpose Instrument Bus (GPIB) @@ -2427,7 +2440,7 @@ Your cooperation is appreciated. Partitions are handled in the same way as for IDE disks (see major number 3) except that the limit on - partitions is 31. + partitions is 31. 162 char Raw block device interface 0 = /dev/rawctl Raw I/O control device @@ -2483,7 +2496,6 @@ Your cooperation is appreciated. 171 char Reserved for IEEE 1394 (Firewire) - 172 char Moxa Intellio serial card 0 = /dev/ttyMX0 First Moxa port 1 = /dev/ttyMX1 Second Moxa port @@ -2543,9 +2555,6 @@ Your cooperation is appreciated. 64 = /dev/usb/rio500 Diamond Rio 500 65 = /dev/usb/usblcd USBLCD Interface (info@usblcd.de) 66 = /dev/usb/cpad0 Synaptics cPad (mouse/LCD) - 67 = /dev/usb/adutux0 1st Ontrak ADU device - ... - 76 = /dev/usb/adutux10 10th Ontrak ADU device 96 = /dev/usb/hiddev0 1st USB HID device ... 111 = /dev/usb/hiddev15 16th USB HID device @@ -2558,7 +2567,7 @@ Your cooperation is appreciated. 132 = /dev/usb/idmouse ID Mouse (fingerprint scanner) device 133 = /dev/usb/sisusbvga1 First SiSUSB VGA device ... - 140 = /dev/usb/sisusbvga8 Eigth SISUSB VGA device + 140 = /dev/usb/sisusbvga8 Eighth SISUSB VGA device 144 = /dev/usb/lcd USB LCD device 160 = /dev/usb/legousbtower0 1st USB Legotower device ... @@ -2571,7 +2580,7 @@ Your cooperation is appreciated. 0 = /dev/uba First USB block device 8 = /dev/ubb Second USB block device 16 = /dev/ubc Third USB block device - ... + ... 181 char Conrad Electronic parallel port radio clocks 0 = /dev/pcfclock0 First Conrad radio clock @@ -2657,7 +2666,7 @@ Your cooperation is appreciated. 32 = /dev/mvideo/status2 Third device ... ... - 240 = /dev/mvideo/status15 16th device + 240 = /dev/mvideo/status15 16th device ... 195 char Nvidia graphics devices @@ -2795,6 +2804,10 @@ Your cooperation is appreciated. ... 185 = /dev/ttyNX15 Hilscher netX serial port 15 186 = /dev/ttyJ0 JTAG1 DCC protocol based serial port emulation + 187 = /dev/ttyUL0 Xilinx uartlite - port 0 + ... + 190 = /dev/ttyUL3 Xilinx uartlite - port 3 + 191 = /dev/xvc0 Xen virtual console - port 0 205 char Low-density serial ports (alternate device) 0 = /dev/culu0 Callout device for ttyLU0 @@ -2832,7 +2845,6 @@ Your cooperation is appreciated. 82 = /dev/cuvr0 Callout device for ttyVR0 83 = /dev/cuvr1 Callout device for ttyVR1 - 206 char OnStream SC-x0 tape devices 0 = /dev/osst0 First OnStream SCSI tape, mode 0 1 = /dev/osst1 Second OnStream SCSI tape, mode 0 @@ -2922,7 +2934,6 @@ Your cooperation is appreciated. ... 212 char LinuxTV.org DVB driver subsystem - 0 = /dev/dvb/adapter0/video0 first video decoder of first card 1 = /dev/dvb/adapter0/audio0 first audio decoder of first card 2 = /dev/dvb/adapter0/sec0 (obsolete/unused) @@ -3008,9 +3019,9 @@ Your cooperation is appreciated. 2 = /dev/3270/tub2 Second 3270 terminal ... -229 char IBM iSeries virtual console - 0 = /dev/iseries/vtty0 First console port - 1 = /dev/iseries/vtty1 Second console port +229 char IBM iSeries/pSeries virtual console + 0 = /dev/hvc0 First console port + 1 = /dev/hvc1 Second console port ... 230 char IBM iSeries virtual tape @@ -3083,12 +3094,14 @@ Your cooperation is appreciated. 234-239 UNASSIGNED 240-254 char LOCAL/EXPERIMENTAL USE + 240-254 block LOCAL/EXPERIMENTAL USE Allocated for local/experimental use. For devices not assigned official numbers, these ranges should be used in order to avoid conflicting with future assignments. 255 char RESERVED + 255 block RESERVED This major is reserved to assist the expansion to a @@ -3115,7 +3128,20 @@ Your cooperation is appreciated. 257 char Phoenix Technologies Cryptographic Services Driver 0 = /dev/ptlsec Crypto Services Driver - +257 block SSFDC Flash Translation Layer filesystem + 0 = /dev/ssfdca First SSFDC layer + 8 = /dev/ssfdcb Second SSFDC layer + 16 = /dev/ssfdcc Third SSFDC layer + 24 = /dev/ssfdcd 4th SSFDC layer + 32 = /dev/ssfdce 5th SSFDC layer + 40 = /dev/ssfdcf 6th SSFDC layer + 48 = /dev/ssfdcg 7th SSFDC layer + 56 = /dev/ssfdch 8th SSFDC layer + +258 block ROM/Flash read-only translation layer + 0 = /dev/blockrom0 First ROM card's translation layer interface + 1 = /dev/blockrom1 Second ROM card's translation layer interface + ... **** ADDITIONAL /dev DIRECTORY ENTRIES diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index eb1a6cad21e..790ef6fbe49 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -124,7 +124,7 @@ sync_fs: no no read write_super_lockfs: ? unlockfs: ? statfs: no no no -remount_fs: no yes maybe (see below) +remount_fs: yes yes maybe (see below) clear_inode: no umount_begin: yes no no show_options: no (vfsmount->sem) diff --git a/Documentation/filesystems/fuse.txt b/Documentation/filesystems/fuse.txt index 3d744773895..345392c4cae 100644 --- a/Documentation/filesystems/fuse.txt +++ b/Documentation/filesystems/fuse.txt @@ -51,6 +51,22 @@ homepage: http://fuse.sourceforge.net/ +Filesystem type +~~~~~~~~~~~~~~~ + +The filesystem type given to mount(2) can be one of the following: + +'fuse' + + This is the usual way to mount a FUSE filesystem. The first + argument of the mount system call may contain an arbitrary string, + which is not interpreted by the kernel. + +'fuseblk' + + The filesystem is block device based. The first argument of the + mount system call is interpreted as the name of the device. + Mount options ~~~~~~~~~~~~~ @@ -94,6 +110,11 @@ Mount options The default is infinite. Note that the size of read requests is limited anyway to 32 pages (which is 128kbyte on i386). +'blksize=N' + + Set the block size for the filesystem. The default is 512. This + option is only valid for 'fuseblk' type mounts. + Control filesystem ~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/filesystems/sysv-fs.txt b/Documentation/filesystems/sysv-fs.txt index d8172241801..253b50d1328 100644 --- a/Documentation/filesystems/sysv-fs.txt +++ b/Documentation/filesystems/sysv-fs.txt @@ -1,11 +1,8 @@ -This is the implementation of the SystemV/Coherent filesystem for Linux. It implements all of - Xenix FS, - SystemV/386 FS, - Coherent FS. -This is version beta 4. - To install: * Answer the 'System V and Coherent filesystem support' question with 'y' when configuring the kernel. @@ -28,11 +25,173 @@ Bugs in the present implementation: for this FS on hard disk yet. -Please report any bugs and suggestions to - Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhe.de> - Pascal Haible <haible@izfm.uni-stuttgart.de> - Krzysztof G. Baranowski <kgb@manjak.knm.org.pl> +These filesystems are rather similar. Here is a comparison with Minix FS: + +* Linux fdisk reports on partitions + - Minix FS 0x81 Linux/Minix + - Xenix FS ?? + - SystemV FS ?? + - Coherent FS 0x08 AIX bootable + +* Size of a block or zone (data allocation unit on disk) + - Minix FS 1024 + - Xenix FS 1024 (also 512 ??) + - SystemV FS 1024 (also 512 and 2048) + - Coherent FS 512 + +* General layout: all have one boot block, one super block and + separate areas for inodes and for directories/data. + On SystemV Release 2 FS (e.g. Microport) the first track is reserved and + all the block numbers (including the super block) are offset by one track. + +* Byte ordering of "short" (16 bit entities) on disk: + - Minix FS little endian 0 1 + - Xenix FS little endian 0 1 + - SystemV FS little endian 0 1 + - Coherent FS little endian 0 1 + Of course, this affects only the file system, not the data of files on it! + +* Byte ordering of "long" (32 bit entities) on disk: + - Minix FS little endian 0 1 2 3 + - Xenix FS little endian 0 1 2 3 + - SystemV FS little endian 0 1 2 3 + - Coherent FS PDP-11 2 3 0 1 + Of course, this affects only the file system, not the data of files on it! + +* Inode on disk: "short", 0 means non-existent, the root dir ino is: + - Minix FS 1 + - Xenix FS, SystemV FS, Coherent FS 2 + +* Maximum number of hard links to a file: + - Minix FS 250 + - Xenix FS ?? + - SystemV FS ?? + - Coherent FS >=10000 + +* Free inode management: + - Minix FS a bitmap + - Xenix FS, SystemV FS, Coherent FS + There is a cache of a certain number of free inodes in the super-block. + When it is exhausted, new free inodes are found using a linear search. + +* Free block management: + - Minix FS a bitmap + - Xenix FS, SystemV FS, Coherent FS + Free blocks are organized in a "free list". Maybe a misleading term, + since it is not true that every free block contains a pointer to + the next free block. Rather, the free blocks are organized in chunks + of limited size, and every now and then a free block contains pointers + to the free blocks pertaining to the next chunk; the first of these + contains pointers and so on. The list terminates with a "block number" + 0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS. + +* Super-block location: + - Minix FS block 1 = bytes 1024..2047 + - Xenix FS block 1 = bytes 1024..2047 + - SystemV FS bytes 512..1023 + - Coherent FS block 1 = bytes 512..1023 + +* Super-block layout: + - Minix FS + unsigned short s_ninodes; + unsigned short s_nzones; + unsigned short s_imap_blocks; + unsigned short s_zmap_blocks; + unsigned short s_firstdatazone; + unsigned short s_log_zone_size; + unsigned long s_max_size; + unsigned short s_magic; + - Xenix FS, SystemV FS, Coherent FS + unsigned short s_firstdatazone; + unsigned long s_nzones; + unsigned short s_fzone_count; + unsigned long s_fzones[NICFREE]; + unsigned short s_finode_count; + unsigned short s_finodes[NICINOD]; + char s_flock; + char s_ilock; + char s_modified; + char s_rdonly; + unsigned long s_time; + short s_dinfo[4]; -- SystemV FS only + unsigned long s_free_zones; + unsigned short s_free_inodes; + short s_dinfo[4]; -- Xenix FS only + unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only + char s_fname[6]; + char s_fpack[6]; + then they differ considerably: + Xenix FS + char s_clean; + char s_fill[371]; + long s_magic; + long s_type; + SystemV FS + long s_fill[12 or 14]; + long s_state; + long s_magic; + long s_type; + Coherent FS + unsigned long s_unique; + Note that Coherent FS has no magic. + +* Inode layout: + - Minix FS + unsigned short i_mode; + unsigned short i_uid; + unsigned long i_size; + unsigned long i_time; + unsigned char i_gid; + unsigned char i_nlinks; + unsigned short i_zone[7+1+1]; + - Xenix FS, SystemV FS, Coherent FS + unsigned short i_mode; + unsigned short i_nlink; + unsigned short i_uid; + unsigned short i_gid; + unsigned long i_size; + unsigned char i_zone[3*(10+1+1+1)]; + unsigned long i_atime; + unsigned long i_mtime; + unsigned long i_ctime; + +* Regular file data blocks are organized as + - Minix FS + 7 direct blocks + 1 indirect block (pointers to blocks) + 1 double-indirect block (pointer to pointers to blocks) + - Xenix FS, SystemV FS, Coherent FS + 10 direct blocks + 1 indirect block (pointers to blocks) + 1 double-indirect block (pointer to pointers to blocks) + 1 triple-indirect block (pointer to pointers to pointers to blocks) + +* Inode size, inodes per block + - Minix FS 32 32 + - Xenix FS 64 16 + - SystemV FS 64 16 + - Coherent FS 64 8 + +* Directory entry on disk + - Minix FS + unsigned short inode; + char name[14/30]; + - Xenix FS, SystemV FS, Coherent FS + unsigned short inode; + char name[14]; + +* Dir entry size, dir entries per block + - Minix FS 16/32 64/32 + - Xenix FS 16 64 + - SystemV FS 16 64 + - Coherent FS 16 32 + +* How to implement symbolic links such that the host fsck doesn't scream: + - Minix FS normal + - Xenix FS kludge: as regular files with chmod 1000 + - SystemV FS ?? + - Coherent FS kludge: as regular files with chmod 1000 -Bruno Haible -<haible@ma2s2.mathematik.uni-karlsruhe.de> +Notation: We often speak of a "block" but mean a zone (the allocation unit) +and not the disk driver's notion of "block". diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index c51314b1a46..9575de300a6 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -2,7 +2,7 @@ ---------------------------- H. Peter Anvin <hpa@zytor.com> - Last update 2005-09-02 + Last update 2006-11-17 On the i386 platform, the Linux kernel uses a rather complicated boot convention. This has evolved partially due to historical aspects, as @@ -35,6 +35,8 @@ Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible initrd address available to the bootloader. Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. +Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. + Introduce relocatable_kernel and kernel_alignment fields. **** MEMORY LAYOUT @@ -129,6 +131,8 @@ Offset Proto Name Meaning 0226/2 N/A pad1 Unused 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line 022C/4 2.03+ initrd_addr_max Highest legal initrd address +0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel +0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2e1898e4e8f..b79bcdf1631 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -599,8 +599,6 @@ and is between 256 and 4096 characters. It is defined in the file hugepages= [HW,IA-32,IA-64] Maximal number of HugeTLB pages. - noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing - i8042.direct [HW] Put keyboard port into non-translated mode i8042.dumbkbd [HW] Pretend that controller can only read data from keyboard and cannot control its state @@ -650,6 +648,10 @@ and is between 256 and 4096 characters. It is defined in the file idle= [HW] Format: idle=poll or idle=halt + ignore_loglevel [KNL] + Ignore loglevel setting - this will print /all/ + kernel messages to the console. Useful for debugging. + ihash_entries= [KNL] Set number of hash buckets for inode cache. @@ -714,7 +716,12 @@ and is between 256 and 4096 characters. It is defined in the file Format: <RDP>,<reset>,<pci_scan>,<verbosity> isolcpus= [KNL,SMP] Isolate CPUs from the general scheduler. - Format: <cpu number>,...,<cpu number> + Format: + <cpu number>,...,<cpu number> + or + <cpu number>-<cpu number> (must be a positive range in ascending order) + or a mixture + <cpu number>,...,<cpu number>-<cpu number> This option can be used to specify one or more CPUs to isolate from the general SMP balancing and scheduling algorithms. The only way to move a process onto or off @@ -1012,6 +1019,10 @@ and is between 256 and 4096 characters. It is defined in the file emulation library even if a 387 maths coprocessor is present. + noaliencache [MM, NUMA] Disables the allcoation of alien caches in + the slab allocator. Saves per-node memory, but will + impact performance on real NUMA hardware. + noalign [KNL,ARM] noapic [SMP,APIC] Tells the kernel to not make use of any @@ -1052,9 +1063,14 @@ and is between 256 and 4096 characters. It is defined in the file in certain environments such as networked servers or real-time systems. + noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing + noirqdebug [IA-32] Disables the code which attempts to detect and disable unhandled interrupt sources. + no_timer_check [IA-32,X86_64,APIC] Disables the code which tests for + broken timer IRQ sources. + noisapnp [ISAPNP] Disables ISA PnP code. noinitrd [RAM] Tells the kernel not to load any configured @@ -1285,6 +1301,7 @@ and is between 256 and 4096 characters. It is defined in the file Param: "schedule" - profile schedule points. Param: <number> - step/bucket size as a power of 2 for statistical time based profiling. + Param: "sleep" - profile D-state sleeping (millisecs) processor.max_cstate= [HW,ACPI] Limit processor to maximum C-state @@ -1366,6 +1383,12 @@ and is between 256 and 4096 characters. It is defined in the file resume= [SWSUSP] Specify the partition device for software suspend + resume_offset= [SWSUSP] + Specify the offset from the beginning of the partition + given by "resume=" at which the swap header is located, + in <PAGE_SIZE> units (needed only for swap files). + See Documentation/power/swsusp-and-swap-files.txt + rhash_entries= [KNL,NET] Set number of hash buckets for route cache @@ -1732,6 +1755,9 @@ and is between 256 and 4096 characters. It is defined in the file norandmaps Don't use address space randomization Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space + unwind_debug=N N > 0 will enable dwarf2 unwinder debugging + This is useful to get more information why + you got a "dwarf2 unwinder stuck" ______________________________________________________________________ diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index b1181ce232d..e06b6e3c1db 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -58,6 +58,8 @@ fore200e.txt - FORE Systems PCA-200E/SBA-200E ATM NIC driver info. framerelay.txt - info on using Frame Relay/Data Link Connection Identifier (DLCI). +generic_netlink.txt + - info on Generic Netlink ip-sysctl.txt - /proc/sys/net/ipv4/* variables ip_dynaddr.txt diff --git a/Documentation/networking/generic_netlink.txt b/Documentation/networking/generic_netlink.txt new file mode 100644 index 00000000000..d4f8b8b9b53 --- /dev/null +++ b/Documentation/networking/generic_netlink.txt @@ -0,0 +1,3 @@ +A wiki document on how to use Generic Netlink can be found here: + + * http://linux-net.osdl.org/index.php/Generic_Netlink_HOWTO diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt new file mode 100644 index 00000000000..b05f512130e --- /dev/null +++ b/Documentation/power/s2ram.txt @@ -0,0 +1,56 @@ + How to get s2ram working + ~~~~~~~~~~~~~~~~~~~~~~~~ + 2006 Linus Torvalds + 2006 Pavel Machek + +1) Check suspend.sf.net, program s2ram there has long whitelist of + "known ok" machines, along with tricks to use on each one. + +2) If that does not help, try reading tricks.txt and + video.txt. Perhaps problem is as simple as broken module, and + simple module unload can fix it. + +3) You can use Linus' TRACE_RESUME infrastructure, described below. + + Using TRACE_RESUME + ~~~~~~~~~~~~~~~~~~ + +I've been working at making the machines I have able to STR, and almost +always it's a driver that is buggy. Thank God for the suspend/resume +debugging - the thing that Chuck tried to disable. That's often the _only_ +way to debug these things, and it's actually pretty powerful (but +time-consuming - having to insert TRACE_RESUME() markers into the device +driver that doesn't resume and recompile and reboot). + +Anyway, the way to debug this for people who are interested (have a +machine that doesn't boot) is: + + - enable PM_DEBUG, and PM_TRACE + + - use a script like this: + + #!/bin/sh + sync + echo 1 > /sys/power/pm_trace + echo mem > /sys/power/state + + to suspend + + - if it doesn't come back up (which is usually the problem), reboot by + holding the power button down, and look at the dmesg output for things + like + + Magic number: 4:156:725 + hash matches drivers/base/power/resume.c:28 + hash matches device 0000:01:00.0 + + which means that the last trace event was just before trying to resume + device 0000:01:00.0. Then figure out what driver is controlling that + device (lspci and /sys/devices/pci* is your friend), and see if you can + fix it, disable it, or trace into its resume function. + +For example, the above happens to be the VGA device on my EVO, which I +used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out +that "radeonfb" simply cannot resume that device - it tries to set the +PLL's, and it just _hangs_. Using the regular VGA console and letting X +resume it instead works fine. diff --git a/Documentation/power/swsusp-and-swap-files.txt b/Documentation/power/swsusp-and-swap-files.txt new file mode 100644 index 00000000000..06f911a5f88 --- /dev/null +++ b/Documentation/power/swsusp-and-swap-files.txt @@ -0,0 +1,60 @@ +Using swap files with software suspend (swsusp) + (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + +The Linux kernel handles swap files almost in the same way as it handles swap +partitions and there are only two differences between these two types of swap +areas: +(1) swap files need not be contiguous, +(2) the header of a swap file is not in the first block of the partition that +holds it. From the swsusp's point of view (1) is not a problem, because it is +already taken care of by the swap-handling code, but (2) has to be taken into +consideration. + +In principle the location of a swap file's header may be determined with the +help of appropriate filesystem driver. Unfortunately, however, it requires the +filesystem holding the swap file to be mounted, and if this filesystem is +journaled, it cannot be mounted during resume from disk. For this reason to +identify a swap file swsusp uses the name of the partition that holds the file +and the offset from the beginning of the partition at which the swap file's +header is located. For convenience, this offset is expressed in <PAGE_SIZE> +units. + +In order to use a swap file with swsusp, you need to: + +1) Create the swap file and make it active, eg. + +# dd if=/dev/zero of=<swap_file_path> bs=1024 count=<swap_file_size_in_k> +# mkswap <swap_file_path> +# swapon <swap_file_path> + +2) Use an application that will bmap the swap file with the help of the +FIBMAP ioctl and determine the location of the file's swap header, as the +offset, in <PAGE_SIZE> units, from the beginning of the partition which +holds the swap file. + +3) Add the following parameters to the kernel command line: + +resume=<swap_file_partition> resume_offset=<swap_file_offset> + +where <swap_file_partition> is the partition on which the swap file is located +and <swap_file_offset> is the offset of the swap header determined by the +application in 2) (of course, this step may be carried out automatically +by the same application that determies the swap file's header offset using the +FIBMAP ioctl) + +OR + +Use a userland suspend application that will set the partition and offset +with the help of the SNAPSHOT_SET_SWAP_AREA ioctl described in +Documentation/power/userland-swsusp.txt (this is the only method to suspend +to a swap file allowing the resume to be initiated from an initrd or initramfs +image). + +Now, swsusp will use the swap file in the same way in which it would use a swap +partition. In particular, the swap file has to be active (ie. be present in +/proc/swaps) so that it can be used for suspending. + +Note that if the swap file used for suspending is deleted and recreated, +the location of its header need not be the same as before. Thus every time +this happens the value of the "resume_offset=" kernel command line parameter +has to be updated. diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index e635e6f1e31..0761ff6c57e 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -297,20 +297,12 @@ system is shut down or suspended. Additionally use the encrypted suspend image to prevent sensitive data from being stolen after resume. -Q: Why can't we suspend to a swap file? +Q: Can I suspend to a swap file? -A: Because accessing swap file needs the filesystem mounted, and -filesystem might do something wrong (like replaying the journal) -during mount. - -There are few ways to get that fixed: - -1) Probably could be solved by modifying every filesystem to support -some kind of "really read-only!" option. Patches welcome. - -2) suspend2 gets around that by storing absolute positions in on-disk -image (and blocksize), with resume parameter pointing directly to -suspend header. +A: Generally, yes, you can. However, it requires you to use the "resume=" and +"resume_offset=" kernel command line parameters, so the resume from a swap file +cannot be initiated from an initrd or initramfs image. See +swsusp-and-swap-files.txt for details. Q: Is there a maximum system RAM size that is supported by swsusp? diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt index 64755e9285d..000556c932e 100644 --- a/Documentation/power/userland-swsusp.txt +++ b/Documentation/power/userland-swsusp.txt @@ -9,9 +9,8 @@ done it already. Now, to use the userland interface for software suspend you need special utilities that will read/write the system memory snapshot from/to the kernel. Such utilities are available, for example, from -<http://www.sisk.pl/kernel/utilities/suspend>. You may want to have -a look at them if you are going to develop your own suspend/resume -utilities. +<http://suspend.sourceforge.net>. You may want to have a look at them if you +are going to develop your own suspend/resume utilities. The interface consists of a character device providing the open(), release(), read(), and write() operations as well as several ioctl() @@ -21,9 +20,9 @@ be read from /sys/class/misc/snapshot/dev. The device can be open either for reading or for writing. If open for reading, it is considered to be in the suspend mode. Otherwise it is -assumed to be in the resume mode. The device cannot be open for reading -and writing. It is also impossible to have the device open more than once -at a time. +assumed to be in the resume mode. The device cannot be open for simultaneous +reading and writing. It is also impossible to have the device open more than +once at a time. The ioctl() commands recognized by the device are: @@ -69,9 +68,46 @@ SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated with SNAPSHOT_SET_SWAP_FILE - set the resume partition (the last ioctl() argument should specify the device's major and minor numbers in the old two-byte format, as returned by the stat() function in the .st_rdev - member of the stat structure); it is recommended to always use this - call, because the code to set the resume partition could be removed from - future kernels + member of the stat structure) + +SNAPSHOT_SET_SWAP_AREA - set the resume partition and the offset (in <PAGE_SIZE> + units) from the beginning of the partition at which the swap header is + located (the last ioctl() argument should point to a struct + resume_swap_area, as defined in kernel/power/power.h, containing the + resume device specification, as for the SNAPSHOT_SET_SWAP_FILE ioctl(), + and the offset); for swap partitions the offset is always 0, but it is + different to zero for swap files (please see + Documentation/swsusp-and-swap-files.txt for details). + The SNAPSHOT_SET_SWAP_AREA ioctl() is considered as a replacement for + SNAPSHOT_SET_SWAP_FILE which is regarded as obsolete. It is + recommended to always use this call, because the code to set the resume + partition may be removed from future kernels + +SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to + immediately enter the suspend-to-RAM state, so this call must always + be preceded by the SNAPSHOT_FREEZE call and it is also necessary + to use the SNAPSHOT_UNFREEZE call after the system wakes up. This call + is needed to implement the suspend-to-both mechanism in which the + suspend image is first created, as though the system had been suspended + to disk, and then the system is suspended to RAM (this makes it possible + to resume the system from RAM if there's enough battery power or restore + its state on the basis of the saved suspend image otherwise) + +SNAPSHOT_PMOPS - enable the usage of the pmops->prepare, pmops->enter and + pmops->finish methods (the in-kernel swsusp knows these as the "platform + method") which are needed on many machines to (among others) speed up + the resume by letting the BIOS skip some steps or to let the system + recognise the correct state of the hardware after the resume (in + particular on many machines this ensures that unplugged AC + adapters get correctly detected and that kacpid does not run wild after + the resume). The last ioctl() argument can take one of the three + values, defined in kernel/power/power.h: + PMOPS_PREPARE - make the kernel carry out the + pm_ops->prepare(PM_SUSPEND_DISK) operation + PMOPS_ENTER - make the kernel power off the system by calling + pm_ops->enter(PM_SUSPEND_DISK) + PMOPS_FINISH - make the kernel carry out the + pm_ops->finish(PM_SUSPEND_DISK) operation The device's read() operation can be used to transfer the snapshot image from the kernel. It has the following limitations: @@ -91,10 +127,12 @@ unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are still frozen when the device is being closed). Currently it is assumed that the userland utilities reading/writing the -snapshot image from/to the kernel will use a swap partition, called the resume -partition, as storage space. However, this is not really required, as they -can use, for example, a special (blank) suspend partition or a file on a partition -that is unmounted before SNAPSHOT_ATOMIC_SNAPSHOT and mounted afterwards. +snapshot image from/to the kernel will use a swap parition, called the resume +partition, or a swap file as storage space (if a swap file is used, the resume +partition is the partition that holds this file). However, this is not really +required, as they can use, for example, a special (blank) suspend partition or +a file on a partition that is unmounted before SNAPSHOT_ATOMIC_SNAPSHOT and +mounted afterwards. These utilities SHOULD NOT make any assumptions regarding the ordering of data within the snapshot image, except for the image header that MAY be diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt index f39c9d714db..a2afca3b2ba 100644 --- a/Documentation/stable_api_nonsense.txt +++ b/Documentation/stable_api_nonsense.txt @@ -62,9 +62,6 @@ consider the following facts about the Linux kernel: - different structures can contain different fields - Some functions may not be implemented at all, (i.e. some locks compile away to nothing for non-SMP builds.) - - Parameter passing of variables from function to function can be - done in different ways (the CONFIG_REGPARM option controls - this.) - Memory within the kernel can be aligned in different ways, depending on the build options. - Linux runs on a wide range of different processor architectures. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 0bc7f1e3c9e..5922e84d913 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -27,6 +27,7 @@ show up in /proc/sys/kernel: - hotplug - java-appletviewer [ binfmt_java, obsolete ] - java-interpreter [ binfmt_java, obsolete ] +- kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] - modprobe ==> Documentation/kmod.txt - msgmax @@ -170,6 +171,13 @@ This flag controls the L2 cache of G3 processor boards. If ============================================================== +kstack_depth_to_print: (X86 only) + +Controls the number of words to print when dumping the raw +kernel stack. + +============================================================== + osrelease, ostype & version: # cat osrelease diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index f3c57f43ba6..dbdcaf68e3e 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -52,10 +52,6 @@ APICs apicmaintimer. Useful when your PIT timer is totally broken. - disable_8254_timer / enable_8254_timer - Enable interrupt 0 timer routing over the 8254 in addition to over - the IO-APIC. The kernel tries to set a sensible default. - Early Console syntax: earlyprintk=vga @@ -183,7 +179,7 @@ PCI IOMMU iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] - [,forcesac][,fullflush][,nomerge][,noaperture] + [,forcesac][,fullflush][,nomerge][,noaperture][,calgary] size set size of iommu (in bytes) noagp don't initialize the AGP driver and use full aperture. off don't use the IOMMU @@ -204,6 +200,7 @@ IOMMU buffering. nodac Forbid DMA >4GB panic Always panic when IOMMU overflows + calgary Use the Calgary IOMMU if it is available swiotlb=pages[,force] diff --git a/MAINTAINERS b/MAINTAINERS index 5dff26814a0..cf24400213f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1091,13 +1091,19 @@ M: miku@iki.fi S: Maintained EXT2 FILE SYSTEM -L: ext2-devel@lists.sourceforge.net +L: linux-ext4@vger.kernel.org S: Maintained EXT3 FILE SYSTEM P: Stephen Tweedie, Andrew Morton M: sct@redhat.com, akpm@osdl.org, adilger@clusterfs.com -L: ext2-devel@lists.sourceforge.net +L: linux-ext4@vger.kernel.org +S: Maintained + +EXT4 FILE SYSTEM +P: Stephen Tweedie, Andrew Morton +M: sct@redhat.com, akpm@osdl.org, adilger@clusterfs.com +L: linux-ext4@vger.kernel.org S: Maintained F71805F HARDWARE MONITORING DRIVER @@ -1214,7 +1220,8 @@ HARDWARE MONITORING P: Jean Delvare M: khali@linux-fr.org L: lm-sensors@lm-sensors.org -W: http://www.lm-sensors.nu/ +W: http://www.lm-sensors.org/ +T: quilt http://khali.linux-fr.org/devel/linux-2.6/jdelvare-hwmon/ S: Maintained HARDWARE RANDOM NUMBER GENERATOR CORE @@ -1340,8 +1347,7 @@ I2C SUBSYSTEM P: Jean Delvare M: khali@linux-fr.org L: i2c@lm-sensors.org -W: http://www.lm-sensors.nu/ -T: quilt kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ +T: quilt http://khali.linux-fr.org/devel/linux-2.6/jdelvare-i2c/ S: Maintained I2O @@ -1673,7 +1679,7 @@ S: Supported JOURNALLING LAYER FOR BLOCK DEVICES (JBD) P: Stephen Tweedie, Andrew Morton M: sct@redhat.com, akpm@osdl.org -L: ext2-devel@lists.sourceforge.net +L: linux-ext4@vger.kernel.org S: Maintained K8TEMP HARDWARE MONITORING DRIVER @@ -2913,7 +2919,6 @@ S: Maintained SUN3/3X P: Sam Creasey M: sammy@sammy.net -L: sun3-list@redhat.com W: http://sammy.net/sun3/ S: Maintained @@ -3454,6 +3459,12 @@ W: http://oss.sgi.com/projects/xfs T: git git://oss.sgi.com:8090/xfs/xfs-2.6 S: Supported +XILINX UARTLITE SERIAL DRIVER +P: Peter Korsgaard +M: jacmet@sunsite.dk +L: linux-serial@vger.kernel.org +S: Maintained + X86 3-LEVEL PAGING (PAE) SUPPORT P: Ingo Molnar M: mingo@redhat.com diff --git a/REPORTING-BUGS b/REPORTING-BUGS index f9da827a0c1..ac02e42a262 100644 --- a/REPORTING-BUGS +++ b/REPORTING-BUGS @@ -40,7 +40,9 @@ summary from [1.]>" for easy identification by the developers. [1.] One line summary of the problem: [2.] Full description of the problem/report: [3.] Keywords (i.e., modules, networking, kernel): -[4.] Kernel version (from /proc/version): +[4.] Kernel information +[4.1.] Kernel version (from /proc/version): +[4.2.] Kernel .config file: [5.] Most recent kernel version which did not have the bug: [6.] Output of Oops.. message (if applicable) with symbolic information resolved (see Documentation/oops-tracing.txt) diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index ffb7d5423cc..3c10b9a1ddf 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -516,10 +516,11 @@ sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn) if (bus == 0 && dfn == 0) { hose = pci_isa_hose; } else { - dev = pci_find_slot(bus, dfn); + dev = pci_get_bus_and_slot(bus, dfn); if (!dev) return -ENODEV; hose = dev->sysdata; + pci_dev_put(dev); } } diff --git a/arch/alpha/kernel/sys_miata.c b/arch/alpha/kernel/sys_miata.c index b8b817feb1e..910b43cd63e 100644 --- a/arch/alpha/kernel/sys_miata.c +++ b/arch/alpha/kernel/sys_miata.c @@ -183,11 +183,15 @@ miata_map_irq(struct pci_dev *dev, u8 slot, u8 pin) if((slot == 7) && (PCI_FUNC(dev->devfn) == 3)) { u8 irq=0; - - if(pci_read_config_byte(pci_find_slot(dev->bus->number, dev->devfn & ~(7)), 0x40,&irq)!=PCIBIOS_SUCCESSFUL) + struct pci_dev *pdev = pci_get_slot(dev->bus, dev->devfn & ~7); + if(pdev == NULL || pci_read_config_byte(pdev, 0x40,&irq) != PCIBIOS_SUCCESSFUL) { + pci_dev_put(pdev); return -1; - else + } + else { + pci_dev_put(pdev); return irq; + } } return COMMON_TABLE_LOOKUP; diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 93744bab73f..e7594a7cf58 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -200,7 +200,7 @@ nautilus_init_pci(void) bus = pci_scan_bus(0, alpha_mv.pci_ops, hose); hose->bus = bus; - irongate = pci_find_slot(0, 0); + irongate = pci_get_bus_and_slot(0, 0); bus->self = irongate; bus->resource[1] = &irongate_mem; diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 8871529a34e..8aa9db834c1 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -108,7 +108,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, /* If we're in an interrupt context, or have no user context, we must not take the fault. */ - if (!mm || in_interrupt()) + if (!mm || in_atomic()) goto no_context; #ifdef CONFIG_ALPHA_LARGE_VMALLOC diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 48cf7fffddf..f38a60a03b8 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -11,6 +11,7 @@ #include <linux/signal.h> #include <linux/ptrace.h> #include <linux/personality.h> +#include <linux/freezer.h> #include <asm/cacheflush.h> #include <asm/ucontext.h> diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c index a611c3b6395..6dcd10ab449 100644 --- a/arch/arm/mach-omap1/devices.c +++ b/arch/arm/mach-omap1/devices.c @@ -55,7 +55,7 @@ static inline void omap_init_irda(void) {} /*-------------------------------------------------------------------------*/ -#if defined(CONFIG_OMAP_RTC) || defined(CONFIG_OMAP_RTC) +#if defined(CONFIG_RTC_DRV_OMAP) || defined(CONFIG_RTC_DRV_OMAP_MODULE) #define OMAP_RTC_BASE 0xfffb4800 diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c index 3d211dc2f2f..01abb0ace23 100644 --- a/arch/arm/mach-s3c2410/dma.c +++ b/arch/arm/mach-s3c2410/dma.c @@ -40,7 +40,7 @@ /* io map for dma */ static void __iomem *dma_base; -static kmem_cache_t *dma_kmem; +static struct kmem_cache *dma_kmem; struct s3c24xx_dma_selection dma_sel; @@ -1271,7 +1271,7 @@ struct sysdev_class dma_sysclass = { /* kmem cache implementation */ -static void s3c2410_dma_cache_ctor(void *p, kmem_cache_t *c, unsigned long f) +static void s3c2410_dma_cache_ctor(void *p, struct kmem_cache *c, unsigned long f) { memset(p, 0, sizeof(struct s3c2410_dma_buf)); } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5e658a87449..9fd6d2eafb4 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -230,7 +230,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; /* diff --git a/arch/arm26/kernel/ecard.c b/arch/arm26/kernel/ecard.c index 047d0a408b9..43dd41be71f 100644 --- a/arch/arm26/kernel/ecard.c +++ b/arch/arm26/kernel/ecard.c @@ -620,12 +620,10 @@ ecard_probe(int slot, card_type_t type) struct ex_ecid cid; int i, rc = -ENOMEM; - ec = kmalloc(sizeof(ecard_t), GFP_KERNEL); + ec = kzalloc(sizeof(ecard_t), GFP_KERNEL); if (!ec) goto nomem; - memset(ec, 0, sizeof(ecard_t)); - ec->slot_no = slot; ec->type = type; ec->irq = NO_IRQ; diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c index a1f6d8a9cc3..93c0cee0fb5 100644 --- a/arch/arm26/mm/fault.c +++ b/arch/arm26/mm/fault.c @@ -215,7 +215,7 @@ int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; down_read(&mm->mmap_sem); diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c index 34def6397c3..f2901581d4d 100644 --- a/arch/arm26/mm/memc.c +++ b/arch/arm26/mm/memc.c @@ -24,7 +24,7 @@ #define MEMC_TABLE_SIZE (256*sizeof(unsigned long)) -kmem_cache_t *pte_cache, *pgd_cache; +struct kmem_cache *pte_cache, *pgd_cache; int page_nr; /* @@ -162,12 +162,12 @@ void __init create_memmap_holes(struct meminfo *mi) { } -static void pte_cache_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) +static void pte_cache_ctor(void *pte, struct kmem_cache *cache, unsigned long flags) { memzero(pte, sizeof(pte_t) * PTRS_PER_PTE); } -static void pgd_cache_ctor(void *pgd, kmem_cache_t *cache, unsigned long flags) +static void pgd_cache_ctor(void *pgd, struct kmem_cache *cache, unsigned long flags) { memzero(pgd + MEMC_TABLE_SIZE, USER_PTRS_PER_PGD * sizeof(pgd_t)); } diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c index ca41fc1edbe..d0abbcaf1c1 100644 --- a/arch/avr32/kernel/kprobes.c +++ b/arch/avr32/kernel/kprobes.c @@ -154,6 +154,7 @@ ss_probe: return 1; no_kprobe: + preempt_enable_no_resched(); return ret; } diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c index 33096651c24..0ec14854a20 100644 --- a/arch/avr32/kernel/signal.c +++ b/arch/avr32/kernel/signal.c @@ -15,7 +15,7 @@ #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/unistd.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/ucontext.h> diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c index 44ab8a7bdae..b68d669f823 100644 --- a/arch/avr32/mm/dma-coherent.c +++ b/arch/avr32/mm/dma-coherent.c @@ -11,7 +11,7 @@ #include <asm/addrspace.h> #include <asm/cacheflush.h> -void dma_cache_sync(void *vaddr, size_t size, int direction) +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, int direction) { /* * No need to sync an uncached area diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 934c51078cc..c73e91f1299 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -232,7 +232,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, * context, we must not take the fault.. */ - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; down_read(&mm->mmap_sem); diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c index eae874a970c..14f64b054c7 100644 --- a/arch/frv/kernel/futex.c +++ b/arch/frv/kernel/futex.c @@ -10,9 +10,9 @@ */ #include <linux/futex.h> +#include <linux/uaccess.h> #include <asm/futex.h> #include <asm/errno.h> -#include <asm/uaccess.h> /* * the various futex operations; MMU fault checking is ignored under no-MMU @@ -200,7 +200,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -223,7 +223,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) break; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index a8c61dac1ce..1a5eb6c301c 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -947,7 +947,7 @@ static void __init setup_linux_memory(void) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (low_top_pfn << PAGE_SHIFT)) { reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; } else { diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index b8a5882b862..85baeae9666 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -21,7 +21,7 @@ #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/personality.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 8b3eb50c510..3f12296c368 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; down_read(&mm->mmap_sem); diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index f76dd03ddd9..19b13be114a 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -18,7 +18,7 @@ #include <asm/cacheflush.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE))); -kmem_cache_t *pgd_cache; +struct kmem_cache *pgd_cache; pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { @@ -100,7 +100,7 @@ static inline void pgd_list_del(pgd_t *pgd) set_page_private(next, (unsigned long) pprev); } -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) { unsigned long flags; @@ -120,7 +120,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) } /* never called when PTRS_PER_PMD > 1 */ -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) { unsigned long flags; /* can be called from interrupt context */ diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 1077b71d522..6adf8f41d2a 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -116,7 +116,7 @@ void __init setup_arch(char **cmdline_p) #endif #else if ((memory_end < CONFIG_BLKDEV_RESERVE_ADDRESS) && - (memory_end > CONFIG_BLKDEV_RESERVE_ADDRESS) + (memory_end > CONFIG_BLKDEV_RESERVE_ADDRESS)) /* overlap userarea */ memory_end = CONFIG_BLKDEV_RESERVE_ADDRESS; #endif diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 7787f70a05b..02955604d76 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -38,7 +38,7 @@ #include <linux/personality.h> #include <linux/tty.h> #include <linux/binfmts.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <asm/setup.h> #include <asm/uaccess.h> diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 756325dd480..f05288be887 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -70,6 +70,7 @@ SECTIONS #endif .text : { + _text = .; #if defined(CONFIG_ROMKERNEL) *(.int_redirect) #endif diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 8ff1c6fb5aa..ea70359b02d 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -182,6 +182,17 @@ config X86_ES7000 endchoice +config PARAVIRT + bool "Paravirtualization support (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Paravirtualization is a way of running multiple instances of + Linux on the same machine, under a hypervisor. This option + changes the kernel so it can modify itself when it is run + under a hypervisor, improving performance significantly. + However, when run without a hypervisor the kernel is + theoretically slower. If in doubt, say N. + config ACPI_SRAT bool default y @@ -443,7 +454,8 @@ source "drivers/firmware/Kconfig" choice prompt "High Memory Support" - default NOHIGHMEM + default HIGHMEM4G if !X86_NUMAQ + default HIGHMEM64G if X86_NUMAQ config NOHIGHMEM bool "off" @@ -710,20 +722,6 @@ config BOOT_IOREMAP depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y -config REGPARM - bool "Use register arguments" - default y - help - Compile the kernel with -mregparm=3. This instructs gcc to use - a more efficient function call ABI which passes the first three - arguments of a function call via registers, which results in denser - and faster code. - - If this option is disabled, then the default ABI of passing - arguments via the stack is used. - - If unsure, say Y. - config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS @@ -773,23 +771,39 @@ config CRASH_DUMP PHYSICAL_START. For more details see Documentation/kdump/kdump.txt -config PHYSICAL_START - hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) +config RELOCATABLE + bool "Build a relocatable kernel(EXPERIMENTAL)" + depends on EXPERIMENTAL + help + This build a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations tend to the kernel binary about 10% larger, + but are discarded at runtime. + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. - default "0x1000000" if CRASH_DUMP +config PHYSICAL_ALIGN + hex "Alignment value to which kernel should be aligned" default "0x100000" + range 0x2000 0x400000 help - This gives the physical address where the kernel is loaded. Normally - for regular kernels this value is 0x100000 (1MB). But in the case - of kexec on panic the fail safe kernel needs to run at a different - address than the panic-ed kernel. This option is used to set the load - address for kernels used to capture crash dump on being kexec'ed - after panic. The default value for crash dump kernels is - 0x1000000 (16MB). This can also be set based on the "X" value as - specified in the "crashkernel=YM@XM" command line boot parameter - passed to the panic-ed kernel. Typically this parameter is set as - crashkernel=64M@16M. Please take a look at - Documentation/kdump/kdump.txt for more details about crash dumps. + This value puts the alignment restrictions on physical address + where kernel is loaded and run from. Kernel is compiled for an + address which meets above alignment restriction. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is set, kernel will move itself to nearest + address aligned to above value and run from there. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is not set, kernel will ignore the run time + load address and decompress itself to the address it has been + compiled for and run from there. The address for which kernel is + compiled already meets above alignment restrictions. Hence the + end result is that kernel runs from a physical address meeting + above alignment restrictions. Don't change this unless you know what you are doing. diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index fc4f2abccf0..821fd269ca5 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu @@ -103,8 +103,15 @@ config MPENTIUMM Select this for Intel Pentium M (not Pentium-4 M) notebook chips. +config MCORE2 + bool "Core 2/newer Xeon" + help + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) + CPUs. You can distingush newer from older Xeons by the CPU family + in /proc/cpuinfo. Newer ones have 6. + config MPENTIUM4 - bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" help Select this for Intel Pentium 4 chips. This includes the Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M @@ -229,7 +236,7 @@ config X86_L1_CACHE_SHIFT default "7" if MPENTIUM4 || X86_GENERIC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - default "6" if MK7 || MK8 || MPENTIUMM + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 config RWSEM_GENERIC_SPINLOCK bool @@ -287,17 +294,17 @@ config X86_ALIGNMENT_16 config X86_GOOD_APIC bool - depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON + depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 default y config X86_INTEL_USERCOPY bool - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 default y config X86_USE_PPRO_CHECKSUM bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 default y config X86_USE_3DNOW @@ -312,5 +319,5 @@ config X86_OOSTORE config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ default y diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index b31c0802e1c..f68cc6f215f 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -85,4 +85,14 @@ config DOUBLEFAULT option saves about 4k and might cause you much additional grey hair. +config DEBUG_PARAVIRT + bool "Enable some paravirtualization debugging" + default y + depends on PARAVIRT && DEBUG_KERNEL + help + Currently deliberately clobbers regs which are allowed to be + clobbered in inlined paravirt hooks, even in native mode. + If turning this off solves a problem, then DISABLE_INTERRUPTS() or + ENABLE_INTERRUPTS() is lying about what registers can be clobbered. + endmenu diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 0677908dfa0..f7ac1aea1d8 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -26,10 +26,12 @@ endif LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S -LDFLAGS_vmlinux := +ifdef CONFIG_RELOCATABLE +LDFLAGS_vmlinux := --emit-relocs +endif CHECKFLAGS += -D__i386__ -CFLAGS += -pipe -msoft-float +CFLAGS += -pipe -msoft-float -mregparm=3 # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) @@ -37,8 +39,6 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/i386/Makefile.cpu -cflags-$(CONFIG_REGPARM) += -mregparm=3 - # temporary until string.h is fixed cflags-y += -ffreestanding diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu index a11befba26d..a32c031c90d 100644 --- a/arch/i386/Makefile.cpu +++ b/arch/i386/Makefile.cpu @@ -32,6 +32,7 @@ cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) +cflags-$(CONFIG_MCORE2) += -march=i686 $(call cc-option,-mtune=core2,$(call cc-option,-mtune=generic,-mtune=i686)) # AMD Elan support cflags-$(CONFIG_X86_ELAN) += -march=i486 diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile index 258ea95224f..a661217f33e 100644 --- a/arch/i386/boot/compressed/Makefile +++ b/arch/i386/boot/compressed/Makefile @@ -4,22 +4,42 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o \ + vmlinux.bin.all vmlinux.relocs EXTRA_AFLAGS := -traditional -LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 +LDFLAGS_vmlinux := -T +CFLAGS_misc.o += -fPIC +hostprogs-y := relocs -$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE +$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE $(call if_changed,ld) @: $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE + $(call if_changed,relocs) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin +vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs +quiet_cmd_relocbin = BUILD $@ + cmd_relocbin = cat $(filter-out FORCE,$^) > $@ +$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE + $(call if_changed,relocbin) + +ifdef CONFIG_RELOCATABLE +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE + $(call if_changed,gzip) +else $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) +endif LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE +$(obj)/piggy.o: $(src)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE $(call if_changed,ld) diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index b5893e4ecd3..f395a4bb38b 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -26,9 +26,11 @@ #include <linux/linkage.h> #include <asm/segment.h> #include <asm/page.h> +#include <asm/boot.h> +.section ".text.head" .globl startup_32 - + startup_32: cld cli @@ -37,93 +39,142 @@ startup_32: movl %eax,%es movl %eax,%fs movl %eax,%gs + movl %eax,%ss - lss stack_start,%esp - xorl %eax,%eax -1: incl %eax # check that A20 really IS enabled - movl %eax,0x000000 # loop forever if it isn't - cmpl %eax,0x100000 - je 1b +/* Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x34-0x3f are used as the stack for this calculation. + * Only 4 bytes are needed. + */ + leal 0x40(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp + +/* %ebp contains the address we are loaded at by the boot loader and %ebx + * contains the address where we should move the kernel image temporarily + * for safe in-place decompression. + */ + +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx +#else + movl $LOAD_PHYSICAL_ADDR, %ebx +#endif + + /* Replace the compressed data size with the uncompressed size */ + subl input_len(%ebp), %ebx + movl output_len(%ebp), %eax + addl %eax, %ebx + /* Add 8 bytes for every 32K input block */ + shrl $12, %eax + addl %eax, %ebx + /* Add 32K + 18 bytes of extra slack */ + addl $(32768 + 18), %ebx + /* Align on a 4K boundary */ + addl $4095, %ebx + andl $~4095, %ebx + +/* Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushl %esi + leal _end(%ebp), %esi + leal _end(%ebx), %edi + movl $(_end - startup_32), %ecx + std + rep + movsb + cld + popl %esi + +/* Compute the kernel start address. + */ +#ifdef CONFIG_RELOCATABLE + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp +#else + movl $LOAD_PHYSICAL_ADDR, %ebp +#endif /* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. + * Jump to the relocated address. */ - pushl $0 - popfl + leal relocated(%ebx), %eax + jmp *%eax +.section ".text" +relocated: + /* * Clear BSS */ xorl %eax,%eax - movl $_edata,%edi - movl $_end,%ecx + leal _edata(%ebx),%edi + leal _end(%ebx), %ecx subl %edi,%ecx cld rep stosb + +/* + * Setup the stack for the decompressor + */ + leal stack_end(%ebx), %esp + /* * Do the decompression, and jump to the new kernel.. */ - subl $16,%esp # place for structure on the stack - movl %esp,%eax + movl output_len(%ebx), %eax + pushl %eax + pushl %ebp # output address + movl input_len(%ebx), %eax + pushl %eax # input_len + leal input_data(%ebx), %eax + pushl %eax # input_data + leal _end(%ebx), %eax + pushl %eax # end of the image as third argument pushl %esi # real mode pointer as second arg - pushl %eax # address of structure as first arg call decompress_kernel - orl %eax,%eax - jnz 3f - popl %esi # discard address - popl %esi # real mode pointer - xorl %ebx,%ebx - ljmp $(__BOOT_CS), $__PHYSICAL_START + addl $20, %esp + popl %ecx +#if CONFIG_RELOCATABLE +/* Find the address of the relocations. + */ + movl %ebp, %edi + addl %ecx, %edi + +/* Calculate the delta between where vmlinux was compiled to run + * and where it was actually loaded. + */ + movl %ebp, %ebx + subl $LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ /* - * We come here, if we were loaded high. - * We need to move the move-in-place routine down to 0x1000 - * and then start it with the buffer addresses in registers, - * which we got from the stack. + * Process relocations. */ -3: - movl $move_routine_start,%esi - movl $0x1000,%edi - movl $move_routine_end,%ecx - subl %esi,%ecx - addl $3,%ecx - shrl $2,%ecx - cld - rep - movsl - - popl %esi # discard the address - popl %ebx # real mode pointer - popl %esi # low_buffer_start - popl %ecx # lcount - popl %edx # high_buffer_start - popl %eax # hcount - movl $__PHYSICAL_START,%edi - cli # make sure we don't get interrupted - ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine + +1: subl $4, %edi + movl 0(%edi), %ecx + testl %ecx, %ecx + jz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b +2: +#endif /* - * Routine (template) for moving the decompressed kernel in place, - * if we were high loaded. This _must_ PIC-code ! + * Jump to the decompressed kernel. */ -move_routine_start: - movl %ecx,%ebp - shrl $2,%ecx - rep - movsl - movl %ebp,%ecx - andl $3,%ecx - rep - movsb - movl %edx,%esi - movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0 - addl $3,%ecx - shrl $2,%ecx - rep - movsl - movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $__PHYSICAL_START -move_routine_end: + jmp *%ebp + +.bss +.balign 4 +stack: + .fill 4096, 1, 0 +stack_end: diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index b2ccd543410..1ce7017fd62 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -9,11 +9,94 @@ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ +#undef CONFIG_PARAVIRT #include <linux/linkage.h> #include <linux/vmalloc.h> #include <linux/screen_info.h> #include <asm/io.h> #include <asm/page.h> +#include <asm/boot.h> + +/* WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically + * at run time, but no relocation processing is performed. + * This means that it is not safe to place pointers in static structures. + */ + +/* + * Getting to provable safe in place decompression is hard. + * Worst case behaviours need to be analized. + * Background information: + * + * The file layout is: + * magic[2] + * method[1] + * flags[1] + * timestamp[4] + * extraflags[1] + * os[1] + * compressed data blocks[N] + * crc[4] orig_len[4] + * + * resulting in 18 bytes of non compressed data overhead. + * + * Files divided into blocks + * 1 bit (last block flag) + * 2 bits (block type) + * + * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved. + * The smallest block type encoding is always used. + * + * stored: + * 32 bits length in bytes. + * + * fixed: + * magic fixed tree. + * symbols. + * + * dynamic: + * dynamic tree encoding. + * symbols. + * + * + * The buffer for decompression in place is the length of the + * uncompressed data, plus a small amount extra to keep the algorithm safe. + * The compressed data is placed at the end of the buffer. The output + * pointer is placed at the start of the buffer and the input pointer + * is placed where the compressed data starts. Problems will occur + * when the output pointer overruns the input pointer. + * + * The output pointer can only overrun the input pointer if the input + * pointer is moving faster than the output pointer. A condition only + * triggered by data whose compressed form is larger than the uncompressed + * form. + * + * The worst case at the block level is a growth of the compressed data + * of 5 bytes per 32767 bytes. + * + * The worst case internal to a compressed block is very hard to figure. + * The worst case can at least be boundined by having one bit that represents + * 32764 bytes and then all of the rest of the bytes representing the very + * very last byte. + * + * All of which is enough to compute an amount of extra data that is required + * to be safe. To avoid problems at the block level allocating 5 extra bytes + * per 32767 bytes of data is sufficient. To avoind problems internal to a block + * adding an extra 32767 bytes (the worst case uncompressed block size) is + * sufficient, to ensure that in the worst case the decompressed data for + * block will stop the byte before the compressed data for a block begins. + * To avoid problems with the compressed data's meta information an extra 18 + * bytes are needed. Leading to the formula: + * + * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. + * + * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. + * Adding 32768 instead of 32767 just makes for round numbers. + * Adding the decompressor_size is necessary as it musht live after all + * of the data as well. Last I measured the decompressor is about 14K. + * 10K of actuall data and 4K of bss. + * + */ /* * gzip declarations @@ -30,15 +113,20 @@ typedef unsigned char uch; typedef unsigned short ush; typedef unsigned long ulg; -#define WSIZE 0x8000 /* Window size must be at least 32k, */ - /* and a power of two */ +#define WSIZE 0x80000000 /* Window size must be at least 32k, + * and a power of two + * We don't actually have a window just + * a huge output buffer so I report + * a 2G windows size, as that should + * always be larger than our output buffer. + */ -static uch *inbuf; /* input buffer */ -static uch window[WSIZE]; /* Sliding window buffer */ +static uch *inbuf; /* input buffer */ +static uch *window; /* Sliding window buffer, (and final output buffer) */ -static unsigned insize = 0; /* valid bytes in inbuf */ -static unsigned inptr = 0; /* index of next byte to be processed in inbuf */ -static unsigned outcnt = 0; /* bytes in output buffer */ +static unsigned insize; /* valid bytes in inbuf */ +static unsigned inptr; /* index of next byte to be processed in inbuf */ +static unsigned outcnt; /* bytes in output buffer */ /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ @@ -89,8 +177,6 @@ extern unsigned char input_data[]; extern int input_len; static long bytes_out = 0; -static uch *output_data; -static unsigned long output_ptr = 0; static void *malloc(int size); static void free(void *where); @@ -100,24 +186,17 @@ static void *memcpy(void *dest, const void *src, unsigned n); static void putstr(const char *); -extern int end; -static long free_mem_ptr = (long)&end; -static long free_mem_end_ptr; +static unsigned long free_mem_ptr; +static unsigned long free_mem_end_ptr; -#define INPLACE_MOVE_ROUTINE 0x1000 -#define LOW_BUFFER_START 0x2000 -#define LOW_BUFFER_MAX 0x90000 #define HEAP_SIZE 0x3000 -static unsigned int low_buffer_end, low_buffer_size; -static int high_loaded =0; -static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; static char *vidmem = (char *)0xb8000; static int vidport; static int lines, cols; #ifdef CONFIG_X86_NUMAQ -static void * xquad_portio = NULL; +void *xquad_portio; #endif #include "../../../../lib/inflate.c" @@ -151,7 +230,7 @@ static void gzip_mark(void **ptr) static void gzip_release(void **ptr) { - free_mem_ptr = (long) *ptr; + free_mem_ptr = (unsigned long) *ptr; } static void scroll(void) @@ -179,7 +258,7 @@ static void putstr(const char *s) y--; } } else { - vidmem [ ( x + cols * y ) * 2 ] = c; + vidmem [ ( x + cols * y ) * 2 ] = c; if ( ++x >= cols ) { x = 0; if ( ++y >= lines ) { @@ -224,58 +303,31 @@ static void* memcpy(void* dest, const void* src, unsigned n) */ static int fill_inbuf(void) { - if (insize != 0) { - error("ran out of input data"); - } - - inbuf = input_data; - insize = input_len; - inptr = 1; - return inbuf[0]; + error("ran out of input data"); + return 0; } /* =========================================================================== * Write the output window window[0..outcnt-1] and update crc and bytes_out. * (Used for the decompressed data only.) */ -static void flush_window_low(void) -{ - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, *out, ch; - - in = window; - out = &output_data[output_ptr]; - for (n = 0; n < outcnt; n++) { - ch = *out++ = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - output_ptr += (ulg)outcnt; - outcnt = 0; -} - -static void flush_window_high(void) -{ - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, ch; - in = window; - for (n = 0; n < outcnt; n++) { - ch = *output_data++ = *in++; - if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - outcnt = 0; -} - static void flush_window(void) { - if (high_loaded) flush_window_high(); - else flush_window_low(); + /* With my window equal to my output buffer + * I only need to compute the crc here. + */ + ulg c = crc; /* temporary variable */ + unsigned n; + uch *in, ch; + + in = window; + for (n = 0; n < outcnt; n++) { + ch = *in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + outcnt = 0; } static void error(char *x) @@ -287,66 +339,8 @@ static void error(char *x) while(1); /* Halt */ } -#define STACK_SIZE (4096) - -long user_stack [STACK_SIZE]; - -struct { - long * a; - short b; - } stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS }; - -static void setup_normal_output_buffer(void) -{ -#ifdef STANDARD_MEMORY_BIOS_CALL - if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory"); -#else - if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); -#endif - output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ - free_mem_end_ptr = (long)real_mode; -} - -struct moveparams { - uch *low_buffer_start; int lcount; - uch *high_buffer_start; int hcount; -}; - -static void setup_output_buffer_if_we_run_high(struct moveparams *mv) -{ - high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE); -#ifdef STANDARD_MEMORY_BIOS_CALL - if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); -#else - if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); -#endif - mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; - low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX - ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; - low_buffer_size = low_buffer_end - LOW_BUFFER_START; - high_loaded = 1; - free_mem_end_ptr = (long)high_buffer_start; - if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { - high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size); - mv->hcount = 0; /* say: we need not to move high_buffer */ - } - else mv->hcount = -1; - mv->high_buffer_start = high_buffer_start; -} - -static void close_output_buffer_if_we_run_high(struct moveparams *mv) -{ - if (bytes_out > low_buffer_size) { - mv->lcount = low_buffer_size; - if (mv->hcount) - mv->hcount = bytes_out - low_buffer_size; - } else { - mv->lcount = bytes_out; - mv->hcount = 0; - } -} - -asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode) +asmlinkage void decompress_kernel(void *rmode, unsigned long end, + uch *input_data, unsigned long input_len, uch *output) { real_mode = rmode; @@ -361,13 +355,25 @@ asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode) lines = RM_SCREEN_INFO.orig_video_lines; cols = RM_SCREEN_INFO.orig_video_cols; - if (free_mem_ptr < 0x100000) setup_normal_output_buffer(); - else setup_output_buffer_if_we_run_high(mv); + window = output; /* Output buffer (Normally at 1M) */ + free_mem_ptr = end; /* Heap */ + free_mem_end_ptr = end + HEAP_SIZE; + inbuf = input_data; /* Input buffer */ + insize = input_len; + inptr = 0; + + if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1)) + error("Destination address not CONFIG_PHYSICAL_ALIGN aligned"); + if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff)) + error("Destination address too large"); +#ifndef CONFIG_RELOCATABLE + if ((u32)output != LOAD_PHYSICAL_ADDR) + error("Wrong destination address"); +#endif makecrc(); putstr("Uncompressing Linux... "); gunzip(); putstr("Ok, booting the kernel.\n"); - if (high_loaded) close_output_buffer_if_we_run_high(mv); - return high_loaded; + return; } diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c new file mode 100644 index 00000000000..468da89153c --- /dev/null +++ b/arch/i386/boot/compressed/relocs.c @@ -0,0 +1,625 @@ +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <elf.h> +#include <byteswap.h> +#define USE_BSD +#include <endian.h> + +#define MAX_SHDRS 100 +static Elf32_Ehdr ehdr; +static Elf32_Shdr shdr[MAX_SHDRS]; +static Elf32_Sym *symtab[MAX_SHDRS]; +static Elf32_Rel *reltab[MAX_SHDRS]; +static char *strtab[MAX_SHDRS]; +static unsigned long reloc_count, reloc_idx; +static unsigned long *relocs; + +/* + * Following symbols have been audited. There values are constant and do + * not change if bzImage is loaded at a different physical address than + * the address for which it has been compiled. Don't warn user about + * absolute relocations present w.r.t these symbols. + */ +static const char* safe_abs_relocs[] = { + "__kernel_vsyscall", + "__kernel_rt_sigreturn", + "__kernel_sigreturn", + "SYSENTER_RETURN", +}; + +static int is_safe_abs_reloc(const char* sym_name) +{ + int i, array_size; + + array_size = sizeof(safe_abs_relocs)/sizeof(char*); + + for(i = 0; i < array_size; i++) { + if (!strcmp(sym_name, safe_abs_relocs[i])) + /* Match found */ + return 1; + } + return 0; +} + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static const char *sym_type(unsigned type) +{ + static const char *type_name[] = { +#define SYM_TYPE(X) [X] = #X + SYM_TYPE(STT_NOTYPE), + SYM_TYPE(STT_OBJECT), + SYM_TYPE(STT_FUNC), + SYM_TYPE(STT_SECTION), + SYM_TYPE(STT_FILE), + SYM_TYPE(STT_COMMON), + SYM_TYPE(STT_TLS), +#undef SYM_TYPE + }; + const char *name = "unknown sym type name"; + if (type < sizeof(type_name)/sizeof(type_name[0])) { + name = type_name[type]; + } + return name; +} + +static const char *sym_bind(unsigned bind) +{ + static const char *bind_name[] = { +#define SYM_BIND(X) [X] = #X + SYM_BIND(STB_LOCAL), + SYM_BIND(STB_GLOBAL), + SYM_BIND(STB_WEAK), +#undef SYM_BIND + }; + const char *name = "unknown sym bind name"; + if (bind < sizeof(bind_name)/sizeof(bind_name[0])) { + name = bind_name[bind]; + } + return name; +} + +static const char *sym_visibility(unsigned visibility) +{ + static const char *visibility_name[] = { +#define SYM_VISIBILITY(X) [X] = #X + SYM_VISIBILITY(STV_DEFAULT), + SYM_VISIBILITY(STV_INTERNAL), + SYM_VISIBILITY(STV_HIDDEN), + SYM_VISIBILITY(STV_PROTECTED), +#undef SYM_VISIBILITY + }; + const char *name = "unknown sym visibility name"; + if (visibility < sizeof(visibility_name)/sizeof(visibility_name[0])) { + name = visibility_name[visibility]; + } + return name; +} + +static const char *rel_type(unsigned type) +{ + static const char *type_name[] = { +#define REL_TYPE(X) [X] = #X + REL_TYPE(R_386_NONE), + REL_TYPE(R_386_32), + REL_TYPE(R_386_PC32), + REL_TYPE(R_386_GOT32), + REL_TYPE(R_386_PLT32), + REL_TYPE(R_386_COPY), + REL_TYPE(R_386_GLOB_DAT), + REL_TYPE(R_386_JMP_SLOT), + REL_TYPE(R_386_RELATIVE), + REL_TYPE(R_386_GOTOFF), + REL_TYPE(R_386_GOTPC), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + if (type < sizeof(type_name)/sizeof(type_name[0])) { + name = type_name[type]; + } + return name; +} + +static const char *sec_name(unsigned shndx) +{ + const char *sec_strtab; + const char *name; + sec_strtab = strtab[ehdr.e_shstrndx]; + name = "<noname>"; + if (shndx < ehdr.e_shnum) { + name = sec_strtab + shdr[shndx].sh_name; + } + else if (shndx == SHN_ABS) { + name = "ABSOLUTE"; + } + else if (shndx == SHN_COMMON) { + name = "COMMON"; + } + return name; +} + +static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) +{ + const char *name; + name = "<noname>"; + if (sym->st_name) { + name = sym_strtab + sym->st_name; + } + else { + name = sec_name(shdr[sym->st_shndx].sh_name); + } + return name; +} + + + +#if BYTE_ORDER == LITTLE_ENDIAN +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#endif +#if BYTE_ORDER == BIG_ENDIAN +#define le16_to_cpu(val) bswap_16(val) +#define le32_to_cpu(val) bswap_32(val) +#endif + +static uint16_t elf16_to_cpu(uint16_t val) +{ + return le16_to_cpu(val); +} + +static uint32_t elf32_to_cpu(uint32_t val) +{ + return le32_to_cpu(val); +} + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { + die("Cannot read ELF header: %s\n", + strerror(errno)); + } + if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { + die("No ELF magic\n"); + } + if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { + die("Not a 32 bit executable\n"); + } + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { + die("Not a LSB ELF executable\n"); + } + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { + die("Unknown ELF version\n"); + } + /* Convert the fields to native endian */ + ehdr.e_type = elf16_to_cpu(ehdr.e_type); + ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); + ehdr.e_version = elf32_to_cpu(ehdr.e_version); + ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); + ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); + ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); + ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); + ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); + ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); + ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); + ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); + ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); + ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { + die("Unsupported ELF header type\n"); + } + if (ehdr.e_machine != EM_386) { + die("Not for x86\n"); + } + if (ehdr.e_version != EV_CURRENT) { + die("Unknown ELF version\n"); + } + if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { + die("Bad Elf header size\n"); + } + if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { + die("Bad program header entry\n"); + } + if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { + die("Bad section header entry\n"); + } + if (ehdr.e_shstrndx >= ehdr.e_shnum) { + die("String table index out of bounds\n"); + } +} + +static void read_shdrs(FILE *fp) +{ + int i; + if (ehdr.e_shnum > MAX_SHDRS) { + die("%d section headers supported: %d\n", + ehdr.e_shnum, MAX_SHDRS); + } + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + ehdr.e_shoff, strerror(errno)); + } + if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) { + die("Cannot read ELF section headers: %s\n", + strerror(errno)); + } + for(i = 0; i < ehdr.e_shnum; i++) { + shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name); + shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type); + shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags); + shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr); + shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset); + shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size); + shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link); + shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info); + shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign); + shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize); + } + +} + +static void read_strtabs(FILE *fp) +{ + int i; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_STRTAB) { + continue; + } + strtab[i] = malloc(shdr[i].sh_size); + if (!strtab[i]) { + die("malloc of %d bytes for strtab failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + } +} + +static void read_symtabs(FILE *fp) +{ + int i,j; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_SYMTAB) { + continue; + } + symtab[i] = malloc(shdr[i].sh_size); + if (!symtab[i]) { + die("malloc of %d bytes for symtab failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) { + symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name); + symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value); + symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size); + symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx); + } + } +} + + +static void read_relocs(FILE *fp) +{ + int i,j; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_REL) { + continue; + } + reltab[i] = malloc(shdr[i].sh_size); + if (!reltab[i]) { + die("malloc of %d bytes for relocs failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset); + reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info); + } + } +} + + +static void print_absolute_symbols(void) +{ + int i; + printf("Absolute symbols\n"); + printf(" Num: Value Size Type Bind Visibility Name\n"); + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + int j; + if (shdr[i].sh_type != SHT_SYMTAB) { + continue; + } + sh_symtab = symtab[i]; + sym_strtab = strtab[shdr[i].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) { + Elf32_Sym *sym; + const char *name; + sym = &symtab[i][j]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + printf("%5d %08x %5d %10s %10s %12s %s\n", + j, sym->st_value, sym->st_size, + sym_type(ELF32_ST_TYPE(sym->st_info)), + sym_bind(ELF32_ST_BIND(sym->st_info)), + sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), + name); + } + } + printf("\n"); +} + +static void print_absolute_relocs(void) +{ + int i, printed = 0; + + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + unsigned sec_applies, sec_symtab; + int j; + if (shdr[i].sh_type != SHT_REL) { + continue; + } + sec_symtab = shdr[i].sh_link; + sec_applies = shdr[i].sh_info; + if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = symtab[sec_symtab]; + sym_strtab = strtab[shdr[sec_symtab].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + const char *name; + rel = &reltab[i][j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + + /* Absolute symbols are not relocated if bzImage is + * loaded at a non-compiled address. Display a warning + * to user at compile time about the absolute + * relocations present. + * + * User need to audit the code to make sure + * some symbols which should have been section + * relative have not become absolute because of some + * linker optimization or wrong programming usage. + * + * Before warning check if this absolute symbol + * relocation is harmless. + */ + if (is_safe_abs_reloc(name)) + continue; + + if (!printed) { + printf("WARNING: Absolute relocations" + " present\n"); + printf("Offset Info Type Sym.Value " + "Sym.Name\n"); + printed = 1; + } + + printf("%08x %08x %10s %08x %s\n", + rel->r_offset, + rel->r_info, + rel_type(ELF32_R_TYPE(rel->r_info)), + sym->st_value, + name); + } + } + + if (printed) + printf("\n"); +} + +static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) +{ + int i; + /* Walk through the relocations */ + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + unsigned sec_applies, sec_symtab; + int j; + if (shdr[i].sh_type != SHT_REL) { + continue; + } + sec_symtab = shdr[i].sh_link; + sec_applies = shdr[i].sh_info; + if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = symtab[sec_symtab]; + sym_strtab = strtab[shdr[sec_symtab].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + unsigned r_type; + rel = &reltab[i][j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + r_type = ELF32_R_TYPE(rel->r_info); + /* Don't visit relocations to absolute symbols */ + if (sym->st_shndx == SHN_ABS) { + continue; + } + if (r_type == R_386_PC32) { + /* PC relative relocations don't need to be adjusted */ + } + else if (r_type == R_386_32) { + /* Visit relocations that need to be adjusted */ + visit(rel, sym); + } + else { + die("Unsupported relocation type: %d\n", r_type); + } + } + } +} + +static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + reloc_count += 1; +} + +static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + /* Remember the address that needs to be adjusted. */ + relocs[reloc_idx++] = rel->r_offset; +} + +static int cmp_relocs(const void *va, const void *vb) +{ + const unsigned long *a, *b; + a = va; b = vb; + return (*a == *b)? 0 : (*a > *b)? 1 : -1; +} + +static void emit_relocs(int as_text) +{ + int i; + /* Count how many relocations I have and allocate space for them. */ + reloc_count = 0; + walk_relocs(count_reloc); + relocs = malloc(reloc_count * sizeof(relocs[0])); + if (!relocs) { + die("malloc of %d entries for relocs failed\n", + reloc_count); + } + /* Collect up the relocations */ + reloc_idx = 0; + walk_relocs(collect_reloc); + + /* Order the relocations for more efficient processing */ + qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 4\n"); + for(i = 0; i < reloc_count; i++) { + printf("\t .long 0x%08lx\n", relocs[i]); + } + printf("\n"); + } + else { + unsigned char buf[4]; + buf[0] = buf[1] = buf[2] = buf[3] = 0; + /* Print a stop */ + printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + /* Now print each relocation */ + for(i = 0; i < reloc_count; i++) { + buf[0] = (relocs[i] >> 0) & 0xff; + buf[1] = (relocs[i] >> 8) & 0xff; + buf[2] = (relocs[i] >> 16) & 0xff; + buf[3] = (relocs[i] >> 24) & 0xff; + printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + } + } +} + +static void usage(void) +{ + die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_absolute_syms, show_absolute_relocs; + int as_text; + const char *fname; + FILE *fp; + int i; + + show_absolute_syms = 0; + show_absolute_relocs = 0; + as_text = 0; + fname = NULL; + for(i = 1; i < argc; i++) { + char *arg = argv[i]; + if (*arg == '-') { + if (strcmp(argv[1], "--abs-syms") == 0) { + show_absolute_syms = 1; + continue; + } + + if (strcmp(argv[1], "--abs-relocs") == 0) { + show_absolute_relocs = 1; + continue; + } + else if (strcmp(argv[1], "--text") == 0) { + as_text = 1; + continue; + } + } + else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) { + usage(); + } + fp = fopen(fname, "r"); + if (!fp) { + die("Cannot open %s: %s\n", + fname, strerror(errno)); + } + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_absolute_syms) { + print_absolute_symbols(); + return 0; + } + if (show_absolute_relocs) { + print_absolute_relocs(); + return 0; + } + emit_relocs(as_text); + return 0; +} diff --git a/arch/i386/boot/compressed/vmlinux.lds b/arch/i386/boot/compressed/vmlinux.lds new file mode 100644 index 00000000000..cc4854f6c6c --- /dev/null +++ b/arch/i386/boot/compressed/vmlinux.lds @@ -0,0 +1,43 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(startup_32) +SECTIONS +{ + /* Be careful parts of head.S assume startup_32 is at + * address 0. + */ + . = 0 ; + .text.head : { + _head = . ; + *(.text.head) + _ehead = . ; + } + .data.compressed : { + *(.data.compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + _end = . ; + } +} diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr index 1ed9d791f86..707a88f7f29 100644 --- a/arch/i386/boot/compressed/vmlinux.scr +++ b/arch/i386/boot/compressed/vmlinux.scr @@ -1,9 +1,10 @@ SECTIONS { - .data : { + .data.compressed : { input_len = .; LONG(input_data_end - input_data) input_data = .; *(.data) + output_len = . - 4; input_data_end = .; } } diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index 3aec4538a11..06edf1c6624 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -81,7 +81,7 @@ start: # This is the setup header, and it must start at %cs:2 (old 0x9020:2) .ascii "HdrS" # header signature - .word 0x0204 # header version number (>= 0x0105) + .word 0x0205 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) realmode_swtch: .word 0, 0 # default_switch, SETUPSEG start_sys_seg: .word SYSSEG @@ -160,6 +160,17 @@ ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff # The highest safe address for # the contents of an initrd +kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment + #required for protected mode + #kernel +#ifdef CONFIG_RELOCATABLE +relocatable_kernel: .byte 1 +#else +relocatable_kernel: .byte 0 +#endif +pad2: .byte 0 +pad3: .word 0 + trampoline: call start_of_setup .align 16 # The offset at this point is 0x240 @@ -588,11 +599,6 @@ rmodeswtch_normal: call default_switch rmodeswtch_end: -# we get the code32 start address and modify the below 'jmpi' -# (loader may have changed it) - movl %cs:code32_start, %eax - movl %eax, %cs:code32 - # Now we move the system to its rightful place ... but we check if we have a # big-kernel. In that case we *must* not move it ... testb $LOADED_HIGH, %cs:loadflags @@ -788,11 +794,12 @@ a20_err_msg: a20_done: #endif /* CONFIG_X86_VOYAGER */ -# set up gdt and idt +# set up gdt and idt and 32bit start address lidt idt_48 # load idt with 0,0 xorl %eax, %eax # Compute gdt_base movw %ds, %ax # (Convert %ds:gdt to a linear ptr) shll $4, %eax + addl %eax, code32 addl $gdt, %eax movl %eax, (gdt_48+2) lgdt gdt_48 # load gdt with whatever is @@ -851,9 +858,26 @@ flush_instr: # Manual, Mixing 16-bit and 32-bit code, page 16-6) .byte 0x66, 0xea # prefix + jmpi-opcode -code32: .long 0x1000 # will be set to 0x100000 - # for big kernels +code32: .long startup_32 # will be set to %cs+startup_32 .word __BOOT_CS +.code32 +startup_32: + movl $(__BOOT_DS), %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss + + xorl %eax, %eax +1: incl %eax # check that A20 really IS enabled + movl %eax, 0x00000000 # loop forever if it isn't + cmpl %eax, 0x00100000 + je 1b + + # Jump to the 32bit entry point + jmpl *(code32_start - start + (DELTA_INITSEG << 4))(%esi) +.code16 # Here's a bunch of information about your current kernel.. kernel_version: .ascii UTS_RELEASE diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 97aacd6bd7d..65891f11ace 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.19-rc2-git4 -# Sat Oct 21 03:38:56 2006 +# Linux kernel version: 2.6.19-git7 +# Wed Dec 6 23:50:49 2006 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y @@ -40,13 +40,14 @@ CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y # CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y -# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -110,6 +111,7 @@ CONFIG_SMP=y # CONFIG_X86_VISWS is not set CONFIG_X86_GENERICARCH=y # CONFIG_X86_ES7000 is not set +# CONFIG_PARAVIRT is not set CONFIG_X86_CYCLONE_TIMER=y # CONFIG_M386 is not set # CONFIG_M486 is not set @@ -120,6 +122,7 @@ CONFIG_X86_CYCLONE_TIMER=y # CONFIG_MPENTIUMII is not set CONFIG_MPENTIUMIII=y # CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set # CONFIG_MPENTIUM4 is not set # CONFIG_MK6 is not set # CONFIG_MK7 is not set @@ -197,7 +200,6 @@ CONFIG_RESOURCES_64BIT=y CONFIG_MTRR=y # CONFIG_EFI is not set # CONFIG_IRQBALANCE is not set -CONFIG_REGPARM=y CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set CONFIG_HZ_250=y @@ -205,7 +207,8 @@ CONFIG_HZ_250=y CONFIG_HZ=250 # CONFIG_KEXEC is not set # CONFIG_CRASH_DUMP is not set -CONFIG_PHYSICAL_START=0x100000 +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x100000 # CONFIG_HOTPLUG_CPU is not set CONFIG_COMPAT_VDSO=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y @@ -367,6 +370,7 @@ CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -677,6 +681,7 @@ CONFIG_SATA_INTEL_COMBINED=y # CONFIG_PATA_IT821X is not set # CONFIG_PATA_JMICRON is not set # CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set @@ -850,6 +855,7 @@ CONFIG_BNX2=y # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set # # Token Ring devices @@ -984,10 +990,6 @@ CONFIG_RTC=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# CONFIG_AGP=y # CONFIG_AGP_ALI is not set # CONFIG_AGP_ATI is not set @@ -1108,6 +1110,7 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_BANDWIDTH is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set +# CONFIG_USB_MULTITHREAD_PROBE is not set # CONFIG_USB_OTG is not set # @@ -1185,6 +1188,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set CONFIG_USB_MON=y diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 1a884b6e6e5..1e8988e558c 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ - pci-dma.o i386_ksyms.o i387.o bootflag.o \ + pci-dma.o i386_ksyms.o i387.o bootflag.o e820.o\ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -40,6 +40,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o +# Make sure this is linked after any other paravirt_ops structs: see head.S +obj-$(CONFIG_PARAVIRT) += paravirt.o + EXTRA_AFLAGS := -traditional obj-$(CONFIG_SCx200) += scx200.o diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c index 4664b55f623..12e937c1ce4 100644 --- a/arch/i386/kernel/acpi/cstate.c +++ b/arch/i386/kernel/acpi/cstate.c @@ -156,10 +156,8 @@ static int __init ffh_cstate_init(void) static void __exit ffh_cstate_exit(void) { - if (cpu_cstate_entry) { - free_percpu(cpu_cstate_entry); - cpu_cstate_entry = NULL; - } + free_percpu(cpu_cstate_entry); + cpu_cstate_entry = NULL; } arch_initcall(ffh_cstate_init); diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index c9841692bb7..4b60af7f91d 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -10,6 +10,7 @@ #include <asm/pci-direct.h> #include <asm/acpi.h> #include <asm/apic.h> +#include <asm/irq.h> #ifdef CONFIG_ACPI @@ -49,6 +50,24 @@ static int __init check_bridge(int vendor, int device) return 0; } +static void check_intel(void) +{ + u16 vendor, device; + + vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID); + + if (vendor != PCI_VENDOR_ID_INTEL) + return; + + device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); +#ifdef CONFIG_SMP + if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || + device == PCI_DEVICE_ID_INTEL_E7520_MCH || + device == PCI_DEVICE_ID_INTEL_E7525_MCH) + quirk_intel_irqbalance(); +#endif +} + void __init check_acpi_pci(void) { int num, slot, func; @@ -60,6 +79,8 @@ void __init check_acpi_pci(void) if (!early_pci_allowed()) return; + check_intel(); + /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) { for (slot = 0; slot < 32; slot++) { diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 535f9794fba..9eca21b49f6 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -124,6 +124,20 @@ static unsigned char** find_nop_table(void) #endif /* CONFIG_X86_64 */ +static void nop_out(void *insns, unsigned int len) +{ + unsigned char **noptable = find_nop_table(); + + while (len > 0) { + unsigned int noplen = len; + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + memcpy(insns, noptable[noplen], noplen); + insns += noplen; + len -= noplen; + } +} + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; extern u8 *__smp_locks[], *__smp_locks_end[]; @@ -138,10 +152,9 @@ extern u8 __smp_alt_begin[], __smp_alt_end[]; void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { - unsigned char **noptable = find_nop_table(); struct alt_instr *a; u8 *instr; - int diff, i, k; + int diff; DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { @@ -159,13 +172,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) #endif memcpy(instr, a->replacement, a->replacementlen); diff = a->instrlen - a->replacementlen; - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); - } + nop_out(instr + a->replacementlen, diff); } } @@ -209,7 +216,6 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { - unsigned char **noptable = find_nop_table(); u8 **ptr; for (ptr = start; ptr < end; ptr++) { @@ -217,7 +223,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end continue; if (*ptr > text_end) continue; - **ptr = noptable[1][0]; + nop_out(*ptr, 1); }; } @@ -343,6 +349,40 @@ void alternatives_smp_switch(int smp) #endif +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +{ + struct paravirt_patch *p; + + for (p = start; p < end; p++) { + unsigned int used; + + used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, + p->len); +#ifdef CONFIG_DEBUG_PARAVIRT + { + int i; + /* Deliberately clobber regs using "not %reg" to find bugs. */ + for (i = 0; i < 3; i++) { + if (p->len - used >= 2 && (p->clobbers & (1 << i))) { + memcpy(p->instr + used, "\xf7\xd0", 2); + p->instr[used+1] |= i; + used += 2; + } + } + } +#endif + /* Pad the rest with nops */ + nop_out(p->instr + used, p->len - used); + } + + /* Sync to be conservative, in case we patched following instructions */ + sync_core(); +} +extern struct paravirt_patch __start_parainstructions[], + __stop_parainstructions[]; +#endif /* CONFIG_PARAVIRT */ + void __init alternative_instructions(void) { unsigned long flags; @@ -390,5 +430,6 @@ void __init alternative_instructions(void) alternatives_smp_switch(0); } #endif + apply_paravirt(__start_parainstructions, __stop_parainstructions); local_irq_restore(flags); } diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 2fd4b7d927c..776d9be26af 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -647,23 +647,30 @@ static struct { static int lapic_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#ifdef CONFIG_X86_MCE_P4THERMAL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif local_irq_save(flags); disable_local_APIC(); @@ -675,10 +682,13 @@ static int lapic_resume(struct sys_device *dev) { unsigned int l, h; unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + local_irq_save(flags); /* @@ -700,8 +710,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); +#ifdef CONFIG_X86_MCE_P4THERMAL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); apic_write(APIC_TMICT, apic_pm_state.apic_tmict); diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index a60358fe9a4..a97847da9ed 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -231,6 +231,7 @@ #include <asm/uaccess.h> #include <asm/desc.h> #include <asm/i8253.h> +#include <asm/paravirt.h> #include "io_ports.h" @@ -2235,7 +2236,7 @@ static int __init apm_init(void) dmi_check_system(apm_dmi_table); - if (apm_info.bios.version == 0) { + if (apm_info.bios.version == 0 || paravirt_enabled()) { printk(KERN_INFO "apm: BIOS not found.\n"); return -ENODEV; } diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index c80271f8f08..1b2f3cd3327 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -15,6 +15,7 @@ #include <asm/processor.h> #include <asm/thread_info.h> #include <asm/elf.h> +#include <asm/pda.h> #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -51,13 +52,35 @@ void foo(void) OFFSET(TI_exec_domain, thread_info, exec_domain); OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); - OFFSET(TI_cpu, thread_info, cpu); OFFSET(TI_preempt_count, thread_info, preempt_count); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_restart_block, thread_info, restart_block); OFFSET(TI_sysenter_return, thread_info, sysenter_return); BLANK(); + OFFSET(GDS_size, Xgt_desc_struct, size); + OFFSET(GDS_address, Xgt_desc_struct, address); + OFFSET(GDS_pad, Xgt_desc_struct, pad); + BLANK(); + + OFFSET(PT_EBX, pt_regs, ebx); + OFFSET(PT_ECX, pt_regs, ecx); + OFFSET(PT_EDX, pt_regs, edx); + OFFSET(PT_ESI, pt_regs, esi); + OFFSET(PT_EDI, pt_regs, edi); + OFFSET(PT_EBP, pt_regs, ebp); + OFFSET(PT_EAX, pt_regs, eax); + OFFSET(PT_DS, pt_regs, xds); + OFFSET(PT_ES, pt_regs, xes); + OFFSET(PT_GS, pt_regs, xgs); + OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); + OFFSET(PT_EIP, pt_regs, eip); + OFFSET(PT_CS, pt_regs, xcs); + OFFSET(PT_EFLAGS, pt_regs, eflags); + OFFSET(PT_OLDESP, pt_regs, esp); + OFFSET(PT_OLDSS, pt_regs, xss); + BLANK(); + OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); BLANK(); @@ -74,4 +97,18 @@ void foo(void) DEFINE(VDSO_PRELINK, VDSO_PRELINK); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); + + BLANK(); + OFFSET(PDA_cpu, i386_pda, cpu_number); + OFFSET(PDA_pcurrent, i386_pda, pcurrent); + +#ifdef CONFIG_PARAVIRT + BLANK(); + OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); + OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); + OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); + OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); + OFFSET(PARAVIRT_iret, paravirt_ops, iret); + OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); +#endif } diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index e4758095d87..41cfea57232 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -104,10 +104,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) f_vide(); rdtscl(d2); d = d2-d; - - /* Knock these two lines out if it debugs out ok */ - printk(KERN_INFO "AMD K6 stepping B detected - "); - /* -- cut here -- */ + if (d > 20*K6_BUG_LOOP) printk("system stability may be impaired when more than 32 MB are used.\n"); else diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d9f3e3c31f0..1b34c56f812 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -18,14 +18,15 @@ #include <asm/apic.h> #include <mach_apic.h> #endif +#include <asm/pda.h> #include "cpu.h" DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); +struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(_cpu_pda); static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; @@ -235,29 +236,14 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -/* Do minimum CPU detection early. - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. - - WARNING: this function is only called on the BP. Don't add code here - that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +void __init cpu_detect(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - - c->x86_cache_alignment = 32; - - if (!have_cpuid_p()) - return; - /* Get vendor name */ cpuid(0x00000000, &c->cpuid_level, (int *)&c->x86_vendor_id[0], (int *)&c->x86_vendor_id[8], (int *)&c->x86_vendor_id[4]); - get_cpu_vendor(c, 1); - c->x86 = 4; if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; @@ -274,6 +260,26 @@ static void __init early_cpu_detect(void) } } +/* Do minimum CPU detection early. + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ +static void __init early_cpu_detect(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + c->x86_cache_alignment = 32; + + if (!have_cpuid_p()) + return; + + cpu_detect(c); + + get_cpu_vendor(c, 1); +} + static void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; @@ -308,6 +314,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) #else c->apicid = (ebx >> 24) & 0xFF; #endif + if (c->x86_capability[0] & (1<<19)) + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -372,6 +380,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; + c->x86_clflush_size = 32; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -591,42 +600,24 @@ void __init early_cpu_init(void) disable_pse = 1; #endif } -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __cpuinit cpu_init(void) + +/* Make sure %gs is initialized properly in idle threads */ +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) { - int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = ¤t->thread; - struct desc_struct *gdt; - __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + memset(regs, 0, sizeof(struct pt_regs)); + regs->xgs = __KERNEL_PDA; + return regs; +} - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - printk(KERN_INFO "Initializing CPU#%d\n", cpu); +static __cpuinit int alloc_gdt(int cpu) +{ + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt; + struct i386_pda *pda; - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - if (tsc_disable && cpu_has_tsc) { - printk(KERN_NOTICE "Disabling TSC...\n"); - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); - set_in_cr4(X86_CR4_TSD); - } + gdt = (struct desc_struct *)cpu_gdt_descr->address; + pda = cpu_pda(cpu); - /* The CPU hotplug case */ - if (cpu_gdt_descr->address) { - gdt = (struct desc_struct *)cpu_gdt_descr->address; - memset(gdt, 0, PAGE_SIZE); - goto old_gdt; - } /* * This is a horrible hack to allocate the GDT. The problem * is that cpu_init() is called really early for the boot CPU @@ -634,43 +625,130 @@ void __cpuinit cpu_init(void) * CPUs, when bootmem will have gone away */ if (NODE_DATA(0)->bdata->node_bootmem_map) { - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); - /* alloc_bootmem_pages panics on failure, so no check */ + BUG_ON(gdt != NULL || pda != NULL); + + gdt = alloc_bootmem_pages(PAGE_SIZE); + pda = alloc_bootmem(sizeof(*pda)); + /* alloc_bootmem(_pages) panics on failure, so no check */ + memset(gdt, 0, PAGE_SIZE); + memset(pda, 0, sizeof(*pda)); } else { - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); - if (unlikely(!gdt)) { - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - for (;;) - local_irq_enable(); + /* GDT and PDA might already have been allocated if + this is a CPU hotplug re-insertion. */ + if (gdt == NULL) + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); + + if (pda == NULL) + pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); + + if (unlikely(!gdt || !pda)) { + free_pages((unsigned long)gdt, 0); + kfree(pda); + return 0; } } -old_gdt: + + cpu_gdt_descr->address = (unsigned long)gdt; + cpu_pda(cpu) = pda; + + return 1; +} + +/* Initial PDA used by boot CPU */ +struct i386_pda boot_pda = { + ._pda = &boot_pda, + .cpu_number = 0, + .pcurrent = &init_task, +}; + +static inline void set_kernel_gs(void) +{ + /* Set %gs for this CPU's PDA. Memory clobber is to create a + barrier with respect to any PDA operations, so the compiler + doesn't move any before here. */ + asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); +} + +/* Initialize the CPU's GDT and PDA. The boot CPU does this for + itself, but secondaries find this done for them. */ +__cpuinit int init_gdt(int cpu, struct task_struct *idle) +{ + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt; + struct i386_pda *pda; + + /* For non-boot CPUs, the GDT and PDA should already have been + allocated. */ + if (!alloc_gdt(cpu)) { + printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); + return 0; + } + + gdt = (struct desc_struct *)cpu_gdt_descr->address; + pda = cpu_pda(cpu); + + BUG_ON(gdt == NULL || pda == NULL); + /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ memcpy(gdt, cpu_gdt_table, GDT_SIZE); + cpu_gdt_descr->size = GDT_SIZE - 1; - /* Set up GDT entry for 16bit stack */ - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | - (CPU_16BIT_STACK_SIZE - 1); + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, + (u32 *)&gdt[GDT_ENTRY_PDA].b, + (unsigned long)pda, sizeof(*pda) - 1, + 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ - cpu_gdt_descr->size = GDT_SIZE - 1; - cpu_gdt_descr->address = (unsigned long)gdt; + memset(pda, 0, sizeof(*pda)); + pda->_pda = pda; + pda->cpu_number = cpu; + pda->pcurrent = idle; + + return 1; +} + +/* Common CPU init for both boot and secondary CPUs */ +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) +{ + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + /* Reinit these anyway, even if they've already been done (on + the boot CPU, this will transition from the boot gdt+pda to + the real ones). */ load_gdt(cpu_gdt_descr); + set_kernel_gs(); + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (tsc_disable && cpu_has_tsc) { + printk(KERN_NOTICE "Disabling TSC...\n"); + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); + set_in_cr4(X86_CR4_TSD); + } + load_idt(&idt_descr); /* * Set up and load the per-CPU TSS and LDT */ atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - BUG_ON(current->mm); - enter_lazy_tlb(&init_mm, current); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); load_esp0(t, thread); set_tss_desc(cpu,t); @@ -682,8 +760,8 @@ old_gdt: __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %fs and %gs. */ - asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); + /* Clear %fs. */ + asm volatile ("mov %0, %%fs" : : "r" (0)); /* Clear all 6 debug registers: */ set_debugreg(0, 0); @@ -701,6 +779,37 @@ old_gdt: mxcsr_feature_mask_init(); } +/* Entrypoint to initialize secondary CPU */ +void __cpuinit secondary_cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + + _cpu_init(cpu, curr); +} + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + + /* Set up the real GDT and PDA, so we can transition from the + boot versions. */ + if (!init_gdt(cpu, curr)) { + /* failed to allocate something; not much we can do... */ + for (;;) + local_irq_enable(); + } + + _cpu_init(cpu, curr); +} + #ifdef CONFIG_HOTPLUG_CPU void __cpuinit cpu_uninit(void) { diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 94a95aa5227..56fe2658495 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -107,7 +107,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) * Note that the workaround only should be initialized once... */ c->f00f_bug = 0; - if ( c->x86 == 5 ) { + if (!paravirt_enabled() && c->x86 == 5) { static int f00f_workaround_enabled = 0; c->f00f_bug = 1; @@ -195,8 +195,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); -} + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } +} static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 5c43be47587..80b4c5d421b 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -480,12 +480,10 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) if (num_cache_leaves == 0) return -ENOENT; - cpuid4_info[cpu] = kmalloc( + cpuid4_info[cpu] = kzalloc( sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); if (unlikely(cpuid4_info[cpu] == NULL)) return -ENOMEM; - memset(cpuid4_info[cpu], 0, - sizeof(struct _cpuid4_info) * num_cache_leaves); oldmask = current->cpus_allowed; retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); @@ -658,17 +656,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) return -ENOENT; /* Allocate all required memory */ - cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL); + cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); if (unlikely(cache_kobject[cpu] == NULL)) goto err_out; - memset(cache_kobject[cpu], 0, sizeof(struct kobject)); - index_kobject[cpu] = kmalloc( + index_kobject[cpu] = kzalloc( sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); if (unlikely(index_kobject[cpu] == NULL)) goto err_out; - memset(index_kobject[cpu], 0, - sizeof(struct _index_kobject) * num_cache_leaves); return 0; diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index bad8b442070..065005c3f16 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -116,7 +116,6 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); } -#ifdef CONFIG_HOTPLUG_CPU static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) { return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); @@ -153,7 +152,6 @@ static struct notifier_block thermal_throttle_cpu_notifier = { .notifier_call = thermal_throttle_cpu_callback, }; -#endif /* CONFIG_HOTPLUG_CPU */ static __init int thermal_throttle_init_device(void) { diff --git a/arch/i386/kernel/cpu/mtrr/Makefile b/arch/i386/kernel/cpu/mtrr/Makefile index a25b701ab84..191fc053364 100644 --- a/arch/i386/kernel/cpu/mtrr/Makefile +++ b/arch/i386/kernel/cpu/mtrr/Makefile @@ -1,5 +1,3 @@ obj-y := main.o if.o generic.o state.o -obj-y += amd.o -obj-y += cyrix.o -obj-y += centaur.o +obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o diff --git a/arch/i386/kernel/cpu/mtrr/amd.c b/arch/i386/kernel/cpu/mtrr/amd.c index 1a1e04b6fd0..0949cdbf848 100644 --- a/arch/i386/kernel/cpu/mtrr/amd.c +++ b/arch/i386/kernel/cpu/mtrr/amd.c @@ -7,7 +7,7 @@ static void amd_get_mtrr(unsigned int reg, unsigned long *base, - unsigned int *size, mtrr_type * type) + unsigned long *size, mtrr_type * type) { unsigned long low, high; diff --git a/arch/i386/kernel/cpu/mtrr/centaur.c b/arch/i386/kernel/cpu/mtrr/centaur.c index 33f00ac314e..cb9aa3a7a7a 100644 --- a/arch/i386/kernel/cpu/mtrr/centaur.c +++ b/arch/i386/kernel/cpu/mtrr/centaur.c @@ -17,7 +17,7 @@ static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ */ static int -centaur_get_free_region(unsigned long base, unsigned long size) +centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) /* [SUMMARY] Get a free MTRR. <base> The starting (base) address of the region. <size> The size (in bytes) of the region. @@ -26,10 +26,11 @@ centaur_get_free_region(unsigned long base, unsigned long size) { int i, max; mtrr_type ltype; - unsigned long lbase; - unsigned int lsize; + unsigned long lbase, lsize; max = num_var_ranges; + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; for (i = 0; i < max; ++i) { if (centaur_mcr_reserved & (1 << i)) continue; @@ -49,7 +50,7 @@ mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) static void centaur_get_mcr(unsigned int reg, unsigned long *base, - unsigned int *size, mtrr_type * type) + unsigned long *size, mtrr_type * type) { *base = centaur_mcr[reg].high >> PAGE_SHIFT; *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c index 9027a987006..0737a596db4 100644 --- a/arch/i386/kernel/cpu/mtrr/cyrix.c +++ b/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -9,7 +9,7 @@ int arr3_protected; static void cyrix_get_arr(unsigned int reg, unsigned long *base, - unsigned int *size, mtrr_type * type) + unsigned long *size, mtrr_type * type) { unsigned long flags; unsigned char arr, ccr3, rcr, shift; @@ -77,7 +77,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, } static int -cyrix_get_free_region(unsigned long base, unsigned long size) +cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) /* [SUMMARY] Get a free ARR. <base> The starting (base) address of the region. <size> The size (in bytes) of the region. @@ -86,9 +86,24 @@ cyrix_get_free_region(unsigned long base, unsigned long size) { int i; mtrr_type ltype; - unsigned long lbase; - unsigned int lsize; + unsigned long lbase, lsize; + switch (replace_reg) { + case 7: + if (size < 0x40) + break; + case 6: + case 5: + case 4: + return replace_reg; + case 3: + if (arr3_protected) + break; + case 2: + case 1: + case 0: + return replace_reg; + } /* If we are to set up a region >32M then look at ARR7 immediately */ if (size > 0x2000) { cyrix_get_arr(7, &lbase, &lsize, <ype); @@ -214,7 +229,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, typedef struct { unsigned long base; - unsigned int size; + unsigned long size; mtrr_type type; } arr_state_t; diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index 0b61eed8bbd..f77fc53db65 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/module.h> #include <asm/io.h> #include <asm/mtrr.h> #include <asm/msr.h> @@ -15,12 +16,19 @@ struct mtrr_state { struct mtrr_var_range *var_ranges; mtrr_type fixed_ranges[NUM_FIXED_RANGES]; unsigned char enabled; + unsigned char have_fixed; mtrr_type def_type; }; static unsigned long smp_changes_mask; static struct mtrr_state mtrr_state = {}; +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "mtrr." + +static __initdata int mtrr_show; +module_param_named(show, mtrr_show, bool, 0); + /* Get the MSR pair relating to a var range */ static void __init get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) @@ -43,6 +51,14 @@ get_fixed_ranges(mtrr_type * frs) rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); } +static void __init print_fixed(unsigned base, unsigned step, const mtrr_type*types) +{ + unsigned i; + + for (i = 0; i < 8; ++i, ++types, base += step) + printk(KERN_INFO "MTRR %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types)); +} + /* Grab all of the MTRR state for this CPU into *state */ void __init get_mtrr_state(void) { @@ -58,13 +74,49 @@ void __init get_mtrr_state(void) } vrs = mtrr_state.var_ranges; + rdmsr(MTRRcap_MSR, lo, dummy); + mtrr_state.have_fixed = (lo >> 8) & 1; + for (i = 0; i < num_var_ranges; i++) get_mtrr_var_range(i, &vrs[i]); - get_fixed_ranges(mtrr_state.fixed_ranges); + if (mtrr_state.have_fixed) + get_fixed_ranges(mtrr_state.fixed_ranges); rdmsr(MTRRdefType_MSR, lo, dummy); mtrr_state.def_type = (lo & 0xff); mtrr_state.enabled = (lo & 0xc00) >> 10; + + if (mtrr_show) { + int high_width; + + printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); + if (mtrr_state.have_fixed) { + printk(KERN_INFO "MTRR fixed ranges %sabled:\n", + mtrr_state.enabled & 1 ? "en" : "dis"); + print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); + for (i = 0; i < 2; ++i) + print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); + for (i = 0; i < 8; ++i) + print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); + } + printk(KERN_INFO "MTRR variable ranges %sabled:\n", + mtrr_state.enabled & 2 ? "en" : "dis"); + high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; + for (i = 0; i < num_var_ranges; ++i) { + if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) + printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n", + i, + high_width, + mtrr_state.var_ranges[i].base_hi, + mtrr_state.var_ranges[i].base_lo >> 12, + high_width, + mtrr_state.var_ranges[i].mask_hi, + mtrr_state.var_ranges[i].mask_lo >> 12, + mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); + else + printk(KERN_INFO "MTRR %u disabled\n", i); + } + } } /* Some BIOS's are fucked and don't set all MTRRs the same! */ @@ -95,7 +147,7 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) smp_processor_id(), msr, a, b); } -int generic_get_free_region(unsigned long base, unsigned long size) +int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) /* [SUMMARY] Get a free MTRR. <base> The starting (base) address of the region. <size> The size (in bytes) of the region. @@ -104,10 +156,11 @@ int generic_get_free_region(unsigned long base, unsigned long size) { int i, max; mtrr_type ltype; - unsigned long lbase; - unsigned lsize; + unsigned long lbase, lsize; max = num_var_ranges; + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; for (i = 0; i < max; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); if (lsize == 0) @@ -117,7 +170,7 @@ int generic_get_free_region(unsigned long base, unsigned long size) } static void generic_get_mtrr(unsigned int reg, unsigned long *base, - unsigned int *size, mtrr_type * type) + unsigned long *size, mtrr_type *type) { unsigned int mask_lo, mask_hi, base_lo, base_hi; @@ -202,7 +255,9 @@ static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) return changed; } -static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi) +static u32 deftype_lo, deftype_hi; + +static unsigned long set_mtrr_state(void) /* [SUMMARY] Set the MTRR state for this CPU. <state> The MTRR state information to read. <ctxt> Some relevant CPU context. @@ -217,14 +272,14 @@ static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi) if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) change_mask |= MTRR_CHANGE_MASK_VARIABLE; - if (set_fixed_ranges(mtrr_state.fixed_ranges)) + if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) change_mask |= MTRR_CHANGE_MASK_FIXED; /* Set_mtrr_restore restores the old value of MTRRdefType, so to set it we fiddle with the saved value */ if ((deftype_lo & 0xff) != mtrr_state.def_type || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { - deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled << 10); + deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); change_mask |= MTRR_CHANGE_MASK_DEFTYPE; } @@ -233,7 +288,6 @@ static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi) static unsigned long cr4 = 0; -static u32 deftype_lo, deftype_hi; static DEFINE_SPINLOCK(set_atomicity_lock); /* @@ -271,7 +325,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi); + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); } static void post_set(void) __releases(set_atomicity_lock) @@ -300,7 +354,7 @@ static void generic_set_all(void) prepare_set(); /* Actually set the state */ - mask = set_mtrr_state(deftype_lo,deftype_hi); + mask = set_mtrr_state(); post_set(); local_irq_restore(flags); @@ -366,7 +420,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; } - if (!(base + size < 0x70000000 || base > 0x7003FFFF) && + if (!(base + size < 0x70000 || base > 0x7003F) && (type == MTRR_TYPE_WRCOMB || type == MTRR_TYPE_WRBACK)) { printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index 5ac051bb9d5..5ae1705eafa 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c @@ -17,7 +17,7 @@ extern unsigned int *usage_table; #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) -static char *mtrr_strings[MTRR_NUM_TYPES] = +static const char *const mtrr_strings[MTRR_NUM_TYPES] = { "uncachable", /* 0 */ "write-combining", /* 1 */ @@ -28,7 +28,7 @@ static char *mtrr_strings[MTRR_NUM_TYPES] = "write-back", /* 6 */ }; -char *mtrr_attrib_to_str(int x) +const char *mtrr_attrib_to_str(int x) { return (x <= 6) ? mtrr_strings[x] : "?"; } @@ -44,10 +44,9 @@ mtrr_file_add(unsigned long base, unsigned long size, max = num_var_ranges; if (fcount == NULL) { - fcount = kmalloc(max * sizeof *fcount, GFP_KERNEL); + fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL); if (!fcount) return -ENOMEM; - memset(fcount, 0, max * sizeof *fcount); FILE_FCOUNT(file) = fcount; } if (!page) { @@ -155,6 +154,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) { int err = 0; mtrr_type type; + unsigned long size; struct mtrr_sentry sentry; struct mtrr_gentry gentry; void __user *arg = (void __user *) __arg; @@ -235,15 +235,15 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) case MTRRIOC_GET_ENTRY: if (gentry.regnum >= num_var_ranges) return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); + mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); /* Hide entries that go above 4GB */ - if (gentry.base + gentry.size > 0x100000 - || gentry.size == 0x100000) + if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) + || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) gentry.base = gentry.size = gentry.type = 0; else { gentry.base <<= PAGE_SHIFT; - gentry.size <<= PAGE_SHIFT; + gentry.size = size << PAGE_SHIFT; gentry.type = type; } @@ -273,8 +273,14 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) case MTRRIOC_GET_PAGE_ENTRY: if (gentry.regnum >= num_var_ranges) return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); - gentry.type = type; + mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + /* Hide entries that would overflow */ + if (size != (__typeof__(gentry.size))size) + gentry.base = gentry.size = gentry.type = 0; + else { + gentry.size = size; + gentry.type = type; + } break; } @@ -353,8 +359,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) char factor; int i, max, len; mtrr_type type; - unsigned long base; - unsigned int size; + unsigned long base, size; len = 0; max = num_var_ranges; @@ -373,7 +378,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) } /* RED-PEN: base can be > 32bit */ len += seq_printf(seq, - "reg%02i: base=0x%05lx000 (%4liMB), size=%4i%cB: %s, count=%d\n", + "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", i, base, base >> (20 - PAGE_SHIFT), size, factor, mtrr_attrib_to_str(type), usage_table[i]); } diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index fff90bda473..16bb7ea8714 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -59,7 +59,11 @@ struct mtrr_ops * mtrr_if = NULL; static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); +#ifndef CONFIG_X86_64 extern int arr3_protected; +#else +#define arr3_protected 0 +#endif void set_mtrr_ops(struct mtrr_ops * ops) { @@ -168,6 +172,13 @@ static void ipi_handler(void *info) #endif +static inline int types_compatible(mtrr_type type1, mtrr_type type2) { + return type1 == MTRR_TYPE_UNCACHABLE || + type2 == MTRR_TYPE_UNCACHABLE || + (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || + (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); +} + /** * set_mtrr - update mtrrs on all processors * @reg: mtrr in question @@ -263,8 +274,8 @@ static void set_mtrr(unsigned int reg, unsigned long base, /** * mtrr_add_page - Add a memory type region - * @base: Physical base address of region in pages (4 KB) - * @size: Physical size of region in pages (4 KB) + * @base: Physical base address of region in pages (in units of 4 kB!) + * @size: Physical size of region in pages (4 kB) * @type: Type of MTRR desired * @increment: If this is true do usage counting on the region * @@ -300,11 +311,9 @@ static void set_mtrr(unsigned int reg, unsigned long base, int mtrr_add_page(unsigned long base, unsigned long size, unsigned int type, char increment) { - int i; + int i, replace, error; mtrr_type ltype; - unsigned long lbase; - unsigned int lsize; - int error; + unsigned long lbase, lsize; if (!mtrr_if) return -ENXIO; @@ -324,12 +333,18 @@ int mtrr_add_page(unsigned long base, unsigned long size, return -ENOSYS; } + if (!size) { + printk(KERN_WARNING "mtrr: zero sized request\n"); + return -EINVAL; + } + if (base & size_or_mask || size & size_or_mask) { printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); return -EINVAL; } error = -EINVAL; + replace = -1; /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); @@ -337,21 +352,28 @@ int mtrr_add_page(unsigned long base, unsigned long size, mutex_lock(&mtrr_mutex); for (i = 0; i < num_var_ranges; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); - if (base >= lbase + lsize) - continue; - if ((base < lbase) && (base + size <= lbase)) + if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) continue; /* At this point we know there is some kind of overlap/enclosure */ - if ((base < lbase) || (base + size > lbase + lsize)) { + if (base < lbase || base + size - 1 > lbase + lsize - 1) { + if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { + /* New region encloses an existing region */ + if (type == ltype) { + replace = replace == -1 ? i : -2; + continue; + } + else if (types_compatible(type, ltype)) + continue; + } printk(KERN_WARNING "mtrr: 0x%lx000,0x%lx000 overlaps existing" - " 0x%lx000,0x%x000\n", base, size, lbase, + " 0x%lx000,0x%lx000\n", base, size, lbase, lsize); goto out; } /* New region is enclosed by an existing region */ if (ltype != type) { - if (type == MTRR_TYPE_UNCACHABLE) + if (types_compatible(type, ltype)) continue; printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", base, size, mtrr_attrib_to_str(ltype), @@ -364,10 +386,18 @@ int mtrr_add_page(unsigned long base, unsigned long size, goto out; } /* Search for an empty MTRR */ - i = mtrr_if->get_free_region(base, size); + i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { set_mtrr(i, base, size, type); - usage_table[i] = 1; + if (likely(replace < 0)) + usage_table[i] = 1; + else { + usage_table[i] = usage_table[replace] + !!increment; + if (unlikely(replace != i)) { + set_mtrr(replace, 0, 0, 0); + usage_table[replace] = 0; + } + } } else printk(KERN_INFO "mtrr: no more MTRRs available\n"); error = i; @@ -455,8 +485,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) { int i, max; mtrr_type ltype; - unsigned long lbase; - unsigned int lsize; + unsigned long lbase, lsize; int error = -EINVAL; if (!mtrr_if) @@ -544,9 +573,11 @@ extern void centaur_init_mtrr(void); static void __init init_ifs(void) { +#ifndef CONFIG_X86_64 amd_init_mtrr(); cyrix_init_mtrr(); centaur_init_mtrr(); +#endif } /* The suspend/resume methods are only for CPU without MTRR. CPU using generic @@ -555,7 +586,7 @@ static void __init init_ifs(void) struct mtrr_value { mtrr_type ltype; unsigned long lbase; - unsigned int lsize; + unsigned long lsize; }; static struct mtrr_value * mtrr_state; @@ -565,10 +596,8 @@ static int mtrr_save(struct sys_device * sysdev, pm_message_t state) int i; int size = num_var_ranges * sizeof(struct mtrr_value); - mtrr_state = kmalloc(size,GFP_ATOMIC); - if (mtrr_state) - memset(mtrr_state,0,size); - else + mtrr_state = kzalloc(size,GFP_ATOMIC); + if (!mtrr_state) return -ENOMEM; for (i = 0; i < num_var_ranges; i++) { diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h index 99c9f268204..d61ea9db6cf 100644 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h @@ -43,15 +43,16 @@ struct mtrr_ops { void (*set_all)(void); void (*get)(unsigned int reg, unsigned long *base, - unsigned int *size, mtrr_type * type); - int (*get_free_region) (unsigned long base, unsigned long size); - + unsigned long *size, mtrr_type * type); + int (*get_free_region)(unsigned long base, unsigned long size, + int replace_reg); int (*validate_add_page)(unsigned long base, unsigned long size, unsigned int type); int (*have_wrcomb)(void); }; -extern int generic_get_free_region(unsigned long base, unsigned long size); +extern int generic_get_free_region(unsigned long base, unsigned long size, + int replace_reg); extern int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type); @@ -62,17 +63,17 @@ extern int positive_have_wrcomb(void); /* library functions for processor-specific routines */ struct set_mtrr_context { unsigned long flags; - unsigned long deftype_lo; - unsigned long deftype_hi; unsigned long cr4val; - unsigned long ccr3; + u32 deftype_lo; + u32 deftype_hi; + u32 ccr3; }; struct mtrr_var_range { - unsigned long base_lo; - unsigned long base_hi; - unsigned long mask_lo; - unsigned long mask_hi; + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; }; void set_mtrr_done(struct set_mtrr_context *ctxt); @@ -92,6 +93,6 @@ extern struct mtrr_ops * mtrr_if; extern unsigned int num_var_ranges; void mtrr_state_warn(void); -char *mtrr_attrib_to_str(int x); +const char *mtrr_attrib_to_str(int x); void mtrr_wrmsr(unsigned, unsigned, unsigned); diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 76aac088a32..6624d8583c4 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -152,9 +152,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, " [%d]", i); } - seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", c->loops_per_jiffy/(500000/HZ), (c->loops_per_jiffy/(5000/HZ)) % 100); + seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); return 0; } diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index ab0c327e79d..db6dd20c358 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -34,7 +34,6 @@ #include <linux/major.h> #include <linux/fs.h> #include <linux/smp_lock.h> -#include <linux/fs.h> #include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> @@ -167,7 +166,6 @@ static int cpuid_device_create(int i) return err; } -#ifdef CONFIG_HOTPLUG_CPU static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -187,7 +185,6 @@ static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = { .notifier_call = cpuid_class_cpu_callback, }; -#endif /* !CONFIG_HOTPLUG_CPU */ static int __init cpuid_init(void) { diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 144b4328896..a5e0e990ea9 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -31,68 +31,6 @@ /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, - size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) +3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - -static void crash_save_this_cpu(struct pt_regs *regs, int cpu) -{ - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= NR_CPUS)) - return; - - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that, so there is no need to invent something new. - */ - buf = (u32*)per_cpu_ptr(crash_notes, cpu); - if (!buf) - return; - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, - sizeof(prstatus)); - final_note(buf); -} - -static void crash_save_self(struct pt_regs *regs) -{ - int cpu; - - cpu = safe_smp_processor_id(); - crash_save_this_cpu(regs, cpu); -} - #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static atomic_t waiting_for_crash_ipi; @@ -121,7 +59,7 @@ static int crash_nmi_callback(struct notifier_block *self, crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } - crash_save_this_cpu(regs, cpu); + crash_save_cpu(regs, cpu); disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -195,5 +133,5 @@ void machine_crash_shutdown(struct pt_regs *regs) #if defined(CONFIG_X86_IO_APIC) disable_IO_APIC(); #endif - crash_save_self(regs); + crash_save_cpu(regs, safe_smp_processor_id()); } diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c new file mode 100644 index 00000000000..2f7d0a92fd7 --- /dev/null +++ b/arch/i386/kernel/e820.c @@ -0,0 +1,894 @@ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/ioport.h> +#include <linux/string.h> +#include <linux/kexec.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/efi.h> +#include <linux/pfn.h> +#include <linux/uaccess.h> + +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/e820.h> + +#ifdef CONFIG_EFI +int efi_enabled = 0; +EXPORT_SYMBOL(efi_enabled); +#endif + +struct e820map e820; +struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ + unsigned long long addr; /* address for this change point */ +}; +static struct change_member change_point_list[2*E820MAX] __initdata; +static struct change_member *change_point[2*E820MAX] __initdata; +static struct e820entry *overlap_list[E820MAX] __initdata; +static struct e820entry new_bios[E820MAX] __initdata; +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0x10000000; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif +extern int user_defined_memmap; +struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource system_rom_resource = { + .name = "System ROM", + .start = 0xf0000, + .end = 0xfffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource extension_rom_resource = { + .name = "Extension ROM", + .start = 0xe0000, + .end = 0xeffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource adapter_rom_resources[] = { { + .name = "Adapter ROM", + .start = 0xc8000, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +} }; + +static struct resource video_rom_resource = { + .name = "Video ROM", + .start = 0xc0000, + .end = 0xc7fff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource video_ram_resource = { + .name = "Video RAM area", + .start = 0xa0000, + .end = 0xbffff, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource standard_io_resources[] = { { + .name = "dma1", + .start = 0x0000, + .end = 0x001f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "pic1", + .start = 0x0020, + .end = 0x0021, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "timer0", + .start = 0x0040, + .end = 0x0043, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "timer1", + .start = 0x0050, + .end = 0x0053, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "keyboard", + .start = 0x0060, + .end = 0x006f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "dma page reg", + .start = 0x0080, + .end = 0x008f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "pic2", + .start = 0x00a0, + .end = 0x00a1, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "dma2", + .start = 0x00c0, + .end = 0x00df, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "fpu", + .start = 0x00f0, + .end = 0x00ff, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +} }; + +static int romsignature(const unsigned char *x) +{ + unsigned short sig; + int ret = 0; + if (probe_kernel_address((const unsigned short *)x, sig) == 0) + ret = (sig == 0xaa55); + return ret; +} + +static int __init romchecksum(unsigned char *rom, unsigned long length) +{ + unsigned char *p, sum = 0; + + for (p = rom; p < rom + length; p++) + sum += *p; + return sum == 0; +} + +static void __init probe_roms(void) +{ + unsigned long start, length, upper; + unsigned char *rom; + int i; + + /* video rom */ + upper = adapter_rom_resources[0].start; + for (start = video_rom_resource.start; start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + video_rom_resource.start = start; + + /* 0 < length <= 0x7f * 512, historically */ + length = rom[2] * 512; + + /* if checksum okay, trust length byte */ + if (length && romchecksum(rom, length)) + video_rom_resource.end = start + length - 1; + + request_resource(&iomem_resource, &video_rom_resource); + break; + } + + start = (video_rom_resource.end + 1 + 2047) & ~2047UL; + if (start < upper) + start = upper; + + /* system rom */ + request_resource(&iomem_resource, &system_rom_resource); + upper = system_rom_resource.start; + + /* check for extension rom (ignore length byte!) */ + rom = isa_bus_to_virt(extension_rom_resource.start); + if (romsignature(rom)) { + length = extension_rom_resource.end - extension_rom_resource.start + 1; + if (romchecksum(rom, length)) { + request_resource(&iomem_resource, &extension_rom_resource); + upper = extension_rom_resource.start; + } + } + + /* check for adapter roms on 2k boundaries */ + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + /* 0 < length <= 0x7f * 512, historically */ + length = rom[2] * 512; + + /* but accept any length that fits if checksum okay */ + if (!length || start + length > upper || !romchecksum(rom, length)) + continue; + + adapter_rom_resources[i].start = start; + adapter_rom_resources[i].end = start + length - 1; + request_resource(&iomem_resource, &adapter_rom_resources[i]); + + start = adapter_rom_resources[i++].end & ~2047UL; + } +} + +/* + * Request address space for all standard RAM and ROM resources + * and also for regions reported as reserved by the e820. + */ +static void __init +legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) +{ + int i; + + probe_roms(); + for (i = 0; i < e820.nr_map; i++) { + struct resource *res; +#ifndef CONFIG_RESOURCES_64BIT + if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) + continue; +#endif + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); + switch (e820.map[i].type) { + case E820_RAM: res->name = "System RAM"; break; + case E820_ACPI: res->name = "ACPI Tables"; break; + case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; + default: res->name = "reserved"; + } + res->start = e820.map[i].addr; + res->end = res->start + e820.map[i].size - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + if (request_resource(&iomem_resource, res)) { + kfree(res); + continue; + } + if (e820.map[i].type == E820_RAM) { + /* + * We don't know which RAM region contains kernel data, + * so we try it repeatedly and let the resource manager + * test it. + */ + request_resource(res, code_resource); + request_resource(res, data_resource); +#ifdef CONFIG_KEXEC + request_resource(res, &crashk_res); +#endif + } + } +} + +/* + * Request address space for all standard resources + * + * This is called just before pcibios_init(), which is also a + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). + */ +static int __init request_standard_resources(void) +{ + int i; + + printk("Setting up standard PCI resources\n"); + if (efi_enabled) + efi_initialize_iomem_resources(&code_resource, &data_resource); + else + legacy_init_iomem_resources(&code_resource, &data_resource); + + /* EFI systems may still have VGA */ + request_resource(&iomem_resource, &video_ram_resource); + + /* request I/O space for devices used on all i[345]86 PCs */ + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) + request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +subsys_initcall(request_standard_resources); + +void __init add_memory_region(unsigned long long start, + unsigned long long size, int type) +{ + int x; + + if (!efi_enabled) { + x = e820.nr_map; + + if (x == E820MAX) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820.map[x].addr = start; + e820.map[x].size = size; + e820.map[x].type = type; + e820.nr_map++; + } +} /* add_memory_region */ + +/* + * Sanitize the BIOS e820 map. + * + * Some e820 responses include overlapping entries. The following + * replaces the original e820 map with a new one, removing overlaps. + * + */ +int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) +{ + struct change_member *change_tmp; + unsigned long current_type, last_type; + unsigned long long last_addr; + int chgidx, still_changing; + int overlap_entries; + int new_bios_entry; + int old_nr, new_nr, chg_nr; + int i; + + /* + Visually we're performing the following (1,2,3,4 = memory types)... + + Sample memory map (w/overlaps): + ____22__________________ + ______________________4_ + ____1111________________ + _44_____________________ + 11111111________________ + ____________________33__ + ___________44___________ + __________33333_________ + ______________22________ + ___________________2222_ + _________111111111______ + _____________________11_ + _________________4______ + + Sanitized equivalent (no overlap): + 1_______________________ + _44_____________________ + ___1____________________ + ____22__________________ + ______11________________ + _________1______________ + __________3_____________ + ___________44___________ + _____________33_________ + _______________2________ + ________________1_______ + _________________4______ + ___________________2____ + ____________________33__ + ______________________4_ + */ + printk("sanitize start\n"); + /* if there's only one memory region, don't bother */ + if (*pnr_map < 2) { + printk("sanitize bail 0\n"); + return -1; + } + + old_nr = *pnr_map; + + /* bail out if we find any unreasonable addresses in bios map */ + for (i=0; i<old_nr; i++) + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { + printk("sanitize bail 1\n"); + return -1; + } + + /* create pointers for initial change-point information (for sorting) */ + for (i=0; i < 2*old_nr; i++) + change_point[i] = &change_point_list[i]; + + /* record all known change-points (starting and ending addresses), + omitting those that are for empty memory regions */ + chgidx = 0; + for (i=0; i < old_nr; i++) { + if (biosmap[i].size != 0) { + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } + } + chg_nr = chgidx; /* true number of change-points */ + + /* sort change-point list by memory addresses (low -> high) */ + still_changing = 1; + while (still_changing) { + still_changing = 0; + for (i=1; i < chg_nr; i++) { + /* if <current_addr> > <last_addr>, swap */ + /* or, if current=<start_addr> & last=<end_addr>, swap */ + if ((change_point[i]->addr < change_point[i-1]->addr) || + ((change_point[i]->addr == change_point[i-1]->addr) && + (change_point[i]->addr == change_point[i]->pbios->addr) && + (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) + ) + { + change_tmp = change_point[i]; + change_point[i] = change_point[i-1]; + change_point[i-1] = change_tmp; + still_changing=1; + } + } + } + + /* create a new bios memory map, removing overlaps */ + overlap_entries=0; /* number of entries in the overlap table */ + new_bios_entry=0; /* index for creating new bios map entries */ + last_type = 0; /* start with undefined memory type */ + last_addr = 0; /* start with 0 as last starting address */ + /* loop through change-points, determining affect on the new bios map */ + for (chgidx=0; chgidx < chg_nr; chgidx++) + { + /* keep track of all overlapping bios entries */ + if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) + { + /* add map entry to overlap list (> 1 entry implies an overlap) */ + overlap_list[overlap_entries++]=change_point[chgidx]->pbios; + } + else + { + /* remove entry from list (order independent, so swap with last) */ + for (i=0; i<overlap_entries; i++) + { + if (overlap_list[i] == change_point[chgidx]->pbios) + overlap_list[i] = overlap_list[overlap_entries-1]; + } + overlap_entries--; + } + /* if there are overlapping entries, decide which "type" to use */ + /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ + current_type = 0; + for (i=0; i<overlap_entries; i++) + if (overlap_list[i]->type > current_type) + current_type = overlap_list[i]->type; + /* continue building up new bios map based on this information */ + if (current_type != last_type) { + if (last_type != 0) { + new_bios[new_bios_entry].size = + change_point[chgidx]->addr - last_addr; + /* move forward only if the new size was non-zero */ + if (new_bios[new_bios_entry].size != 0) + if (++new_bios_entry >= E820MAX) + break; /* no more space left for new bios entries */ + } + if (current_type != 0) { + new_bios[new_bios_entry].addr = change_point[chgidx]->addr; + new_bios[new_bios_entry].type = current_type; + last_addr=change_point[chgidx]->addr; + } + last_type = current_type; + } + } + new_nr = new_bios_entry; /* retain count for new bios entries */ + + /* copy new bios mapping into original location */ + memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); + *pnr_map = new_nr; + + printk("sanitize end\n"); + return 0; +} + +/* + * Copy the BIOS e820 map into a safe place. + * + * Sanity-check it while we're at it.. + * + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. + * + * We check to see that the memory map contains at least 2 elements + * before we'll use it, because the detection code in setup.S may + * not be perfect and most every PC known to man has two memory + * regions: one from 0 to 640k, and one from 1mb up. (The IBM + * thinkpad 560x, for example, does not cooperate with the memory + * detection code.) + */ +int __init copy_e820_map(struct e820entry * biosmap, int nr_map) +{ + /* Only one memory region (or negative)? Ignore it */ + if (nr_map < 2) + return -1; + + do { + unsigned long long start = biosmap->addr; + unsigned long long size = biosmap->size; + unsigned long long end = start + size; + unsigned long type = biosmap->type; + printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); + + /* Overflow in 64 bits? Ignore the memory map. */ + if (start > end) + return -1; + + /* + * Some BIOSes claim RAM in the 640k - 1M region. + * Not right. Fix it up. + */ + if (type == E820_RAM) { + printk("copy_e820_map() type is E820_RAM\n"); + if (start < 0x100000ULL && end > 0xA0000ULL) { + printk("copy_e820_map() lies in range...\n"); + if (start < 0xA0000ULL) { + printk("copy_e820_map() start < 0xA0000ULL\n"); + add_memory_region(start, 0xA0000ULL-start, type); + } + if (end <= 0x100000ULL) { + printk("copy_e820_map() end <= 0x100000ULL\n"); + continue; + } + start = 0x100000ULL; + size = end - start; + } + } + add_memory_region(start, size, type); + } while (biosmap++,--nr_map); + return 0; +} + +/* + * Callback for efi_memory_walk. + */ +static int __init +efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) +{ + unsigned long *max_pfn = arg, pfn; + + if (start < end) { + pfn = PFN_UP(end -1); + if (pfn > *max_pfn) + *max_pfn = pfn; + } + return 0; +} + +static int __init +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) +{ + memory_present(0, PFN_UP(start), PFN_DOWN(end)); + return 0; +} + +/* + * Find the highest page frame number we have available + */ +void __init find_max_pfn(void) +{ + int i; + + max_pfn = 0; + if (efi_enabled) { + efi_memmap_walk(efi_find_max_pfn, &max_pfn); + efi_memmap_walk(efi_memory_present_wrapper, NULL); + return; + } + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + /* RAM? */ + if (e820.map[i].type != E820_RAM) + continue; + start = PFN_UP(e820.map[i].addr); + end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + if (start >= end) + continue; + if (end > max_pfn) + max_pfn = end; + memory_present(0, start, end); + } +} + +/* + * Free all available memory for boot time allocation. Used + * as a callback function by efi_memory_walk() + */ + +static int __init +free_available_memory(unsigned long start, unsigned long end, void *arg) +{ + /* check max_low_pfn */ + if (start >= (max_low_pfn << PAGE_SHIFT)) + return 0; + if (end >= (max_low_pfn << PAGE_SHIFT)) + end = max_low_pfn << PAGE_SHIFT; + if (start < end) + free_bootmem(start, end - start); + + return 0; +} +/* + * Register fully available low RAM pages with the bootmem allocator. + */ +void __init register_bootmem_low_pages(unsigned long max_low_pfn) +{ + int i; + + if (efi_enabled) { + efi_memmap_walk(free_available_memory, NULL); + return; + } + for (i = 0; i < e820.nr_map; i++) { + unsigned long curr_pfn, last_pfn, size; + /* + * Reserve usable low memory + */ + if (e820.map[i].type != E820_RAM) + continue; + /* + * We are rounding up the start address of usable memory: + */ + curr_pfn = PFN_UP(e820.map[i].addr); + if (curr_pfn >= max_low_pfn) + continue; + /* + * ... and at the end of the usable range downwards: + */ + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + + if (last_pfn > max_low_pfn) + last_pfn = max_low_pfn; + + /* + * .. finally, did all the rounding and playing + * around just make the area go away? + */ + if (last_pfn <= curr_pfn) + continue; + + size = last_pfn - curr_pfn; + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); + } +} + +void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; + + /* + * Search for the bigest gap in the low 32 bits of the e820 + * memory space. + */ + last = 0x100000000ull; + gapstart = 0x10000000; + gapsize = 0x400000; + i = e820.nr_map; + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap > gapsize) { + gapsize = gap; + gapstart = end; + } + } + if (start < last) + last = start; + } + + /* + * See how much we want to round up: start off with + * rounding to the next 1MB area. + */ + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; + + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", + pci_mem_start, gapstart, gapsize); +} + +void __init print_memory_map(char *who) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + printk(" %s: %016Lx - %016Lx ", who, + e820.map[i].addr, + e820.map[i].addr + e820.map[i].size); + switch (e820.map[i].type) { + case E820_RAM: printk("(usable)\n"); + break; + case E820_RESERVED: + printk("(reserved)\n"); + break; + case E820_ACPI: + printk("(ACPI data)\n"); + break; + case E820_NVS: + printk("(ACPI NVS)\n"); + break; + default: printk("type %lu\n", e820.map[i].type); + break; + } + } +} + +static __init __always_inline void efi_limit_regions(unsigned long long size) +{ + unsigned long long current_addr = 0; + efi_memory_desc_t *md, *next_md; + void *p, *p1; + int i, j; + + j = 0; + p1 = memmap.map; + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { + md = p; + next_md = p1; + current_addr = md->phys_addr + + PFN_PHYS(md->num_pages); + if (is_available_memory(md)) { + if (md->phys_addr >= size) continue; + memcpy(next_md, md, memmap.desc_size); + if (current_addr >= size) { + next_md->num_pages -= + PFN_UP(current_addr-size); + } + p1 += memmap.desc_size; + next_md = p1; + j++; + } else if ((md->attribute & EFI_MEMORY_RUNTIME) == + EFI_MEMORY_RUNTIME) { + /* In order to make runtime services + * available we have to include runtime + * memory regions in memory map */ + memcpy(next_md, md, memmap.desc_size); + p1 += memmap.desc_size; + next_md = p1; + j++; + } + } + memmap.nr_map = j; + memmap.map_end = memmap.map + + (memmap.nr_map * memmap.desc_size); +} + +void __init limit_regions(unsigned long long size) +{ + unsigned long long current_addr; + int i; + + print_memory_map("limit_regions start"); + if (efi_enabled) { + efi_limit_regions(size); + return; + } + for (i = 0; i < e820.nr_map; i++) { + current_addr = e820.map[i].addr + e820.map[i].size; + if (current_addr < size) + continue; + + if (e820.map[i].type != E820_RAM) + continue; + + if (e820.map[i].addr >= size) { + /* + * This region starts past the end of the + * requested size, skip it completely. + */ + e820.nr_map = i; + } else { + e820.nr_map = i + 1; + e820.map[i].size -= current_addr - size; + } + print_memory_map("limit_regions endfor"); + return; + } + print_memory_map("limit_regions endfunc"); +} + + /* + * This function checks if the entire range <start,end> is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) +{ + u64 start = s; + u64 end = e; + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + /* if the region is at the beginning of <start,end> we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full + * coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + +static int __init parse_memmap(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "exactmap") == 0) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. + */ + find_max_pfn(); + saved_max_pfn = max_pfn; +#endif + e820.nr_map = 0; + user_defined_memmap = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long start_at, mem_size; + + mem_size = memparse(arg, &arg); + if (*arg == '@') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*arg == '#') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*arg == '$') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + limit_regions(mem_size); + user_defined_memmap = 1; + } + } + return 0; +} +early_param("memmap", parse_memmap); diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 8b40648d0ef..b92c7f0a358 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -194,17 +194,24 @@ inline int efi_set_rtc_mmss(unsigned long nowtime) return 0; } /* - * This should only be used during kernel init and before runtime - * services have been remapped, therefore, we'll need to call in physical - * mode. Note, this call isn't used later, so mark it __init. + * This is used during kernel init before runtime + * services have been remapped and also during suspend, therefore, + * we'll need to call both in physical and virtual modes. */ -inline unsigned long __init efi_get_time(void) +inline unsigned long efi_get_time(void) { efi_status_t status; efi_time_t eft; efi_time_cap_t cap; - status = phys_efi_get_time(&eft, &cap); + if (efi.get_time) { + /* if we are in virtual mode use remapped function */ + status = efi.get_time(&eft, &cap); + } else { + /* we are in physical mode */ + status = phys_efi_get_time(&eft, &cap); + } + if (status != EFI_SUCCESS) printk("Oops: efitime: can't read time status: 0x%lx\n",status); diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5a63d6fdb70..de34b7fed3c 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -30,12 +30,13 @@ * 18(%esp) - %eax * 1C(%esp) - %ds * 20(%esp) - %es - * 24(%esp) - orig_eax - * 28(%esp) - %eip - * 2C(%esp) - %cs - * 30(%esp) - %eflags - * 34(%esp) - %oldesp - * 38(%esp) - %oldss + * 24(%esp) - %gs + * 28(%esp) - orig_eax + * 2C(%esp) - %eip + * 30(%esp) - %cs + * 34(%esp) - %eflags + * 38(%esp) - %oldesp + * 3C(%esp) - %oldss * * "current" is in register %ebx during any slow entries. */ @@ -48,26 +49,24 @@ #include <asm/smp.h> #include <asm/page.h> #include <asm/desc.h> +#include <asm/percpu.h> #include <asm/dwarf2.h> #include "irq_vectors.h" -#define nr_syscalls ((syscall_table_size)/4) +/* + * We use macros for low-level operations which need to be overridden + * for paravirtualization. The following will never clobber any registers: + * INTERRUPT_RETURN (aka. "iret") + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). + * + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). + * Allowing a register to be clobbered can shrink the paravirt replacement + * enough to patch inline, increasing performance. + */ -EBX = 0x00 -ECX = 0x04 -EDX = 0x08 -ESI = 0x0C -EDI = 0x10 -EBP = 0x14 -EAX = 0x18 -DS = 0x1C -ES = 0x20 -ORIG_EAX = 0x24 -EIP = 0x28 -CS = 0x2C -EFLAGS = 0x30 -OLDESP = 0x34 -OLDSS = 0x38 +#define nr_syscalls ((syscall_table_size)/4) CF_MASK = 0x00000001 TF_MASK = 0x00000100 @@ -76,23 +75,16 @@ DF_MASK = 0x00000400 NT_MASK = 0x00004000 VM_MASK = 0x00020000 -/* These are replaces for paravirtualization */ -#define DISABLE_INTERRUPTS cli -#define ENABLE_INTERRUPTS sti -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define INTERRUPT_RETURN iret -#define GET_CR0_INTO_EAX movl %cr0, %eax - #ifdef CONFIG_PREEMPT -#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else -#define preempt_stop +#define preempt_stop(clobbers) #define resume_kernel restore_nocheck #endif .macro TRACE_IRQS_IRET #ifdef CONFIG_TRACE_IRQFLAGS - testl $IF_MASK,EFLAGS(%esp) # interrupts off? + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? jz 1f TRACE_IRQS_ON 1: @@ -107,6 +99,9 @@ VM_MASK = 0x00020000 #define SAVE_ALL \ cld; \ + pushl %gs; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET gs, 0;*/\ pushl %es; \ CFI_ADJUST_CFA_OFFSET 4;\ /*CFI_REL_OFFSET es, 0;*/\ @@ -136,7 +131,9 @@ VM_MASK = 0x00020000 CFI_REL_OFFSET ebx, 0;\ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ - movl %edx, %es; + movl %edx, %es; \ + movl $(__KERNEL_PDA), %edx; \ + movl %edx, %gs #define RESTORE_INT_REGS \ popl %ebx; \ @@ -169,17 +166,22 @@ VM_MASK = 0x00020000 2: popl %es; \ CFI_ADJUST_CFA_OFFSET -4;\ /*CFI_RESTORE es;*/\ -.section .fixup,"ax"; \ -3: movl $0,(%esp); \ - jmp 1b; \ +3: popl %gs; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE gs;*/\ +.pushsection .fixup,"ax"; \ 4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ jmp 2b; \ -.previous; \ +6: movl $0,(%esp); \ + jmp 3b; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,3b; \ - .long 2b,4b; \ -.previous + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.popsection #define RING0_INT_FRAME \ CFI_STARTPROC simple;\ @@ -198,18 +200,18 @@ VM_MASK = 0x00020000 #define RING0_PTREGS_FRAME \ CFI_STARTPROC simple;\ CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, OLDESP-EBX;\ - /*CFI_OFFSET cs, CS-OLDESP;*/\ - CFI_OFFSET eip, EIP-OLDESP;\ - /*CFI_OFFSET es, ES-OLDESP;*/\ - /*CFI_OFFSET ds, DS-OLDESP;*/\ - CFI_OFFSET eax, EAX-OLDESP;\ - CFI_OFFSET ebp, EBP-OLDESP;\ - CFI_OFFSET edi, EDI-OLDESP;\ - CFI_OFFSET esi, ESI-OLDESP;\ - CFI_OFFSET edx, EDX-OLDESP;\ - CFI_OFFSET ecx, ECX-OLDESP;\ - CFI_OFFSET ebx, EBX-OLDESP + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ + CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ + CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ + CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ + CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ + CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ + CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ + CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ + CFI_OFFSET ebx, PT_EBX-PT_OLDESP ENTRY(ret_from_fork) CFI_STARTPROC @@ -237,17 +239,18 @@ ENTRY(ret_from_fork) ALIGN RING0_PTREGS_FRAME ret_from_exception: - preempt_stop + preempt_stop(CLBR_ANY) ret_from_intr: GET_THREAD_INFO(%ebp) check_userspace: - movl EFLAGS(%esp), %eax # mix EFLAGS and CS - movb CS(%esp), %al + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS + movb PT_CS(%esp), %al andl $(VM_MASK | SEGMENT_RPL_MASK), %eax cmpl $USER_RPL, %eax jb resume_kernel # not returning to v8086 or userspace + ENTRY(resume_userspace) - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -258,14 +261,14 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - DISABLE_INTERRUPTS + DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq jmp need_resched @@ -287,7 +290,7 @@ sysenter_past_esp: * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - ENABLE_INTERRUPTS + ENABLE_INTERRUPTS(CLBR_NONE) pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ss, 0*/ @@ -331,20 +334,27 @@ sysenter_past_esp: cmpl $(nr_syscalls), %eax jae syscall_badsys call *sys_call_table(,%eax,4) - movl %eax,EAX(%esp) - DISABLE_INTERRUPTS + movl %eax,PT_EAX(%esp) + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work /* if something modifies registers it must also disable sysexit */ - movl EIP(%esp), %edx - movl OLDESP(%esp), %ecx + movl PT_EIP(%esp), %edx + movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON +1: mov PT_GS(%esp), %gs ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC - +.pushsection .fixup,"ax" +2: movl $0,PT_GS(%esp) + jmp 1b +.section __ex_table,"a" + .align 4 + .long 1b,2b +.popsection # system call handler stub ENTRY(system_call) @@ -353,7 +363,7 @@ ENTRY(system_call) CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) - testl $TF_MASK,EFLAGS(%esp) + testl $TF_MASK,PT_EFLAGS(%esp) jz no_singlestep orl $_TIF_SINGLESTEP,TI_flags(%ebp) no_singlestep: @@ -365,9 +375,9 @@ no_singlestep: jae syscall_badsys syscall_call: call *sys_call_table(,%eax,4) - movl %eax,EAX(%esp) # store the return value + movl %eax,PT_EAX(%esp) # store the return value syscall_exit: - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -376,12 +386,12 @@ syscall_exit: jne syscall_exit_work restore_all: - movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS - # Warning: OLDSS(%esp) contains the wrong/random values if we + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we # are returning to the kernel. # See comments in process.c:copy_thread() for details. - movb OLDSS(%esp), %ah - movb CS(%esp), %al + movb PT_OLDSS(%esp), %ah + movb PT_CS(%esp), %al andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE @@ -390,13 +400,13 @@ restore_nocheck: TRACE_IRQS_IRET restore_nocheck_notrace: RESTORE_REGS - addl $4, %esp + addl $4, %esp # skip orig_eax/error_code CFI_ADJUST_CFA_OFFSET -4 1: INTERRUPT_RETURN .section .fixup,"ax" iret_exc: TRACE_IRQS_ON - ENABLE_INTERRUPTS + ENABLE_INTERRUPTS(CLBR_NONE) pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -408,33 +418,42 @@ iret_exc: CFI_RESTORE_STATE ldt_ss: - larl OLDSS(%esp), %eax + larl PT_OLDSS(%esp), %eax jnz restore_nocheck testl $0x00400000, %eax # returning to 32bit stack? jnz restore_nocheck # allright, normal return + +#ifdef CONFIG_PARAVIRT + /* + * The kernel can't run on a non-flat stack if paravirt mode + * is active. Rather than try to fixup the high bits of + * ESP, bypass this code entirely. This may break DOSemu + * and/or Wine support in a paravirt VM, although the option + * is still available to implement the setting of the high + * 16-bits in the INTERRUPT_RETURN paravirt-op. + */ + cmpl $0, paravirt_ops+PARAVIRT_enabled + jne restore_nocheck +#endif + /* If returning to userspace with 16bit stack, * try to fix the higher word of ESP, as the CPU * won't restore it. * This is an "official" bug of all the x86-compatible * CPUs, which we can try to work around to make * dosemu and wine happy. */ - subl $8, %esp # reserve space for switch16 pointer - CFI_ADJUST_CFA_OFFSET 8 - DISABLE_INTERRUPTS + movl PT_OLDESP(%esp), %eax + movl %esp, %edx + call patch_espfix_desc + pushl $__ESPFIX_SS + CFI_ADJUST_CFA_OFFSET 4 + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + DISABLE_INTERRUPTS(CLBR_EAX) TRACE_IRQS_OFF - movl %esp, %eax - /* Set up the 16bit stack frame with switch32 pointer on top, - * and a switch16 pointer on top of the current frame. */ - call setup_x86_bogus_stack - CFI_ADJUST_CFA_OFFSET -8 # frame has moved - TRACE_IRQS_IRET - RESTORE_REGS - lss 20+4(%esp), %esp # switch to 16bit stack -1: INTERRUPT_RETURN -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous + lss (%esp), %esp + CFI_ADJUST_CFA_OFFSET -8 + jmp restore_nocheck CFI_ENDPROC # perform work that needs to be done immediately before resumption @@ -445,7 +464,7 @@ work_pending: jz work_notifysig work_resched: call schedule - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -458,7 +477,8 @@ work_resched: work_notifysig: # deal with pending signals and # notify-resume requests - testl $VM_MASK, EFLAGS(%esp) +#ifdef CONFIG_VM86 + testl $VM_MASK, PT_EFLAGS(%esp) movl %esp, %eax jne work_notifysig_v86 # returning to kernel-space or # vm86-space @@ -468,29 +488,30 @@ work_notifysig: # deal with pending signals and ALIGN work_notifysig_v86: -#ifdef CONFIG_VM86 pushl %ecx # save ti_flags for do_notify_resume CFI_ADJUST_CFA_OFFSET 4 call save_v86_state # %eax contains pt_regs pointer popl %ecx CFI_ADJUST_CFA_OFFSET -4 movl %eax, %esp +#else + movl %esp, %eax +#endif xorl %edx, %edx call do_notify_resume jmp resume_userspace_sig -#endif # perform syscall exit tracing ALIGN syscall_trace_entry: - movl $-ENOSYS,EAX(%esp) + movl $-ENOSYS,PT_EAX(%esp) movl %esp, %eax xorl %edx,%edx call do_syscall_trace cmpl $0, %eax jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, # so must skip actual syscall - movl ORIG_EAX(%esp), %eax + movl PT_ORIG_EAX(%esp), %eax cmpl $(nr_syscalls), %eax jnae syscall_call jmp syscall_exit @@ -501,7 +522,7 @@ syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending TRACE_IRQS_ON - ENABLE_INTERRUPTS # could let do_syscall_trace() call + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -515,39 +536,38 @@ syscall_fault: CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) - movl $-EFAULT,EAX(%esp) + movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace syscall_badsys: - movl $-ENOSYS,EAX(%esp) + movl $-ENOSYS,PT_EAX(%esp) jmp resume_userspace CFI_ENDPROC #define FIXUP_ESPFIX_STACK \ - movl %esp, %eax; \ - /* switch to 32bit stack using the pointer on top of 16bit stack */ \ - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ - /* copy data from 16bit stack to 32bit stack */ \ - call fixup_x86_bogus_stack; \ - /* put ESP to the proper location */ \ - movl %eax, %esp; -#define UNWIND_ESPFIX_STACK \ + /* since we are on a wrong stack, we cant make it a C code :( */ \ + movl %gs:PDA_cpu, %ebx; \ + PER_CPU(cpu_gdt_descr, %ebx); \ + movl GDS_address(%ebx), %ebx; \ + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ + addl %esp, %eax; \ + pushl $__KERNEL_DS; \ + CFI_ADJUST_CFA_OFFSET 4; \ pushl %eax; \ CFI_ADJUST_CFA_OFFSET 4; \ + lss (%esp), %esp; \ + CFI_ADJUST_CFA_OFFSET -8; +#define UNWIND_ESPFIX_STACK \ movl %ss, %eax; \ - /* see if on 16bit stack */ \ + /* see if on espfix stack */ \ cmpw $__ESPFIX_SS, %ax; \ - je 28f; \ -27: popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4; \ -.section .fixup,"ax"; \ -28: movl $__KERNEL_DS, %eax; \ + jne 27f; \ + movl $__KERNEL_DS, %eax; \ movl %eax, %ds; \ movl %eax, %es; \ - /* switch to 32bit stack */ \ + /* switch to normal stack */ \ FIXUP_ESPFIX_STACK; \ - jmp 27b; \ -.previous +27:; /* * Build the entry stubs and pointer table with @@ -608,13 +628,16 @@ KPROBE_ENTRY(page_fault) CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: + /* the function address is in %gs's slot on the stack */ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ pushl %ds CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ds, 0*/ pushl %eax CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eax, 0 - xorl %eax, %eax pushl %ebp CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebp, 0 @@ -627,7 +650,6 @@ error_code: pushl %edx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET edx, 0 - decl %eax # eax = -1 pushl %ecx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ecx, 0 @@ -635,18 +657,20 @@ error_code: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %es + pushl %gs CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET es, 0*/ + /*CFI_REL_OFFSET gs, 0*/ + movl $(__KERNEL_PDA), %ecx + movl %ecx, %gs UNWIND_ESPFIX_STACK popl %ecx CFI_ADJUST_CFA_OFFSET -4 /*CFI_REGISTER es, ecx*/ - movl ES(%esp), %edi # get the function address - movl ORIG_EAX(%esp), %edx # get the error code - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) - /*CFI_REL_OFFSET es, ES*/ + movl PT_GS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + mov %ecx, PT_GS(%esp) + /*CFI_REL_OFFSET gs, ES*/ movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es @@ -682,7 +706,7 @@ ENTRY(device_not_available) GET_CR0_INTO_EAX testl $0x4, %eax # EM (math emulation bit) jne device_not_available_emulate - preempt_stop + preempt_stop(CLBR_ANY) call math_state_restore jmp ret_from_exception device_not_available_emulate: @@ -754,7 +778,7 @@ KPROBE_ENTRY(nmi) cmpw $__ESPFIX_SS, %ax popl %eax CFI_ADJUST_CFA_OFFSET -4 - je nmi_16bit_stack + je nmi_espfix_stack cmpl $sysenter_entry,(%esp) je nmi_stack_fixup pushl %eax @@ -797,7 +821,7 @@ nmi_debug_stack_check: FIX_STACK(24,nmi_stack_correct, 1) jmp nmi_stack_correct -nmi_16bit_stack: +nmi_espfix_stack: /* We have a RING0_INT_FRAME here. * * create the pointer to lss back @@ -806,7 +830,6 @@ nmi_16bit_stack: CFI_ADJUST_CFA_OFFSET 4 pushl %esp CFI_ADJUST_CFA_OFFSET 4 - movzwl %sp, %esp addw $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 @@ -817,11 +840,11 @@ nmi_16bit_stack: CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL FIXUP_ESPFIX_STACK # %eax == %esp - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved xorl %edx,%edx # zero error code call do_nmi RESTORE_REGS - lss 12+4(%esp), %esp # back to 16bit stack + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 1: INTERRUPT_RETURN CFI_ENDPROC .section __ex_table,"a" @@ -830,6 +853,19 @@ nmi_16bit_stack: .previous KPROBE_END(nmi) +#ifdef CONFIG_PARAVIRT +ENTRY(native_iret) +1: iret +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous + +ENTRY(native_irq_enable_sysexit) + sti + sysexit +#endif + KPROBE_ENTRY(int3) RING0_INT_FRAME pushl $-1 # mark this as an int @@ -949,26 +985,27 @@ ENTRY(arch_unwind_init_running) movl 4(%esp), %edx movl (%esp), %ecx leal 4(%esp), %eax - movl %ebx, EBX(%edx) + movl %ebx, PT_EBX(%edx) xorl %ebx, %ebx - movl %ebx, ECX(%edx) - movl %ebx, EDX(%edx) - movl %esi, ESI(%edx) - movl %edi, EDI(%edx) - movl %ebp, EBP(%edx) - movl %ebx, EAX(%edx) - movl $__USER_DS, DS(%edx) - movl $__USER_DS, ES(%edx) - movl %ebx, ORIG_EAX(%edx) - movl %ecx, EIP(%edx) + movl %ebx, PT_ECX(%edx) + movl %ebx, PT_EDX(%edx) + movl %esi, PT_ESI(%edx) + movl %edi, PT_EDI(%edx) + movl %ebp, PT_EBP(%edx) + movl %ebx, PT_EAX(%edx) + movl $__USER_DS, PT_DS(%edx) + movl $__USER_DS, PT_ES(%edx) + movl $0, PT_GS(%edx) + movl %ebx, PT_ORIG_EAX(%edx) + movl %ecx, PT_EIP(%edx) movl 12(%esp), %ecx - movl $__KERNEL_CS, CS(%edx) - movl %ebx, EFLAGS(%edx) - movl %eax, OLDESP(%edx) + movl $__KERNEL_CS, PT_CS(%edx) + movl %ebx, PT_EFLAGS(%edx) + movl %eax, PT_OLDESP(%edx) movl 8(%esp), %eax movl %ecx, 8(%esp) - movl EBX(%edx), %ebx - movl $__KERNEL_DS, OLDSS(%edx) + movl PT_EBX(%edx), %ebx + movl $__KERNEL_DS, PT_OLDSS(%edx) jmpl *%eax CFI_ENDPROC ENDPROC(arch_unwind_init_running) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index ca31f18d277..edef5084ce1 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -55,6 +55,12 @@ */ ENTRY(startup_32) +#ifdef CONFIG_PARAVIRT + movl %cs, %eax + testl $0x3, %eax + jnz startup_paravirt +#endif + /* * Set segments to known values. */ @@ -302,6 +308,7 @@ is386: movl $2,%ecx # set MP movl %eax,%cr0 call check_x87 + call setup_pda lgdt cpu_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f @@ -312,10 +319,13 @@ is386: movl $2,%ecx # set MP movl %eax,%ds movl %eax,%es - xorl %eax,%eax # Clear FS/GS and LDT + xorl %eax,%eax # Clear FS and LDT movl %eax,%fs - movl %eax,%gs lldt %ax + + movl $(__KERNEL_PDA),%eax + mov %eax,%gs + cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder #ifdef CONFIG_SMP @@ -346,6 +356,23 @@ check_x87: ret /* + * Point the GDT at this CPU's PDA. On boot this will be + * cpu_gdt_table and boot_pda; for secondary CPUs, these will be + * that CPU's GDT and PDA. + */ +setup_pda: + /* get the PDA pointer */ + movl start_pda, %eax + + /* slot the PDA address into the GDT */ + mov cpu_gdt_descr+2, %ecx + mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ + shr $16, %eax + mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ + mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ + ret + +/* * setup_idt * * sets up a idt with 256 entries pointing to @@ -465,6 +492,33 @@ ignore_int: #endif iret +#ifdef CONFIG_PARAVIRT +startup_paravirt: + cld + movl $(init_thread_union+THREAD_SIZE),%esp + + /* We take pains to preserve all the regs. */ + pushl %edx + pushl %ecx + pushl %eax + + /* paravirt.o is last in link, and that probe fn never returns */ + pushl $__start_paravirtprobe +1: + movl 0(%esp), %eax + pushl (%eax) + movl 8(%esp), %eax + call *(%esp) + popl %eax + + movl 4(%esp), %eax + movl 8(%esp), %ecx + movl 12(%esp), %edx + + addl $4, (%esp) + jmp 1b +#endif + /* * Real beginning of normal "text" segment */ @@ -484,6 +538,8 @@ ENTRY(empty_zero_page) * This starts the data section. */ .data +ENTRY(start_pda) + .long boot_pda ENTRY(stack_start) .long init_thread_union+THREAD_SIZE @@ -525,7 +581,7 @@ idt_descr: # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address -cpu_gdt_descr: +ENTRY(cpu_gdt_descr) .word GDT_ENTRIES*8-1 .long cpu_gdt_table @@ -584,8 +640,8 @@ ENTRY(cpu_gdt_table) .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ .quad 0x004092000000ffff /* 0xc8 APM DS data */ - .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ - .quad 0x0000000000000000 /* 0xd8 - unused */ + .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ + .quad 0x00cf92000000ffff /* 0xd8 - PDA */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ .quad 0x0000000000000000 /* 0xf0 - unused */ diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index 17647a530b2..45a8685bb60 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c @@ -34,6 +34,7 @@ static int __init init_hpet_clocksource(void) unsigned long hpet_period; void __iomem* hpet_base; u64 tmp; + int err; if (!is_hpet_enabled()) return -ENODEV; @@ -61,7 +62,11 @@ static int __init init_hpet_clocksource(void) do_div(tmp, FSEC_PER_NSEC); clocksource_hpet.mult = (u32)tmp; - return clocksource_register(&clocksource_hpet); + err = clocksource_register(&clocksource_hpet); + if (err) + iounmap(hpet_base); + + return err; } module_init(init_hpet_clocksource); diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 62996cd1708..c8d45821c78 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -381,7 +381,10 @@ void __init init_ISA_irqs (void) } } -void __init init_IRQ(void) +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) { int i; diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 3b7a63e0ed1..e21dcde0790 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -34,6 +34,7 @@ #include <linux/pci.h> #include <linux/msi.h> #include <linux/htirq.h> +#include <linux/freezer.h> #include <asm/io.h> #include <asm/smp.h> @@ -153,14 +154,20 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) * the interrupt, and we need to make sure the entry is fully populated * before that happens. */ -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +static void +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { - unsigned long flags; union entry_union eu; eu.entry = e; - spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11 + 2*pin, eu.w2); io_apic_write(apic, 0x10 + 2*pin, eu.w1); +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, e); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -836,8 +843,7 @@ static int __init find_isa_irq_pin(int irq, int type) if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA || - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 + mp_bus_id_to_type[lbus] == MP_BUS_MCA ) && (mp_irqs[i].mpc_irqtype == type) && (mp_irqs[i].mpc_srcbusirq == irq)) @@ -856,8 +862,7 @@ static int __init find_isa_irq_apic(int irq, int type) if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA || - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 + mp_bus_id_to_type[lbus] == MP_BUS_MCA ) && (mp_irqs[i].mpc_irqtype == type) && (mp_irqs[i].mpc_srcbusirq == irq)) @@ -987,12 +992,6 @@ static int EISA_ELCR(unsigned int irq) #define default_MCA_trigger(idx) (1) #define default_MCA_polarity(idx) (0) -/* NEC98 interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_NEC98_trigger(idx) (0) -#define default_NEC98_polarity(idx) (0) - static int __init MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mpc_srcbus; @@ -1027,11 +1026,6 @@ static int __init MPBIOS_polarity(int idx) polarity = default_MCA_polarity(idx); break; } - case MP_BUS_NEC98: /* NEC 98 pin */ - { - polarity = default_NEC98_polarity(idx); - break; - } default: { printk(KERN_WARNING "broken BIOS!!\n"); @@ -1101,11 +1095,6 @@ static int MPBIOS_trigger(int idx) trigger = default_MCA_trigger(idx); break; } - case MP_BUS_NEC98: /* NEC 98 pin */ - { - trigger = default_NEC98_trigger(idx); - break; - } default: { printk(KERN_WARNING "broken BIOS!!\n"); @@ -1167,7 +1156,6 @@ static int pin_2_irq(int idx, int apic, int pin) case MP_BUS_ISA: /* ISA pin */ case MP_BUS_EISA: case MP_BUS_MCA: - case MP_BUS_NEC98: { irq = mp_irqs[idx].mpc_srcbusirq; break; @@ -1235,7 +1223,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; static int __assign_irq_vector(int irq) { @@ -1360,8 +1348,8 @@ static void __init setup_IO_APIC_irqs(void) if (!apic && (irq < 16)) disable_8259A_irq(irq); } - ioapic_write_entry(apic, pin, entry); spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, entry); set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1926,6 +1914,15 @@ static void __init setup_ioapic_ids_from_mpc(void) static void __init setup_ioapic_ids_from_mpc(void) { } #endif +static int no_timer_check __initdata; + +static int __init notimercheck(char *s) +{ + no_timer_check = 1; + return 1; +} +__setup("no_timer_check", notimercheck); + /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1934,10 +1931,13 @@ static void __init setup_ioapic_ids_from_mpc(void) { } * - if this function detects that timer IRQs are defunct, then we fall * back to ISA timer IRQs */ -static int __init timer_irq_works(void) +int __init timer_irq_works(void) { unsigned long t1 = jiffies; + if (no_timer_check) + return 1; + local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -2161,9 +2161,15 @@ static inline void unlock_ExtINT_logic(void) unsigned char save_control, save_freq_select; pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) { + WARN_ON_ONCE(1); + return; + } apic = find_isa_irq_apic(8, mp_INT); - if (pin == -1) + if (apic == -1) { + WARN_ON_ONCE(1); return; + } entry0 = ioapic_read_entry(apic, pin); clear_IO_APIC_pin(apic, pin); @@ -2208,7 +2214,7 @@ int timer_uses_ioapic_pin_0; * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. */ -static inline void check_timer(void) +static inline void __init check_timer(void) { int apic1, pin1, apic2, pin2; int vector; @@ -2856,8 +2862,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a if (!ioapic && (irq < 16)) disable_8259A_irq(irq); - ioapic_write_entry(ioapic, pin, entry); spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(ioapic, pin, entry); set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index fc79e1e859c..af1d5334499 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -184,7 +184,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); mutex_unlock(&kprobe_mutex); } @@ -333,7 +333,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) return 1; ss_probe: -#ifndef CONFIG_PREEMPT +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) if (p->ainsn.boostable == 1 && !p->post_handler){ /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index 445211eb2d5..b410e5fb034 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -160,16 +160,14 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount) { int err; unsigned long size; - void *address; err = 0; - address = &default_ldt[0]; size = 5*sizeof(struct desc_struct); if (size > bytecount) size = bytecount; err = size; - if (copy_to_user(ptr, address, size)) + if (clear_user(ptr, size)) err = -EFAULT; return err; diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index eb57a851789..b83672b8952 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -283,10 +283,9 @@ static int __init mca_init(void) bus->f.mca_transform_memory = mca_dummy_transform_memory; /* get the motherboard device */ - mca_dev = kmalloc(sizeof(struct mca_device), GFP_KERNEL); + mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL); if(unlikely(!mca_dev)) goto out_nomem; - memset(mca_dev, 0, sizeof(struct mca_device)); /* * We do not expect many MCA interrupts during initialization, @@ -310,11 +309,9 @@ static int __init mca_init(void) mca_dev->slot = MCA_MOTHERBOARD; mca_register_device(MCA_PRIMARY_BUS, mca_dev); - mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); + mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); if(unlikely(!mca_dev)) goto out_unlock_nomem; - memset(mca_dev, 0, sizeof(struct mca_device)); - /* Put motherboard into video setup mode, read integrated video * POS registers, and turn motherboard setup off. @@ -349,10 +346,9 @@ static int __init mca_init(void) } if(which_scsi) { /* found a scsi card */ - mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); + mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); if(unlikely(!mca_dev)) goto out_unlock_nomem; - memset(mca_dev, 0, sizeof(struct mca_device)); for(j = 0; j < 8; j++) mca_dev->pos[j] = pos[j]; @@ -378,10 +374,9 @@ static int __init mca_init(void) if(!mca_read_and_store_pos(pos)) continue; - mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); + mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); if(unlikely(!mca_dev)) goto out_unlock_nomem; - memset(mca_dev, 0, sizeof(struct mca_device)); for(j=0; j<8; j++) mca_dev->pos[j]=pos[j]; diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 23f5984d065..972346604f9 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -703,7 +703,6 @@ static struct sysdev_driver mc_sysdev_driver = { .resume = mc_sysdev_resume, }; -#ifdef CONFIG_HOTPLUG_CPU static __cpuinit int mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { @@ -726,7 +725,6 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) static struct notifier_block mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -#endif static int __init microcode_init (void) { diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c index 470cf97e7cd..d7d9c8b23f7 100644 --- a/arch/i386/kernel/module.c +++ b/arch/i386/kernel/module.c @@ -108,7 +108,8 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -118,6 +119,8 @@ int module_finalize(const Elf_Ehdr *hdr, alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) locks= s; + if (!strcmp(".parainstructions", secstrings + s->sh_name)) + para = s; } if (alt) { @@ -132,6 +135,12 @@ int module_finalize(const Elf_Ehdr *hdr, lseg, lseg + locks->sh_size, tseg, tseg + text->sh_size); } + + if (para) { + void *pseg = (void *)para->sh_addr; + apply_paravirt(pseg, pseg + para->sh_size); + } + return 0; } diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 442aaf8c77e..2ce67228dff 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -249,8 +249,6 @@ static void __init MP_bus_info (struct mpc_config_bus *m) mp_current_pci_id++; } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; - } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; } else { printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index a773f776c9e..1d1a56cae34 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -195,7 +195,6 @@ static ssize_t msr_write(struct file *file, const char __user *buf, { const u32 __user *tmp = (const u32 __user *)buf; u32 data[2]; - size_t rv; u32 reg = *ppos; int cpu = iminor(file->f_dentry->d_inode); int err; @@ -203,7 +202,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf, if (count % 8) return -EINVAL; /* Invalid chunk size */ - for (rv = 0; count; count -= 8) { + for (; count; count -= 8) { if (copy_from_user(&data, tmp, 8)) return -EFAULT; err = do_wrmsr(cpu, reg, data[0], data[1]); @@ -250,7 +249,6 @@ static int msr_device_create(int i) return err; } -#ifdef CONFIG_HOTPLUG_CPU static int msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -271,7 +269,6 @@ static struct notifier_block __cpuinitdata msr_class_cpu_notifier = { .notifier_call = msr_class_cpu_callback, }; -#endif static int __init msr_init(void) { diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index eaafe233a5d..f5bc7e1be80 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -22,6 +22,7 @@ #include <linux/percpu.h> #include <linux/dmi.h> #include <linux/kprobes.h> +#include <linux/cpumask.h> #include <asm/smp.h> #include <asm/nmi.h> @@ -42,6 +43,8 @@ int nmi_watchdog_enabled; static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; + /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ @@ -867,14 +870,16 @@ static unsigned int void touch_nmi_watchdog (void) { - int i; + if (nmi_watchdog > 0) { + unsigned cpu; - /* - * Just reset the alert counters, (other CPUs might be - * spinning on locks we hold): - */ - for_each_possible_cpu(i) - alert_counter[i] = 0; + /* + * Just reset the alert counters, (other CPUs might be + * spinning on locks we hold): + */ + for_each_present_cpu (cpu) + alert_counter[cpu] = 0; + } /* * Tickle the softlockup detector too: @@ -907,6 +912,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + sum = per_cpu(irq_stat, cpu).apic_timer_irqs; /* if the apic timer isn't firing, this cpu isn't doing much */ @@ -1033,6 +1048,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, #endif +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c new file mode 100644 index 00000000000..3dceab5828f --- /dev/null +++ b/arch/i386/kernel/paravirt.c @@ -0,0 +1,569 @@ +/* Paravirtualization interfaces + Copyright (C) 2006 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/efi.h> +#include <linux/bcd.h> +#include <linux/start_kernel.h> + +#include <asm/bug.h> +#include <asm/paravirt.h> +#include <asm/desc.h> +#include <asm/setup.h> +#include <asm/arch_hooks.h> +#include <asm/time.h> +#include <asm/irq.h> +#include <asm/delay.h> +#include <asm/fixmap.h> +#include <asm/apic.h> +#include <asm/tlbflush.h> + +/* nop stub */ +static void native_nop(void) +{ +} + +static void __init default_banner(void) +{ + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", + paravirt_ops.name); +} + +char *memory_setup(void) +{ + return paravirt_ops.memory_setup(); +} + +/* Simple instruction patching code. */ +#define DEF_NATIVE(name, code) \ + extern const char start_##name[], end_##name[]; \ + asm("start_" #name ": " code "; end_" #name ":") +DEF_NATIVE(cli, "cli"); +DEF_NATIVE(sti, "sti"); +DEF_NATIVE(popf, "push %eax; popf"); +DEF_NATIVE(pushf, "pushf; pop %eax"); +DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli"); +DEF_NATIVE(iret, "iret"); +DEF_NATIVE(sti_sysexit, "sti; sysexit"); + +static const struct native_insns +{ + const char *start, *end; +} native_insns[] = { + [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli }, + [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti }, + [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf }, + [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf }, + [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli }, + [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret }, + [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit }, +}; + +static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) +{ + unsigned int insn_len; + + /* Don't touch it if we don't have a replacement */ + if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start) + return len; + + insn_len = native_insns[type].end - native_insns[type].start; + + /* Similarly if we can't fit replacement. */ + if (len < insn_len) + return len; + + memcpy(insns, native_insns[type].start, insn_len); + return insn_len; +} + +static fastcall unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("movl %%db0, %0" :"=r" (val)); break; + case 1: + asm("movl %%db1, %0" :"=r" (val)); break; + case 2: + asm("movl %%db2, %0" :"=r" (val)); break; + case 3: + asm("movl %%db3, %0" :"=r" (val)); break; + case 6: + asm("movl %%db6, %0" :"=r" (val)); break; + case 7: + asm("movl %%db7, %0" :"=r" (val)); break; + default: + BUG(); + } + return val; +} + +static fastcall void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("movl %0,%%db0" : /* no output */ :"r" (value)); + break; + case 1: + asm("movl %0,%%db1" : /* no output */ :"r" (value)); + break; + case 2: + asm("movl %0,%%db2" : /* no output */ :"r" (value)); + break; + case 3: + asm("movl %0,%%db3" : /* no output */ :"r" (value)); + break; + case 6: + asm("movl %0,%%db6" : /* no output */ :"r" (value)); + break; + case 7: + asm("movl %0,%%db7" : /* no output */ :"r" (value)); + break; + default: + BUG(); + } +} + +void init_IRQ(void) +{ + paravirt_ops.init_IRQ(); +} + +static fastcall void native_clts(void) +{ + asm volatile ("clts"); +} + +static fastcall unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr0(unsigned long val) +{ + asm volatile("movl %0,%%cr0": :"r" (val)); +} + +static fastcall unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr2(unsigned long val) +{ + asm volatile("movl %0,%%cr2": :"r" (val)); +} + +static fastcall unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr3(unsigned long val) +{ + asm volatile("movl %0,%%cr3": :"r" (val)); +} + +static fastcall unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist */ + asm("1: movl %%cr4, %0 \n" + "2: \n" + ".section __ex_table,\"a\" \n" + ".long 1b,2b \n" + ".previous \n" + : "=r" (val): "0" (0)); + return val; +} + +static fastcall void native_write_cr4(unsigned long val) +{ + asm volatile("movl %0,%%cr4": :"r" (val)); +} + +static fastcall unsigned long native_save_fl(void) +{ + unsigned long f; + asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); + return f; +} + +static fastcall void native_restore_fl(unsigned long f) +{ + asm volatile("pushl %0 ; popfl": /* no output */ + :"g" (f) + :"memory", "cc"); +} + +static fastcall void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static fastcall void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static fastcall void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static fastcall void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} + +static fastcall void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + +static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) +{ + unsigned long long val; + + asm volatile("2: rdmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=r" (*err), "=A" (val) + : "c" (msr), "i" (-EFAULT)); + + return val; +} + +static fastcall int native_write_msr(unsigned int msr, unsigned long long val) +{ + int err; + asm volatile("2: wrmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %4,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=a" (err) + : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), + "i" (-EFAULT)); + return err; +} + +static fastcall unsigned long long native_read_tsc(void) +{ + unsigned long long val; + asm volatile("rdtsc" : "=A" (val)); + return val; +} + +static fastcall unsigned long long native_read_pmc(void) +{ + unsigned long long val; + asm volatile("rdpmc" : "=A" (val)); + return val; +} + +static fastcall void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} + +static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) +{ + asm ("sgdt %0":"=m" (*dtr)); +} + +static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) +{ + asm ("sidt %0":"=m" (*dtr)); +} + +static fastcall unsigned long native_store_tr(void) +{ + unsigned long tr; + asm ("str %0":"=r" (tr)); + return tr; +} + +static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ +#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] + C(0); C(1); C(2); +#undef C +} + +static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) +{ + u32 *lp = (u32 *)((char *)dt + entry*8); + lp[0] = entry_low; + lp[1] = entry_high; +} + +static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + tss->esp0 = thread->esp0; + + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ + if (unlikely(tss->ss1 != thread->sysenter_cs)) { + tss->ss1 = thread->sysenter_cs; + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } +} + +static fastcall void native_io_delay(void) +{ + asm volatile("outb %al,$0x80"); +} + +static fastcall void native_flush_tlb(void) +{ + __native_flush_tlb(); +} + +/* + * Global pages have to be flushed a bit differently. Not a real + * performance problem because this does not happen often. + */ +static fastcall void native_flush_tlb_global(void) +{ + __native_flush_tlb_global(); +} + +static fastcall void native_flush_tlb_single(u32 addr) +{ + __native_flush_tlb_single(addr); +} + +#ifndef CONFIG_X86_PAE +static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + *pmdp = pmdval; +} + +#else /* CONFIG_X86_PAE */ + +static fastcall void native_set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) +{ + set_64bit((unsigned long long *)ptep,pte_val(pteval)); +} + +static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); +} + +static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) +{ + *pudp = pudval; +} + +static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = 0; +} + +static fastcall void native_pmd_clear(pmd_t *pmd) +{ + u32 *tmp = (u32 *)pmd; + *tmp = 0; + smp_wmb(); + *(tmp + 1) = 0; +} +#endif /* CONFIG_X86_PAE */ + +/* These are in entry.S */ +extern fastcall void native_iret(void); +extern fastcall void native_irq_enable_sysexit(void); + +static int __init print_banner(void) +{ + paravirt_ops.banner(); + return 0; +} +core_initcall(print_banner); + +/* We simply declare start_kernel to be the paravirt probe of last resort. */ +paravirt_probe(start_kernel); + +struct paravirt_ops paravirt_ops = { + .name = "bare hardware", + .paravirt_enabled = 0, + .kernel_rpl = 0, + + .patch = native_patch, + .banner = default_banner, + .arch_setup = native_nop, + .memory_setup = machine_specific_memory_setup, + .get_wallclock = native_get_wallclock, + .set_wallclock = native_set_wallclock, + .time_init = time_init_hook, + .init_IRQ = native_init_IRQ, + + .cpuid = native_cpuid, + .get_debugreg = native_get_debugreg, + .set_debugreg = native_set_debugreg, + .clts = native_clts, + .read_cr0 = native_read_cr0, + .write_cr0 = native_write_cr0, + .read_cr2 = native_read_cr2, + .write_cr2 = native_write_cr2, + .read_cr3 = native_read_cr3, + .write_cr3 = native_write_cr3, + .read_cr4 = native_read_cr4, + .read_cr4_safe = native_read_cr4_safe, + .write_cr4 = native_write_cr4, + .save_fl = native_save_fl, + .restore_fl = native_restore_fl, + .irq_disable = native_irq_disable, + .irq_enable = native_irq_enable, + .safe_halt = native_safe_halt, + .halt = native_halt, + .wbinvd = native_wbinvd, + .read_msr = native_read_msr, + .write_msr = native_write_msr, + .read_tsc = native_read_tsc, + .read_pmc = native_read_pmc, + .load_tr_desc = native_load_tr_desc, + .set_ldt = native_set_ldt, + .load_gdt = native_load_gdt, + .load_idt = native_load_idt, + .store_gdt = native_store_gdt, + .store_idt = native_store_idt, + .store_tr = native_store_tr, + .load_tls = native_load_tls, + .write_ldt_entry = native_write_ldt_entry, + .write_gdt_entry = native_write_gdt_entry, + .write_idt_entry = native_write_idt_entry, + .load_esp0 = native_load_esp0, + + .set_iopl_mask = native_set_iopl_mask, + .io_delay = native_io_delay, + .const_udelay = __const_udelay, + +#ifdef CONFIG_X86_LOCAL_APIC + .apic_write = native_apic_write, + .apic_write_atomic = native_apic_write_atomic, + .apic_read = native_apic_read, +#endif + + .flush_tlb_user = native_flush_tlb, + .flush_tlb_kernel = native_flush_tlb_global, + .flush_tlb_single = native_flush_tlb_single, + + .set_pte = native_set_pte, + .set_pte_at = native_set_pte_at, + .set_pmd = native_set_pmd, + .pte_update = (void *)native_nop, + .pte_update_defer = (void *)native_nop, +#ifdef CONFIG_X86_PAE + .set_pte_atomic = native_set_pte_atomic, + .set_pte_present = native_set_pte_present, + .set_pud = native_set_pud, + .pte_clear = native_pte_clear, + .pmd_clear = native_pmd_clear, +#endif + + .irq_enable_sysexit = native_irq_enable_sysexit, + .iret = native_iret, +}; +EXPORT_SYMBOL(paravirt_ops); diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 5c8c6ef1fc5..41af692c158 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -92,14 +92,12 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, if (!mem_base) goto out; - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); if (!dev->dma_mem) goto out; - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); if (!dev->dma_mem->bitmap) goto free1_out; - memset(dev->dma_mem->bitmap, 0, bitmap_size); dev->dma_mem->virt_base = mem_base; dev->dma_mem->device_base = device_addr; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index dd53c58f64f..99308510a17 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -56,6 +56,7 @@ #include <asm/tlbflush.h> #include <asm/cpu.h> +#include <asm/pda.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -99,22 +100,18 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { - local_irq_enable(); - if (!hlt_counter && boot_cpu_data.hlt_works_ok) { current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); - } + local_irq_disable(); + if (!need_resched()) + safe_halt(); /* enables interrupts racelessly */ + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { - while (!need_resched()) - cpu_relax(); + /* loop is done by the caller */ + cpu_relax(); } } #ifdef CONFIG_APM_MODULE @@ -128,14 +125,7 @@ EXPORT_SYMBOL(default_idle); */ static void poll_idle (void) { - local_irq_enable(); - - asm volatile( - "2:" - "testl %0, %1;" - "rep; nop;" - "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + cpu_relax(); } #ifdef CONFIG_HOTPLUG_CPU @@ -256,8 +246,7 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) static void mwait_idle(void) { local_irq_enable(); - while (!need_resched()) - mwait_idle_with_hints(0, 0); + mwait_idle_with_hints(0, 0); } void __devinit select_idle_routine(const struct cpuinfo_x86 *c) @@ -314,8 +303,8 @@ void show_regs(struct pt_regs * regs) regs->eax,regs->ebx,regs->ecx,regs->edx); printk("ESI: %08lx EDI: %08lx EBP: %08lx", regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes); + printk(" DS: %04x ES: %04x GS: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); cr0 = read_cr0(); cr2 = read_cr2(); @@ -346,6 +335,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xds = __USER_DS; regs.xes = __USER_DS; + regs.xgs = __KERNEL_PDA; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -431,7 +421,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, p->thread.eip = (unsigned long) ret_from_fork; savesegment(fs,p->thread.fs); - savesegment(gs,p->thread.gs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -508,7 +497,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->regs.ds = regs->xds; dump->regs.es = regs->xes; savesegment(fs,dump->regs.fs); - savesegment(gs,dump->regs.gs); + dump->regs.gs = regs->xgs; dump->regs.orig_eax = regs->orig_eax; dump->regs.eip = regs->eip; dump->regs.cs = regs->xcs; @@ -648,22 +637,27 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas __unlazy_fpu(prev_p); + + /* we're going to use this soon, after a few expensive things */ + if (next_p->fpu_counter > 5) + prefetch(&next->i387.fxsave); + /* * Reload esp0. */ load_esp0(tss, next); /* - * Save away %fs and %gs. No need to save %es and %ds, as - * those are always kernel segments while inside the kernel. - * Doing this before setting the new TLS descriptors avoids - * the situation where we temporarily have non-reloadable - * segments in %fs and %gs. This could be an issue if the - * NMI handler ever used %fs or %gs (it does not today), or - * if the kernel is running inside of a hypervisor layer. + * Save away %fs. No need to save %gs, as it was saved on the + * stack on entry. No need to save %es and %ds, as those are + * always kernel segments while inside the kernel. Doing this + * before setting the new TLS descriptors avoids the situation + * where we temporarily have non-reloadable segments in %fs + * and %gs. This could be an issue if the NMI handler ever + * used %fs or %gs (it does not today), or if the kernel is + * running inside of a hypervisor layer. */ savesegment(fs, prev->fs); - savesegment(gs, prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -671,22 +665,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas load_TLS(next, cpu); /* - * Restore %fs and %gs if needed. + * Restore %fs if needed. * - * Glibc normally makes %fs be zero, and %gs is one of - * the TLS segments. + * Glibc normally makes %fs be zero. */ if (unlikely(prev->fs | next->fs)) loadsegment(fs, next->fs); - if (prev->gs | next->gs) - loadsegment(gs, next->gs); - - /* - * Restore IOPL if needed. - */ - if (unlikely(prev->iopl != next->iopl)) - set_iopl_mask(next->iopl); + write_pda(pcurrent, next_p); /* * Now maybe handle debug registers and/or IO bitmaps @@ -697,6 +683,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas disable_tsc(prev_p, next_p); + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + if (next_p->fpu_counter > 5) + math_state_restore(); + return prev_p; } diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 775f50e9395..f3f94ac5736 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -94,13 +94,9 @@ static int putreg(struct task_struct *child, return -EIO; child->thread.fs = value; return 0; - case GS: - if (value && (value & 3) != 3) - return -EIO; - child->thread.gs = value; - return 0; case DS: case ES: + case GS: if (value && (value & 3) != 3) return -EIO; value &= 0xffff; @@ -116,8 +112,8 @@ static int putreg(struct task_struct *child, value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; break; } - if (regno > GS*4) - regno -= 2*4; + if (regno > ES*4) + regno -= 1*4; put_stack_long(child, regno - sizeof(struct pt_regs), value); return 0; } @@ -131,18 +127,16 @@ static unsigned long getreg(struct task_struct *child, case FS: retval = child->thread.fs; break; - case GS: - retval = child->thread.gs; - break; case DS: case ES: + case GS: case SS: case CS: retval = 0xffff; /* fall through */ default: - if (regno > GS*4) - regno -= 2*4; + if (regno > ES*4) + regno -= 1*4; regno = regno - sizeof(struct pt_regs); retval &= get_stack_long(child, regno); } diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index 9f6ab1789bb..a01320a7b63 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c @@ -3,10 +3,23 @@ */ #include <linux/pci.h> #include <linux/irq.h> +#include <asm/pci-direct.h> +#include <asm/genapic.h> +#include <asm/cpu.h> #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) +static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) +{ +#ifdef CONFIG_X86_64 + if (genapic != &apic_flat) + panic("APIC mode must be flat on this system\n"); +#elif defined(CONFIG_X86_GENERICARCH) + if (genapic != &apic_default) + panic("APIC mode must be default(flat) on this system. Use apic=default\n"); +#endif +} -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) +void __init quirk_intel_irqbalance(void) { u8 config, rev; u32 word; @@ -16,18 +29,18 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) * based platforms. * Disable SW irqbalance/affinity on those platforms. */ - pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); if (rev > 0x9) return; printk(KERN_INFO "Intel E7520/7320/7525 detected."); - /* enable access to config space*/ - pci_read_config_byte(dev, 0xf4, &config); - pci_write_config_byte(dev, 0xf4, config|0x2); + /* enable access to config space */ + config = read_pci_config_byte(0, 0, 0, 0xf4); + write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); /* read xTPR register */ - raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); + word = read_pci_config_16(0, 0, 0x40, 0x4c); if (!(word & (1 << 13))) { printk(KERN_INFO "Disabling irq balancing and affinity\n"); @@ -38,13 +51,24 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) #ifdef CONFIG_PROC_FS no_irq_affinity = 1; #endif +#ifdef CONFIG_HOTPLUG_CPU + printk(KERN_INFO "Disabling cpu hotplug control\n"); + enable_cpu_hotplug = 0; +#endif +#ifdef CONFIG_X86_64 + /* force the genapic selection to flat mode so that + * interrupts can be redirected to more than one CPU. + */ + genapic_force = &apic_flat; +#endif } - /* put back the original value for config space*/ + /* put back the original value for config space */ if (!(config & 0x2)) - pci_write_config_byte(dev, 0xf4, config); + write_pci_config_byte(0, 0, 0, 0xf4, config); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); + #endif diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 84278e0093a..3514b4153f7 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -12,6 +12,7 @@ #include <linux/dmi.h> #include <linux/ctype.h> #include <linux/pm.h> +#include <linux/reboot.h> #include <asm/uaccess.h> #include <asm/apic.h> #include <asm/desc.h> diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 141041dde74..79df6e612db 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -63,9 +63,6 @@ #include <setup_arch.h> #include <bios_ebda.h> -/* Forward Declaration. */ -void __init find_max_pfn(void); - /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* address, and must not be in the .bss segment! */ @@ -76,11 +73,8 @@ int disable_pse __devinitdata = 0; /* * Machine setup.. */ - -#ifdef CONFIG_EFI -int efi_enabled = 0; -EXPORT_SYMBOL(efi_enabled); -#endif +extern struct resource code_resource; +extern struct resource data_resource; /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -99,12 +93,6 @@ unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; @@ -134,7 +122,6 @@ struct ist_info ist_info; defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) EXPORT_SYMBOL(ist_info); #endif -struct e820map e820; extern void early_cpu_init(void); extern int root_mountflags; @@ -149,516 +136,6 @@ static char command_line[COMMAND_LINE_SIZE]; unsigned char __initdata boot_params[PARAM_SIZE]; -static struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource adapter_rom_resources[] = { { - .name = "Adapter ROM", - .start = 0xc8000, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -} }; - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer0", - .start = 0x0040, - .end = 0x0043, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer1", - .start = 0x0050, - .end = 0x0053, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x006f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) - -static int __init romchecksum(unsigned char *rom, unsigned long length) -{ - unsigned char *p, sum = 0; - - for (p = rom; p < rom + length; p++) - sum += *p; - return sum == 0; -} - -static void __init probe_roms(void) -{ - unsigned long start, length, upper; - unsigned char *rom; - int i; - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - /* 0 < length <= 0x7f * 512, historically */ - length = rom[2] * 512; - - /* if checksum okay, trust length byte */ - if (length && romchecksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = extension_rom_resource.end - extension_rom_resource.start + 1; - if (romchecksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = rom[2] * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !romchecksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - -static void __init limit_regions(unsigned long long size) -{ - unsigned long long current_addr = 0; - int i; - - if (efi_enabled) { - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map, i = 0; p < memmap.map_end; - p += memmap.desc_size, i++) { - md = p; - current_addr = md->phys_addr + (md->num_pages << 12); - if (md->type == EFI_CONVENTIONAL_MEMORY) { - if (current_addr >= size) { - md->num_pages -= - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); - memmap.nr_map = i + 1; - return; - } - } - } - } - for (i = 0; i < e820.nr_map; i++) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr < size) - continue; - - if (e820.map[i].type != E820_RAM) - continue; - - if (e820.map[i].addr >= size) { - /* - * This region starts past the end of the - * requested size, skip it completely. - */ - e820.nr_map = i; - } else { - e820.nr_map = i + 1; - e820.map[i].size -= current_addr - size; - } - return; - } -} - -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - if (!efi_enabled) { - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; - } -} /* add_memory_region */ - -#define E820_DEBUG 1 - -static void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %lu\n", e820.map[i].type); - break; - } - } -} - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; -static struct change_member change_point_list[2*E820MAX] __initdata; -static struct change_member *change_point[2*E820MAX] __initdata; -static struct e820entry *overlap_list[E820MAX] __initdata; -static struct e820entry new_bios[E820MAX] __initdata; - -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) -{ - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) - return -1; - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; i<old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) - return -1; - - /* create pointers for initial change-point information (for sorting) */ - for (i=0; i < 2*old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i=0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; /* true number of change-points */ - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if <current_addr> > <last_addr>, swap */ - /* or, if current=<start_addr> & last=<end_addr>, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing=1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; i<overlap_entries; i++) - { - if (overlap_list[i] == change_point[chgidx]->pbios) - overlap_list[i] = overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ - current_type = 0; - for (i=0; i<overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ - if (new_bios[new_bios_entry].size != 0) - if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; - } - last_type = current_type; - } - } - new_nr = new_bios_entry; /* retain count for new bios entries */ - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) - */ -int __init copy_e820_map(struct e820entry * biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - unsigned long long start = biosmap->addr; - unsigned long long size = biosmap->size; - unsigned long long end = start + size; - unsigned long type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - /* - * Some BIOSes claim RAM in the 640k - 1M region. - * Not right. Fix it up. - */ - if (type == E820_RAM) { - if (start < 0x100000ULL && end > 0xA0000ULL) { - if (start < 0xA0000ULL) - add_memory_region(start, 0xA0000ULL-start, type); - if (end <= 0x100000ULL) - continue; - start = 0x100000ULL; - size = end - start; - } - } - add_memory_region(start, size, type); - } while (biosmap++,--nr_map); - return 0; -} - #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; #ifdef CONFIG_EDD_MODULE @@ -682,7 +159,7 @@ static inline void copy_edd(void) } #endif -static int __initdata user_defined_memmap = 0; +int __initdata user_defined_memmap = 0; /* * "mem=nopentium" disables the 4MB page tables. @@ -719,51 +196,6 @@ static int __init parse_mem(char *arg) } early_param("mem", parse_mem); -static int __init parse_memmap(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "exactmap") == 0) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - e820.nr_map = 0; - user_defined_memmap = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(arg, &arg); - if (*arg == '@') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*arg == '#') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*arg == '$') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - user_defined_memmap = 1; - } - } - return 0; -} -early_param("memmap", parse_memmap); - #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. @@ -828,90 +260,6 @@ static int __init parse_reservetop(char *arg) early_param("reservetop", parse_reservetop); /* - * Callback for efi_memory_walk. - */ -static int __init -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) -{ - unsigned long *max_pfn = arg, pfn; - - if (start < end) { - pfn = PFN_UP(end -1); - if (pfn > *max_pfn) - *max_pfn = pfn; - } - return 0; -} - -static int __init -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) -{ - memory_present(0, PFN_UP(start), PFN_DOWN(end)); - return 0; -} - - /* - * This function checks if the entire range <start,end> is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of <start,end> we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; -} - -/* - * Find the highest page frame number we have available - */ -void __init find_max_pfn(void) -{ - int i; - - max_pfn = 0; - if (efi_enabled) { - efi_memmap_walk(efi_find_max_pfn, &max_pfn); - efi_memmap_walk(efi_memory_present_wrapper, NULL); - return; - } - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - /* RAM? */ - if (e820.map[i].type != E820_RAM) - continue; - start = PFN_UP(e820.map[i].addr); - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - if (start >= end) - continue; - if (end > max_pfn) - max_pfn = end; - memory_present(0, start, end); - } -} - -/* * Determine low and high memory ranges: */ unsigned long __init find_max_low_pfn(void) @@ -971,68 +319,6 @@ unsigned long __init find_max_low_pfn(void) } /* - * Free all available memory for boot time allocation. Used - * as a callback function by efi_memory_walk() - */ - -static int __init -free_available_memory(unsigned long start, unsigned long end, void *arg) -{ - /* check max_low_pfn */ - if (start >= (max_low_pfn << PAGE_SHIFT)) - return 0; - if (end >= (max_low_pfn << PAGE_SHIFT)) - end = max_low_pfn << PAGE_SHIFT; - if (start < end) - free_bootmem(start, end - start); - - return 0; -} -/* - * Register fully available low RAM pages with the bootmem allocator. - */ -static void __init register_bootmem_low_pages(unsigned long max_low_pfn) -{ - int i; - - if (efi_enabled) { - efi_memmap_walk(free_available_memory, NULL); - return; - } - for (i = 0; i < e820.nr_map; i++) { - unsigned long curr_pfn, last_pfn, size; - /* - * Reserve usable low memory - */ - if (e820.map[i].type != E820_RAM) - continue; - /* - * We are rounding up the start address of usable memory: - */ - curr_pfn = PFN_UP(e820.map[i].addr); - if (curr_pfn >= max_low_pfn) - continue; - /* - * ... and at the end of the usable range downwards: - */ - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - - if (last_pfn > max_low_pfn) - last_pfn = max_low_pfn; - - /* - * .. finally, did all the rounding and playing - * around just make the area go away? - */ - if (last_pfn <= curr_pfn) - continue; - - size = last_pfn - curr_pfn; - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); - } -} - -/* * workaround for Dell systems that neglect to reserve EBDA */ static void __init reserve_ebda_region(void) @@ -1118,8 +404,8 @@ void __init setup_bootmem_allocator(void) * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */ - reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START)); + reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); /* * reserve physical page 0 - it's a special BIOS page on many boxes, @@ -1162,8 +448,7 @@ void __init setup_bootmem_allocator(void) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start+INITRD_SIZE; } else { @@ -1200,126 +485,6 @@ void __init remapped_pgdat_init(void) } } -/* - * Request address space for all standard RAM and ROM resources - * and also for regions reported as reserved by the e820. - */ -static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) -{ - int i; - - probe_roms(); - for (i = 0; i < e820.nr_map; i++) { - struct resource *res; -#ifndef CONFIG_RESOURCES_64BIT - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; -#endif - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; - case E820_ACPI: res->name = "ACPI Tables"; break; - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; - default: res->name = "reserved"; - } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res)) { - kfree(res); - continue; - } - if (e820.map[i].type == E820_RAM) { - /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. - */ - request_resource(res, code_resource); - request_resource(res, data_resource); -#ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); -#endif - } - } -} - -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) -{ - int i; - - printk("Setting up standard PCI resources\n"); - if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); - else - legacy_init_iomem_resources(&code_resource, &data_resource); - - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; -} - -subsys_initcall(request_standard_resources); - -static void __init register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the bigest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - #ifdef CONFIG_MCA static void set_mca_bus(int x) { @@ -1329,6 +494,12 @@ static void set_mca_bus(int x) static void set_mca_bus(int x) { } #endif +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ +char * __attribute__((weak)) memory_setup(void) +{ + return machine_specific_memory_setup(); +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -1381,7 +552,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); else { printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(machine_specific_memory_setup()); + print_memory_map(memory_setup()); } copy_edd(); diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 43002cfb40c..65d7620eaa0 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -128,7 +128,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) - GET_SEG(gs); + COPY_SEG(gs); GET_SEG(fs); COPY_SEG(es); COPY_SEG(ds); @@ -244,9 +244,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, { int tmp, err = 0; - tmp = 0; - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); + err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs); savesegment(fs, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->fs); diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 31e5c6573aa..5285aff8367 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -321,7 +321,6 @@ static inline void leave_mm (unsigned long cpu) fastcall void smp_invalidate_interrupt(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); unsigned long cpu; cpu = get_cpu(); @@ -352,7 +351,6 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs) smp_mb__after_clear_bit(); out: put_cpu_no_resched(); - set_irq_regs(old_regs); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, @@ -607,14 +605,11 @@ void smp_send_stop(void) */ fastcall void smp_reschedule_interrupt(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); ack_APIC_irq(); - set_irq_regs(old_regs); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); void (*func) (void *info) = call_data->func; void *info = call_data->info; int wait = call_data->wait; @@ -637,7 +632,6 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) mb(); atomic_inc(&call_data->finished); } - set_irq_regs(old_regs); } /* @@ -699,6 +693,10 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, put_cpu(); return -EBUSY; } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + spin_lock_bh(&call_lock); __smp_call_function_single(cpu, func, info, nonatomic, wait); spin_unlock_bh(&call_lock); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 02a9b66b6ac..4bf0e3c83b8 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -33,6 +33,11 @@ * Dave Jones : Report invalid combinations of Athlon CPUs. * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ + +/* SMP boot always wants to use real time delay to allow sufficient time for + * the APs to come online */ +#define USE_REAL_TIME_DELAY + #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> @@ -52,6 +57,8 @@ #include <asm/desc.h> #include <asm/arch_hooks.h> #include <asm/nmi.h> +#include <asm/pda.h> +#include <asm/genapic.h> #include <mach_apic.h> #include <mach_wakecpu.h> @@ -536,11 +543,11 @@ set_cpu_sibling_map(int cpu) static void __devinit start_secondary(void *unused) { /* - * Dont put anything before smp_callin(), SMP + * Don't put *anything* before secondary_cpu_init(), SMP * booting is too fragile that we want to limit the * things done here to the most necessary things. */ - cpu_init(); + secondary_cpu_init(); preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) @@ -599,13 +606,16 @@ void __devinit initialize_secondary(void) "movl %0,%%esp\n\t" "jmp *%1" : - :"r" (current->thread.esp),"r" (current->thread.eip)); + :"m" (current->thread.esp),"m" (current->thread.eip)); } +/* Static state in head.S used to set up a CPU */ extern struct { void * esp; unsigned short ss; } stack_start; +extern struct i386_pda *start_pda; +extern struct Xgt_desc_struct cpu_gdt_descr; #ifdef CONFIG_NUMA @@ -936,9 +946,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - ++cpucount; - alternatives_smp_switch(1); - /* * We can't use kernel_thread since we must avoid to * reschedule the child. @@ -946,15 +953,30 @@ static int __devinit do_boot_cpu(int apicid, int cpu) idle = alloc_idle_task(cpu); if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); + + /* Pre-allocate and initialize the CPU's GDT and PDA so it + doesn't have to do any memory allocation during the + delicate CPU-bringup phase. */ + if (!init_gdt(cpu, idle)) { + printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); + return -1; /* ? */ + } + idle->thread.eip = (unsigned long) start_secondary; /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); + ++cpucount; + alternatives_smp_switch(1); + /* So we see what's up */ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); /* Stack for startup_32 can be just as for start_secondary onwards */ stack_start.esp = (void *) idle->thread.esp; + start_pda = cpu_pda(cpu); + cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu); + irq_ctx_init(cpu); x86_cpu_to_apicid[cpu] = apicid; @@ -1109,34 +1131,15 @@ exit: } #endif -static void smp_tune_scheduling (void) +static void smp_tune_scheduling(void) { unsigned long cachesize; /* kB */ - unsigned long bandwidth = 350; /* MB/s */ - /* - * Rough estimation for SMP scheduling, this is the number of - * cycles it takes for a fully memory-limited process to flush - * the SMP-local cache. - * - * (For a P5 this pretty much means we will choose another idle - * CPU almost always at wakeup time (this is due to the small - * L1 cache), on PIIs it's around 50-100 usecs, depending on - * the cache size) - */ - if (!cpu_khz) { - /* - * this basically disables processor-affinity - * scheduling on SMP without a TSC. - */ - return; - } else { + if (cpu_khz) { cachesize = boot_cpu_data.x86_cache_size; - if (cachesize == -1) { - cachesize = 16; /* Pentiums, 2x8kB cache */ - bandwidth = 100; - } - max_cache_size = cachesize * 1024; + + if (cachesize > 0) + max_cache_size = cachesize * 1024; } } @@ -1462,6 +1465,12 @@ int __devinit __cpu_up(unsigned int cpu) cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + +#ifdef CONFIG_X86_GENERICARCH + if (num_online_cpus() > 8 && genapic == &apic_default) + panic("Default flat APIC routing can't be used with > 8 cpus\n"); +#endif + return 0; } diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 713ba39d32c..7de9117b5a3 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -27,7 +27,11 @@ * Should the kernel map a VDSO page into processes and pass its * address down to glibc upon exec()? */ +#ifdef CONFIG_PARAVIRT +unsigned int __read_mostly vdso_enabled = 0; +#else unsigned int __read_mostly vdso_enabled = 1; +#endif EXPORT_SYMBOL_GPL(vdso_enabled); @@ -132,7 +136,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) goto up_fail; } - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { ret = -ENOMEM; goto up_fail; diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 78af572fd17..c505b16c099 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -56,6 +56,7 @@ #include <asm/uaccess.h> #include <asm/processor.h> #include <asm/timer.h> +#include <asm/time.h> #include "mach_time.h" @@ -116,10 +117,7 @@ static int set_rtc_mmss(unsigned long nowtime) /* gets recalled with irq locally disabled */ /* XXX - does irqsave resolve this? -johnstul */ spin_lock_irqsave(&rtc_lock, flags); - if (efi_enabled) - retval = efi_set_rtc_mmss(nowtime); - else - retval = mach_set_rtc_mmss(nowtime); + retval = set_wallclock(nowtime); spin_unlock_irqrestore(&rtc_lock, flags); return retval; @@ -223,10 +221,7 @@ unsigned long get_cmos_time(void) spin_lock_irqsave(&rtc_lock, flags); - if (efi_enabled) - retval = efi_get_time(); - else - retval = mach_get_cmos_time(); + retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); @@ -370,7 +365,7 @@ static void __init hpet_time_init(void) printk("Using HPET for base-timer\n"); } - time_init_hook(); + do_time_init(); } #endif @@ -392,5 +387,5 @@ void __init time_init(void) do_settimeofday(&ts); - time_init_hook(); + do_time_init(); } diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 1a2a979cf6a..1e4702dfcd0 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -132,14 +132,20 @@ int __init hpet_enable(void) * the single HPET timer for system time. */ #ifdef CONFIG_HPET_EMULATE_RTC - if (!(id & HPET_ID_NUMBER)) + if (!(id & HPET_ID_NUMBER)) { + iounmap(hpet_virt_address); + hpet_virt_address = NULL; return -1; + } #endif hpet_period = hpet_readl(HPET_PERIOD); - if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) + if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) { + iounmap(hpet_virt_address); + hpet_virt_address = NULL; return -1; + } /* * 64 bit math @@ -156,8 +162,11 @@ int __init hpet_enable(void) hpet_use_timer = id & HPET_ID_LEGSUP; - if (hpet_timer_stop_set_go(hpet_tick)) + if (hpet_timer_stop_set_go(hpet_tick)) { + iounmap(hpet_virt_address); + hpet_virt_address = NULL; return -1; + } use_hpet = 1; diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 07d6da36a82..79cf608e14c 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -40,14 +40,18 @@ int arch_register_cpu(int num) * restrictions and assumptions in kernel. This basically * doesnt add a control file, one cannot attempt to offline * BSP. + * + * Also certain PCI quirks require not to enable hotplug control + * for all CPU's. */ - if (!num) - cpu_devices[num].cpu.no_control = 1; + if (num && enable_cpu_hotplug) + cpu_devices[num].cpu.hotpluggable = 1; return register_cpu(&cpu_devices[num].cpu, num); } #ifdef CONFIG_HOTPLUG_CPU +int enable_cpu_hotplug = 1; void arch_unregister_cpu(int num) { return unregister_cpu(&cpu_devices[num].cpu); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index fe9c5e8e7e6..68de48e498c 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -29,6 +29,7 @@ #include <linux/kexec.h> #include <linux/unwind.h> #include <linux/uaccess.h> +#include <linux/nmi.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -61,9 +62,6 @@ int panic_on_unrecovered_nmi; asmlinkage int system_call(void); -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 } }; - /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; @@ -94,7 +92,7 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); -static int kstack_depth_to_print = 24; +int kstack_depth_to_print = 24; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else @@ -163,16 +161,25 @@ dump_trace_unwind(struct unwind_frame_info *info, void *data) { struct ops_and_data *oad = (struct ops_and_data *)data; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } +#define MSG(msg) ops->warning(data, msg) + void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, struct stacktrace_ops *ops, void *data) @@ -191,29 +198,31 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (unwind_init_frame_info(&info, task, regs) == 0) unw_ret = dump_trace_unwind(&info, &oad); } else if (task == current) - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); else { if (unwind_init_blocked(&info, task) == 0) unw_ret = dump_trace_unwind(&info, &oad); } if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s", UNW_PC(&info)); if (UNW_SP(&info) >= PAGE_OFFSET) { - ops->warning(data, "Leftover inexact backtrace:\n"); + MSG("Leftover inexact backtrace:"); stack = (void *)UNW_SP(&info); if (!stack) return; ebp = UNW_FP(&info); } else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) return; else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else - ops->warning(data, "Inexact backtrace:\n"); + MSG("Inexact backtrace:"); } if (!stack) { unsigned long dummy; @@ -247,6 +256,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long*)context->previous_esp; if (!stack) break; + touch_nmi_watchdog(); } } EXPORT_SYMBOL(dump_trace); @@ -379,7 +389,7 @@ void show_registers(struct pt_regs *regs) * time of the fault.. */ if (in_kernel) { - u8 __user *eip; + u8 *eip; int code_bytes = 64; unsigned char c; @@ -388,18 +398,20 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Code: "); - eip = (u8 __user *)regs->eip - 43; - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + eip = (u8 *)regs->eip - 43; + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { /* try starting at EIP */ - eip = (u8 __user *)regs->eip; + eip = (u8 *)regs->eip; code_bytes = 32; } for (i = 0; i < code_bytes; i++, eip++) { - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { printk(" Bad EIP value."); break; } - if (eip == (u8 __user *)regs->eip) + if (eip == (u8 *)regs->eip) printk("<%02x> ", c); else printk("%02x ", c); @@ -415,7 +427,7 @@ static void handle_BUG(struct pt_regs *regs) if (eip < PAGE_OFFSET) return; - if (probe_kernel_address((unsigned short __user *)eip, ud2)) + if (probe_kernel_address((unsigned short *)eip, ud2)) return; if (ud2 != 0x0b0f) return; @@ -428,11 +440,11 @@ static void handle_BUG(struct pt_regs *regs) char *file; char c; - if (probe_kernel_address((unsigned short __user *)(eip + 2), - line)) + if (probe_kernel_address((unsigned short *)(eip + 2), line)) break; - if (__get_user(file, (char * __user *)(eip + 4)) || - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) + if (probe_kernel_address((char **)(eip + 4), file) || + (unsigned long)file < PAGE_OFFSET || + probe_kernel_address(file, c)) file = "<bad filename>"; printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); @@ -452,7 +464,7 @@ void die(const char * str, struct pt_regs * regs, long err) u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -707,8 +719,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " "CPU %d.\n", reason, smp_processor_id()); - printk(KERN_EMERG "You probably have a hardware problem with your RAM " - "chips\n"); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); @@ -773,7 +784,6 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) printk(" on CPU%d, eip %08lx, registers:\n", smp_processor_id(), regs->eip); show_registers(regs); - printk(KERN_EMERG "console shuts up ...\n"); console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); @@ -1088,49 +1098,24 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, #endif } -fastcall void setup_x86_bogus_stack(unsigned char * stk) -{ - unsigned long *switch16_ptr, *switch32_ptr; - struct pt_regs *regs; - unsigned long stack_top, stack_bot; - unsigned short iret_frame16_off; - int cpu = smp_processor_id(); - /* reserve the space on 32bit stack for the magic switch16 pointer */ - memmove(stk, stk + 8, sizeof(struct pt_regs)); - switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); - regs = (struct pt_regs *)stk; - /* now the switch32 on 16bit stack */ - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; - switch32_ptr = (unsigned long *)(stack_top - 8); - iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; - /* copy iret frame on 16bit stack */ - memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); - /* fill in the switch pointers */ - switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; - switch16_ptr[1] = __ESPFIX_SS; - switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + - 8 - CPU_16BIT_STACK_SIZE; - switch32_ptr[1] = __KERNEL_DS; -} - -fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) +fastcall unsigned long patch_espfix_desc(unsigned long uesp, + unsigned long kesp) { - unsigned long *switch32_ptr; - unsigned char *stack16, *stack32; - unsigned long stack_top, stack_bot; - int len; int cpu = smp_processor_id(); - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; - switch32_ptr = (unsigned long *)(stack_top - 8); - /* copy the data from 16bit stack to 32bit stack */ - len = CPU_16BIT_STACK_SIZE - 8 - sp; - stack16 = (unsigned char *)(stack_bot + sp); - stack32 = (unsigned char *) - (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); - memcpy(stack32, stack16, len); - return stack32; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; + unsigned long base = (kesp - uesp) & -THREAD_SIZE; + unsigned long new_kesp = kesp - base; + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; + /* Set up base for espfix segment */ + desc &= 0x00f0ff0000000000ULL; + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | + ((((__u64)base) << 32) & 0xff00000000000000ULL) | + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | + (lim_pages & 0xffff); + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; + return new_kesp; } /* @@ -1143,7 +1128,7 @@ fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) * Must be called with kernel preemption disabled (in this case, * local interrupts are disabled at the call-site in entry.S). */ -asmlinkage void math_state_restore(struct pt_regs regs) +asmlinkage void math_state_restore(void) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; @@ -1153,6 +1138,7 @@ asmlinkage void math_state_restore(struct pt_regs regs) init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ + tsk->fpu_counter++; } #ifndef CONFIG_MATH_EMULATION diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 9810c8c9075..1bbe45dca7a 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -13,7 +13,6 @@ #include <asm/delay.h> #include <asm/tsc.h> -#include <asm/delay.h> #include <asm/io.h> #include "mach_timer.h" diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index cbcd61d6120..be2f96e67f7 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -43,6 +43,7 @@ #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/audit.h> +#include <linux/stddef.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -72,10 +73,10 @@ /* * 8- and 16-bit register defines.. */ -#define AL(regs) (((unsigned char *)&((regs)->eax))[0]) -#define AH(regs) (((unsigned char *)&((regs)->eax))[1]) -#define IP(regs) (*(unsigned short *)&((regs)->eip)) -#define SP(regs) (*(unsigned short *)&((regs)->esp)) +#define AL(regs) (((unsigned char *)&((regs)->pt.eax))[0]) +#define AH(regs) (((unsigned char *)&((regs)->pt.eax))[1]) +#define IP(regs) (*(unsigned short *)&((regs)->pt.eip)) +#define SP(regs) (*(unsigned short *)&((regs)->pt.esp)) /* * virtual flags (16 and 32-bit versions) @@ -89,10 +90,37 @@ #define SAFE_MASK (0xDD5) #define RETURN_MASK (0xDFF) -#define VM86_REGS_PART2 orig_eax -#define VM86_REGS_SIZE1 \ - ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) ) -#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1) +/* convert kernel_vm86_regs to vm86_regs */ +static int copy_vm86_regs_to_user(struct vm86_regs __user *user, + const struct kernel_vm86_regs *regs) +{ + int ret = 0; + + /* kernel_vm86_regs is missing xfs, so copy everything up to + (but not including) xgs, and then rest after xgs. */ + ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs)); + ret += copy_to_user(&user->__null_gs, ®s->pt.xgs, + sizeof(struct kernel_vm86_regs) - + offsetof(struct kernel_vm86_regs, pt.xgs)); + + return ret; +} + +/* convert vm86_regs to kernel_vm86_regs */ +static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, + const struct vm86_regs __user *user, + unsigned extra) +{ + int ret = 0; + + ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs)); + ret += copy_from_user(®s->pt.xgs, &user->__null_gs, + sizeof(struct kernel_vm86_regs) - + offsetof(struct kernel_vm86_regs, pt.xgs) + + extra); + + return ret; +} struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) @@ -112,10 +140,8 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } - set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); - tmp = copy_to_user(¤t->thread.vm86_info->regs,regs, VM86_REGS_SIZE1); - tmp += copy_to_user(¤t->thread.vm86_info->regs.VM86_REGS_PART2, - ®s->VM86_REGS_PART2, VM86_REGS_SIZE2); + set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); + tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs,regs); tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); if (tmp) { printk("vm86: could not access userspace vm86_info\n"); @@ -129,9 +155,11 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) current->thread.saved_esp0 = 0; put_cpu(); - loadsegment(fs, current->thread.saved_fs); - loadsegment(gs, current->thread.saved_gs); ret = KVM86->regs32; + + loadsegment(fs, current->thread.saved_fs); + ret->xgs = current->thread.saved_gs; + return ret; } @@ -183,9 +211,9 @@ asmlinkage int sys_vm86old(struct pt_regs regs) tsk = current; if (tsk->thread.saved_esp0) goto out; - tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); - tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, - (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2); + tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, + offsetof(struct kernel_vm86_struct, vm86plus) - + sizeof(info.regs)); ret = -EFAULT; if (tmp) goto out; @@ -233,9 +261,9 @@ asmlinkage int sys_vm86(struct pt_regs regs) if (tsk->thread.saved_esp0) goto out; v86 = (struct vm86plus_struct __user *)regs.ecx; - tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); - tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, - (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2); + tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, + offsetof(struct kernel_vm86_struct, regs32) - + sizeof(info.regs)); ret = -EFAULT; if (tmp) goto out; @@ -252,15 +280,15 @@ out: static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) { struct tss_struct *tss; - long eax; /* * make sure the vm86() system call doesn't try to do anything silly */ - info->regs.__null_ds = 0; - info->regs.__null_es = 0; + info->regs.pt.xds = 0; + info->regs.pt.xes = 0; + info->regs.pt.xgs = 0; -/* we are clearing fs,gs later just before "jmp resume_userspace", - * because starting with Linux 2.1.x they aren't no longer saved/restored +/* we are clearing fs later just before "jmp resume_userspace", + * because it is not saved/restored. */ /* @@ -268,10 +296,10 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk * has set it up safely, so this makes sure interrupt etc flags are * inherited from protected mode. */ - VEFLAGS = info->regs.eflags; - info->regs.eflags &= SAFE_MASK; - info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK; - info->regs.eflags |= VM_MASK; + VEFLAGS = info->regs.pt.eflags; + info->regs.pt.eflags &= SAFE_MASK; + info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK; + info->regs.pt.eflags |= VM_MASK; switch (info->cpu_type) { case CPU_286: @@ -294,7 +322,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk info->regs32->eax = 0; tsk->thread.saved_esp0 = tsk->thread.esp0; savesegment(fs, tsk->thread.saved_fs); - savesegment(gs, tsk->thread.saved_gs); + tsk->thread.saved_gs = info->regs32->xgs; tss = &per_cpu(init_tss, get_cpu()); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; @@ -306,19 +334,18 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); - __asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t"); - __asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax)); /*call audit_syscall_exit since we do not exit via the normal paths */ if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(eax), eax); + audit_syscall_exit(AUDITSC_RESULT(0), 0); __asm__ __volatile__( "movl %0,%%esp\n\t" "movl %1,%%ebp\n\t" + "mov %2, %%fs\n\t" "jmp resume_userspace" : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk))); + :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); /* we never return here */ } @@ -348,12 +375,12 @@ static inline void clear_IF(struct kernel_vm86_regs * regs) static inline void clear_TF(struct kernel_vm86_regs * regs) { - regs->eflags &= ~TF_MASK; + regs->pt.eflags &= ~TF_MASK; } static inline void clear_AC(struct kernel_vm86_regs * regs) { - regs->eflags &= ~AC_MASK; + regs->pt.eflags &= ~AC_MASK; } /* It is correct to call set_IF(regs) from the set_vflags_* @@ -370,7 +397,7 @@ static inline void clear_AC(struct kernel_vm86_regs * regs) static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs) { set_flags(VEFLAGS, eflags, current->thread.v86mask); - set_flags(regs->eflags, eflags, SAFE_MASK); + set_flags(regs->pt.eflags, eflags, SAFE_MASK); if (eflags & IF_MASK) set_IF(regs); else @@ -380,7 +407,7 @@ static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) { set_flags(VFLAGS, flags, current->thread.v86mask); - set_flags(regs->eflags, flags, SAFE_MASK); + set_flags(regs->pt.eflags, flags, SAFE_MASK); if (flags & IF_MASK) set_IF(regs); else @@ -389,7 +416,7 @@ static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_reg static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) { - unsigned long flags = regs->eflags & RETURN_MASK; + unsigned long flags = regs->pt.eflags & RETURN_MASK; if (VEFLAGS & VIF_MASK) flags |= IF_MASK; @@ -493,7 +520,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i, unsigned long __user *intr_ptr; unsigned long segoffs; - if (regs->cs == BIOSSEG) + if (regs->pt.xcs == BIOSSEG) goto cannot_handle; if (is_revectored(i, &KVM86->int_revectored)) goto cannot_handle; @@ -505,9 +532,9 @@ static void do_int(struct kernel_vm86_regs *regs, int i, if ((segoffs >> 16) == BIOSSEG) goto cannot_handle; pushw(ssp, sp, get_vflags(regs), cannot_handle); - pushw(ssp, sp, regs->cs, cannot_handle); + pushw(ssp, sp, regs->pt.xcs, cannot_handle); pushw(ssp, sp, IP(regs), cannot_handle); - regs->cs = segoffs >> 16; + regs->pt.xcs = segoffs >> 16; SP(regs) -= 6; IP(regs) = segoffs & 0xffff; clear_TF(regs); @@ -524,7 +551,7 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno if (VMPI.is_vm86pus) { if ( (trapno==3) || (trapno==1) ) return_to_32bit(regs, VM86_TRAP + (trapno << 8)); - do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs)); + do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs)); return 0; } if (trapno !=1) @@ -560,10 +587,10 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) handle_vm86_trap(regs, 0, 1); \ return; } while (0) - orig_flags = *(unsigned short *)®s->eflags; + orig_flags = *(unsigned short *)®s->pt.eflags; - csp = (unsigned char __user *) (regs->cs << 4); - ssp = (unsigned char __user *) (regs->ss << 4); + csp = (unsigned char __user *) (regs->pt.xcs << 4); + ssp = (unsigned char __user *) (regs->pt.xss << 4); sp = SP(regs); ip = IP(regs); @@ -650,7 +677,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) SP(regs) += 6; } IP(regs) = newip; - regs->cs = newcs; + regs->pt.xcs = newcs; CHECK_IF_IN_TRAP; if (data32) { set_vflags_long(newflags, regs); diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index c6f84a0322b..56e6ad5cb04 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -1,13 +1,26 @@ /* ld script to make i386 Linux kernel * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; + * + * Don't define absolute symbols until and unless you know that symbol + * value is should remain constant even if kernel image is relocated + * at run time. Absolute symbols are not relocated. If symbol value should + * change if kernel is relocated, make the symbol section relative and + * put it inside the section definition. */ +/* Don't define absolute symbols until and unless you know that symbol + * value is should remain constant even if kernel image is relocated + * at run time. Absolute symbols are not relocated. If symbol value should + * change if kernel is relocated, make the symbol section relative and + * put it inside the section definition. + */ #define LOAD_OFFSET __PAGE_OFFSET #include <asm-generic/vmlinux.lds.h> #include <asm/thread_info.h> #include <asm/page.h> #include <asm/cache.h> +#include <asm/boot.h> OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -21,34 +34,35 @@ PHDRS { } SECTIONS { - . = __KERNEL_START; + . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; phys_startup_32 = startup_32 - LOAD_OFFSET; /* read-only */ - _text = .; /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { + _text = .; /* Text and read-only data */ *(.text) SCHED_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) *(.gnu.warning) - } :text = 0x9090 - - _etext = .; /* End of text section */ + _etext = .; /* End of text section */ + } :text = 0x9090 . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } - __stop___ex_table = .; + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } RODATA . = ALIGN(4); - __tracedata_start = .; .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { + __tracedata_start = .; *(.tracedata) + __tracedata_end = .; } - __tracedata_end = .; /* writeable */ . = ALIGN(4096); @@ -57,11 +71,19 @@ SECTIONS CONSTRUCTORS } :data + .paravirtprobe : AT(ADDR(.paravirtprobe) - LOAD_OFFSET) { + __start_paravirtprobe = .; + *(.paravirtprobe) + __stop_paravirtprobe = .; + } + . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(4096); + __nosave_end = .; + } . = ALIGN(4096); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { @@ -75,17 +97,10 @@ SECTIONS /* rarely changed data like cpu maps */ . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } - _edata = .; /* End of data section */ - -#ifdef CONFIG_STACK_UNWIND - . = ALIGN(4); - .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + _edata = .; /* End of data section */ } -#endif . = ALIGN(THREAD_SIZE); /* init_task */ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { @@ -94,88 +109,102 @@ SECTIONS /* might get freed after init */ . = ALIGN(4096); - __smp_alt_begin = .; - __smp_alt_instructions = .; .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + __smp_alt_begin = .; + __smp_alt_instructions = .; *(.smp_altinstructions) + __smp_alt_instructions_end = .; } - __smp_alt_instructions_end = .; . = ALIGN(4); - __smp_locks = .; .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + __smp_locks = .; *(.smp_locks) + __smp_locks_end = .; } - __smp_locks_end = .; .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { *(.smp_altinstr_replacement) + __smp_alt_end = .; } + /* will be freed after init + * Following ALIGN() is required to make sure no other data falls on the + * same page where __smp_alt_end is pointing as that page might be freed + * after boot. Always make sure that ALIGN() directive is present after + * the section which contains __smp_alt_end. + */ . = ALIGN(4096); - __smp_alt_end = .; /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ - __init_begin = .; .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + __init_begin = .; _sinittext = .; *(.init.text) _einittext = .; } .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } . = ALIGN(16); - __setup_start = .; - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } - __setup_end = .; - __initcall_start = .; + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + __initcall_start = .; INITCALLS + __initcall_end = .; } - __initcall_end = .; - __con_initcall_start = .; .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + __con_initcall_start = .; *(.con_initcall.init) + __con_initcall_end = .; } - __con_initcall_end = .; SECURITY_INIT . = ALIGN(4); - __alt_instructions = .; .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; *(.altinstructions) + __alt_instructions_end = .; } - __alt_instructions_end = .; .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { *(.altinstr_replacement) } + . = ALIGN(4); + .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { + __start_parainstructions = .; + *(.parainstructions) + __stop_parainstructions = .; + } /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } . = ALIGN(4096); - __initramfs_start = .; - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } - __initramfs_end = .; + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } . = ALIGN(L1_CACHE_BYTES); - __per_cpu_start = .; - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } - __per_cpu_end = .; + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { + __per_cpu_start = .; + *(.data.percpu) + __per_cpu_end = .; + } . = ALIGN(4096); - __init_end = .; /* freed after init ends here */ - __bss_start = .; /* BSS */ - .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) { - *(.bss.page_aligned) - } .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __init_end = .; + __bss_start = .; /* BSS */ + *(.bss.page_aligned) *(.bss) + . = ALIGN(4); + __bss_stop = .; + _end = . ; + /* This is where the kernel creates the early boot page tables */ + . = ALIGN(4096); + pg0 = . ; } - . = ALIGN(4); - __bss_stop = .; - - _end = . ; - - /* This is where the kernel creates the early boot page tables */ - . = ALIGN(4096); - pg0 = .; /* Sections to be discarded */ /DISCARD/ : { diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c index 94b1fd9cbe3..a7b3999bb37 100644 --- a/arch/i386/mach-generic/probe.c +++ b/arch/i386/mach-generic/probe.c @@ -45,7 +45,9 @@ static int __init parse_apic(char *arg) return 0; } } - return -ENOENT; + + /* Parsed again by __setup for debug/verbose */ + return 0; } early_param("apic", parse_apic); diff --git a/arch/i386/mach-voyager/voyager_cat.c b/arch/i386/mach-voyager/voyager_cat.c index f50c6c6ad68..943a9473b13 100644 --- a/arch/i386/mach-voyager/voyager_cat.c +++ b/arch/i386/mach-voyager/voyager_cat.c @@ -776,7 +776,7 @@ voyager_cat_init(void) for(asic=0; asic < (*modpp)->num_asics; asic++) { int j; voyager_asic_t *asicp = *asicpp - = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/ + = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/ voyager_sp_table_t *sp_table; voyager_at_t *asic_table; voyager_jtt_t *jtag_table; @@ -785,7 +785,6 @@ voyager_cat_init(void) printk("**WARNING** kmalloc failure in cat_init\n"); continue; } - memset(asicp, 0, sizeof(voyager_asic_t)); asicpp = &(asicp->next); asicp->asic_location = asic; sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset); @@ -851,8 +850,7 @@ voyager_cat_init(void) #endif { - struct resource *res = kmalloc(sizeof(struct resource),GFP_KERNEL); - memset(res, 0, sizeof(struct resource)); + struct resource *res = kzalloc(sizeof(struct resource),GFP_KERNEL); res->name = kmalloc(128, GFP_KERNEL); sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i)); res->start = qic_addr; diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index f3fea2ad50f..55428e656a3 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -28,6 +28,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/arch_hooks.h> +#include <asm/pda.h> /* TLB state -- visible externally, indexed physically */ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; @@ -422,6 +423,7 @@ find_smp_config(void) VOYAGER_SUS_IN_CONTROL_PORT); current_thread_info()->cpu = boot_cpu_id; + write_pda(cpu_number, boot_cpu_id); } /* @@ -458,7 +460,7 @@ start_secondary(void *unused) /* external functions not defined in the headers */ extern void calibrate_delay(void); - cpu_init(); + secondary_cpu_init(); /* OK, we're in the routine */ ack_CPI(VIC_CPU_BOOT_CPI); @@ -578,6 +580,15 @@ do_boot_cpu(__u8 cpu) /* init_tasks (in sched.c) is indexed logically */ stack_start.esp = (void *) idle->thread.esp; + /* Pre-allocate and initialize the CPU's GDT and PDA so it + doesn't have to do any memory allocation during the + delicate CPU-bringup phase. */ + if (!init_gdt(cpu, idle)) { + printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); + cpucount--; + return; + } + irq_ctx_init(cpu); /* Note: Don't modify initial ss override */ @@ -1963,4 +1974,5 @@ void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); + write_pda(cpu_number, hard_smp_processor_id()); } diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h index d62b20a3e66..65120f52385 100644 --- a/arch/i386/math-emu/fpu_emu.h +++ b/arch/i386/math-emu/fpu_emu.h @@ -57,6 +57,7 @@ #define TAG_Special Const(2) /* De-normal, + or - infinity, or Not a Number */ #define TAG_Empty Const(3) /* empty */ +#define TAG_Error Const(0x80) /* probably need to abort */ #define LOADED_DATA Const(10101) /* Special st() number to identify loaded data (not on stack). */ diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c index d93f16ef828..ddf8fa3bbd0 100644 --- a/arch/i386/math-emu/fpu_entry.c +++ b/arch/i386/math-emu/fpu_entry.c @@ -742,7 +742,8 @@ int save_i387_soft(void *s387, struct _fpstate __user * buf) S387->fcs &= ~0xf8000000; S387->fos |= 0xffff0000; #endif /* PECULIAR_486 */ - __copy_to_user(d, &S387->cwd, 7*4); + if (__copy_to_user(d, &S387->cwd, 7*4)) + return -1; RE_ENTRANT_CHECK_ON; d += 7*4; diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h index bf26341c8bd..a3ae28c49dd 100644 --- a/arch/i386/math-emu/fpu_system.h +++ b/arch/i386/math-emu/fpu_system.h @@ -68,6 +68,7 @@ #define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ math_abort(FPU_info,SIGSEGV) +#define FPU_abort math_abort(FPU_info, SIGSEGV) #undef FPU_IGNORE_CODE_SEGV #ifdef FPU_IGNORE_CODE_SEGV diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c index 85314be2fef..eebd6fb1c8a 100644 --- a/arch/i386/math-emu/load_store.c +++ b/arch/i386/math-emu/load_store.c @@ -227,6 +227,8 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, case 027: /* fild m64int */ clear_C1(); loaded_tag = FPU_load_int64((long long __user *)data_address); + if (loaded_tag == TAG_Error) + return 0; FPU_settag0(loaded_tag); break; case 030: /* fstenv m14/28byte */ diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c index f06ed41d191..e976caef649 100644 --- a/arch/i386/math-emu/reg_ld_str.c +++ b/arch/i386/math-emu/reg_ld_str.c @@ -244,7 +244,8 @@ int FPU_load_int64(long long __user *_s) RE_ENTRANT_CHECK_OFF; FPU_access_ok(VERIFY_READ, _s, 8); - copy_from_user(&s,_s,8); + if (copy_from_user(&s,_s,8)) + FPU_abort; RE_ENTRANT_CHECK_ON; if (s == 0) @@ -907,7 +908,8 @@ int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d) RE_ENTRANT_CHECK_OFF; FPU_access_ok(VERIFY_WRITE,d,8); - copy_to_user(d, &tll, 8); + if (copy_to_user(d, &tll, 8)) + FPU_abort; RE_ENTRANT_CHECK_ON; return 1; @@ -1336,7 +1338,8 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) I387.soft.fcs &= ~0xf8000000; I387.soft.fos |= 0xffff0000; #endif /* PECULIAR_486 */ - __copy_to_user(d, &control_word, 7*4); + if (__copy_to_user(d, &control_word, 7*4)) + FPU_abort; RE_ENTRANT_CHECK_ON; d += 0x1c; } @@ -1359,9 +1362,11 @@ void fsave(fpu_addr_modes addr_modes, u_char __user *data_address) FPU_access_ok(VERIFY_WRITE,d,80); /* Copy all registers in stack order. */ - __copy_to_user(d, register_base+offset, other); + if (__copy_to_user(d, register_base+offset, other)) + FPU_abort; if ( offset ) - __copy_to_user(d+other, register_base, offset); + if (__copy_to_user(d+other, register_base, offset)) + FPU_abort; RE_ENTRANT_CHECK_ON; finit(); diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c index 4de11f508c3..4de95a17a7d 100644 --- a/arch/i386/mm/boot_ioremap.c +++ b/arch/i386/mm/boot_ioremap.c @@ -16,6 +16,7 @@ */ #undef CONFIG_X86_PAE +#undef CONFIG_PARAVIRT #include <asm/page.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index ddbdb0336f2..103b76e56a9 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -168,7 +168,7 @@ static void __init allocate_pgdat(int nid) if (nid && node_has_online_mem(nid)) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { - NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); + NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); min_low_pfn += PFN_UP(sizeof(pg_data_t)); } } diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 2581575786c..aaaa4d225f7 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -22,9 +22,9 @@ #include <linux/highmem.h> #include <linux/module.h> #include <linux/kprobes.h> +#include <linux/uaccess.h> #include <asm/system.h> -#include <asm/uaccess.h> #include <asm/desc.h> #include <asm/kdebug.h> #include <asm/segment.h> @@ -167,7 +167,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, static int __is_prefetch(struct pt_regs *regs, unsigned long addr) { unsigned long limit; - unsigned long instr = get_segment_eip (regs, &limit); + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); int scan_more = 1; int prefetch = 0; int i; @@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) unsigned char instr_hi; unsigned char instr_lo; - if (instr > limit) + if (instr > (unsigned char *)limit) break; - if (__get_user(opcode, (unsigned char __user *) instr)) + if (probe_kernel_address(instr, opcode)) break; instr_hi = opcode & 0xf0; @@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; - if (instr > limit) + if (instr > (unsigned char *)limit) break; - if (__get_user(opcode, (unsigned char __user *) instr)) + if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index f9f647cdbc7..e0fa6cb655a 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -32,7 +32,7 @@ void *kmap_atomic(struct page *page, enum km_type type) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -50,26 +50,22 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); -#ifdef CONFIG_DEBUG_HIGHMEM - if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { - dec_preempt_count(); - preempt_check_resched(); - return; - } - - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) - BUG(); -#endif /* * Force other mappings to Oops if they'll try to access this pte * without first remap it. Keeping stale mappings around is a bad idea * also, in case the page changes cacheability attributes or becomes * a protected page in a hypervisor. */ - kpte_clear_flush(kmap_pte-idx, vaddr); + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + kpte_clear_flush(kmap_pte-idx, vaddr); + else { +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr < PAGE_OFFSET); + BUG_ON(vaddr >= (unsigned long)high_memory); +#endif + } - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } /* This is the same as kmap_atomic() but can map memory that doesn't @@ -80,7 +76,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; - inc_preempt_count(); + pagefault_disable(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 1719a8141f8..34728e4afe4 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c @@ -17,6 +17,113 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> +static unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx) +{ + unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + + svma->vm_start; + unsigned long sbase = saddr & PUD_MASK; + unsigned long s_end = sbase + PUD_SIZE; + + /* + * match the virtual addresses, permission and the alignment of the + * page table page. + */ + if (pmd_index(addr) != pmd_index(saddr) || + vma->vm_flags != svma->vm_flags || + sbase < svma->vm_start || svma->vm_end < s_end) + return 0; + + return saddr; +} + +static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long base = addr & PUD_MASK; + unsigned long end = base + PUD_SIZE; + + /* + * check on proper vm_flags and page table alignment + */ + if (vma->vm_flags & VM_MAYSHARE && + vma->vm_start <= base && end <= vma->vm_end) + return 1; + return 0; +} + +/* + * search for a shareable pmd page for hugetlb. + */ +static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + struct prio_tree_iter iter; + struct vm_area_struct *svma; + unsigned long saddr; + pte_t *spte = NULL; + + if (!vma_shareable(vma, addr)) + return; + + spin_lock(&mapping->i_mmap_lock); + vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { + if (svma == vma) + continue; + + saddr = page_table_shareable(svma, vma, addr, idx); + if (saddr) { + spte = huge_pte_offset(svma->vm_mm, saddr); + if (spte) { + get_page(virt_to_page(spte)); + break; + } + } + } + + if (!spte) + goto out; + + spin_lock(&mm->page_table_lock); + if (pud_none(*pud)) + pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK); + else + put_page(virt_to_page(spte)); + spin_unlock(&mm->page_table_lock); +out: + spin_unlock(&mapping->i_mmap_lock); +} + +/* + * unmap huge page backed by shared pte. + * + * Hugetlb pte page is ref counted at the time of mapping. If pte is shared + * indicated by page_count > 1, unmap is achieved by clearing pud and + * decrementing the ref count. If count == 1, the pte page is not shared. + * + * called with vma->vm_mm->page_table_lock held. + * + * returns: 1 successfully unmapped a shared pte page + * 0 the underlying pte page is not shared, or it is the last user + */ +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + pgd_t *pgd = pgd_offset(mm, *addr); + pud_t *pud = pud_offset(pgd, *addr); + + BUG_ON(page_count(virt_to_page(ptep)) == 0); + if (page_count(virt_to_page(ptep)) == 1) + return 0; + + pud_clear(pud); + put_page(virt_to_page(ptep)); + *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + return 1; +} + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; @@ -25,8 +132,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - if (pud) + if (pud) { + if (pud_none(*pud)) + huge_pmd_share(mm, addr, pud); pte = (pte_t *) pmd_alloc(mm, pud, addr); + } BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); return pte; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 167416155ee..84697dfc734 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -192,8 +192,6 @@ static inline int page_kills_ppro(unsigned long pagenr) return 0; } -extern int is_available_memory(efi_memory_desc_t *); - int page_is_ram(unsigned long pagenr) { int i; @@ -699,8 +697,8 @@ int remove_memory(u64 start, u64 size) #endif #endif -kmem_cache_t *pgd_cache; -kmem_cache_t *pmd_cache; +struct kmem_cache *pgd_cache; +struct kmem_cache *pmd_cache; void __init pgtable_cache_init(void) { diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 8564b6ae17e..ad91528bdc1 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -67,11 +67,17 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, return base; } -static void flush_kernel_map(void *dummy) +static void flush_kernel_map(void *arg) { - /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ - if (boot_cpu_data.x86_model >= 4) + unsigned long adr = (unsigned long)arg; + + if (adr && cpu_has_clflush) { + int i; + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) + asm volatile("clflush (%0)" :: "r" (adr + i)); + } else if (boot_cpu_data.x86_model >= 4) wbinvd(); + /* Flush all to work around Errata in early athlons regarding * large page flushing. */ @@ -173,9 +179,9 @@ __change_page_attr(struct page *page, pgprot_t prot) return 0; } -static inline void flush_map(void) +static inline void flush_map(void *adr) { - on_each_cpu(flush_kernel_map, NULL, 1, 1); + on_each_cpu(flush_kernel_map, adr, 1, 1); } /* @@ -217,9 +223,13 @@ void global_flush_tlb(void) spin_lock_irq(&cpa_lock); list_replace_init(&df_list, &l); spin_unlock_irq(&cpa_lock); - flush_map(); - list_for_each_entry_safe(pg, next, &l, lru) + if (!cpu_has_clflush) + flush_map(0); + list_for_each_entry_safe(pg, next, &l, lru) { + if (cpu_has_clflush) + flush_map(page_address(pg)); __free_page(pg); + } } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 10126e3f817..f349eaf450b 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -95,8 +95,11 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) return; } pte = pte_offset_kernel(pmd, vaddr); - /* <pfn,flags> stored as-is, to permit clearing entries */ - set_pte(pte, pfn_pte(pfn, flags)); + if (pgprot_val(flags)) + /* <pfn,flags> stored as-is, to permit clearing entries */ + set_pte(pte, pfn_pte(pfn, flags)); + else + pte_clear(&init_mm, vaddr, pte); /* * It's enough to flush this one mapping. @@ -193,7 +196,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) return pte; } -void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) +void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) { memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } @@ -233,7 +236,7 @@ static inline void pgd_list_del(pgd_t *pgd) set_page_private(next, (unsigned long)pprev); } -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) { unsigned long flags; @@ -253,7 +256,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) } /* never called when PTRS_PER_PMD > 1 */ -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) { unsigned long flags; /* can be called from interrupt context */ diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c index 713d6c866ca..42df4b6606d 100644 --- a/arch/i386/pci/early.c +++ b/arch/i386/pci/early.c @@ -45,6 +45,13 @@ void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, outl(val, 0xcfc); } +void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outb(val, 0xcfc); +} + int early_pci_allowed(void) { return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index e65551cd821..f2cb942f828 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -764,7 +764,7 @@ static void __init pirq_find_router(struct irq_router *r) DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", rt->rtr_vendor, rt->rtr_device); - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); if (!pirq_router_dev) { DBG(KERN_DEBUG "PCI: Interrupt router not found at " "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); @@ -784,6 +784,8 @@ static void __init pirq_find_router(struct irq_router *r) pirq_router_dev->vendor, pirq_router_dev->device, pci_name(pirq_router_dev)); + + /* The device remains referenced for the kernel lifetime */ } static struct irq_info *pirq_get_info(struct pci_dev *dev) diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c index ed1512a175a..5f5193401be 100644 --- a/arch/i386/pci/pcbios.c +++ b/arch/i386/pci/pcbios.c @@ -5,6 +5,7 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/uaccess.h> #include "pci.h" #include "pci-functions.h" @@ -314,6 +315,10 @@ static struct pci_raw_ops * __devinit pci_find_bios(void) for (check = (union bios32 *) __va(0xe0000); check <= (union bios32 *) __va(0xffff0); ++check) { + long sig; + if (probe_kernel_address(&check->fields.signature, sig)) + continue; + if (check->fields.signature != BIOS32_SIGNATURE) continue; length = check->fields.length * 16; @@ -331,11 +336,13 @@ static struct pci_raw_ops * __devinit pci_find_bios(void) } DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check); if (check->fields.entry >= 0x100000) { - printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check); + printk("PCI: BIOS32 entry (0x%p) in high memory, " + "cannot use.\n", check); return NULL; } else { unsigned long bios32_entry = check->fields.entry; - DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); + DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", + bios32_entry); bios32_indirect.address = bios32_entry + PAGE_OFFSET; if (check_pcibios()) return &pci_bios_access; diff --git a/arch/i386/power/Makefile b/arch/i386/power/Makefile index 8cfa4e8a719..2de7bbf03cd 100644 --- a/arch/i386/power/Makefile +++ b/arch/i386/power/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_PM) += cpu.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o suspend.o diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 5a1abeff033..2c15500f871 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -26,8 +26,8 @@ void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ - store_gdt(&ctxt->gdt_limit); - store_idt(&ctxt->idt_limit); + store_gdt(&ctxt->gdt); + store_idt(&ctxt->idt); store_tr(ctxt->tr); /* @@ -99,8 +99,8 @@ void __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ - load_gdt(&ctxt->gdt_limit); - load_idt(&ctxt->idt_limit); + load_gdt(&ctxt->gdt); + load_idt(&ctxt->idt); /* * segment registers diff --git a/arch/i386/power/suspend.c b/arch/i386/power/suspend.c new file mode 100644 index 00000000000..db5e98d2eb7 --- /dev/null +++ b/arch/i386/power/suspend.c @@ -0,0 +1,158 @@ +/* + * Suspend support specific for i386 - temporary page tables + * + * Distribute under GPLv2 + * + * Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl> + */ + +#include <linux/suspend.h> +#include <linux/bootmem.h> + +#include <asm/system.h> +#include <asm/page.h> +#include <asm/pgtable.h> + +/* Defined in arch/i386/power/swsusp.S */ +extern int restore_image(void); + +/* Pointer to the temporary resume page tables */ +pgd_t *resume_pg_dir; + +/* The following three functions are based on the analogous code in + * arch/i386/mm/init.c + */ + +/* + * Create a middle page table on a resume-safe page and put a pointer to it in + * the given global directory entry. This only returns the gd entry + * in non-PAE compilation mode, since the middle layer is folded. + */ +static pmd_t *resume_one_md_table_init(pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd_table; + +#ifdef CONFIG_X86_PAE + pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd_table) + return NULL; + + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); + + BUG_ON(pmd_table != pmd_offset(pud, 0)); +#else + pud = pud_offset(pgd, 0); + pmd_table = pmd_offset(pud, 0); +#endif + + return pmd_table; +} + +/* + * Create a page table on a resume-safe page and place a pointer to it in + * a middle page directory entry. + */ +static pte_t *resume_one_page_table_init(pmd_t *pmd) +{ + if (pmd_none(*pmd)) { + pte_t *page_table = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!page_table) + return NULL; + + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); + + return page_table; + } + + return pte_offset_kernel(pmd, 0); +} + +/* + * This maps the physical memory to kernel virtual address space, a total + * of max_low_pfn pages, by creating page tables starting from address + * PAGE_OFFSET. The page tables are allocated out of resume-safe pages. + */ +static int resume_physical_mapping_init(pgd_t *pgd_base) +{ + unsigned long pfn; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int pgd_idx, pmd_idx; + + pgd_idx = pgd_index(PAGE_OFFSET); + pgd = pgd_base + pgd_idx; + pfn = 0; + + for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { + pmd = resume_one_md_table_init(pgd); + if (!pmd) + return -ENOMEM; + + if (pfn >= max_low_pfn) + continue; + + for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { + if (pfn >= max_low_pfn) + break; + + /* Map with big pages if possible, otherwise create + * normal page tables. + * NOTE: We can mark everything as executable here + */ + if (cpu_has_pse) { + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); + pfn += PTRS_PER_PTE; + } else { + pte_t *max_pte; + + pte = resume_one_page_table_init(pmd); + if (!pte) + return -ENOMEM; + + max_pte = pte + PTRS_PER_PTE; + for (; pte < max_pte; pte++, pfn++) { + if (pfn >= max_low_pfn) + break; + + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + } + } + } + } + return 0; +} + +static inline void resume_init_first_level_page_table(pgd_t *pg_dir) +{ +#ifdef CONFIG_X86_PAE + int i; + + /* Init entries of the first-level page table to the zero page */ + for (i = 0; i < PTRS_PER_PGD; i++) + set_pgd(pg_dir + i, + __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); +#endif +} + +int swsusp_arch_resume(void) +{ + int error; + + resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!resume_pg_dir) + return -ENOMEM; + + resume_init_first_level_page_table(resume_pg_dir); + error = resume_physical_mapping_init(resume_pg_dir); + if (error) + return error; + + /* We have got enough memory and from now on we cannot recover */ + restore_image(); + return 0; +} diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S index 8a2b50a0aaa..53662e05b39 100644 --- a/arch/i386/power/swsusp.S +++ b/arch/i386/power/swsusp.S @@ -28,8 +28,9 @@ ENTRY(swsusp_arch_suspend) call swsusp_save ret -ENTRY(swsusp_arch_resume) - movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx +ENTRY(restore_image) + movl resume_pg_dir, %ecx + subl $__PAGE_OFFSET, %ecx movl %ecx, %cr3 movl restore_pblist, %edx @@ -51,6 +52,10 @@ copy_loop: .p2align 4,,7 done: + /* go back to the original page tables */ + movl $swapper_pg_dir, %ecx + subl $__PAGE_OFFSET, %ecx + movl %ecx, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movl mmu_cr4_features, %eax movl %eax, %edx diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index daa6b91bc92..578737ec762 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -91,7 +91,7 @@ ia64_elf32_init (struct pt_regs *regs) * it with privilege level 3 because the IVE uses non-privileged accesses to these * tables. IA-32 segmentation is used to protect against IA-32 accesses to them. */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -117,7 +117,7 @@ ia64_elf32_init (struct pt_regs *regs) * code is locked in specific gate page, which is pointed by pretcode * when setup_frame_ia32 */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -142,7 +142,7 @@ ia64_elf32_init (struct pt_regs *regs) * Install LDT as anonymous memory. This gives us all-zero segment descriptors * until a task modifies them via modify_ldt(). */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -214,7 +214,7 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!mpnt) return -ENOMEM; diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c index c187743965a..6af400a12ca 100644 --- a/arch/ia64/ia32/ia32_support.c +++ b/arch/ia64/ia32/ia32_support.c @@ -249,7 +249,7 @@ ia32_init (void) #if PAGE_SHIFT > IA32_PAGE_SHIFT { - extern kmem_cache_t *partial_page_cachep; + extern struct kmem_cache *partial_page_cachep; partial_page_cachep = kmem_cache_create("partial_page_cache", sizeof(struct partial_page), 0, 0, diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index 703a67c934f..cfa0bc0026b 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -330,8 +330,6 @@ struct old_linux32_dirent { void ia64_elf32_init(struct pt_regs *regs); #define ELF_PLAT_INIT(_r, load_addr) ia64_elf32_init(_r) -#define elf_addr_t u32 - /* This macro yields a bitmask that programs can use to figure out what instruction set this CPU supports. */ #define ELF_HWCAP 0 diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 9d6a3f21014..a4a6e1463af 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -254,7 +254,7 @@ mmap_subpage (struct file *file, unsigned long start, unsigned long end, int pro } /* SLAB cache for partial_page structures */ -kmem_cache_t *partial_page_cachep; +struct kmem_cache *partial_page_cachep; /* * init partial_page_list. diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 51217d63285..4d592ee9300 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -481,7 +481,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } /* diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 0b546e2b36a..c4c10a0b99d 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -952,7 +952,6 @@ remove_palinfo_proc_entries(unsigned int hcpu) } } -#ifdef CONFIG_HOTPLUG_CPU static int palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -974,7 +973,6 @@ static struct notifier_block palinfo_cpu_notifier = .notifier_call = palinfo_cpu_callback, .priority = 0, }; -#endif static int __init palinfo_init(void) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 3aaede0d698..e2321536ee4 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2302,7 +2302,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index e63b8ca5344..fd607ca51a8 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -575,7 +575,6 @@ static struct file_operations salinfo_data_fops = { .write = salinfo_log_write, }; -#ifdef CONFIG_HOTPLUG_CPU static int __devinit salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { @@ -620,7 +619,6 @@ static struct notifier_block salinfo_cpu_notifier = .notifier_call = salinfo_cpu_callback, .priority = 0, }; -#endif /* CONFIG_HOTPLUG_CPU */ static int __init salinfo_init(void) diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 5629b45e89c..687500ddb4b 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -31,11 +31,11 @@ int arch_register_cpu(int num) { #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* - * If CPEI cannot be re-targetted, and this is - * CPEI target, then dont create the control file + * If CPEI can be re-targetted or if this is not + * CPEI target, then it is hotpluggable */ - if (!can_cpei_retarget() && is_cpu_cpei_target(num)) - sysfs_cpus[num].cpu.no_control = 1; + if (can_cpei_retarget() || !is_cpu_cpei_target(num)) + sysfs_cpus[num].cpu.hotpluggable = 1; map_cpu_to_node(num, node_cpuid[num].nid); #endif diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index f3a9585e98a..0c7e94edc20 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -64,6 +64,11 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } /* diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index ff87a5cba39..56dc2024220 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -156,7 +156,7 @@ ia64_init_addr_space (void) * the problem. When the process attempts to write to the register backing store * for the first time, it will get a SEGFAULT in this case. */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -175,7 +175,7 @@ ia64_init_addr_space (void) /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 0e7778be33c..936205f7aba 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -196,9 +196,7 @@ static unsigned long __init setup_memory(void) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START ? - INITRD_START + PAGE_OFFSET : 0; - + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; printk("initrd:start[%08lx],size[%08lx]\n", initrd_start, INITRD_SIZE); diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index b60cea4aeba..092ea86bb07 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -21,7 +21,7 @@ #include <linux/unistd.h> #include <linux/stddef.h> #include <linux/personality.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/uaccess.h> diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index abb34ccd598..c7efdb0aefc 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -105,9 +105,7 @@ unsigned long __init setup_memory(void) if (INITRD_START + INITRD_SIZE <= PFN_PHYS(max_low_pfn)) { reserve_bootmem_node(NODE_DATA(0), INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START ? - INITRD_START + PAGE_OFFSET : 0; - + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; printk("initrd:start[%08lx],size[%08lx]\n", initrd_start, INITRD_SIZE); diff --git a/arch/m68k/amiga/chipram.c b/arch/m68k/amiga/chipram.c index de1304c9111..fa015d80161 100644 --- a/arch/m68k/amiga/chipram.c +++ b/arch/m68k/amiga/chipram.c @@ -52,10 +52,9 @@ void *amiga_chip_alloc(unsigned long size, const char *name) #ifdef DEBUG printk("amiga_chip_alloc: allocate %ld bytes\n", size); #endif - res = kmalloc(sizeof(struct resource), GFP_KERNEL); + res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (!res) return NULL; - memset(res, 0, sizeof(struct resource)); res->name = name; if (allocate_resource(&chipram_res, res, size, 0, UINT_MAX, PAGE_SIZE, NULL, NULL) < 0) { diff --git a/arch/m68k/atari/hades-pci.c b/arch/m68k/atari/hades-pci.c index 6ca57b6564d..bee2b1443e3 100644 --- a/arch/m68k/atari/hades-pci.c +++ b/arch/m68k/atari/hades-pci.c @@ -375,10 +375,9 @@ struct pci_bus_info * __init init_hades_pci(void) * Allocate memory for bus info structure. */ - bus = kmalloc(sizeof(struct pci_bus_info), GFP_KERNEL); + bus = kzalloc(sizeof(struct pci_bus_info), GFP_KERNEL); if (!bus) return NULL; - memset(bus, 0, sizeof(struct pci_bus_info)); /* * Claim resources. The m68k has no separate I/O space, both diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 911f2ce3f53..2adbeb16e1b 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -99,7 +99,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; down_read(&mm->mmap_sem); diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index 58afa8be604..2b2a10da64a 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -60,6 +60,7 @@ SECTIONS { #endif .text : { + _text = .; _stext = . ; *(.text) SCHED_TEXT diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 4a9f1ecefaf..9b34238d41c 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -90,7 +90,6 @@ struct elf_prpsinfo32 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -#define elf_addr_t u32 #define elf_caddr_t u32 #define init_elf_binfmt init_elfn32_binfmt diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index e3181377989..993f7ec70f3 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -92,7 +92,6 @@ struct elf_prpsinfo32 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -#define elf_addr_t u32 #define elf_caddr_t u32 #define init_elf_binfmt init_elf32_binfmt diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c index ab12c8f0151..1bbefbf4337 100644 --- a/arch/mips/kernel/irixelf.c +++ b/arch/mips/kernel/irixelf.c @@ -52,10 +52,6 @@ static struct linux_binfmt irix_format = { irix_core_dump, PAGE_SIZE }; -#ifndef elf_addr_t -#define elf_addr_t unsigned long -#endif - #ifdef DEBUG /* Debugging routines. */ static char *get_elf_p_type(Elf32_Word p_type) @@ -1013,7 +1009,7 @@ static int notesize(struct memelfnote *en) int sz; sz = sizeof(struct elf_note); - sz += roundup(strlen(en->name), 4); + sz += roundup(strlen(en->name) + 1, 4); sz += roundup(en->datasz, 4); return sz; @@ -1032,7 +1028,7 @@ static int writenote(struct memelfnote *men, struct file *file) { struct elf_note en; - en.n_namesz = strlen(men->name); + en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; diff --git a/arch/mips/mm/dma-coherent.c b/arch/mips/mm/dma-coherent.c index 7fa5fd16e46..5697c6e250a 100644 --- a/arch/mips/mm/dma-coherent.c +++ b/arch/mips/mm/dma-coherent.c @@ -190,14 +190,14 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); -int dma_is_consistent(dma_addr_t dma_addr) +int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } EXPORT_SYMBOL(dma_is_consistent); -void dma_cache_sync(void *vaddr, size_t size, +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); diff --git a/arch/mips/mm/dma-ip27.c b/arch/mips/mm/dma-ip27.c index 8da19fd22ac..f088344db46 100644 --- a/arch/mips/mm/dma-ip27.c +++ b/arch/mips/mm/dma-ip27.c @@ -197,14 +197,14 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); -int dma_is_consistent(dma_addr_t dma_addr) +int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } EXPORT_SYMBOL(dma_is_consistent); -void dma_cache_sync(void *vaddr, size_t size, +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); diff --git a/arch/mips/mm/dma-ip32.c b/arch/mips/mm/dma-ip32.c index ec54ed0d26f..b42b6f7456e 100644 --- a/arch/mips/mm/dma-ip32.c +++ b/arch/mips/mm/dma-ip32.c @@ -363,14 +363,15 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); -int dma_is_consistent(dma_addr_t dma_addr) +int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } EXPORT_SYMBOL(dma_is_consistent); -void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction) +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) { if (direction == DMA_NONE) return; diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 2eeffe5c2a3..8cecef0957c 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -299,14 +299,15 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); -int dma_is_consistent(dma_addr_t dma_addr) +int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } EXPORT_SYMBOL(dma_is_consistent); -void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction) +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) { if (direction == DMA_NONE) return; diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index 99ebf3ccc22..675502ada5a 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -39,7 +39,7 @@ void *__kmap_atomic(struct page *page, enum km_type type) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -62,8 +62,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type) enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); if (vaddr < FIXADDR_START) { // FIXME - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); return; } @@ -78,8 +77,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type) local_flush_tlb_one(vaddr); #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } #ifndef CONFIG_LIMITED_DMA @@ -92,7 +90,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; - inc_preempt_count(); + pagefault_disable(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); diff --git a/arch/parisc/kernel/binfmt_elf32.c b/arch/parisc/kernel/binfmt_elf32.c index 1e64e7b8811..ecb10a4f63c 100644 --- a/arch/parisc/kernel/binfmt_elf32.c +++ b/arch/parisc/kernel/binfmt_elf32.c @@ -75,7 +75,6 @@ struct elf_prpsinfo32 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -#define elf_addr_t unsigned int #define init_elf_binfmt init_elf32_binfmt #define ELF_PLATFORM ("PARISC32\0") diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 64785e46f93..641f9c920ee 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -152,7 +152,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, const struct exception_table_entry *fix; unsigned long acc_type; - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; down_read(&mm->mmap_sem); diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 89b03c8da9d..d3f2080d2ee 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -46,61 +46,6 @@ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; cpumask_t cpus_in_sr = CPU_MASK_NONE; -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, - size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) +3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - -static void crash_save_this_cpu(struct pt_regs *regs, int cpu) -{ - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= NR_CPUS)) - return; - - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that that no need to invent something new. - */ - buf = (u32*)per_cpu_ptr(crash_notes, cpu); - if (!buf) - return; - - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, - sizeof(prstatus)); - final_note(buf); -} - #ifdef CONFIG_SMP static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); @@ -113,7 +58,7 @@ void crash_ipi_callback(struct pt_regs *regs) hard_irq_disable(); if (!cpu_isset(cpu, cpus_in_crash)) - crash_save_this_cpu(regs, cpu); + crash_save_cpu(regs, cpu); cpu_set(cpu, cpus_in_crash); /* @@ -306,7 +251,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_save_this_cpu(regs, crashing_cpu); + crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); if (ppc_md.kexec_cpu_down) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7b8d12b9026..4657563f881 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -85,7 +85,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index b9561d30051..7d0f13fecc0 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -101,7 +101,7 @@ struct flash_block_list_header { /* just the header of flash_block_list */ static struct flash_block_list_header rtas_firmware_flash_list = {0, NULL}; /* Use slab cache to guarantee 4k alignment */ -static kmem_cache_t *flash_block_cache = NULL; +static struct kmem_cache *flash_block_cache = NULL; #define FLASH_BLOCK_LIST_VERSION (1UL) @@ -286,7 +286,7 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf, } /* constructor for flash_block_cache */ -void rtas_block_ctor(void *ptr, kmem_cache_t *cache, unsigned long flags) +void rtas_block_ctor(void *ptr, struct kmem_cache *cache, unsigned long flags) { memset(ptr, 0, RTAS_BLK_SIZE); } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 320353f0926..e4ebe1a6228 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -36,7 +36,7 @@ #include <linux/stddef.h> #include <linux/tty.h> #include <linux/binfmts.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #endif #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 22123a0d541..63ed265b7f0 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -239,7 +239,7 @@ static void unregister_cpu_online(unsigned int cpu) struct cpu *c = &per_cpu(cpu_devices, cpu); struct sys_device *s = &c->sysdev; - BUG_ON(c->no_control); + BUG_ON(!c->hotpluggable); if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) @@ -424,10 +424,10 @@ static int __init topology_init(void) * CPU. For instance, the boot cpu might never be valid * for hotplugging. */ - if (!ppc_md.cpu_die) - c->no_control = 1; + if (ppc_md.cpu_die) + c->hotpluggable = 1; - if (cpu_online(cpu) || (c->no_control == 0)) { + if (cpu_online(cpu) || c->hotpluggable) { register_cpu(c, cpu); sysdev_create_file(&c->sysdev, &attr_physical_id); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index c913ad5cad2..a4b28c73bba 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -264,7 +264,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, /* Allocate a VMA structure and fill it up */ - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (vma == NULL) { rc = -ENOMEM; goto fail_mmapsem; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e8342d86753..04b98671a06 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -33,6 +33,7 @@ SECTIONS /* Text and gots */ .text : { + _text = .; *(.text .text.*) SCHED_TEXT LOCK_TEXT diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 506d89768d4..89c836d5480 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -146,6 +146,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) return hugepte_offset(hpdp, addr); } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) { pte_t *hugepte = hugepd_page(*hpdp); @@ -1042,7 +1047,7 @@ repeat: return err; } -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) { memset(addr, 0, kmem_cache_size(cache)); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 9a178549cbc..d12a87ec5ae 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -141,7 +141,7 @@ static int __init setup_kcore(void) } module_init(setup_kcore); -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) { memset(addr, 0, kmem_cache_size(cache)); } @@ -166,9 +166,9 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { /* Hugepages need one extra cache, initialized in hugetlbpage.c. We * can't put into the tables above, because HPAGE_SHIFT is not compile * time constant. */ -kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; +struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; #else -kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; +struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; #endif void pgtable_cache_init(void) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index c7d010749a1..e3af9112c02 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -40,7 +40,7 @@ #include "spufs.h" -static kmem_cache_t *spufs_inode_cache; +static struct kmem_cache *spufs_inode_cache; char *isolated_loader; static struct inode * @@ -48,7 +48,7 @@ spufs_alloc_inode(struct super_block *sb) { struct spufs_inode_info *ei; - ei = kmem_cache_alloc(spufs_inode_cache, SLAB_KERNEL); + ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL); if (!ei) return NULL; @@ -65,7 +65,7 @@ spufs_destroy_inode(struct inode *inode) } static void -spufs_init_once(void *p, kmem_cache_t * cachep, unsigned long flags) +spufs_init_once(void *p, struct kmem_cache * cachep, unsigned long flags) { struct spufs_inode_info *ei = p; diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index 16e8661e1fe..61921268a0d 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S @@ -31,6 +31,7 @@ SECTIONS .plt : { *(.plt) } .text : { + _text = .; *(.text) SCHED_TEXT LOCK_TEXT diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 67d5cf9cba8..b8c23729026 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -561,7 +561,6 @@ appldata_offline_cpu(int cpu) spin_unlock(&appldata_timer_lock); } -#ifdef CONFIG_HOTPLUG_CPU static int __cpuinit appldata_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) @@ -582,7 +581,6 @@ appldata_cpu_notify(struct notifier_block *self, static struct notifier_block appldata_nb = { .notifier_call = appldata_cpu_notify, }; -#endif /* * appldata_init() diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c index 9565a2dcfad..5c46054195c 100644 --- a/arch/s390/kernel/binfmt_elf32.c +++ b/arch/s390/kernel/binfmt_elf32.c @@ -176,7 +176,6 @@ struct elf_prpsinfo32 #include <linux/highuid.h> -#define elf_addr_t u32 /* #define init_elf_binfmt init_elf32_binfmt */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 67914fe7f31..576368c4f60 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -200,7 +200,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c index 2d549ed2e11..bbaca66fa29 100644 --- a/arch/s390/lib/uaccess_std.c +++ b/arch/s390/lib/uaccess_std.c @@ -11,7 +11,7 @@ #include <linux/errno.h> #include <linux/mm.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/futex.h> #ifndef __s390x__ @@ -258,7 +258,7 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old) { int oldval = 0, newval, ret; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -284,7 +284,7 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old) default: ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); *old = oldval; return ret; } diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 55f43506995..0c9ea38d2ca 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -38,7 +38,7 @@ struct sq_mapping { static struct sq_mapping *sq_mapping_list; static DEFINE_SPINLOCK(sq_mapping_lock); -static kmem_cache_t *sq_cache; +static struct kmem_cache *sq_cache; static unsigned long *sq_bitmap; #define store_queue_barrier() \ diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 696ca75752d..f8dd6b7bfab 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -332,8 +332,7 @@ void __init setup_arch(char **cmdline_p) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { reserve_bootmem_node(NODE_DATA(0), INITRD_START+__MEMORY_START, INITRD_SIZE); - initrd_start = - INITRD_START ? INITRD_START + PAGE_OFFSET + __MEMORY_START : 0; + initrd_start = INITRD_START + PAGE_OFFSET + __MEMORY_START; initrd_end = initrd_start + INITRD_SIZE; } else { printk("initrd extends beyond end of memory " diff --git a/arch/sh/kernel/sh_ksyms.c b/arch/sh/kernel/sh_ksyms.c index c706f3bfd89..ceee7914340 100644 --- a/arch/sh/kernel/sh_ksyms.c +++ b/arch/sh/kernel/sh_ksyms.c @@ -99,10 +99,6 @@ EXPORT_SYMBOL(__down_trylock); EXPORT_SYMBOL(synchronize_irq); #endif -#ifdef CONFIG_PM -EXPORT_SYMBOL(pm_suspend); -#endif - EXPORT_SYMBOL(csum_partial); #ifdef CONFIG_IPV6 EXPORT_SYMBOL(csum_ipv6_magic); diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c index 50d7c4993be..bb1c480a59c 100644 --- a/arch/sh/kernel/signal.c +++ b/arch/sh/kernel/signal.c @@ -23,6 +23,7 @@ #include <linux/elf.h> #include <linux/personality.h> #include <linux/binfmts.h> +#include <linux/freezer.h> #include <asm/ucontext.h> #include <asm/uaccess.h> diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 075d6cc1a2d..deb46941f31 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -97,7 +97,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, goto up_fail; } - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { ret = -ENOMEM; goto up_fail; diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 329059d6b54..cf2c2ee35a3 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -63,6 +63,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index 92e745341e4..b60ad83a763 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -30,7 +30,7 @@ #define NR_PMB_ENTRIES 16 -static kmem_cache_t *pmb_cache; +static struct kmem_cache *pmb_cache; static unsigned long pmb_map; static struct pmb_entry pmb_init_map[] = { @@ -283,7 +283,7 @@ void pmb_unmap(unsigned long addr) } while (pmbe); } -static void pmb_cache_ctor(void *pmb, kmem_cache_t *cachep, unsigned long flags) +static void pmb_cache_ctor(void *pmb, struct kmem_cache *cachep, unsigned long flags) { struct pmb_entry *pmbe = pmb; @@ -297,7 +297,7 @@ static void pmb_cache_ctor(void *pmb, kmem_cache_t *cachep, unsigned long flags) spin_unlock_irq(&pmb_list_lock); } -static void pmb_cache_dtor(void *pmb, kmem_cache_t *cachep, unsigned long flags) +static void pmb_cache_dtor(void *pmb, struct kmem_cache *cachep, unsigned long flags) { spin_lock_irq(&pmb_list_lock); pmb_list_del(pmb); diff --git a/arch/sh64/kernel/setup.c b/arch/sh64/kernel/setup.c index ffb310e33ce..b9e7d54d7b8 100644 --- a/arch/sh64/kernel/setup.c +++ b/arch/sh64/kernel/setup.c @@ -243,9 +243,7 @@ void __init setup_arch(char **cmdline_p) if (INITRD_START + INITRD_SIZE <= (PFN_PHYS(last_pfn))) { reserve_bootmem_node(NODE_DATA(0), INITRD_START + __MEMORY_START, INITRD_SIZE); - initrd_start = - (long) INITRD_START ? INITRD_START + PAGE_OFFSET + __MEMORY_START : 0; - + initrd_start = (long) INITRD_START + PAGE_OFFSET + __MEMORY_START; initrd_end = initrd_start + INITRD_SIZE; } else { printk("initrd extends beyond end of memory " diff --git a/arch/sh64/kernel/signal.c b/arch/sh64/kernel/signal.c index 9e2ffc45c0e..1666d3efb52 100644 --- a/arch/sh64/kernel/signal.c +++ b/arch/sh64/kernel/signal.c @@ -22,7 +22,7 @@ #include <linux/errno.h> #include <linux/wait.h> #include <linux/personality.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/stddef.h> diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c index 8e2f6c28b73..4f72ab33bb2 100644 --- a/arch/sh64/mm/fault.c +++ b/arch/sh64/mm/fault.c @@ -154,7 +154,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; /* TLB misses upon some cache flushes get done under cli() */ diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c index 187cf01750b..4b455f61114 100644 --- a/arch/sh64/mm/hugetlbpage.c +++ b/arch/sh64/mm/hugetlbpage.c @@ -53,6 +53,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 5cc5ff7f882..b73e6b9067e 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -11,6 +11,7 @@ SECTIONS . = 0x10000 + SIZEOF_HEADERS; .text 0xf0004000 : { + _text = .; *(.text) SCHED_TEXT LOCK_TEXT diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 4d8ed9c6518..01fc6c25429 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -35,7 +35,7 @@ void *kmap_atomic(struct page *page, enum km_type type) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -70,8 +70,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned long idx = type + KM_TYPE_NR*smp_processor_id(); if (vaddr < FIXADDR_START) { // FIXME - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); return; } @@ -97,8 +96,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #endif #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } /* We may be fed a pagetable here by ptep_to_xxx and others. */ diff --git a/arch/sparc64/kernel/binfmt_elf32.c b/arch/sparc64/kernel/binfmt_elf32.c index a98f3ae175a..9ad84ff10a1 100644 --- a/arch/sparc64/kernel/binfmt_elf32.c +++ b/arch/sparc64/kernel/binfmt_elf32.c @@ -141,7 +141,6 @@ cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value) value->tv_sec = jiffies / HZ; } -#define elf_addr_t u32 #undef start_thread #define start_thread start_thread32 #define init_elf_binfmt init_elf32_binfmt diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index bd9de8c2a2a..4a6063f33e7 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -13,6 +13,7 @@ SECTIONS . = 0x4000; .text 0x0000000000404000 : { + _text = .; *(.text) SCHED_TEXT LOCK_TEXT diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 53b9b1f528e..33fd0b265e7 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -235,6 +235,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 09cb7fccc03..a8e8802eed4 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -176,9 +176,9 @@ unsigned long sparc64_kern_sec_context __read_mostly; int bigkernel = 0; -kmem_cache_t *pgtable_cache __read_mostly; +struct kmem_cache *pgtable_cache __read_mostly; -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) { clear_page(addr); } diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index beaa02810f0..236d02f41a0 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -239,7 +239,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign } } -static kmem_cache_t *tsb_caches[8] __read_mostly; +static struct kmem_cache *tsb_caches[8] __read_mostly; static const char *tsb_cache_names[8] = { "tsb_8KB", diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c index 824386974f8..9c2e7a758f2 100644 --- a/arch/um/drivers/daemon_kern.c +++ b/arch/um/drivers/daemon_kern.c @@ -98,4 +98,4 @@ static int register_daemon(void) return 0; } -__initcall(register_daemon); +late_initcall(register_daemon); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 426633e5d6e..aa3090d05a8 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -31,9 +31,9 @@ static irqreturn_t line_interrupt(int irq, void *data) return IRQ_HANDLED; } -static void line_timer_cb(void *arg) +static void line_timer_cb(struct work_struct *work) { - struct line *line = arg; + struct line *line = container_of(work, struct line, task.work); if(!line->throttled) chan_interrupt(&line->chan_list, &line->task, line->tty, @@ -443,7 +443,7 @@ int line_open(struct line *lines, struct tty_struct *tty) * is registered. */ enable_chan(line); - INIT_WORK(&line->task, line_timer_cb, line); + INIT_DELAYED_WORK(&line->task, line_timer_cb); if(!line->sigio){ chan_enable_winch(&line->chan_list, tty); diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c index c090fbd464e..52ccb7b53cd 100644 --- a/arch/um/drivers/mcast_kern.c +++ b/arch/um/drivers/mcast_kern.c @@ -127,4 +127,4 @@ static int register_mcast(void) return 0; } -__initcall(register_mcast); +late_initcall(register_mcast); diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c index 6e1ef855828..e67362acf0e 100644 --- a/arch/um/drivers/pcap_kern.c +++ b/arch/um/drivers/pcap_kern.c @@ -109,4 +109,4 @@ static int register_pcap(void) return 0; } -__initcall(register_pcap); +late_initcall(register_pcap); diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c index 788da5439a2..25634bd1f58 100644 --- a/arch/um/drivers/slip_kern.c +++ b/arch/um/drivers/slip_kern.c @@ -95,4 +95,4 @@ static int register_slip(void) return 0; } -__initcall(register_slip); +late_initcall(register_slip); diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c index ae322e1c8a8..b3ed8fb874a 100644 --- a/arch/um/drivers/slirp_kern.c +++ b/arch/um/drivers/slirp_kern.c @@ -119,4 +119,4 @@ static int register_slirp(void) return 0; } -__initcall(register_slirp); +late_initcall(register_slirp); diff --git a/arch/um/include/chan_kern.h b/arch/um/include/chan_kern.h index 572d286ed2c..9003a343e14 100644 --- a/arch/um/include/chan_kern.h +++ b/arch/um/include/chan_kern.h @@ -27,7 +27,7 @@ struct chan { void *data; }; -extern void chan_interrupt(struct list_head *chans, struct work_struct *task, +extern void chan_interrupt(struct list_head *chans, struct delayed_work *task, struct tty_struct *tty, int irq); extern int parse_chan_pair(char *str, struct line *line, int device, const struct chan_opts *opts); diff --git a/arch/um/include/line.h b/arch/um/include/line.h index 7be24811bb3..214ee76c40d 100644 --- a/arch/um/include/line.h +++ b/arch/um/include/line.h @@ -51,7 +51,7 @@ struct line { char *tail; int sigio; - struct work_struct task; + struct delayed_work task; const struct line_driver *driver; int have_irq; }; diff --git a/arch/um/include/sysdep-i386/ptrace.h b/arch/um/include/sysdep-i386/ptrace.h index 6670cc992ec..52b398bcafc 100644 --- a/arch/um/include/sysdep-i386/ptrace.h +++ b/arch/um/include/sysdep-i386/ptrace.h @@ -75,7 +75,7 @@ union uml_pt_regs { #endif #ifdef UML_CONFIG_MODE_SKAS struct skas_regs { - unsigned long regs[HOST_FRAME_SIZE]; + unsigned long regs[MAX_REG_NR]; unsigned long fp[HOST_FP_SIZE]; unsigned long xfp[HOST_XFP_SIZE]; struct faultinfo faultinfo; diff --git a/arch/um/include/sysdep-i386/stub.h b/arch/um/include/sysdep-i386/stub.h index b492b12b4a1..4fffae75ba5 100644 --- a/arch/um/include/sysdep-i386/stub.h +++ b/arch/um/include/sysdep-i386/stub.h @@ -9,6 +9,7 @@ #include <sys/mman.h> #include <asm/ptrace.h> #include <asm/unistd.h> +#include <asm/page.h> #include "stub-data.h" #include "kern_constants.h" #include "uml-config.h" diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h index 617bb9efc93..66cb400c2c9 100644 --- a/arch/um/include/sysdep-x86_64/ptrace.h +++ b/arch/um/include/sysdep-x86_64/ptrace.h @@ -108,7 +108,7 @@ union uml_pt_regs { * file size, while i386 uses FRAME_SIZE. Therefore, we need * to use UM_FRAME_SIZE here instead of HOST_FRAME_SIZE. */ - unsigned long regs[UM_FRAME_SIZE]; + unsigned long regs[MAX_REG_NR]; unsigned long fp[HOST_FP_SIZE]; struct faultinfo faultinfo; long syscall; diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c index 16385e2ada8..70541821775 100644 --- a/arch/um/os-Linux/drivers/ethertap_kern.c +++ b/arch/um/os-Linux/drivers/ethertap_kern.c @@ -105,4 +105,4 @@ static int register_ethertap(void) return 0; } -__initcall(register_ethertap); +late_initcall(register_ethertap); diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c index 0edbac63c52..76570a2c25c 100644 --- a/arch/um/os-Linux/drivers/tuntap_kern.c +++ b/arch/um/os-Linux/drivers/tuntap_kern.c @@ -90,4 +90,4 @@ static int register_tuntap(void) return 0; } -__initcall(register_tuntap); +late_initcall(register_tuntap); diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c index e299ee5a753..49057d8bc66 100644 --- a/arch/um/sys-i386/ldt.c +++ b/arch/um/sys-i386/ldt.c @@ -3,7 +3,6 @@ * Licensed under the GPL */ -#include "linux/stddef.h" #include "linux/sched.h" #include "linux/slab.h" #include "linux/types.h" diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c index 5f3cc668582..01212c88fcc 100644 --- a/arch/um/sys-i386/ptrace_user.c +++ b/arch/um/sys-i386/ptrace_user.c @@ -4,9 +4,9 @@ */ #include <stdio.h> +#include <stddef.h> #include <errno.h> #include <unistd.h> -#include <linux/stddef.h> #include "ptrace_user.h" /* Grr, asm/user.h includes asm/ptrace.h, so has to follow ptrace_user.h */ #include <asm/user.h> diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c index 6f4ef2b7fa4..447306b20ae 100644 --- a/arch/um/sys-i386/user-offsets.c +++ b/arch/um/sys-i386/user-offsets.c @@ -2,7 +2,7 @@ #include <signal.h> #include <asm/ptrace.h> #include <asm/user.h> -#include <linux/stddef.h> +#include <stddef.h> #include <sys/poll.h> #define DEFINE(sym, val) \ diff --git a/arch/v850/kernel/vmlinux.lds.S b/arch/v850/kernel/vmlinux.lds.S index 88d087f527c..3a5fd07fe06 100644 --- a/arch/v850/kernel/vmlinux.lds.S +++ b/arch/v850/kernel/vmlinux.lds.S @@ -90,6 +90,7 @@ /* Kernel text segment, and some constant data areas. */ #define TEXT_CONTENTS \ + _text = .; \ __stext = . ; \ *(.text) \ SCHED_TEXT \ diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 010d2265f1c..bfbb9bcae12 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -122,7 +122,7 @@ endchoice choice prompt "Processor family" - default MK8 + default GENERIC_CPU config MK8 bool "AMD-Opteron/Athlon64" @@ -130,16 +130,31 @@ config MK8 Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs. config MPSC - bool "Intel EM64T" + bool "Intel P4 / older Netburst based Xeon" help - Optimize for Intel Pentium 4 and Xeon CPUs with Intel - Extended Memory 64 Technology(EM64T). For details see + Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs + with Intel Extended Memory 64 Technology(EM64T). For details see <http://www.intel.com/technology/64bitextensions/>. + Note the the latest Xeons (Xeon 51xx and 53xx) are not based on the + Netburst core and shouldn't use this option. You can distingush them + using the cpu family field + in /proc/cpuinfo. Family 15 is a older Xeon, Family 6 a newer one + (this rule only applies to system that support EM64T) + +config MCORE2 + bool "Intel Core2 / newer Xeon" + help + Optimize for Intel Core2 and newer Xeons (51xx) + You can distingush the newer Xeons from the older ones using + the cpu family field in /proc/cpuinfo. 15 is a older Xeon + (use CONFIG_MPSC then), 6 is a newer one. This rule only + applies to CPUs that support EM64T. config GENERIC_CPU bool "Generic-x86-64" help Generic x86-64 CPU. + Run equally well on all x86-64 CPUs. endchoice @@ -149,12 +164,12 @@ endchoice config X86_L1_CACHE_BYTES int default "128" if GENERIC_CPU || MPSC - default "64" if MK8 + default "64" if MK8 || MCORE2 config X86_L1_CACHE_SHIFT int default "7" if GENERIC_CPU || MPSC - default "6" if MK8 + default "6" if MK8 || MCORE2 config X86_INTERNODE_CACHE_BYTES int @@ -344,11 +359,6 @@ config ARCH_DISCONTIGMEM_ENABLE depends on NUMA default y - -config ARCH_DISCONTIGMEM_ENABLE - def_bool y - depends on NUMA - config ARCH_DISCONTIGMEM_DEFAULT def_bool y depends on NUMA @@ -455,6 +465,17 @@ config CALGARY_IOMMU Normally the kernel will make the right choice by itself. If unsure, say Y. +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + bool "Should Calgary be enabled by default?" + default y + depends on CALGARY_IOMMU + help + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 6e38d4daeed..b471b8550d0 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -30,6 +30,10 @@ cflags-y := cflags-kernel-y := cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) +# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it +# will eventually. Use -mtune=generic as fallback +cflags-$(CONFIG_MCORE2) += \ + $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) cflags-y += -m64 diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 0f5d44e86be..96f226cfb33 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.19-rc2-git4 -# Sat Oct 21 03:38:52 2006 +# Linux kernel version: 2.6.19-git7 +# Wed Dec 6 23:50:47 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -47,13 +47,14 @@ CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y # CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y -# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -87,9 +88,7 @@ CONFIG_STOP_MACHINE=y # Block layer # CONFIG_BLOCK=y -CONFIG_LBD=y # CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_LSF is not set # # IO Schedulers @@ -111,10 +110,11 @@ CONFIG_X86_PC=y # CONFIG_X86_VSMP is not set # CONFIG_MK8 is not set # CONFIG_MPSC is not set -CONFIG_GENERIC_CPU=y -CONFIG_X86_L1_CACHE_BYTES=128 -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_INTERNODE_CACHE_BYTES=128 +CONFIG_MCORE2=y +# CONFIG_GENERIC_CPU is not set +CONFIG_X86_L1_CACHE_BYTES=64 +CONFIG_X86_L1_CACHE_SHIFT=6 +CONFIG_X86_INTERNODE_CACHE_BYTES=64 CONFIG_X86_TSC=y CONFIG_X86_GOOD_APIC=y # CONFIG_MICROCODE is not set @@ -322,6 +322,7 @@ CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -624,6 +625,7 @@ CONFIG_SATA_INTEL_COMBINED=y # CONFIG_PATA_IT821X is not set # CONFIG_PATA_JMICRON is not set # CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set @@ -795,6 +797,7 @@ CONFIG_BNX2=y CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set # CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set # # Token Ring devices @@ -927,10 +930,6 @@ CONFIG_RTC=y # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y @@ -1135,6 +1134,7 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_BANDWIDTH is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set +# CONFIG_USB_MULTITHREAD_PROBE is not set # CONFIG_USB_OTG is not set # @@ -1212,6 +1212,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set CONFIG_USB_MON=y diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 82ef182de6a..543ef4f405e 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -305,8 +305,6 @@ MODULE_AUTHOR("Eric Youngdale, Andi Kleen"); #undef MODULE_DESCRIPTION #undef MODULE_AUTHOR -#define elf_addr_t __u32 - static void elf32_init(struct pt_regs *); #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 @@ -351,7 +349,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!mpnt) return -ENOMEM; diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index 0e0a266d976..ff499ef2a1b 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -584,6 +584,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->rdx = (unsigned long) &frame->info; regs->rcx = (unsigned long) &frame->uc; + /* Make -mregparm=3 work */ + regs->rax = sig; + regs->rdx = (unsigned long) &frame->info; + regs->rcx = (unsigned long) &frame->uc; + asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 3a01329473a..3e5ed20cba4 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -49,7 +49,7 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack) struct mm_struct *mm = current->mm; int ret; - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!vma) return -ENOMEM; diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 4d9d5ed942b..124b2d27b4a 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -25,6 +25,7 @@ #include <linux/kernel_stat.h> #include <linux/sysdev.h> #include <linux/module.h> +#include <linux/ioport.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata; int disable_apic_timer __initdata; +static struct resource *ioapic_resources; +static struct resource lapic_resource = { + .name = "Local APIC", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + /* * cpu_mask that denotes the CPUs that needs timer interrupt coming in as * IPIs in place of local APIC timers @@ -133,7 +140,6 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - v = GET_APIC_VERSION(apic_read(APIC_LVR)); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } @@ -452,23 +458,30 @@ static struct { static int lapic_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); @@ -479,10 +492,13 @@ static int lapic_resume(struct sys_device *dev) { unsigned int l, h; unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; @@ -496,8 +512,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); apic_write(APIC_TMICT, apic_pm_state.apic_tmict); @@ -585,6 +605,64 @@ static int __init detect_init_APIC (void) return 0; } +#ifdef CONFIG_X86_IO_APIC +static struct resource * __init ioapic_setup_resources(void) +{ +#define IOAPIC_RESOURCE_NAME_SIZE 11 + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + memset(mem, 0, n); + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} + +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk("IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif + void __init init_apic_mappings(void) { unsigned long apic_phys; @@ -604,6 +682,11 @@ void __init init_apic_mappings(void) apic_mapped = 1; apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); + /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). @@ -613,7 +696,9 @@ void __init init_apic_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; + struct resource *ioapic_res; + ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; @@ -625,6 +710,12 @@ void __init init_apic_mappings(void) apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; + + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } } } } @@ -644,10 +735,9 @@ void __init init_apic_mappings(void) static void __setup_APIC_LVTT(unsigned int clocks) { - unsigned int lvtt_value, tmp_value, ver; + unsigned int lvtt_value, tmp_value; int cpu = smp_processor_id(); - ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index 3525f884af8..95a7a2c1313 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c @@ -28,71 +28,6 @@ /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, - void *data, size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) +3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - -static void crash_save_this_cpu(struct pt_regs *regs, int cpu) -{ - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= NR_CPUS)) - return; - - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that, no need to invent something new. - */ - - buf = (u32*)per_cpu_ptr(crash_notes, cpu); - - if (!buf) - return; - - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, - sizeof(prstatus)); - final_note(buf); -} - -static void crash_save_self(struct pt_regs *regs) -{ - int cpu; - - cpu = smp_processor_id(); - crash_save_this_cpu(regs, cpu); -} - #ifdef CONFIG_SMP static atomic_t waiting_for_crash_ipi; @@ -117,7 +52,7 @@ static int crash_nmi_callback(struct notifier_block *self, return NOTIFY_STOP; local_irq_disable(); - crash_save_this_cpu(regs, cpu); + crash_save_cpu(regs, cpu); disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -196,5 +131,5 @@ void machine_crash_shutdown(struct pt_regs *regs) disable_IO_APIC(); - crash_save_self(regs); + crash_save_cpu(regs, smp_processor_id()); } diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index 68273bff58c..829698f6d04 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -69,11 +69,18 @@ static void nvidia_bugs(void) static void ati_bugs(void) { - if (timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); - } +} + +static void intel_bugs(void) +{ + u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); + +#ifdef CONFIG_SMP + if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || + device == PCI_DEVICE_ID_INTEL_E7520_MCH || + device == PCI_DEVICE_ID_INTEL_E7525_MCH) + quirk_intel_irqbalance(); +#endif } struct chipset { @@ -85,6 +92,7 @@ static struct chipset early_qrk[] = { { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, { PCI_VENDOR_ID_VIA, via_bugs }, { PCI_VENDOR_ID_ATI, ati_bugs }, + { PCI_VENDOR_ID_INTEL, intel_bugs}, {} }; diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7d401b00d82..601d332c4b7 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -230,7 +230,6 @@ ENTRY(system_call) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) - CFI_REMEMBER_STATE jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -241,7 +240,6 @@ ENTRY(system_call) * Syscall return path ending with SYSRET (fast path) * Has incomplete stack frame and undefined top of stack. */ - .globl ret_from_sys_call ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ @@ -251,8 +249,8 @@ sysret_check: TRACE_IRQS_OFF movl threadinfo_flags(%rcx),%edx andl %edi,%edx - CFI_REMEMBER_STATE jnz sysret_careful + CFI_REMEMBER_STATE /* * sysretq will re-enable interrupts: */ @@ -265,10 +263,10 @@ sysret_check: swapgs sysretq + CFI_RESTORE_STATE /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: - CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc sysret_signal TRACE_IRQS_ON @@ -306,7 +304,6 @@ badsys: /* Do syscall tracing */ tracesys: - CFI_RESTORE_STATE SAVE_REST movq $-ENOSYS,RAX(%rsp) FIXUP_TOP_OF_STACK %rdi @@ -322,32 +319,13 @@ tracesys: call *sys_call_table(,%rax,8) 1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ - jmp int_ret_from_sys_call - CFI_ENDPROC -END(system_call) /* * Syscall return path ending with IRET. * Has correct top of stack, but partial stack frame. - */ -ENTRY(int_ret_from_sys_call) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8-ARGOFFSET - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ - CFI_REL_OFFSET rsp,RSP-ARGOFFSET - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ - CFI_REL_OFFSET rip,RIP-ARGOFFSET - CFI_REL_OFFSET rdx,RDX-ARGOFFSET - CFI_REL_OFFSET rcx,RCX-ARGOFFSET - CFI_REL_OFFSET rax,RAX-ARGOFFSET - CFI_REL_OFFSET rdi,RDI-ARGOFFSET - CFI_REL_OFFSET rsi,RSI-ARGOFFSET - CFI_REL_OFFSET r8,R8-ARGOFFSET - CFI_REL_OFFSET r9,R9-ARGOFFSET - CFI_REL_OFFSET r10,R10-ARGOFFSET - CFI_REL_OFFSET r11,R11-ARGOFFSET + */ + .globl int_ret_from_sys_call +int_ret_from_sys_call: cli TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) @@ -394,8 +372,6 @@ int_very_careful: popq %rdi CFI_ADJUST_CFA_OFFSET -8 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi - cli - TRACE_IRQS_OFF jmp int_restore_rest int_signal: @@ -411,7 +387,7 @@ int_restore_rest: TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC -END(int_ret_from_sys_call) +END(system_call) /* * Certain special system calls that need to save a complete full stack frame. diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 8e78a75d186..b007433f96b 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -33,7 +33,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; struct genapic *genapic = &apic_flat; - +struct genapic *genapic_force; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. @@ -46,6 +46,13 @@ void __init clustered_apic_check(void) u8 cluster_cnt[NUM_APIC_CLUSTERS]; int max_apic = 0; + /* genapic selection can be forced because of certain quirks. + */ + if (genapic_force) { + genapic = genapic_force; + goto print; + } + #if defined(CONFIG_ACPI) /* * Some x86_64 machines use physical APIC mode regardless of how many diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 9561eb3c5b5..cc230b93cd1 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data) { int i; - for (i = 0; i < 256; i++) + /* clear bss before set_intr_gate with early_idt_handler */ + clear_bss(); + + for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); asm volatile("lidt %0" :: "m" (idt_descr)); - clear_bss(); early_printk("Kernel alive\n"); diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c index 3aa1e9bb781..1d58c13bc6b 100644 --- a/arch/x86_64/kernel/i387.c +++ b/arch/x86_64/kernel/i387.c @@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf) struct task_struct *tsk = current; int err = 0; - { - extern void bad_user_i387_struct(void); - if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave)) - bad_user_i387_struct(); - } + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.i387.fxsave)); if ((unsigned long)buf % 16) printk("save_i387: bad fpstate %p\n",buf); diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index c4ef801b765..d73c79e821f 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) -void (*interrupt[NR_IRQS])(void) = { +/* for the irq vectors */ +static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { IRQLIST_16(0x2), IRQLIST_16(0x3), IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c80081a6ba4..2a1dcd5f69c 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -55,10 +55,6 @@ int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; -static int disable_timer_pin_1 __initdata; - -int timer_over_8254 __initdata = 1; - /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -178,14 +174,20 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) * the interrupt, and we need to make sure the entry is fully populated * before that happens. */ -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +static void +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { - unsigned long flags; union entry_union eu; eu.entry = e; - spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11 + 2*pin, eu.w2); io_apic_write(apic, 0x10 + 2*pin, eu.w1); +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, e); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -348,29 +350,6 @@ static int __init disable_ioapic_setup(char *str) } early_param("noapic", disable_ioapic_setup); -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 1; -} -__setup("disable_timer_pin_1", disable_timer_pin_setup); - -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - - /* * Find the IRQ entry number of a certain pin. */ @@ -750,6 +729,22 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) return vector; } +static void __clear_irq_vector(int irq) +{ + cpumask_t mask; + int cpu, vector; + + BUG_ON(!irq_vector[irq]); + + vector = irq_vector[irq]; + cpus_and(mask, irq_domain[irq], cpu_online_map); + for_each_cpu_mask(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + irq_vector[irq] = 0; + irq_domain[irq] = CPU_MASK_NONE; +} + void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ @@ -794,27 +789,65 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) handle_edge_irq, "edge"); } } - -static void __init setup_IO_APIC_irqs(void) +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) { struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; + int vector; unsigned long flags; - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + /* + * add it to the IO-APIC irq-routing table: + */ + memset(&entry,0,sizeof(entry)); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; + entry.mask = 0; /* enable IRQ */ + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + + entry.trigger = irq_trigger(idx); + entry.polarity = irq_polarity(idx); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ + if (irq_trigger(idx)) { + entry.trigger = 1; + entry.mask = 1; entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + } + + if (!apic && !IO_APIC_IRQ(irq)) + return; + + if (IO_APIC_IRQ(irq)) { + cpumask_t mask; + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); + if (vector < 0) + return; + + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); + entry.vector = vector; + + ioapic_register_intr(irq, vector, IOAPIC_AUTO); + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } + + ioapic_write_entry(apic, pin, entry); + + spin_lock_irqsave(&ioapic_lock, flags); + set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + +} + +static void __init setup_IO_APIC_irqs(void) +{ + int apic, pin, idx, irq, first_notcon = 1; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { @@ -826,39 +859,11 @@ static void __init setup_IO_APIC_irqs(void) continue; } - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - } - irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); - if (!apic && !IO_APIC_IRQ(irq)) - continue; - - if (IO_APIC_IRQ(irq)) { - cpumask_t mask; - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); - if (vector < 0) - continue; - - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); - entry.vector = vector; - - ioapic_register_intr(irq, vector, IOAPIC_AUTO); - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - ioapic_write_entry(apic, pin, entry); + setup_IO_APIC_irq(apic, pin, idx, irq); - spin_lock_irqsave(&ioapic_lock, flags); - set_native_irq_info(irq, TARGET_CPUS); - spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1563,10 +1568,33 @@ static inline void unlock_ExtINT_logic(void) * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for modern platforms only. */ -static inline void check_timer(void) + +static int try_apic_pin(int apic, int pin, char *msg) +{ + apic_printk(APIC_VERBOSE, KERN_INFO + "..TIMER: trying IO-APIC=%d PIN=%d %s", + apic, pin, msg); + + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + if (!no_timer_check && timer_irq_works()) { + nmi_watchdog_default(); + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + setup_nmi(); + enable_8259A_irq(0); + } + return 1; + } + clear_IO_APIC_pin(apic, pin); + apic_printk(APIC_QUIET, KERN_ERR " .. failed\n"); + return 0; +} + +/* The function from hell */ +static void check_timer(void) { int apic1, pin1, apic2, pin2; int vector; @@ -1587,61 +1615,43 @@ static inline void check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - if (timer_over_8254 > 0) - enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); + /* Do this first, otherwise we get double interrupts on ATI boards */ + if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled")) + return; - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - unmask_IO_APIC_irq(0); - if (!no_timer_check && timer_irq_works()) { - nmi_watchdog_default(); - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - return; - } - clear_IO_APIC_pin(apic1, pin1); - apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " - "connected to IO-APIC\n"); - } + /* Now try again with IRQ0 8259A enabled. + Assumes timer is on IO-APIC 0 ?!? */ + enable_8259A_irq(0); + unmask_IO_APIC_irq(0); + if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled")) + return; + disable_8259A_irq(0); + + /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides + on Nvidia boards */ + if (!(apic1 == 0 && pin1 == 0) && + try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled")) + return; + if (!(apic1 == 0 && pin1 == 2) && + try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled")) + return; - apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " - "through the 8259A ... "); + /* Then try pure 8259A routing on the 8259 as reported by BIOS*/ + enable_8259A_irq(0); if (pin2 != -1) { - apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", - apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); - if (timer_irq_works()) { - apic_printk(APIC_VERBOSE," works.\n"); - nmi_watchdog_default(); - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - } + if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS")) return; - } - /* - * Cleanup, just in case ... - */ - clear_IO_APIC_pin(apic2, pin2); } - apic_printk(APIC_VERBOSE," failed.\n"); + + /* Tried all possibilities to go through the IO-APIC. Now come the + really cheesy fallbacks. */ if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); @@ -1837,7 +1847,7 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); - irq_vector[irq] = 0; + __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } @@ -2139,7 +2149,15 @@ void __init setup_ioapic_dest(void) if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); + + /* setup_IO_APIC_irqs could fail to get vector for some device + * when you have too many devices, because at that time only boot + * cpu is online. + */ + if(!irq_vector[irq]) + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); + else + set_ioapic_affinity_irq(irq, TARGET_CPUS); } } diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index e46c55856d4..0c06af6c13b 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) if (likely(irq < NR_IRQS)) generic_handle_irq(irq); - else + else if (printk_ratelimit()) printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index ac241567e68..209c8c0bec7 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -224,7 +224,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index c7587fc3901..ac085038af2 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -641,7 +641,6 @@ static __cpuinit int mce_create_device(unsigned int cpu) return err; } -#ifdef CONFIG_HOTPLUG_CPU static void mce_remove_device(unsigned int cpu) { int i; @@ -652,6 +651,7 @@ static void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); sysdev_unregister(&per_cpu(device_mce,cpu)); + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -674,7 +674,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) static struct notifier_block mce_cpu_notifier = { .notifier_call = mce_cpu_callback, }; -#endif static __init int mce_init_device(void) { diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index 883fe747f64..fa09debad4b 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -551,7 +551,6 @@ out: return err; } -#ifdef CONFIG_HOTPLUG_CPU /* * let's be hotplug friendly. * in case of multiple core processors, the first core always takes ownership @@ -594,12 +593,14 @@ static void threshold_remove_bank(unsigned int cpu, int bank) sprintf(name, "threshold_bank%i", bank); +#ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; } +#endif /* remove all sibling symlinks before unregistering */ for_each_cpu_mask(i, b->cpus) { @@ -656,7 +657,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb, static struct notifier_block threshold_cpu_notifier = { .notifier_call = threshold_cpu_callback, }; -#endif /* CONFIG_HOTPLUG_CPU */ static __init int threshold_init_device(void) { diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index b147ab19fbd..08072568847 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -35,8 +35,6 @@ int smp_found_config; unsigned int __initdata maxcpus = NR_CPUS; -int acpi_found_madt; - /* * Various Linux-internal data structures created from the * MP-table. diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 7af9cb3e2d9..27e95e7922c 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -12,14 +12,15 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ +#include <linux/nmi.h> #include <linux/mm.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/sysdev.h> -#include <linux/nmi.h> #include <linux/sysctl.h> #include <linux/kprobes.h> +#include <linux/cpumask.h> #include <asm/smp.h> #include <asm/nmi.h> @@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi; static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; + /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ @@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = smp_processor_id(); struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 dummy; int rc=0; @@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + #ifdef CONFIG_X86_MCE /* Could check oops_in_progress here too, but it's safer not too */ @@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, #endif +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 37a770859e7..3215675ab12 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -41,6 +41,13 @@ #include <asm/pci-direct.h> #include <asm/system.h> #include <asm/dma.h> +#include <asm/rio.h> + +#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT +int use_calgary __read_mostly = 1; +#else +int use_calgary __read_mostly = 0; +#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */ #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ @@ -115,14 +122,35 @@ static const unsigned long phb_offsets[] = { 0xB000 /* PHB3 */ }; +/* PHB debug registers */ + +static const unsigned long phb_debug_offsets[] = { + 0x4000 /* PHB 0 DEBUG */, + 0x5000 /* PHB 1 DEBUG */, + 0x6000 /* PHB 2 DEBUG */, + 0x7000 /* PHB 3 DEBUG */ +}; + +/* + * STUFF register for each debug PHB, + * byte 1 = start bus number, byte 2 = end bus number + */ + +#define PHB_DEBUG_STUFF_OFFSET 0x0020 + unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; static int translate_empty_slots __read_mostly = 0; static int calgary_detected __read_mostly = 0; +static struct rio_table_hdr *rio_table_hdr __initdata; +static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; +static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata; + struct calgary_bus_info { void *tce_space; unsigned char translation_disabled; signed char phbid; + void __iomem *bbar; }; static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; @@ -475,6 +503,11 @@ static struct dma_mapping_ops calgary_dma_ops = { .unmap_sg = calgary_unmap_sg, }; +static inline void __iomem * busno_to_bbar(unsigned char num) +{ + return bus_info[num].bbar; +} + static inline int busno_to_phbid(unsigned char num) { return bus_info[num].phbid; @@ -620,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev) static void __init calgary_reserve_regions(struct pci_dev *dev) { unsigned int npages; - void __iomem *bbar; - unsigned char busnum; u64 start; struct iommu_table *tbl = dev->sysdata; - bbar = tbl->bbar; - busnum = dev->bus->number; - /* reserve bad_dma_address in case it's a legal address */ iommu_range_reserve(tbl, bad_dma_address, 1); @@ -740,7 +768,7 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar, { u64 val64; void __iomem *target; - unsigned long phb_shift = -1; + unsigned int phb_shift = ~0; /* silence gcc */ u64 mask; switch (busno_to_phbid(busnum)) { @@ -828,33 +856,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev) del_timer_sync(&tbl->watchdog_timer); } -static inline unsigned int __init locate_register_space(struct pci_dev *dev) -{ - int rionodeid; - u32 address; - - /* - * Each Calgary has four busses. The first four busses (first Calgary) - * have RIO node ID 2, then the next four (second Calgary) have RIO - * node ID 3, the next four (third Calgary) have node ID 2 again, etc. - * We use a gross hack - relying on the dev->bus->number ordering, - * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1, - * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the - * second (id 3), and then it repeats modulo 14. - */ - rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2; - /* - * register space address calculation as follows: - * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase) - * ChassisBase is always zero for x366/x260/x460 - * RioNodeId is 2 for first Calgary, 3 for second Calgary - */ - address = START_ADDRESS - - (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) + - (0x100000) * (rionodeid - CHASSIS_BASE); - return address; -} - static void __init calgary_init_one_nontraslated(struct pci_dev *dev) { pci_dev_get(dev); @@ -864,23 +865,15 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev) static int __init calgary_init_one(struct pci_dev *dev) { - u32 address; void __iomem *bbar; int ret; BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - address = locate_register_space(dev); - /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(address, 1024 * 1024); - if (!bbar) { - ret = -ENODATA; - goto done; - } - + bbar = busno_to_bbar(dev->bus->number); ret = calgary_setup_tar(dev, bbar); if (ret) - goto iounmap; + goto done; pci_dev_get(dev); dev->bus->self = dev; @@ -888,17 +881,66 @@ static int __init calgary_init_one(struct pci_dev *dev) return 0; -iounmap: - iounmap(bbar); done: return ret; } +static int __init calgary_locate_bbars(void) +{ + int ret; + int rioidx, phb, bus; + void __iomem *bbar; + void __iomem *target; + unsigned long offset; + u8 start_bus, end_bus; + u32 val; + + ret = -ENODATA; + for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) { + struct rio_detail *rio = rio_devs[rioidx]; + + if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY)) + continue; + + /* map entire 1MB of Calgary config space */ + bbar = ioremap_nocache(rio->BBAR, 1024 * 1024); + if (!bbar) + goto error; + + for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { + offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET; + target = calgary_reg(bbar, offset); + + val = be32_to_cpu(readl(target)); + start_bus = (u8)((val & 0x00FF0000) >> 16); + end_bus = (u8)((val & 0x0000FF00) >> 8); + for (bus = start_bus; bus <= end_bus; bus++) { + bus_info[bus].bbar = bbar; + bus_info[bus].phbid = phb; + } + } + } + + return 0; + +error: + /* scan bus_info and iounmap any bbars we previously ioremap'd */ + for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++) + if (bus_info[bus].bbar) + iounmap(bus_info[bus].bbar); + + return ret; +} + static int __init calgary_init(void) { - int ret = -ENODEV; + int ret; struct pci_dev *dev = NULL; + ret = calgary_locate_bbars(); + if (ret) + return ret; + do { dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CALGARY, @@ -921,7 +963,7 @@ static int __init calgary_init(void) error: do { - dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, + dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CALGARY, dev); if (!dev) @@ -962,13 +1004,56 @@ static inline int __init determine_tce_table_size(u64 ram) return ret; } +static int __init build_detail_arrays(void) +{ + unsigned long ptr; + int i, scal_detail_size, rio_detail_size; + + if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + printk(KERN_WARNING + "Calgary: MAX_NUMNODES too low! Defined as %d, " + "but system has %d nodes.\n", + MAX_NUMNODES, rio_table_hdr->num_scal_dev); + return -ENODEV; + } + + switch (rio_table_hdr->version){ + case 2: + scal_detail_size = 11; + rio_detail_size = 13; + break; + case 3: + scal_detail_size = 12; + rio_detail_size = 15; + break; + default: + printk(KERN_WARNING + "Calgary: Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); + return -EPROTO; + } + + ptr = ((unsigned long)rio_table_hdr) + 3; + for (i = 0; i < rio_table_hdr->num_scal_dev; + i++, ptr += scal_detail_size) + scal_devs[i] = (struct scal_detail *)ptr; + + for (i = 0; i < rio_table_hdr->num_rio_dev; + i++, ptr += rio_detail_size) + rio_devs[i] = (struct rio_detail *)ptr; + + return 0; +} + void __init detect_calgary(void) { u32 val; int bus; void *tbl; int calgary_found = 0; - int phb = -1; + unsigned long ptr; + int offset; + int ret; /* * if the user specified iommu=off or iommu=soft or we found @@ -977,25 +1062,47 @@ void __init detect_calgary(void) if (swiotlb || no_iommu || iommu_detected) return; + if (!use_calgary) + return; + if (!early_pci_allowed()) return; + ptr = (unsigned long)phys_to_virt(get_bios_ebda()); + + rio_table_hdr = NULL; + offset = 0x180; + while (offset) { + /* The block id is stored in the 2nd word */ + if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ + /* set the pointer past the offset & block id */ + rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); + break; + } + /* The next offset is stored in the 1st word. 0 means no more */ + offset = *((unsigned short *)(ptr + offset)); + } + if (!rio_table_hdr) { + printk(KERN_ERR "Calgary: Unable to locate " + "Rio Grande Table in EBDA - bailing!\n"); + return; + } + + ret = build_detail_arrays(); + if (ret) { + printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret); + return; + } + specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { int dev; struct calgary_bus_info *info = &bus_info[bus]; - info->phbid = -1; if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; - /* - * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3) - * it is connected to releative to the clagary chip. - */ - phb = (phb + 1) % PHBS_PER_CALGARY; - if (info->translation_disabled) continue; @@ -1010,7 +1117,6 @@ void __init detect_calgary(void) if (!tbl) goto cleanup; info->tce_space = tbl; - info->phbid = phb; calgary_found = 1; break; } diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index f8d857453f8..683b7a5c1ab 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -296,6 +296,11 @@ __init int iommu_setup(char *p) gart_parse_options(p); #endif +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + p += strcspn(p, ","); if (*p == ',') ++p; diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 16261a8a330..fc1960f1f24 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -601,10 +601,9 @@ void __init gart_iommu_init(void) (!force_iommu && end_pfn <= MAX_DMA32_PFN) || !iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " - "but IOMMU not available.\n" + "but GART IOMMU not available.\n" KERN_ERR "WARNING 32bit PCI may malfunction.\n"); } return; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7451a4c43c1..a418ee4c8c6 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -108,17 +108,15 @@ void exit_idle(void) */ static void default_idle(void) { - local_irq_enable(); - current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); - } + local_irq_disable(); + if (!need_resched()) { + /* Enables interrupts one instruction before HLT. + x86 special cases this so there is no race. */ + safe_halt(); + } else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } @@ -130,15 +128,7 @@ static void default_idle(void) static void poll_idle (void) { local_irq_enable(); - - asm volatile( - "2:" - "testl %0,%1;" - "rep; nop;" - "je 2b;" - : : - "i" (_TIF_NEED_RESCHED), - "m" (current_thread_info()->flags)); + cpu_relax(); } void cpu_idle_wait(void) @@ -219,6 +209,12 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + /* + * Idle routines should keep interrupts disabled + * from here on, until they go to idle. + * Otherwise, idle callbacks can misfire. + */ + local_irq_disable(); enter_idle(); idle(); /* In many cases the interrupt that ended idle @@ -256,9 +252,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { - local_irq_enable(); - while (!need_resched()) - mwait_idle_with_hints(0,0); + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else { + local_irq_enable(); + } } void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index fc944b5e8f4..af425a8049f 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -471,8 +471,7 @@ void __init setup_arch(char **cmdline_p) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start+INITRD_SIZE; } else { @@ -732,11 +731,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Fix cpuid4 emulation for more */ num_cache_leaves = 3; - /* When there is only one core no need to synchronize RDTSC */ - if (num_possible_cpus() == 1) - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); - else - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + /* RDTSC can be speculated around */ + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -835,6 +831,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); } + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); @@ -854,7 +859,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); if (c->x86 == 6) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + if (c->x86 == 15) + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + else + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 9f74c883568..af1ec4d23cf 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -379,12 +379,17 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, put_cpu(); return 0; } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + spin_lock_bh(&call_lock); __smp_call_function_single(cpu, func, info, nonatomic, wait); spin_unlock_bh(&call_lock); put_cpu(); return 0; } +EXPORT_SYMBOL(smp_call_function_single); /* * this function sends a 'generic call function' IPI to all other CPUs diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 9800147c4c6..daf19332f0d 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -60,6 +60,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/numa.h> +#include <asm/genapic.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -1169,6 +1170,13 @@ int __cpuinit __cpu_up(unsigned int cpu) while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + + if (num_online_cpus() > 8 && genapic == &apic_flat) { + printk(KERN_WARNING + "flat APIC routing can't be used with > 8 cpus\n"); + BUG(); + } + err = 0; return err; diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 0d65b22f229..a1641ffdffc 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -30,9 +30,9 @@ #include <linux/kprobes.h> #include <linux/kexec.h> #include <linux/unwind.h> +#include <linux/uaccess.h> #include <asm/system.h> -#include <asm/uaccess.h> #include <asm/io.h> #include <asm/atomic.h> #include <asm/debugreg.h> @@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_enable_no_resched(); } -static int kstack_depth_to_print = 12; +int kstack_depth_to_print = 12; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else @@ -225,16 +225,25 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context) { struct ops_and_data *oad = (struct ops_and_data *)context; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } +#define MSG(txt) ops->warning(data, txt) + /* * x86-64 can have upto three kernel stacks: * process stack @@ -248,11 +257,12 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) return p > t && p < t + THREAD_SIZE - 3; } -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, + unsigned long *stack, struct stacktrace_ops *ops, void *data) { - const unsigned cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + const unsigned cpu = get_cpu(); + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -268,28 +278,30 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s if (unwind_init_frame_info(&info, tsk, regs) == 0) unw_ret = dump_trace_unwind(&info, &oad); } else if (tsk == current) - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); else { if (unwind_init_blocked(&info, tsk) == 0) unw_ret = dump_trace_unwind(&info, &oad); } if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s", UNW_PC(&info)); if ((long)UNW_SP(&info) < 0) { - ops->warning(data, "Leftover inexact backtrace:\n"); + MSG("Leftover inexact backtrace:"); stack = (unsigned long *)UNW_SP(&info); if (!stack) - return; + goto out; } else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) - return; + goto out; else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else - ops->warning(data, "Inexact backtrace:\n"); + MSG("Inexact backtrace:"); } if (!stack) { unsigned long dummy; @@ -297,12 +309,6 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s if (tsk && tsk != current) stack = (unsigned long *)tsk->thread.rsp; } - /* - * Align the stack pointer on word boundary, later loops - * rely on that (and corruption / debug info bugs can cause - * unaligned values here): - */ - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); /* * Print function call entries within a stack. 'cond' is the @@ -312,9 +318,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s #define HANDLE_STACK(cond) \ do while (cond) { \ unsigned long addr = *stack++; \ - if (oops_in_progress ? \ - __kernel_text_address(addr) : \ - kernel_text_address(addr)) { \ + /* Use unlocked access here because except for NMIs \ + we should be already protected against module unloads */ \ + if (__kernel_text_address(addr)) { \ /* \ * If the address is either in the text segment of the \ * kernel, or in the region which contains vmalloc'ed \ @@ -380,6 +386,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s tinfo = current_thread_info(); HANDLE_STACK (valid_stack_ptr(tinfo, stack)); #undef HANDLE_STACK +out: + put_cpu(); } EXPORT_SYMBOL(dump_trace); @@ -786,8 +794,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); - printk(KERN_EMERG "You probably have a hardware problem with your " - "RAM chips\n"); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index d9534e750d4..6a1f8f491e5 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -51,15 +51,6 @@ SECTIONS RODATA -#ifdef CONFIG_STACK_UNWIND - . = ALIGN(8); - .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; - } -#endif - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 92546c1526f..4a673f5397a 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -42,6 +42,7 @@ #include <asm/topology.h> #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __syscall_clobber "r11","rcx","memory" int __sysctl_vsyscall __section_sysctl_vsyscall = 1; seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; @@ -274,7 +275,6 @@ static void __cpuinit cpu_vsyscall_init(void *arg) vsyscall_set_cpu(raw_smp_processor_id()); } -#ifdef CONFIG_HOTPLUG_CPU static int __cpuinit cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) { @@ -283,13 +283,13 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); return NOTIFY_DONE; } -#endif static void __init map_vsyscall(void) { extern char __vsyscall_0; unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); } diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c index 06ae630de82..bc503f50690 100644 --- a/arch/x86_64/lib/csum-partial.c +++ b/arch/x86_64/lib/csum-partial.c @@ -9,8 +9,6 @@ #include <linux/module.h> #include <asm/checksum.h> -#define __force_inline inline __attribute__((always_inline)) - static inline unsigned short from32to16(unsigned a) { unsigned short b = a >> 16; @@ -33,7 +31,7 @@ static inline unsigned short from32to16(unsigned a) * Unrolling to an 128 bytes inner loop. * Using interleaving with more registers to break the carry chains. */ -static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len) +static unsigned do_csum(const unsigned char *buff, unsigned len) { unsigned odd, count; unsigned long result = 0; diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index 50be90975d0..2dbebd30834 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -40,13 +40,13 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { - __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); + __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1); } EXPORT_SYMBOL(__const_udelay); void __udelay(unsigned long usecs) { - __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ } EXPORT_SYMBOL(__udelay); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 3751b4788e2..a65fc6f1dca 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -23,9 +23,9 @@ #include <linux/compiler.h> #include <linux/module.h> #include <linux/kprobes.h> +#include <linux/uaccess.h> #include <asm/system.h> -#include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/smp.h> #include <asm/tlbflush.h> @@ -96,7 +96,7 @@ void bust_spinlocks(int yes) static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned long error_code) { - unsigned char __user *instr; + unsigned char *instr; int scan_more = 1; int prefetch = 0; unsigned char *max_instr; @@ -116,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned char instr_hi; unsigned char instr_lo; - if (__get_user(opcode, (char __user *)instr)) + if (probe_kernel_address(instr, opcode)) break; instr_hi = opcode & 0xf0; @@ -154,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; - if (__get_user(opcode, (char __user *)instr)) + if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); @@ -170,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, static int bad_address(void *p) { unsigned long dummy; - return __get_user(dummy, (unsigned long __user *)p); + return probe_kernel_address((unsigned long *)p, dummy); } void dump_pagetable(unsigned long address) diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 4c0c00ef3ca..2968b90ef8a 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -730,14 +730,15 @@ static __init int x8664_sysctl_init(void) __initcall(x8664_sysctl_init); #endif -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only covers the 64bit vsyscall page now. 32bit has a real VMA now and does not need special handling anymore. */ static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, - .vm_end = VSYSCALL_END, - .vm_page_prot = PAGE_READONLY + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), + .vm_page_prot = PAGE_READONLY_EXEC, + .vm_flags = VM_READ | VM_EXEC }; struct vm_area_struct *get_gate_vma(struct task_struct *tsk) diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 3e231d762aa..ccb91dd996a 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -61,34 +61,40 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, return base; } - -static void flush_kernel_map(void *address) +static void cache_flush_page(void *adr) { - if (0 && address && cpu_has_clflush) { - /* is this worth it? */ - int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (address + i)); - } else - asm volatile("wbinvd":::"memory"); - if (address) - __flush_tlb_one(address); - else - __flush_tlb_all(); + int i; + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) + asm volatile("clflush (%0)" :: "r" (adr + i)); } +static void flush_kernel_map(void *arg) +{ + struct list_head *l = (struct list_head *)arg; + struct page *pg; + + /* When clflush is available always use it because it is + much cheaper than WBINVD */ + if (!cpu_has_clflush) + asm volatile("wbinvd" ::: "memory"); + list_for_each_entry(pg, l, lru) { + void *adr = page_address(pg); + if (cpu_has_clflush) + cache_flush_page(adr); + __flush_tlb_one(adr); + } +} -static inline void flush_map(unsigned long address) +static inline void flush_map(struct list_head *l) { - on_each_cpu(flush_kernel_map, (void *)address, 1, 1); + on_each_cpu(flush_kernel_map, l, 1, 1); } -static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ +static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */ static inline void save_page(struct page *fpage) { - fpage->lru.next = (struct list_head *)deferred_pages; - deferred_pages = fpage; + list_add(&fpage->lru, &deferred_pages); } /* @@ -207,18 +213,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) void global_flush_tlb(void) { - struct page *dpage; + struct page *pg, *next; + struct list_head l; down_read(&init_mm.mmap_sem); - dpage = xchg(&deferred_pages, NULL); + list_replace_init(&deferred_pages, &l); up_read(&init_mm.mmap_sem); - flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); - while (dpage) { - struct page *tmp = dpage; - dpage = (struct page *)dpage->lru.next; - ClearPagePrivate(tmp); - __free_page(tmp); + flush_map(&l); + + list_for_each_entry_safe(pg, next, &l, lru) { + ClearPagePrivate(pg); + __free_page(pg); } } diff --git a/block/blktrace.c b/block/blktrace.c index 74e02c04b2d..d3679dd1d22 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -394,8 +394,7 @@ err: if (bt) { if (bt->dropped_file) debugfs_remove(bt->dropped_file); - if (bt->sequence) - free_percpu(bt->sequence); + free_percpu(bt->sequence); if (bt->rchan) relay_close(bt->rchan); kfree(bt); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 84e9be07318..78c6b312bd3 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -43,8 +43,8 @@ static int cfq_slice_idle = HZ / 125; #define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private) #define RQ_CFQQ(rq) ((rq)->elevator_private2) -static kmem_cache_t *cfq_pool; -static kmem_cache_t *cfq_ioc_pool; +static struct kmem_cache *cfq_pool; +static struct kmem_cache *cfq_ioc_pool; static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index cc6e95f8e5d..31512cd9f3a 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -44,17 +44,17 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node); /* * For the allocated request tables */ -static kmem_cache_t *request_cachep; +static struct kmem_cache *request_cachep; /* * For queue allocation */ -static kmem_cache_t *requestq_cachep; +static struct kmem_cache *requestq_cachep; /* * For io context allocations */ -static kmem_cache_t *iocontext_cachep; +static struct kmem_cache *iocontext_cachep; /* * Controlling structure to kblockd @@ -3459,8 +3459,6 @@ static void blk_done_softirq(struct softirq_action *h) } } -#ifdef CONFIG_HOTPLUG_CPU - static int blk_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -3486,8 +3484,6 @@ static struct notifier_block __devinitdata blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; -#endif /* CONFIG_HOTPLUG_CPU */ - /** * blk_complete_request - end I/O on a request * @req: the request being processed diff --git a/crypto/Kconfig b/crypto/Kconfig index cbae8392ce1..92ba249f3a5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -39,6 +39,17 @@ config CRYPTO_HMAC HMAC: Keyed-Hashing for Message Authentication (RFC2104). This is required for IPSec. +config CRYPTO_XCBC + tristate "XCBC support" + depends on EXPERIMENTAL + select CRYPTO_HASH + select CRYPTO_MANAGER + help + XCBC: Keyed-Hashing with encryption algorithm + http://www.ietf.org/rfc/rfc3566.txt + http://csrc.nist.gov/encryption/modes/proposedmodes/ + xcbc-mac/xcbc-mac-spec.pdf + config CRYPTO_NULL tristate "Null algorithms" select CRYPTO_ALGAPI @@ -128,6 +139,16 @@ config CRYPTO_TGR192 See also: <http://www.cs.technion.ac.il/~biham/Reports/Tiger/>. +config CRYPTO_GF128MUL + tristate "GF(2^128) multiplication functions (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Efficient table driven implementation of multiplications in the + field GF(2^128). This is needed by some cypher modes. This + option will be selected automatically if you select such a + cipher mode. Only select this option by hand if you expect to load + an external module that requires these functions. + config CRYPTO_ECB tristate "ECB support" select CRYPTO_BLKCIPHER @@ -147,6 +168,19 @@ config CRYPTO_CBC CBC: Cipher Block Chaining mode This block cipher algorithm is required for IPSec. +config CRYPTO_LRW + tristate "LRW support (EXPERIMENTAL)" + depends on EXPERIMENTAL + select CRYPTO_BLKCIPHER + select CRYPTO_MANAGER + select CRYPTO_GF128MUL + help + LRW: Liskov Rivest Wagner, a tweakable, non malleable, non movable + narrow block cipher mode for dm-crypt. Use it with cipher + specification string aes-lrw-benbi, the key must be 256, 320 or 384. + The first 128, 192 or 256 bits in the key are used for AES and the + rest is used to tie each cipher block to its logical position. + config CRYPTO_DES tristate "DES and Triple DES EDE cipher algorithms" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 72366208e29..60e3d24f61f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_CRYPTO_HASH) += crypto_hash.o obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o +obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o obj-$(CONFIG_CRYPTO_MD4) += md4.o obj-$(CONFIG_CRYPTO_MD5) += md5.o @@ -23,8 +24,10 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256.o obj-$(CONFIG_CRYPTO_SHA512) += sha512.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o +obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o obj-$(CONFIG_CRYPTO_CBC) += cbc.o +obj-$(CONFIG_CRYPTO_LRW) += lrw.o obj-$(CONFIG_CRYPTO_DES) += des.o obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o diff --git a/crypto/api.c b/crypto/api.c index 4fb7fa45cb0..8c446871cd5 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -466,23 +466,8 @@ void crypto_free_tfm(struct crypto_tfm *tfm) kfree(tfm); } -int crypto_alg_available(const char *name, u32 flags) -{ - int ret = 0; - struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, - CRYPTO_ALG_ASYNC); - - if (!IS_ERR(alg)) { - crypto_mod_put(alg); - ret = 1; - } - - return ret; -} - EXPORT_SYMBOL_GPL(crypto_alloc_tfm); EXPORT_SYMBOL_GPL(crypto_free_tfm); -EXPORT_SYMBOL_GPL(crypto_alg_available); int crypto_has_alg(const char *name, u32 type, u32 mask) { diff --git a/crypto/digest.c b/crypto/digest.c index 0155a94e4b1..8f4593268ce 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -21,54 +21,6 @@ #include "internal.h" #include "scatterwalk.h" -void crypto_digest_init(struct crypto_tfm *tfm) -{ - struct crypto_hash *hash = crypto_hash_cast(tfm); - struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; - - crypto_hash_init(&desc); -} -EXPORT_SYMBOL_GPL(crypto_digest_init); - -void crypto_digest_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg) -{ - struct crypto_hash *hash = crypto_hash_cast(tfm); - struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; - unsigned int nbytes = 0; - unsigned int i; - - for (i = 0; i < nsg; i++) - nbytes += sg[i].length; - - crypto_hash_update(&desc, sg, nbytes); -} -EXPORT_SYMBOL_GPL(crypto_digest_update); - -void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) -{ - struct crypto_hash *hash = crypto_hash_cast(tfm); - struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; - - crypto_hash_final(&desc, out); -} -EXPORT_SYMBOL_GPL(crypto_digest_final); - -void crypto_digest_digest(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg, u8 *out) -{ - struct crypto_hash *hash = crypto_hash_cast(tfm); - struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; - unsigned int nbytes = 0; - unsigned int i; - - for (i = 0; i < nsg; i++) - nbytes += sg[i].length; - - crypto_hash_digest(&desc, sg, nbytes, out); -} -EXPORT_SYMBOL_GPL(crypto_digest_digest); - static int init(struct hash_desc *desc) { struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c new file mode 100644 index 00000000000..0a2aadfa1d8 --- /dev/null +++ b/crypto/gf128mul.c @@ -0,0 +1,466 @@ +/* gf128mul.c - GF(2^128) multiplication functions + * + * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. + * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org> + * + * Based on Dr Brian Gladman's (GPL'd) work published at + * http://fp.gladman.plus.com/cryptography_technology/index.htm + * See the original copyright notice below. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 31/01/2006 + + This file provides fast multiplication in GF(128) as required by several + cryptographic authentication modes +*/ + +#include <crypto/gf128mul.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> + +#define gf128mul_dat(q) { \ + q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\ + q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\ + q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\ + q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\ + q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\ + q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\ + q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\ + q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\ + q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\ + q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\ + q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\ + q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\ + q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\ + q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\ + q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\ + q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\ + q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\ + q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\ + q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\ + q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\ + q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\ + q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\ + q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\ + q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\ + q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\ + q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\ + q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\ + q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\ + q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\ + q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\ + q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\ + q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \ +} + +/* Given the value i in 0..255 as the byte overflow when a field element + in GHASH is multipled by x^8, this function will return the values that + are generated in the lo 16-bit word of the field value by applying the + modular polynomial. The values lo_byte and hi_byte are returned via the + macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into + memory as required by a suitable definition of this macro operating on + the table above +*/ + +#define xx(p, q) 0x##p##q + +#define xda_bbe(i) ( \ + (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \ + (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \ + (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \ + (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \ +) + +#define xda_lle(i) ( \ + (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \ + (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \ + (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \ + (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \ +) + +static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle); +static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe); + +/* These functions multiply a field element by x, by x^4 and by x^8 + * in the polynomial field representation. It uses 32-bit word operations + * to gain speed but compensates for machine endianess and hence works + * correctly on both styles of machine. + */ + +static void gf128mul_x_lle(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_lle[(b << 7) & 0xff]; + + r->b = cpu_to_be64((b >> 1) | (a << 63)); + r->a = cpu_to_be64((a >> 1) ^ (_tt << 48)); +} + +static void gf128mul_x_bbe(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_bbe[a >> 63]; + + r->a = cpu_to_be64((a << 1) | (b >> 63)); + r->b = cpu_to_be64((b << 1) ^ _tt); +} + +static void gf128mul_x8_lle(be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_lle[b & 0xff]; + + x->b = cpu_to_be64((b >> 8) | (a << 56)); + x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); +} + +static void gf128mul_x8_bbe(be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_bbe[a >> 56]; + + x->a = cpu_to_be64((a << 8) | (b >> 56)); + x->b = cpu_to_be64((b << 8) ^ _tt); +} + +void gf128mul_lle(be128 *r, const be128 *b) +{ + be128 p[8]; + int i; + + p[0] = *r; + for (i = 0; i < 7; ++i) + gf128mul_x_lle(&p[i + 1], &p[i]); + + memset(r, 0, sizeof(r)); + for (i = 0;;) { + u8 ch = ((u8 *)b)[15 - i]; + + if (ch & 0x80) + be128_xor(r, r, &p[0]); + if (ch & 0x40) + be128_xor(r, r, &p[1]); + if (ch & 0x20) + be128_xor(r, r, &p[2]); + if (ch & 0x10) + be128_xor(r, r, &p[3]); + if (ch & 0x08) + be128_xor(r, r, &p[4]); + if (ch & 0x04) + be128_xor(r, r, &p[5]); + if (ch & 0x02) + be128_xor(r, r, &p[6]); + if (ch & 0x01) + be128_xor(r, r, &p[7]); + + if (++i >= 16) + break; + + gf128mul_x8_lle(r); + } +} +EXPORT_SYMBOL(gf128mul_lle); + +void gf128mul_bbe(be128 *r, const be128 *b) +{ + be128 p[8]; + int i; + + p[0] = *r; + for (i = 0; i < 7; ++i) + gf128mul_x_bbe(&p[i + 1], &p[i]); + + memset(r, 0, sizeof(r)); + for (i = 0;;) { + u8 ch = ((u8 *)b)[i]; + + if (ch & 0x80) + be128_xor(r, r, &p[7]); + if (ch & 0x40) + be128_xor(r, r, &p[6]); + if (ch & 0x20) + be128_xor(r, r, &p[5]); + if (ch & 0x10) + be128_xor(r, r, &p[4]); + if (ch & 0x08) + be128_xor(r, r, &p[3]); + if (ch & 0x04) + be128_xor(r, r, &p[2]); + if (ch & 0x02) + be128_xor(r, r, &p[1]); + if (ch & 0x01) + be128_xor(r, r, &p[0]); + + if (++i >= 16) + break; + + gf128mul_x8_bbe(r); + } +} +EXPORT_SYMBOL(gf128mul_bbe); + +/* This version uses 64k bytes of table space. + A 16 byte buffer has to be multiplied by a 16 byte key + value in GF(128). If we consider a GF(128) value in + the buffer's lowest byte, we can construct a table of + the 256 16 byte values that result from the 256 values + of this byte. This requires 4096 bytes. But we also + need tables for each of the 16 higher bytes in the + buffer as well, which makes 64 kbytes in total. +*/ +/* additional explanation + * t[0][BYTE] contains g*BYTE + * t[1][BYTE] contains g*x^8*BYTE + * .. + * t[15][BYTE] contains g*x^120*BYTE */ +struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g) +{ + struct gf128mul_64k *t; + int i, j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + for (i = 0; i < 16; i++) { + t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); + if (!t->t[i]) { + gf128mul_free_64k(t); + t = NULL; + goto out; + } + } + + t->t[0]->t[128] = *g; + for (j = 64; j > 0; j >>= 1) + gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]); + + for (i = 0;;) { + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[i]->t[j + k], + &t->t[i]->t[j], &t->t[i]->t[k]); + + if (++i >= 16) + break; + + for (j = 128; j > 0; j >>= 1) { + t->t[i]->t[j] = t->t[i - 1]->t[j]; + gf128mul_x8_lle(&t->t[i]->t[j]); + } + } + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_64k_lle); + +struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g) +{ + struct gf128mul_64k *t; + int i, j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + for (i = 0; i < 16; i++) { + t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); + if (!t->t[i]) { + gf128mul_free_64k(t); + t = NULL; + goto out; + } + } + + t->t[0]->t[1] = *g; + for (j = 1; j <= 64; j <<= 1) + gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]); + + for (i = 0;;) { + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[i]->t[j + k], + &t->t[i]->t[j], &t->t[i]->t[k]); + + if (++i >= 16) + break; + + for (j = 128; j > 0; j >>= 1) { + t->t[i]->t[j] = t->t[i - 1]->t[j]; + gf128mul_x8_bbe(&t->t[i]->t[j]); + } + } + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_64k_bbe); + +void gf128mul_free_64k(struct gf128mul_64k *t) +{ + int i; + + for (i = 0; i < 16; i++) + kfree(t->t[i]); + kfree(t); +} +EXPORT_SYMBOL(gf128mul_free_64k); + +void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i; + + *r = t->t[0]->t[ap[0]]; + for (i = 1; i < 16; ++i) + be128_xor(r, r, &t->t[i]->t[ap[i]]); + *a = *r; +} +EXPORT_SYMBOL(gf128mul_64k_lle); + +void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i; + + *r = t->t[0]->t[ap[15]]; + for (i = 1; i < 16; ++i) + be128_xor(r, r, &t->t[i]->t[ap[15 - i]]); + *a = *r; +} +EXPORT_SYMBOL(gf128mul_64k_bbe); + +/* This version uses 4k bytes of table space. + A 16 byte buffer has to be multiplied by a 16 byte key + value in GF(128). If we consider a GF(128) value in a + single byte, we can construct a table of the 256 16 byte + values that result from the 256 values of this byte. + This requires 4096 bytes. If we take the highest byte in + the buffer and use this table to get the result, we then + have to multiply by x^120 to get the final value. For the + next highest byte the result has to be multiplied by x^112 + and so on. But we can do this by accumulating the result + in an accumulator starting with the result for the top + byte. We repeatedly multiply the accumulator value by + x^8 and then add in (i.e. xor) the 16 bytes of the next + lower byte in the buffer, stopping when we reach the + lowest byte. This requires a 4096 byte table. +*/ +struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g) +{ + struct gf128mul_4k *t; + int j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + t->t[128] = *g; + for (j = 64; j > 0; j >>= 1) + gf128mul_x_lle(&t->t[j], &t->t[j+j]); + + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_4k_lle); + +struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g) +{ + struct gf128mul_4k *t; + int j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + t->t[1] = *g; + for (j = 1; j <= 64; j <<= 1) + gf128mul_x_bbe(&t->t[j + j], &t->t[j]); + + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_4k_bbe); + +void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i = 15; + + *r = t->t[ap[15]]; + while (i--) { + gf128mul_x8_lle(r); + be128_xor(r, r, &t->t[ap[i]]); + } + *a = *r; +} +EXPORT_SYMBOL(gf128mul_4k_lle); + +void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i = 0; + + *r = t->t[ap[0]]; + while (++i < 16) { + gf128mul_x8_bbe(r); + be128_xor(r, r, &t->t[ap[i]]); + } + *a = *r; +} +EXPORT_SYMBOL(gf128mul_4k_bbe); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)"); diff --git a/crypto/lrw.c b/crypto/lrw.c new file mode 100644 index 00000000000..56642586d84 --- /dev/null +++ b/crypto/lrw.c @@ -0,0 +1,301 @@ +/* LRW: as defined by Cyril Guyot in + * http://grouper.ieee.org/groups/1619/email/pdf00017.pdf + * + * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> + * + * Based om ecb.c + * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ +/* This implementation is checked against the test vectors in the above + * document and by a test vector provided by Ken Buchanan at + * http://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html + * + * The test vectors are included in the testing module tcrypt.[ch] */ +#include <crypto/algapi.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> + +#include <crypto/b128ops.h> +#include <crypto/gf128mul.h> + +struct priv { + struct crypto_cipher *child; + /* optimizes multiplying a random (non incrementing, as at the + * start of a new sector) value with key2, we could also have + * used 4k optimization tables or no optimization at all. In the + * latter case we would have to store key2 here */ + struct gf128mul_64k *table; + /* stores: + * key2*{ 0,0,...0,0,0,0,1 }, key2*{ 0,0,...0,0,0,1,1 }, + * key2*{ 0,0,...0,0,1,1,1 }, key2*{ 0,0,...0,1,1,1,1 } + * key2*{ 0,0,...1,1,1,1,1 }, etc + * needed for optimized multiplication of incrementing values + * with key2 */ + be128 mulinc[128]; +}; + +static inline void setbit128_bbe(void *b, int bit) +{ + __set_bit(bit ^ 0x78, b); +} + +static int setkey(struct crypto_tfm *parent, const u8 *key, + unsigned int keylen) +{ + struct priv *ctx = crypto_tfm_ctx(parent); + struct crypto_cipher *child = ctx->child; + int err, i; + be128 tmp = { 0 }; + int bsize = crypto_cipher_blocksize(child); + + crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + if ((err = crypto_cipher_setkey(child, key, keylen - bsize))) + return err; + crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + + if (ctx->table) + gf128mul_free_64k(ctx->table); + + /* initialize multiplication table for Key2 */ + ctx->table = gf128mul_init_64k_bbe((be128 *)(key + keylen - bsize)); + if (!ctx->table) + return -ENOMEM; + + /* initialize optimization table */ + for (i = 0; i < 128; i++) { + setbit128_bbe(&tmp, i); + ctx->mulinc[i] = tmp; + gf128mul_64k_bbe(&ctx->mulinc[i], ctx->table); + } + + return 0; +} + +struct sinfo { + be128 t; + struct crypto_tfm *tfm; + void (*fn)(struct crypto_tfm *, u8 *, const u8 *); +}; + +static inline void inc(be128 *iv) +{ + if (!(iv->b = cpu_to_be64(be64_to_cpu(iv->b) + 1))) + iv->a = cpu_to_be64(be64_to_cpu(iv->a) + 1); +} + +static inline void lrw_round(struct sinfo *s, void *dst, const void *src) +{ + be128_xor(dst, &s->t, src); /* PP <- T xor P */ + s->fn(s->tfm, dst, dst); /* CC <- E(Key2,PP) */ + be128_xor(dst, dst, &s->t); /* C <- T xor CC */ +} + +/* this returns the number of consequative 1 bits starting + * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */ +static inline int get_index128(be128 *block) +{ + int x; + __be32 *p = (__be32 *) block; + + for (p += 3, x = 0; x < 128; p--, x += 32) { + u32 val = be32_to_cpup(p); + + if (!~val) + continue; + + return x + ffz(val); + } + + return x; +} + +static int crypt(struct blkcipher_desc *d, + struct blkcipher_walk *w, struct priv *ctx, + void (*fn)(struct crypto_tfm *, u8 *, const u8 *)) +{ + int err; + unsigned int avail; + const int bs = crypto_cipher_blocksize(ctx->child); + struct sinfo s = { + .tfm = crypto_cipher_tfm(ctx->child), + .fn = fn + }; + be128 *iv; + u8 *wsrc; + u8 *wdst; + + err = blkcipher_walk_virt(d, w); + if (!(avail = w->nbytes)) + return err; + + wsrc = w->src.virt.addr; + wdst = w->dst.virt.addr; + + /* calculate first value of T */ + iv = (be128 *)w->iv; + s.t = *iv; + + /* T <- I*Key2 */ + gf128mul_64k_bbe(&s.t, ctx->table); + + goto first; + + for (;;) { + do { + /* T <- I*Key2, using the optimization + * discussed in the specification */ + be128_xor(&s.t, &s.t, &ctx->mulinc[get_index128(iv)]); + inc(iv); + +first: + lrw_round(&s, wdst, wsrc); + + wsrc += bs; + wdst += bs; + } while ((avail -= bs) >= bs); + + err = blkcipher_walk_done(d, w, avail); + if (!(avail = w->nbytes)) + break; + + wsrc = w->src.virt.addr; + wdst = w->dst.virt.addr; + } + + return err; +} + +static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk w; + + blkcipher_walk_init(&w, dst, src, nbytes); + return crypt(desc, &w, ctx, + crypto_cipher_alg(ctx->child)->cia_encrypt); +} + +static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk w; + + blkcipher_walk_init(&w, dst, src, nbytes); + return crypt(desc, &w, ctx, + crypto_cipher_alg(ctx->child)->cia_decrypt); +} + +static int init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_spawn *spawn = crypto_instance_ctx(inst); + struct priv *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + tfm = crypto_spawn_tfm(spawn); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + if (crypto_tfm_alg_blocksize(tfm) != 16) { + *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; + return -EINVAL; + } + + ctx->child = crypto_cipher_cast(tfm); + return 0; +} + +static void exit_tfm(struct crypto_tfm *tfm) +{ + struct priv *ctx = crypto_tfm_ctx(tfm); + if (ctx->table) + gf128mul_free_64k(ctx->table); + crypto_free_cipher(ctx->child); +} + +static struct crypto_instance *alloc(void *param, unsigned int len) +{ + struct crypto_instance *inst; + struct crypto_alg *alg; + + alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_CIPHER, + CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC); + if (IS_ERR(alg)) + return ERR_PTR(PTR_ERR(alg)); + + inst = crypto_alloc_instance("lrw", alg); + if (IS_ERR(inst)) + goto out_put_alg; + + inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; + inst->alg.cra_priority = alg->cra_priority; + inst->alg.cra_blocksize = alg->cra_blocksize; + + if (alg->cra_alignmask < 7) inst->alg.cra_alignmask = 7; + else inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.cra_type = &crypto_blkcipher_type; + + if (!(alg->cra_blocksize % 4)) + inst->alg.cra_alignmask |= 3; + inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; + inst->alg.cra_blkcipher.min_keysize = + alg->cra_cipher.cia_min_keysize + alg->cra_blocksize; + inst->alg.cra_blkcipher.max_keysize = + alg->cra_cipher.cia_max_keysize + alg->cra_blocksize; + + inst->alg.cra_ctxsize = sizeof(struct priv); + + inst->alg.cra_init = init_tfm; + inst->alg.cra_exit = exit_tfm; + + inst->alg.cra_blkcipher.setkey = setkey; + inst->alg.cra_blkcipher.encrypt = encrypt; + inst->alg.cra_blkcipher.decrypt = decrypt; + +out_put_alg: + crypto_mod_put(alg); + return inst; +} + +static void free(struct crypto_instance *inst) +{ + crypto_drop_spawn(crypto_instance_ctx(inst)); + kfree(inst); +} + +static struct crypto_template crypto_tmpl = { + .name = "lrw", + .alloc = alloc, + .free = free, + .module = THIS_MODULE, +}; + +static int __init crypto_module_init(void) +{ + return crypto_register_template(&crypto_tmpl); +} + +static void __exit crypto_module_exit(void) +{ + crypto_unregister_template(&crypto_tmpl); +} + +module_init(crypto_module_init); +module_exit(crypto_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LRW block cipher mode"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 83307420d31..d671e8942b1 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -906,6 +906,10 @@ static void do_test(void) AES_CBC_ENC_TEST_VECTORS); test_cipher("cbc(aes)", DECRYPT, aes_cbc_dec_tv_template, AES_CBC_DEC_TEST_VECTORS); + test_cipher("lrw(aes)", ENCRYPT, aes_lrw_enc_tv_template, + AES_LRW_ENC_TEST_VECTORS); + test_cipher("lrw(aes)", DECRYPT, aes_lrw_dec_tv_template, + AES_LRW_DEC_TEST_VECTORS); //CAST5 test_cipher("ecb(cast5)", ENCRYPT, cast5_enc_tv_template, @@ -977,6 +981,9 @@ static void do_test(void) test_hash("hmac(sha256)", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS); + test_hash("xcbc(aes)", aes_xcbc128_tv_template, + XCBC_AES_TEST_VECTORS); + test_hash("michael_mic", michael_mic_tv_template, MICHAEL_MIC_TEST_VECTORS); break; @@ -1052,6 +1059,10 @@ static void do_test(void) AES_CBC_ENC_TEST_VECTORS); test_cipher("cbc(aes)", DECRYPT, aes_cbc_dec_tv_template, AES_CBC_DEC_TEST_VECTORS); + test_cipher("lrw(aes)", ENCRYPT, aes_lrw_enc_tv_template, + AES_LRW_ENC_TEST_VECTORS); + test_cipher("lrw(aes)", DECRYPT, aes_lrw_dec_tv_template, + AES_LRW_DEC_TEST_VECTORS); break; case 11: @@ -1191,6 +1202,10 @@ static void do_test(void) aes_speed_template); test_cipher_speed("cbc(aes)", DECRYPT, sec, NULL, 0, aes_speed_template); + test_cipher_speed("lrw(aes)", ENCRYPT, sec, NULL, 0, + aes_lrw_speed_template); + test_cipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0, + aes_lrw_speed_template); break; case 201: diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index a40c4411729..48a81362cb8 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -39,15 +39,15 @@ struct hash_testvec { struct cipher_testvec { char key[MAX_KEYLEN] __attribute__ ((__aligned__(4))); char iv[MAX_IVLEN]; - char input[48]; - char result[48]; + char input[512]; + char result[512]; unsigned char tap[MAX_TAP]; int np; unsigned char fail; unsigned char wk; /* weak key flag */ unsigned char klen; - unsigned char ilen; - unsigned char rlen; + unsigned short ilen; + unsigned short rlen; }; struct cipher_speed { @@ -933,6 +933,74 @@ static struct hash_testvec hmac_sha256_tv_template[] = { }, }; +#define XCBC_AES_TEST_VECTORS 6 + +static struct hash_testvec aes_xcbc128_tv_template[] = { + { + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .plaintext = { [0 ... 15] = 0 }, + .digest = { 0x75, 0xf0, 0x25, 0x1d, 0x52, 0x8a, 0xc0, 0x1c, + 0x45, 0x73, 0xdf, 0xd5, 0x84, 0xd7, 0x9f, 0x29 }, + .psize = 0, + .ksize = 16, + }, { + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .plaintext = { 0x00, 0x01, 0x02 }, + .digest = { 0x5b, 0x37, 0x65, 0x80, 0xae, 0x2f, 0x19, 0xaf, + 0xe7, 0x21, 0x9c, 0xee, 0xf1, 0x72, 0x75, 0x6f }, + .psize = 3, + .ksize = 16, + } , { + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .plaintext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .digest = { 0xd2, 0xa2, 0x46, 0xfa, 0x34, 0x9b, 0x68, 0xa7, + 0x99, 0x98, 0xa4, 0x39, 0x4f, 0xf7, 0xa2, 0x63 }, + .psize = 16, + .ksize = 16, + }, { + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .plaintext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13 }, + .digest = { 0x47, 0xf5, 0x1b, 0x45, 0x64, 0x96, 0x62, 0x15, + 0xb8, 0x98, 0x5c, 0x63, 0x05, 0x5e, 0xd3, 0x08 }, + .tap = { 10, 10 }, + .psize = 20, + .np = 2, + .ksize = 16, + }, { + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .plaintext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }, + .digest = { 0xf5, 0x4f, 0x0e, 0xc8, 0xd2, 0xb9, 0xf3, 0xd3, + 0x68, 0x07, 0x73, 0x4b, 0xd5, 0x28, 0x3f, 0xd4 }, + .psize = 32, + .ksize = 16, + }, { + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .plaintext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21 }, + .digest = { 0xbe, 0xcb, 0xb3, 0xbc, 0xcd, 0xb5, 0x18, 0xa3, + 0x06, 0x77, 0xd5, 0x48, 0x1f, 0xb6, 0xb4, 0xd8 }, + .tap = { 17, 17 }, + .psize = 34, + .np = 2, + .ksize = 16, + } +}; + /* * DES test vectors. */ @@ -1831,6 +1899,8 @@ static struct cipher_testvec cast6_dec_tv_template[] = { #define AES_DEC_TEST_VECTORS 3 #define AES_CBC_ENC_TEST_VECTORS 2 #define AES_CBC_DEC_TEST_VECTORS 2 +#define AES_LRW_ENC_TEST_VECTORS 8 +#define AES_LRW_DEC_TEST_VECTORS 8 static struct cipher_testvec aes_enc_tv_template[] = { { /* From FIPS-197 */ @@ -1968,6 +2038,509 @@ static struct cipher_testvec aes_cbc_dec_tv_template[] = { }, }; +static struct cipher_testvec aes_lrw_enc_tv_template[] = { + /* from http://grouper.ieee.org/groups/1619/email/pdf00017.pdf */ + { /* LRW-32-AES 1 */ + .key = { 0x45, 0x62, 0xac, 0x25, 0xf8, 0x28, 0x17, 0x6d, + 0x4c, 0x26, 0x84, 0x14, 0xb5, 0x68, 0x01, 0x85, + 0x25, 0x8e, 0x2a, 0x05, 0xe7, 0x3e, 0x9d, 0x03, + 0xee, 0x5a, 0x83, 0x0c, 0xcc, 0x09, 0x4c, 0x87 }, + .klen = 32, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .input = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .ilen = 16, + .result = { 0xf1, 0xb2, 0x73, 0xcd, 0x65, 0xa3, 0xdf, 0x5f, + 0xe9, 0x5d, 0x48, 0x92, 0x54, 0x63, 0x4e, 0xb8 }, + .rlen = 16, + }, { /* LRW-32-AES 2 */ + .key = { 0x59, 0x70, 0x47, 0x14, 0xf5, 0x57, 0x47, 0x8c, + 0xd7, 0x79, 0xe8, 0x0f, 0x54, 0x88, 0x79, 0x44, + 0x0d, 0x48, 0xf0, 0xb7, 0xb1, 0x5a, 0x53, 0xea, + 0x1c, 0xaa, 0x6b, 0x29, 0xc2, 0xca, 0xfb, 0xaf + }, + .klen = 32, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, + .input = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .ilen = 16, + .result = { 0x00, 0xc8, 0x2b, 0xae, 0x95, 0xbb, 0xcd, 0xe5, + 0x27, 0x4f, 0x07, 0x69, 0xb2, 0x60, 0xe1, 0x36 }, + .rlen = 16, + }, { /* LRW-32-AES 3 */ + .key = { 0xd8, 0x2a, 0x91, 0x34, 0xb2, 0x6a, 0x56, 0x50, + 0x30, 0xfe, 0x69, 0xe2, 0x37, 0x7f, 0x98, 0x47, + 0xcd, 0xf9, 0x0b, 0x16, 0x0c, 0x64, 0x8f, 0xb6, + 0xb0, 0x0d, 0x0d, 0x1b, 0xae, 0x85, 0x87, 0x1f }, + .klen = 32, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00 }, + .input = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .ilen = 16, + .result = { 0x76, 0x32, 0x21, 0x83, 0xed, 0x8f, 0xf1, 0x82, + 0xf9, 0x59, 0x62, 0x03, 0x69, 0x0e, 0x5e, 0x01 }, + .rlen = 16, + }, { /* LRW-32-AES 4 */ + .key = { 0x0f, 0x6a, 0xef, 0xf8, 0xd3, 0xd2, 0xbb, 0x15, + 0x25, 0x83, 0xf7, 0x3c, 0x1f, 0x01, 0x28, 0x74, + 0xca, 0xc6, 0xbc, 0x35, 0x4d, 0x4a, 0x65, 0x54, + 0x90, 0xae, 0x61, 0xcf, 0x7b, 0xae, 0xbd, 0xcc, + 0xad, 0xe4, 0x94, 0xc5, 0x4a, 0x29, 0xae, 0x70 }, + .klen = 40, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .input = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .ilen = 16, + .result = { 0x9c, 0x0f, 0x15, 0x2f, 0x55, 0xa2, 0xd8, 0xf0, + 0xd6, 0x7b, 0x8f, 0x9e, 0x28, 0x22, 0xbc, 0x41 }, + .rlen = 16, + }, { /* LRW-32-AES 5 */ + .key = { 0x8a, 0xd4, 0xee, 0x10, 0x2f, 0xbd, 0x81, 0xff, + 0xf8, 0x86, 0xce, 0xac, 0x93, 0xc5, 0xad, 0xc6, + 0xa0, 0x19, 0x07, 0xc0, 0x9d, 0xf7, 0xbb, 0xdd, + 0x52, 0x13, 0xb2, 0xb7, 0xf0, 0xff, 0x11, 0xd8, + 0xd6, 0x08, 0xd0, 0xcd, 0x2e, 0xb1, 0x17, 0x6f }, + .klen = 40, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00 }, + .input = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .ilen = 16, + .result = { 0xd4, 0x27, 0x6a, 0x7f, 0x14, 0x91, 0x3d, 0x65, + 0xc8, 0x60, 0x48, 0x02, 0x87, 0xe3, 0x34, 0x06 }, + .rlen = 16, + }, { /* LRW-32-AES 6 */ + .key = { 0xf8, 0xd4, 0x76, 0xff, 0xd6, 0x46, 0xee, 0x6c, + 0x23, 0x84, 0xcb, 0x1c, 0x77, 0xd6, 0x19, 0x5d, + 0xfe, 0xf1, 0xa9, 0xf3, 0x7b, 0xbc, 0x8d, 0x21, + 0xa7, 0x9c, 0x21, 0xf8, 0xcb, 0x90, 0x02, 0x89, + 0xa8, 0x45, 0x34, 0x8e, 0xc8, 0xc5, 0xb5, 0xf1, + 0x26, 0xf5, 0x0e, 0x76, 0xfe, 0xfd, 0x1b, 0x1e }, + .klen = 48, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .input = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .ilen = 16, + .result = { 0xbd, 0x06, 0xb8, 0xe1, 0xdb, 0x98, 0x89, 0x9e, + 0xc4, 0x98, 0xe4, 0x91, 0xcf, 0x1c, 0x70, 0x2b }, + .rlen = 16, + }, { /* LRW-32-AES 7 */ + .key = { 0xfb, 0x76, 0x15, 0xb2, 0x3d, 0x80, 0x89, 0x1d, + 0xd4, 0x70, 0x98, 0x0b, 0xc7, 0x95, 0x84, 0xc8, + 0xb2, 0xfb, 0x64, 0xce, 0x60, 0x97, 0x87, 0x8d, + 0x17, 0xfc, 0xe4, 0x5a, 0x49, 0xe8, 0x30, 0xb7, + 0x6e, 0x78, 0x17, 0xe7, 0x2d, 0x5e, 0x12, 0xd4, + 0x60, 0x64, 0x04, 0x7a, 0xf1, 0x2f, 0x9e, 0x0c }, + .klen = 48, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00 }, + .input = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .ilen = 16, + .result = { 0x5b, 0x90, 0x8e, 0xc1, 0xab, 0xdd, 0x67, 0x5f, + 0x3d, 0x69, 0x8a, 0x95, 0x53, 0xc8, 0x9c, 0xe5 }, + .rlen = 16, + }, { +/* http://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html */ + .key = { 0xf8, 0xd4, 0x76, 0xff, 0xd6, 0x46, 0xee, 0x6c, + 0x23, 0x84, 0xcb, 0x1c, 0x77, 0xd6, 0x19, 0x5d, + 0xfe, 0xf1, 0xa9, 0xf3, 0x7b, 0xbc, 0x8d, 0x21, + 0xa7, 0x9c, 0x21, 0xf8, 0xcb, 0x90, 0x02, 0x89, + 0xa8, 0x45, 0x34, 0x8e, 0xc8, 0xc5, 0xb5, 0xf1, + 0x26, 0xf5, 0x0e, 0x76, 0xfe, 0xfd, 0x1b, 0x1e }, + .klen = 48, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .input = { 0x05, 0x11, 0xb7, 0x18, 0xab, 0xc6, 0x2d, 0xac, + 0x70, 0x5d, 0xf6, 0x22, 0x94, 0xcd, 0xe5, 0x6c, + 0x17, 0x6b, 0xf6, 0x1c, 0xf0, 0xf3, 0x6e, 0xf8, + 0x50, 0x38, 0x1f, 0x71, 0x49, 0xb6, 0x57, 0xd6, + 0x8f, 0xcb, 0x8d, 0x6b, 0xe3, 0xa6, 0x29, 0x90, + 0xfe, 0x2a, 0x62, 0x82, 0xae, 0x6d, 0x8b, 0xf6, + 0xad, 0x1e, 0x9e, 0x20, 0x5f, 0x38, 0xbe, 0x04, + 0xda, 0x10, 0x8e, 0xed, 0xa2, 0xa4, 0x87, 0xab, + 0xda, 0x6b, 0xb4, 0x0c, 0x75, 0xba, 0xd3, 0x7c, + 0xc9, 0xac, 0x42, 0x31, 0x95, 0x7c, 0xc9, 0x04, + 0xeb, 0xd5, 0x6e, 0x32, 0x69, 0x8a, 0xdb, 0xa6, + 0x15, 0xd7, 0x3f, 0x4f, 0x2f, 0x66, 0x69, 0x03, + 0x9c, 0x1f, 0x54, 0x0f, 0xde, 0x1f, 0xf3, 0x65, + 0x4c, 0x96, 0x12, 0xed, 0x7c, 0x92, 0x03, 0x01, + 0x6f, 0xbc, 0x35, 0x93, 0xac, 0xf1, 0x27, 0xf1, + 0xb4, 0x96, 0x82, 0x5a, 0x5f, 0xb0, 0xa0, 0x50, + 0x89, 0xa4, 0x8e, 0x66, 0x44, 0x85, 0xcc, 0xfd, + 0x33, 0x14, 0x70, 0xe3, 0x96, 0xb2, 0xc3, 0xd3, + 0xbb, 0x54, 0x5a, 0x1a, 0xf9, 0x74, 0xa2, 0xc5, + 0x2d, 0x64, 0x75, 0xdd, 0xb4, 0x54, 0xe6, 0x74, + 0x8c, 0xd3, 0x9d, 0x9e, 0x86, 0xab, 0x51, 0x53, + 0xb7, 0x93, 0x3e, 0x6f, 0xd0, 0x4e, 0x2c, 0x40, + 0xf6, 0xa8, 0x2e, 0x3e, 0x9d, 0xf4, 0x66, 0xa5, + 0x76, 0x12, 0x73, 0x44, 0x1a, 0x56, 0xd7, 0x72, + 0x88, 0xcd, 0x21, 0x8c, 0x4c, 0x0f, 0xfe, 0xda, + 0x95, 0xe0, 0x3a, 0xa6, 0xa5, 0x84, 0x46, 0xcd, + 0xd5, 0x3e, 0x9d, 0x3a, 0xe2, 0x67, 0xe6, 0x60, + 0x1a, 0xe2, 0x70, 0x85, 0x58, 0xc2, 0x1b, 0x09, + 0xe1, 0xd7, 0x2c, 0xca, 0xad, 0xa8, 0x8f, 0xf9, + 0xac, 0xb3, 0x0e, 0xdb, 0xca, 0x2e, 0xe2, 0xb8, + 0x51, 0x71, 0xd9, 0x3c, 0x6c, 0xf1, 0x56, 0xf8, + 0xea, 0x9c, 0xf1, 0xfb, 0x0c, 0xe6, 0xb7, 0x10, + 0x1c, 0xf8, 0xa9, 0x7c, 0xe8, 0x53, 0x35, 0xc1, + 0x90, 0x3e, 0x76, 0x4a, 0x74, 0xa4, 0x21, 0x2c, + 0xf6, 0x2c, 0x4e, 0x0f, 0x94, 0x3a, 0x88, 0x2e, + 0x41, 0x09, 0x6a, 0x33, 0x7d, 0xf6, 0xdd, 0x3f, + 0x8d, 0x23, 0x31, 0x74, 0x84, 0xeb, 0x88, 0x6e, + 0xcc, 0xb9, 0xbc, 0x22, 0x83, 0x19, 0x07, 0x22, + 0xa5, 0x2d, 0xdf, 0xa5, 0xf3, 0x80, 0x85, 0x78, + 0x84, 0x39, 0x6a, 0x6d, 0x6a, 0x99, 0x4f, 0xa5, + 0x15, 0xfe, 0x46, 0xb0, 0xe4, 0x6c, 0xa5, 0x41, + 0x3c, 0xce, 0x8f, 0x42, 0x60, 0x71, 0xa7, 0x75, + 0x08, 0x40, 0x65, 0x8a, 0x82, 0xbf, 0xf5, 0x43, + 0x71, 0x96, 0xa9, 0x4d, 0x44, 0x8a, 0x20, 0xbe, + 0xfa, 0x4d, 0xbb, 0xc0, 0x7d, 0x31, 0x96, 0x65, + 0xe7, 0x75, 0xe5, 0x3e, 0xfd, 0x92, 0x3b, 0xc9, + 0x55, 0xbb, 0x16, 0x7e, 0xf7, 0xc2, 0x8c, 0xa4, + 0x40, 0x1d, 0xe5, 0xef, 0x0e, 0xdf, 0xe4, 0x9a, + 0x62, 0x73, 0x65, 0xfd, 0x46, 0x63, 0x25, 0x3d, + 0x2b, 0xaf, 0xe5, 0x64, 0xfe, 0xa5, 0x5c, 0xcf, + 0x24, 0xf3, 0xb4, 0xac, 0x64, 0xba, 0xdf, 0x4b, + 0xc6, 0x96, 0x7d, 0x81, 0x2d, 0x8d, 0x97, 0xf7, + 0xc5, 0x68, 0x77, 0x84, 0x32, 0x2b, 0xcc, 0x85, + 0x74, 0x96, 0xf0, 0x12, 0x77, 0x61, 0xb9, 0xeb, + 0x71, 0xaa, 0x82, 0xcb, 0x1c, 0xdb, 0x89, 0xc8, + 0xc6, 0xb5, 0xe3, 0x5c, 0x7d, 0x39, 0x07, 0x24, + 0xda, 0x39, 0x87, 0x45, 0xc0, 0x2b, 0xbb, 0x01, + 0xac, 0xbc, 0x2a, 0x5c, 0x7f, 0xfc, 0xe8, 0xce, + 0x6d, 0x9c, 0x6f, 0xed, 0xd3, 0xc1, 0xa1, 0xd6, + 0xc5, 0x55, 0xa9, 0x66, 0x2f, 0xe1, 0xc8, 0x32, + 0xa6, 0x5d, 0xa4, 0x3a, 0x98, 0x73, 0xe8, 0x45, + 0xa4, 0xc7, 0xa8, 0xb4, 0xf6, 0x13, 0x03, 0xf6, + 0xe9, 0x2e, 0xc4, 0x29, 0x0f, 0x84, 0xdb, 0xc4, + 0x21, 0xc4, 0xc2, 0x75, 0x67, 0x89, 0x37, 0x0a }, + .ilen = 512, + .result = { 0x1a, 0x1d, 0xa9, 0x30, 0xad, 0xf9, 0x2f, 0x9b, + 0xb6, 0x1d, 0xae, 0xef, 0xf0, 0x2f, 0xf8, 0x5a, + 0x39, 0x3c, 0xbf, 0x2a, 0xb2, 0x45, 0xb2, 0x23, + 0x1b, 0x63, 0x3c, 0xcf, 0xaa, 0xbe, 0xcf, 0x4e, + 0xfa, 0xe8, 0x29, 0xc2, 0x20, 0x68, 0x2b, 0x3c, + 0x2e, 0x8b, 0xf7, 0x6e, 0x25, 0xbd, 0xe3, 0x3d, + 0x66, 0x27, 0xd6, 0xaf, 0xd6, 0x64, 0x3e, 0xe3, + 0xe8, 0x58, 0x46, 0x97, 0x39, 0x51, 0x07, 0xde, + 0xcb, 0x37, 0xbc, 0xa9, 0xc0, 0x5f, 0x75, 0xc3, + 0x0e, 0x84, 0x23, 0x1d, 0x16, 0xd4, 0x1c, 0x59, + 0x9c, 0x1a, 0x02, 0x55, 0xab, 0x3a, 0x97, 0x1d, + 0xdf, 0xdd, 0xc7, 0x06, 0x51, 0xd7, 0x70, 0xae, + 0x23, 0xc6, 0x8c, 0xf5, 0x1e, 0xa0, 0xe5, 0x82, + 0xb8, 0xb2, 0xbf, 0x04, 0xa0, 0x32, 0x8e, 0x68, + 0xeb, 0xaf, 0x6e, 0x2d, 0x94, 0x22, 0x2f, 0xce, + 0x4c, 0xb5, 0x59, 0xe2, 0xa2, 0x2f, 0xa0, 0x98, + 0x1a, 0x97, 0xc6, 0xd4, 0xb5, 0x00, 0x59, 0xf2, + 0x84, 0x14, 0x72, 0xb1, 0x9a, 0x6e, 0xa3, 0x7f, + 0xea, 0x20, 0xe7, 0xcb, 0x65, 0x77, 0x3a, 0xdf, + 0xc8, 0x97, 0x67, 0x15, 0xc2, 0x2a, 0x27, 0xcc, + 0x18, 0x55, 0xa1, 0x24, 0x0b, 0x24, 0x24, 0xaf, + 0x5b, 0xec, 0x68, 0xb8, 0xc8, 0xf5, 0xba, 0x63, + 0xff, 0xed, 0x89, 0xce, 0xd5, 0x3d, 0x88, 0xf3, + 0x25, 0xef, 0x05, 0x7c, 0x3a, 0xef, 0xeb, 0xd8, + 0x7a, 0x32, 0x0d, 0xd1, 0x1e, 0x58, 0x59, 0x99, + 0x90, 0x25, 0xb5, 0x26, 0xb0, 0xe3, 0x2b, 0x6c, + 0x4c, 0xa9, 0x8b, 0x84, 0x4f, 0x5e, 0x01, 0x50, + 0x41, 0x30, 0x58, 0xc5, 0x62, 0x74, 0x52, 0x1d, + 0x45, 0x24, 0x6a, 0x42, 0x64, 0x4f, 0x97, 0x1c, + 0xa8, 0x66, 0xb5, 0x6d, 0x79, 0xd4, 0x0d, 0x48, + 0xc5, 0x5f, 0xf3, 0x90, 0x32, 0xdd, 0xdd, 0xe1, + 0xe4, 0xa9, 0x9f, 0xfc, 0xc3, 0x52, 0x5a, 0x46, + 0xe4, 0x81, 0x84, 0x95, 0x36, 0x59, 0x7a, 0x6b, + 0xaa, 0xb3, 0x60, 0xad, 0xce, 0x9f, 0x9f, 0x28, + 0xe0, 0x01, 0x75, 0x22, 0xc4, 0x4e, 0xa9, 0x62, + 0x5c, 0x62, 0x0d, 0x00, 0xcb, 0x13, 0xe8, 0x43, + 0x72, 0xd4, 0x2d, 0x53, 0x46, 0xb5, 0xd1, 0x16, + 0x22, 0x18, 0xdf, 0x34, 0x33, 0xf5, 0xd6, 0x1c, + 0xb8, 0x79, 0x78, 0x97, 0x94, 0xff, 0x72, 0x13, + 0x4c, 0x27, 0xfc, 0xcb, 0xbf, 0x01, 0x53, 0xa6, + 0xb4, 0x50, 0x6e, 0xde, 0xdf, 0xb5, 0x43, 0xa4, + 0x59, 0xdf, 0x52, 0xf9, 0x7c, 0xe0, 0x11, 0x6f, + 0x2d, 0x14, 0x8e, 0x24, 0x61, 0x2c, 0xe1, 0x17, + 0xcc, 0xce, 0x51, 0x0c, 0x19, 0x8a, 0x82, 0x30, + 0x94, 0xd5, 0x3d, 0x6a, 0x53, 0x06, 0x5e, 0xbd, + 0xb7, 0xeb, 0xfa, 0xfd, 0x27, 0x51, 0xde, 0x85, + 0x1e, 0x86, 0x53, 0x11, 0x53, 0x94, 0x00, 0xee, + 0x2b, 0x8c, 0x08, 0x2a, 0xbf, 0xdd, 0xae, 0x11, + 0xcb, 0x1e, 0xa2, 0x07, 0x9a, 0x80, 0xcf, 0x62, + 0x9b, 0x09, 0xdc, 0x95, 0x3c, 0x96, 0x8e, 0xb1, + 0x09, 0xbd, 0xe4, 0xeb, 0xdb, 0xca, 0x70, 0x7a, + 0x9e, 0xfa, 0x31, 0x18, 0x45, 0x3c, 0x21, 0x33, + 0xb0, 0xb3, 0x2b, 0xea, 0xf3, 0x71, 0x2d, 0xe1, + 0x03, 0xad, 0x1b, 0x48, 0xd4, 0x67, 0x27, 0xf0, + 0x62, 0xe4, 0x3d, 0xfb, 0x9b, 0x08, 0x76, 0xe7, + 0xdd, 0x2b, 0x01, 0x39, 0x04, 0x5a, 0x58, 0x7a, + 0xf7, 0x11, 0x90, 0xec, 0xbd, 0x51, 0x5c, 0x32, + 0x6b, 0xd7, 0x35, 0x39, 0x02, 0x6b, 0xf2, 0xa6, + 0xd0, 0x0d, 0x07, 0xe1, 0x06, 0xc4, 0x5b, 0x7d, + 0xe4, 0x6a, 0xd7, 0xee, 0x15, 0x1f, 0x83, 0xb4, + 0xa3, 0xa7, 0x5e, 0xc3, 0x90, 0xb7, 0xef, 0xd3, + 0xb7, 0x4f, 0xf8, 0x92, 0x4c, 0xb7, 0x3c, 0x29, + 0xcd, 0x7e, 0x2b, 0x5d, 0x43, 0xea, 0x42, 0xe7, + 0x74, 0x3f, 0x7d, 0x58, 0x88, 0x75, 0xde, 0x3e }, + .rlen = 512, + } +}; + +static struct cipher_testvec aes_lrw_dec_tv_template[] = { + /* from http://grouper.ieee.org/groups/1619/email/pdf00017.pdf */ + /* same as enc vectors with input and result reversed */ + { /* LRW-32-AES 1 */ + .key = { 0x45, 0x62, 0xac, 0x25, 0xf8, 0x28, 0x17, 0x6d, + 0x4c, 0x26, 0x84, 0x14, 0xb5, 0x68, 0x01, 0x85, + 0x25, 0x8e, 0x2a, 0x05, 0xe7, 0x3e, 0x9d, 0x03, + 0xee, 0x5a, 0x83, 0x0c, 0xcc, 0x09, 0x4c, 0x87 }, + .klen = 32, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .input = { 0xf1, 0xb2, 0x73, 0xcd, 0x65, 0xa3, 0xdf, 0x5f, + 0xe9, 0x5d, 0x48, 0x92, 0x54, 0x63, 0x4e, 0xb8 }, + .ilen = 16, + .result = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .rlen = 16, + }, { /* LRW-32-AES 2 */ + .key = { 0x59, 0x70, 0x47, 0x14, 0xf5, 0x57, 0x47, 0x8c, + 0xd7, 0x79, 0xe8, 0x0f, 0x54, 0x88, 0x79, 0x44, + 0x0d, 0x48, 0xf0, 0xb7, 0xb1, 0x5a, 0x53, 0xea, + 0x1c, 0xaa, 0x6b, 0x29, 0xc2, 0xca, 0xfb, 0xaf + }, + .klen = 32, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, + .input = { 0x00, 0xc8, 0x2b, 0xae, 0x95, 0xbb, 0xcd, 0xe5, + 0x27, 0x4f, 0x07, 0x69, 0xb2, 0x60, 0xe1, 0x36 }, + .ilen = 16, + .result = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .rlen = 16, + }, { /* LRW-32-AES 3 */ + .key = { 0xd8, 0x2a, 0x91, 0x34, 0xb2, 0x6a, 0x56, 0x50, + 0x30, 0xfe, 0x69, 0xe2, 0x37, 0x7f, 0x98, 0x47, + 0xcd, 0xf9, 0x0b, 0x16, 0x0c, 0x64, 0x8f, 0xb6, + 0xb0, 0x0d, 0x0d, 0x1b, 0xae, 0x85, 0x87, 0x1f }, + .klen = 32, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00 }, + .input = { 0x76, 0x32, 0x21, 0x83, 0xed, 0x8f, 0xf1, 0x82, + 0xf9, 0x59, 0x62, 0x03, 0x69, 0x0e, 0x5e, 0x01 }, + .ilen = 16, + .result = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .rlen = 16, + }, { /* LRW-32-AES 4 */ + .key = { 0x0f, 0x6a, 0xef, 0xf8, 0xd3, 0xd2, 0xbb, 0x15, + 0x25, 0x83, 0xf7, 0x3c, 0x1f, 0x01, 0x28, 0x74, + 0xca, 0xc6, 0xbc, 0x35, 0x4d, 0x4a, 0x65, 0x54, + 0x90, 0xae, 0x61, 0xcf, 0x7b, 0xae, 0xbd, 0xcc, + 0xad, 0xe4, 0x94, 0xc5, 0x4a, 0x29, 0xae, 0x70 }, + .klen = 40, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .input = { 0x9c, 0x0f, 0x15, 0x2f, 0x55, 0xa2, 0xd8, 0xf0, + 0xd6, 0x7b, 0x8f, 0x9e, 0x28, 0x22, 0xbc, 0x41 }, + .ilen = 16, + .result = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .rlen = 16, + }, { /* LRW-32-AES 5 */ + .key = { 0x8a, 0xd4, 0xee, 0x10, 0x2f, 0xbd, 0x81, 0xff, + 0xf8, 0x86, 0xce, 0xac, 0x93, 0xc5, 0xad, 0xc6, + 0xa0, 0x19, 0x07, 0xc0, 0x9d, 0xf7, 0xbb, 0xdd, + 0x52, 0x13, 0xb2, 0xb7, 0xf0, 0xff, 0x11, 0xd8, + 0xd6, 0x08, 0xd0, 0xcd, 0x2e, 0xb1, 0x17, 0x6f }, + .klen = 40, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00 }, + .input = { 0xd4, 0x27, 0x6a, 0x7f, 0x14, 0x91, 0x3d, 0x65, + 0xc8, 0x60, 0x48, 0x02, 0x87, 0xe3, 0x34, 0x06 }, + .ilen = 16, + .result = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .rlen = 16, + }, { /* LRW-32-AES 6 */ + .key = { 0xf8, 0xd4, 0x76, 0xff, 0xd6, 0x46, 0xee, 0x6c, + 0x23, 0x84, 0xcb, 0x1c, 0x77, 0xd6, 0x19, 0x5d, + 0xfe, 0xf1, 0xa9, 0xf3, 0x7b, 0xbc, 0x8d, 0x21, + 0xa7, 0x9c, 0x21, 0xf8, 0xcb, 0x90, 0x02, 0x89, + 0xa8, 0x45, 0x34, 0x8e, 0xc8, 0xc5, 0xb5, 0xf1, + 0x26, 0xf5, 0x0e, 0x76, 0xfe, 0xfd, 0x1b, 0x1e }, + .klen = 48, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .input = { 0xbd, 0x06, 0xb8, 0xe1, 0xdb, 0x98, 0x89, 0x9e, + 0xc4, 0x98, 0xe4, 0x91, 0xcf, 0x1c, 0x70, 0x2b }, + .ilen = 16, + .result = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .rlen = 16, + }, { /* LRW-32-AES 7 */ + .key = { 0xfb, 0x76, 0x15, 0xb2, 0x3d, 0x80, 0x89, 0x1d, + 0xd4, 0x70, 0x98, 0x0b, 0xc7, 0x95, 0x84, 0xc8, + 0xb2, 0xfb, 0x64, 0xce, 0x60, 0x97, 0x87, 0x8d, + 0x17, 0xfc, 0xe4, 0x5a, 0x49, 0xe8, 0x30, 0xb7, + 0x6e, 0x78, 0x17, 0xe7, 0x2d, 0x5e, 0x12, 0xd4, + 0x60, 0x64, 0x04, 0x7a, 0xf1, 0x2f, 0x9e, 0x0c }, + .klen = 48, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00 }, + .input = { 0x5b, 0x90, 0x8e, 0xc1, 0xab, 0xdd, 0x67, 0x5f, + 0x3d, 0x69, 0x8a, 0x95, 0x53, 0xc8, 0x9c, 0xe5 }, + .ilen = 16, + .result = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }, + .rlen = 16, + }, { +/* http://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html */ + .key = { 0xf8, 0xd4, 0x76, 0xff, 0xd6, 0x46, 0xee, 0x6c, + 0x23, 0x84, 0xcb, 0x1c, 0x77, 0xd6, 0x19, 0x5d, + 0xfe, 0xf1, 0xa9, 0xf3, 0x7b, 0xbc, 0x8d, 0x21, + 0xa7, 0x9c, 0x21, 0xf8, 0xcb, 0x90, 0x02, 0x89, + 0xa8, 0x45, 0x34, 0x8e, 0xc8, 0xc5, 0xb5, 0xf1, + 0x26, 0xf5, 0x0e, 0x76, 0xfe, 0xfd, 0x1b, 0x1e }, + .klen = 48, + .iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .input = { 0x1a, 0x1d, 0xa9, 0x30, 0xad, 0xf9, 0x2f, 0x9b, + 0xb6, 0x1d, 0xae, 0xef, 0xf0, 0x2f, 0xf8, 0x5a, + 0x39, 0x3c, 0xbf, 0x2a, 0xb2, 0x45, 0xb2, 0x23, + 0x1b, 0x63, 0x3c, 0xcf, 0xaa, 0xbe, 0xcf, 0x4e, + 0xfa, 0xe8, 0x29, 0xc2, 0x20, 0x68, 0x2b, 0x3c, + 0x2e, 0x8b, 0xf7, 0x6e, 0x25, 0xbd, 0xe3, 0x3d, + 0x66, 0x27, 0xd6, 0xaf, 0xd6, 0x64, 0x3e, 0xe3, + 0xe8, 0x58, 0x46, 0x97, 0x39, 0x51, 0x07, 0xde, + 0xcb, 0x37, 0xbc, 0xa9, 0xc0, 0x5f, 0x75, 0xc3, + 0x0e, 0x84, 0x23, 0x1d, 0x16, 0xd4, 0x1c, 0x59, + 0x9c, 0x1a, 0x02, 0x55, 0xab, 0x3a, 0x97, 0x1d, + 0xdf, 0xdd, 0xc7, 0x06, 0x51, 0xd7, 0x70, 0xae, + 0x23, 0xc6, 0x8c, 0xf5, 0x1e, 0xa0, 0xe5, 0x82, + 0xb8, 0xb2, 0xbf, 0x04, 0xa0, 0x32, 0x8e, 0x68, + 0xeb, 0xaf, 0x6e, 0x2d, 0x94, 0x22, 0x2f, 0xce, + 0x4c, 0xb5, 0x59, 0xe2, 0xa2, 0x2f, 0xa0, 0x98, + 0x1a, 0x97, 0xc6, 0xd4, 0xb5, 0x00, 0x59, 0xf2, + 0x84, 0x14, 0x72, 0xb1, 0x9a, 0x6e, 0xa3, 0x7f, + 0xea, 0x20, 0xe7, 0xcb, 0x65, 0x77, 0x3a, 0xdf, + 0xc8, 0x97, 0x67, 0x15, 0xc2, 0x2a, 0x27, 0xcc, + 0x18, 0x55, 0xa1, 0x24, 0x0b, 0x24, 0x24, 0xaf, + 0x5b, 0xec, 0x68, 0xb8, 0xc8, 0xf5, 0xba, 0x63, + 0xff, 0xed, 0x89, 0xce, 0xd5, 0x3d, 0x88, 0xf3, + 0x25, 0xef, 0x05, 0x7c, 0x3a, 0xef, 0xeb, 0xd8, + 0x7a, 0x32, 0x0d, 0xd1, 0x1e, 0x58, 0x59, 0x99, + 0x90, 0x25, 0xb5, 0x26, 0xb0, 0xe3, 0x2b, 0x6c, + 0x4c, 0xa9, 0x8b, 0x84, 0x4f, 0x5e, 0x01, 0x50, + 0x41, 0x30, 0x58, 0xc5, 0x62, 0x74, 0x52, 0x1d, + 0x45, 0x24, 0x6a, 0x42, 0x64, 0x4f, 0x97, 0x1c, + 0xa8, 0x66, 0xb5, 0x6d, 0x79, 0xd4, 0x0d, 0x48, + 0xc5, 0x5f, 0xf3, 0x90, 0x32, 0xdd, 0xdd, 0xe1, + 0xe4, 0xa9, 0x9f, 0xfc, 0xc3, 0x52, 0x5a, 0x46, + 0xe4, 0x81, 0x84, 0x95, 0x36, 0x59, 0x7a, 0x6b, + 0xaa, 0xb3, 0x60, 0xad, 0xce, 0x9f, 0x9f, 0x28, + 0xe0, 0x01, 0x75, 0x22, 0xc4, 0x4e, 0xa9, 0x62, + 0x5c, 0x62, 0x0d, 0x00, 0xcb, 0x13, 0xe8, 0x43, + 0x72, 0xd4, 0x2d, 0x53, 0x46, 0xb5, 0xd1, 0x16, + 0x22, 0x18, 0xdf, 0x34, 0x33, 0xf5, 0xd6, 0x1c, + 0xb8, 0x79, 0x78, 0x97, 0x94, 0xff, 0x72, 0x13, + 0x4c, 0x27, 0xfc, 0xcb, 0xbf, 0x01, 0x53, 0xa6, + 0xb4, 0x50, 0x6e, 0xde, 0xdf, 0xb5, 0x43, 0xa4, + 0x59, 0xdf, 0x52, 0xf9, 0x7c, 0xe0, 0x11, 0x6f, + 0x2d, 0x14, 0x8e, 0x24, 0x61, 0x2c, 0xe1, 0x17, + 0xcc, 0xce, 0x51, 0x0c, 0x19, 0x8a, 0x82, 0x30, + 0x94, 0xd5, 0x3d, 0x6a, 0x53, 0x06, 0x5e, 0xbd, + 0xb7, 0xeb, 0xfa, 0xfd, 0x27, 0x51, 0xde, 0x85, + 0x1e, 0x86, 0x53, 0x11, 0x53, 0x94, 0x00, 0xee, + 0x2b, 0x8c, 0x08, 0x2a, 0xbf, 0xdd, 0xae, 0x11, + 0xcb, 0x1e, 0xa2, 0x07, 0x9a, 0x80, 0xcf, 0x62, + 0x9b, 0x09, 0xdc, 0x95, 0x3c, 0x96, 0x8e, 0xb1, + 0x09, 0xbd, 0xe4, 0xeb, 0xdb, 0xca, 0x70, 0x7a, + 0x9e, 0xfa, 0x31, 0x18, 0x45, 0x3c, 0x21, 0x33, + 0xb0, 0xb3, 0x2b, 0xea, 0xf3, 0x71, 0x2d, 0xe1, + 0x03, 0xad, 0x1b, 0x48, 0xd4, 0x67, 0x27, 0xf0, + 0x62, 0xe4, 0x3d, 0xfb, 0x9b, 0x08, 0x76, 0xe7, + 0xdd, 0x2b, 0x01, 0x39, 0x04, 0x5a, 0x58, 0x7a, + 0xf7, 0x11, 0x90, 0xec, 0xbd, 0x51, 0x5c, 0x32, + 0x6b, 0xd7, 0x35, 0x39, 0x02, 0x6b, 0xf2, 0xa6, + 0xd0, 0x0d, 0x07, 0xe1, 0x06, 0xc4, 0x5b, 0x7d, + 0xe4, 0x6a, 0xd7, 0xee, 0x15, 0x1f, 0x83, 0xb4, + 0xa3, 0xa7, 0x5e, 0xc3, 0x90, 0xb7, 0xef, 0xd3, + 0xb7, 0x4f, 0xf8, 0x92, 0x4c, 0xb7, 0x3c, 0x29, + 0xcd, 0x7e, 0x2b, 0x5d, 0x43, 0xea, 0x42, 0xe7, + 0x74, 0x3f, 0x7d, 0x58, 0x88, 0x75, 0xde, 0x3e }, + .ilen = 512, + .result = { 0x05, 0x11, 0xb7, 0x18, 0xab, 0xc6, 0x2d, 0xac, + 0x70, 0x5d, 0xf6, 0x22, 0x94, 0xcd, 0xe5, 0x6c, + 0x17, 0x6b, 0xf6, 0x1c, 0xf0, 0xf3, 0x6e, 0xf8, + 0x50, 0x38, 0x1f, 0x71, 0x49, 0xb6, 0x57, 0xd6, + 0x8f, 0xcb, 0x8d, 0x6b, 0xe3, 0xa6, 0x29, 0x90, + 0xfe, 0x2a, 0x62, 0x82, 0xae, 0x6d, 0x8b, 0xf6, + 0xad, 0x1e, 0x9e, 0x20, 0x5f, 0x38, 0xbe, 0x04, + 0xda, 0x10, 0x8e, 0xed, 0xa2, 0xa4, 0x87, 0xab, + 0xda, 0x6b, 0xb4, 0x0c, 0x75, 0xba, 0xd3, 0x7c, + 0xc9, 0xac, 0x42, 0x31, 0x95, 0x7c, 0xc9, 0x04, + 0xeb, 0xd5, 0x6e, 0x32, 0x69, 0x8a, 0xdb, 0xa6, + 0x15, 0xd7, 0x3f, 0x4f, 0x2f, 0x66, 0x69, 0x03, + 0x9c, 0x1f, 0x54, 0x0f, 0xde, 0x1f, 0xf3, 0x65, + 0x4c, 0x96, 0x12, 0xed, 0x7c, 0x92, 0x03, 0x01, + 0x6f, 0xbc, 0x35, 0x93, 0xac, 0xf1, 0x27, 0xf1, + 0xb4, 0x96, 0x82, 0x5a, 0x5f, 0xb0, 0xa0, 0x50, + 0x89, 0xa4, 0x8e, 0x66, 0x44, 0x85, 0xcc, 0xfd, + 0x33, 0x14, 0x70, 0xe3, 0x96, 0xb2, 0xc3, 0xd3, + 0xbb, 0x54, 0x5a, 0x1a, 0xf9, 0x74, 0xa2, 0xc5, + 0x2d, 0x64, 0x75, 0xdd, 0xb4, 0x54, 0xe6, 0x74, + 0x8c, 0xd3, 0x9d, 0x9e, 0x86, 0xab, 0x51, 0x53, + 0xb7, 0x93, 0x3e, 0x6f, 0xd0, 0x4e, 0x2c, 0x40, + 0xf6, 0xa8, 0x2e, 0x3e, 0x9d, 0xf4, 0x66, 0xa5, + 0x76, 0x12, 0x73, 0x44, 0x1a, 0x56, 0xd7, 0x72, + 0x88, 0xcd, 0x21, 0x8c, 0x4c, 0x0f, 0xfe, 0xda, + 0x95, 0xe0, 0x3a, 0xa6, 0xa5, 0x84, 0x46, 0xcd, + 0xd5, 0x3e, 0x9d, 0x3a, 0xe2, 0x67, 0xe6, 0x60, + 0x1a, 0xe2, 0x70, 0x85, 0x58, 0xc2, 0x1b, 0x09, + 0xe1, 0xd7, 0x2c, 0xca, 0xad, 0xa8, 0x8f, 0xf9, + 0xac, 0xb3, 0x0e, 0xdb, 0xca, 0x2e, 0xe2, 0xb8, + 0x51, 0x71, 0xd9, 0x3c, 0x6c, 0xf1, 0x56, 0xf8, + 0xea, 0x9c, 0xf1, 0xfb, 0x0c, 0xe6, 0xb7, 0x10, + 0x1c, 0xf8, 0xa9, 0x7c, 0xe8, 0x53, 0x35, 0xc1, + 0x90, 0x3e, 0x76, 0x4a, 0x74, 0xa4, 0x21, 0x2c, + 0xf6, 0x2c, 0x4e, 0x0f, 0x94, 0x3a, 0x88, 0x2e, + 0x41, 0x09, 0x6a, 0x33, 0x7d, 0xf6, 0xdd, 0x3f, + 0x8d, 0x23, 0x31, 0x74, 0x84, 0xeb, 0x88, 0x6e, + 0xcc, 0xb9, 0xbc, 0x22, 0x83, 0x19, 0x07, 0x22, + 0xa5, 0x2d, 0xdf, 0xa5, 0xf3, 0x80, 0x85, 0x78, + 0x84, 0x39, 0x6a, 0x6d, 0x6a, 0x99, 0x4f, 0xa5, + 0x15, 0xfe, 0x46, 0xb0, 0xe4, 0x6c, 0xa5, 0x41, + 0x3c, 0xce, 0x8f, 0x42, 0x60, 0x71, 0xa7, 0x75, + 0x08, 0x40, 0x65, 0x8a, 0x82, 0xbf, 0xf5, 0x43, + 0x71, 0x96, 0xa9, 0x4d, 0x44, 0x8a, 0x20, 0xbe, + 0xfa, 0x4d, 0xbb, 0xc0, 0x7d, 0x31, 0x96, 0x65, + 0xe7, 0x75, 0xe5, 0x3e, 0xfd, 0x92, 0x3b, 0xc9, + 0x55, 0xbb, 0x16, 0x7e, 0xf7, 0xc2, 0x8c, 0xa4, + 0x40, 0x1d, 0xe5, 0xef, 0x0e, 0xdf, 0xe4, 0x9a, + 0x62, 0x73, 0x65, 0xfd, 0x46, 0x63, 0x25, 0x3d, + 0x2b, 0xaf, 0xe5, 0x64, 0xfe, 0xa5, 0x5c, 0xcf, + 0x24, 0xf3, 0xb4, 0xac, 0x64, 0xba, 0xdf, 0x4b, + 0xc6, 0x96, 0x7d, 0x81, 0x2d, 0x8d, 0x97, 0xf7, + 0xc5, 0x68, 0x77, 0x84, 0x32, 0x2b, 0xcc, 0x85, + 0x74, 0x96, 0xf0, 0x12, 0x77, 0x61, 0xb9, 0xeb, + 0x71, 0xaa, 0x82, 0xcb, 0x1c, 0xdb, 0x89, 0xc8, + 0xc6, 0xb5, 0xe3, 0x5c, 0x7d, 0x39, 0x07, 0x24, + 0xda, 0x39, 0x87, 0x45, 0xc0, 0x2b, 0xbb, 0x01, + 0xac, 0xbc, 0x2a, 0x5c, 0x7f, 0xfc, 0xe8, 0xce, + 0x6d, 0x9c, 0x6f, 0xed, 0xd3, 0xc1, 0xa1, 0xd6, + 0xc5, 0x55, 0xa9, 0x66, 0x2f, 0xe1, 0xc8, 0x32, + 0xa6, 0x5d, 0xa4, 0x3a, 0x98, 0x73, 0xe8, 0x45, + 0xa4, 0xc7, 0xa8, 0xb4, 0xf6, 0x13, 0x03, 0xf6, + 0xe9, 0x2e, 0xc4, 0x29, 0x0f, 0x84, 0xdb, 0xc4, + 0x21, 0xc4, 0xc2, 0x75, 0x67, 0x89, 0x37, 0x0a }, + .rlen = 512, + } +}; + /* Cast5 test vectors from RFC 2144 */ #define CAST5_ENC_TEST_VECTORS 3 #define CAST5_DEC_TEST_VECTORS 3 @@ -3084,6 +3657,27 @@ static struct cipher_speed aes_speed_template[] = { { .klen = 0, .blen = 0, } }; +static struct cipher_speed aes_lrw_speed_template[] = { + { .klen = 32, .blen = 16, }, + { .klen = 32, .blen = 64, }, + { .klen = 32, .blen = 256, }, + { .klen = 32, .blen = 1024, }, + { .klen = 32, .blen = 8192, }, + { .klen = 40, .blen = 16, }, + { .klen = 40, .blen = 64, }, + { .klen = 40, .blen = 256, }, + { .klen = 40, .blen = 1024, }, + { .klen = 40, .blen = 8192, }, + { .klen = 48, .blen = 16, }, + { .klen = 48, .blen = 64, }, + { .klen = 48, .blen = 256, }, + { .klen = 48, .blen = 1024, }, + { .klen = 48, .blen = 8192, }, + + /* End marker */ + { .klen = 0, .blen = 0, } +}; + static struct cipher_speed des3_ede_speed_template[] = { { .klen = 24, .blen = 16, }, { .klen = 24, .blen = 64, }, diff --git a/crypto/xcbc.c b/crypto/xcbc.c new file mode 100644 index 00000000000..9347eb6bcf6 --- /dev/null +++ b/crypto/xcbc.c @@ -0,0 +1,348 @@ +/* + * Copyright (C)2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: + * Kazunori Miyazawa <miyazawa@linux-ipv6.org> + */ + +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <linux/scatterlist.h> +#include "internal.h" + +static u_int32_t ks[12] = {0x01010101, 0x01010101, 0x01010101, 0x01010101, + 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x03030303, 0x03030303, 0x03030303, 0x03030303}; +/* + * +------------------------ + * | <parent tfm> + * +------------------------ + * | crypto_xcbc_ctx + * +------------------------ + * | odds (block size) + * +------------------------ + * | prev (block size) + * +------------------------ + * | key (block size) + * +------------------------ + * | consts (block size * 3) + * +------------------------ + */ +struct crypto_xcbc_ctx { + struct crypto_tfm *child; + u8 *odds; + u8 *prev; + u8 *key; + u8 *consts; + void (*xor)(u8 *a, const u8 *b, unsigned int bs); + unsigned int keylen; + unsigned int len; +}; + +static void xor_128(u8 *a, const u8 *b, unsigned int bs) +{ + ((u32 *)a)[0] ^= ((u32 *)b)[0]; + ((u32 *)a)[1] ^= ((u32 *)b)[1]; + ((u32 *)a)[2] ^= ((u32 *)b)[2]; + ((u32 *)a)[3] ^= ((u32 *)b)[3]; +} + +static int _crypto_xcbc_digest_setkey(struct crypto_hash *parent, + struct crypto_xcbc_ctx *ctx) +{ + int bs = crypto_hash_blocksize(parent); + int err = 0; + u8 key1[bs]; + + if ((err = crypto_cipher_setkey(ctx->child, ctx->key, ctx->keylen))) + return err; + + ctx->child->__crt_alg->cra_cipher.cia_encrypt(ctx->child, key1, + ctx->consts); + + return crypto_cipher_setkey(ctx->child, key1, bs); +} + +static int crypto_xcbc_digest_setkey(struct crypto_hash *parent, + const u8 *inkey, unsigned int keylen) +{ + struct crypto_xcbc_ctx *ctx = crypto_hash_ctx_aligned(parent); + + if (keylen != crypto_tfm_alg_blocksize(ctx->child)) + return -EINVAL; + + ctx->keylen = keylen; + memcpy(ctx->key, inkey, keylen); + ctx->consts = (u8*)ks; + + return _crypto_xcbc_digest_setkey(parent, ctx); +} + +static int crypto_xcbc_digest_init(struct hash_desc *pdesc) +{ + struct crypto_xcbc_ctx *ctx = crypto_hash_ctx_aligned(pdesc->tfm); + int bs = crypto_hash_blocksize(pdesc->tfm); + + ctx->len = 0; + memset(ctx->odds, 0, bs); + memset(ctx->prev, 0, bs); + + return 0; +} + +static int crypto_xcbc_digest_update(struct hash_desc *pdesc, + struct scatterlist *sg, + unsigned int nbytes) +{ + struct crypto_hash *parent = pdesc->tfm; + struct crypto_xcbc_ctx *ctx = crypto_hash_ctx_aligned(parent); + struct crypto_tfm *tfm = ctx->child; + int bs = crypto_hash_blocksize(parent); + unsigned int i = 0; + + do { + + struct page *pg = sg[i].page; + unsigned int offset = sg[i].offset; + unsigned int slen = sg[i].length; + + while (slen > 0) { + unsigned int len = min(slen, ((unsigned int)(PAGE_SIZE)) - offset); + char *p = crypto_kmap(pg, 0) + offset; + + /* checking the data can fill the block */ + if ((ctx->len + len) <= bs) { + memcpy(ctx->odds + ctx->len, p, len); + ctx->len += len; + slen -= len; + + /* checking the rest of the page */ + if (len + offset >= PAGE_SIZE) { + offset = 0; + pg++; + } else + offset += len; + + crypto_kunmap(p, 0); + crypto_yield(tfm->crt_flags); + continue; + } + + /* filling odds with new data and encrypting it */ + memcpy(ctx->odds + ctx->len, p, bs - ctx->len); + len -= bs - ctx->len; + p += bs - ctx->len; + + ctx->xor(ctx->prev, ctx->odds, bs); + tfm->__crt_alg->cra_cipher.cia_encrypt(tfm, ctx->prev, ctx->prev); + + /* clearing the length */ + ctx->len = 0; + + /* encrypting the rest of data */ + while (len > bs) { + ctx->xor(ctx->prev, p, bs); + tfm->__crt_alg->cra_cipher.cia_encrypt(tfm, ctx->prev, ctx->prev); + p += bs; + len -= bs; + } + + /* keeping the surplus of blocksize */ + if (len) { + memcpy(ctx->odds, p, len); + ctx->len = len; + } + crypto_kunmap(p, 0); + crypto_yield(tfm->crt_flags); + slen -= min(slen, ((unsigned int)(PAGE_SIZE)) - offset); + offset = 0; + pg++; + } + nbytes-=sg[i].length; + i++; + } while (nbytes>0); + + return 0; +} + +static int crypto_xcbc_digest_final(struct hash_desc *pdesc, u8 *out) +{ + struct crypto_hash *parent = pdesc->tfm; + struct crypto_xcbc_ctx *ctx = crypto_hash_ctx_aligned(parent); + struct crypto_tfm *tfm = ctx->child; + int bs = crypto_hash_blocksize(parent); + int err = 0; + + if (ctx->len == bs) { + u8 key2[bs]; + + if ((err = crypto_cipher_setkey(tfm, ctx->key, ctx->keylen)) != 0) + return err; + + tfm->__crt_alg->cra_cipher.cia_encrypt(tfm, key2, (const u8*)(ctx->consts+bs)); + + ctx->xor(ctx->prev, ctx->odds, bs); + ctx->xor(ctx->prev, key2, bs); + _crypto_xcbc_digest_setkey(parent, ctx); + + tfm->__crt_alg->cra_cipher.cia_encrypt(tfm, out, ctx->prev); + } else { + u8 key3[bs]; + unsigned int rlen; + u8 *p = ctx->odds + ctx->len; + *p = 0x80; + p++; + + rlen = bs - ctx->len -1; + if (rlen) + memset(p, 0, rlen); + + if ((err = crypto_cipher_setkey(tfm, ctx->key, ctx->keylen)) != 0) + return err; + + tfm->__crt_alg->cra_cipher.cia_encrypt(tfm, key3, (const u8*)(ctx->consts+bs*2)); + + ctx->xor(ctx->prev, ctx->odds, bs); + ctx->xor(ctx->prev, key3, bs); + + _crypto_xcbc_digest_setkey(parent, ctx); + + tfm->__crt_alg->cra_cipher.cia_encrypt(tfm, out, ctx->prev); + } + + return 0; +} + +static int crypto_xcbc_digest(struct hash_desc *pdesc, + struct scatterlist *sg, unsigned int nbytes, u8 *out) +{ + crypto_xcbc_digest_init(pdesc); + crypto_xcbc_digest_update(pdesc, sg, nbytes); + return crypto_xcbc_digest_final(pdesc, out); +} + +static int xcbc_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_spawn *spawn = crypto_instance_ctx(inst); + struct crypto_xcbc_ctx *ctx = crypto_hash_ctx_aligned(__crypto_hash_cast(tfm)); + int bs = crypto_hash_blocksize(__crypto_hash_cast(tfm)); + + tfm = crypto_spawn_tfm(spawn); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + switch(bs) { + case 16: + ctx->xor = xor_128; + break; + default: + return -EINVAL; + } + + ctx->child = crypto_cipher_cast(tfm); + ctx->odds = (u8*)(ctx+1); + ctx->prev = ctx->odds + bs; + ctx->key = ctx->prev + bs; + + return 0; +}; + +static void xcbc_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_xcbc_ctx *ctx = crypto_hash_ctx_aligned(__crypto_hash_cast(tfm)); + crypto_free_cipher(ctx->child); +} + +static struct crypto_instance *xcbc_alloc(void *param, unsigned int len) +{ + struct crypto_instance *inst; + struct crypto_alg *alg; + alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_CIPHER, + CRYPTO_ALG_TYPE_HASH_MASK | CRYPTO_ALG_ASYNC); + if (IS_ERR(alg)) + return ERR_PTR(PTR_ERR(alg)); + + switch(alg->cra_blocksize) { + case 16: + break; + default: + return ERR_PTR(PTR_ERR(alg)); + } + + inst = crypto_alloc_instance("xcbc", alg); + if (IS_ERR(inst)) + goto out_put_alg; + + inst->alg.cra_flags = CRYPTO_ALG_TYPE_HASH; + inst->alg.cra_priority = alg->cra_priority; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.cra_type = &crypto_hash_type; + + inst->alg.cra_hash.digestsize = + (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_HASH ? alg->cra_hash.digestsize : + alg->cra_blocksize; + inst->alg.cra_ctxsize = sizeof(struct crypto_xcbc_ctx) + + ALIGN(inst->alg.cra_blocksize * 3, sizeof(void *)); + inst->alg.cra_init = xcbc_init_tfm; + inst->alg.cra_exit = xcbc_exit_tfm; + + inst->alg.cra_hash.init = crypto_xcbc_digest_init; + inst->alg.cra_hash.update = crypto_xcbc_digest_update; + inst->alg.cra_hash.final = crypto_xcbc_digest_final; + inst->alg.cra_hash.digest = crypto_xcbc_digest; + inst->alg.cra_hash.setkey = crypto_xcbc_digest_setkey; + +out_put_alg: + crypto_mod_put(alg); + return inst; +} + +static void xcbc_free(struct crypto_instance *inst) +{ + crypto_drop_spawn(crypto_instance_ctx(inst)); + kfree(inst); +} + +static struct crypto_template crypto_xcbc_tmpl = { + .name = "xcbc", + .alloc = xcbc_alloc, + .free = xcbc_free, + .module = THIS_MODULE, +}; + +static int __init crypto_xcbc_module_init(void) +{ + return crypto_register_template(&crypto_xcbc_tmpl); +} + +static void __exit crypto_xcbc_module_exit(void) +{ + crypto_unregister_template(&crypto_xcbc_tmpl); +} + +module_init(crypto_xcbc_module_init); +module_exit(crypto_xcbc_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("XCBC keyed hash algorithm"); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8816e30fb7a..011c0a8a2dc 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2303,7 +2303,7 @@ int ata_timing_compute(struct ata_device *adev, unsigned short speed, * DMA cycle timing is slower/equal than the fastest PIO timing. */ - if (speed > XFER_PIO_4) { + if (speed > XFER_PIO_6) { ata_timing_compute(adev, adev->pio_mode, &p, T, UT); ata_timing_merge(&p, t, t, ATA_TIMING_ALL); } @@ -4960,6 +4960,7 @@ unsigned int ata_qc_issue_prot(struct ata_queued_cmd *qc) if (ap->flags & ATA_FLAG_PIO_POLLING) { switch (qc->tf.protocol) { case ATA_PROT_PIO: + case ATA_PROT_NODATA: case ATA_PROT_ATAPI: case ATA_PROT_ATAPI_NODATA: qc->tf.flags |= ATA_TFLAG_POLLING; diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index a2778cf016b..f055874a6ec 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -66,15 +66,17 @@ enum { board_2037x = 0, /* FastTrak S150 TX2plus */ board_20319 = 1, /* FastTrak S150 TX4 */ board_20619 = 2, /* FastTrak TX4000 */ - board_20771 = 3, /* FastTrak TX2300 */ - board_2057x = 4, /* SATAII150 Tx2plus */ - board_40518 = 5, /* SATAII150 Tx4 */ + board_2057x = 3, /* SATAII150 Tx2plus */ + board_40518 = 4, /* SATAII150 Tx4 */ PDC_HAS_PATA = (1 << 1), /* PDC20375/20575 has PATA */ + /* PDC_CTLSTAT bit definitions */ + PDC_DMA_ENABLE = (1 << 7), + PDC_IRQ_DISABLE = (1 << 10), PDC_RESET = (1 << 11), /* HDMA reset */ - PDC_COMMON_FLAGS = ATA_FLAG_NO_LEGACY | ATA_FLAG_SRST | + PDC_COMMON_FLAGS = ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO | ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING, @@ -90,7 +92,6 @@ struct pdc_port_priv { struct pdc_host_priv { unsigned long flags; - int hotplug_offset; }; static u32 pdc_sata_scr_read (struct ata_port *ap, unsigned int sc_reg); @@ -101,13 +102,16 @@ static void pdc_eng_timeout(struct ata_port *ap); static int pdc_port_start(struct ata_port *ap); static void pdc_port_stop(struct ata_port *ap); static void pdc_pata_phy_reset(struct ata_port *ap); -static void pdc_sata_phy_reset(struct ata_port *ap); static void pdc_qc_prep(struct ata_queued_cmd *qc); static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf); static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf); static void pdc_irq_clear(struct ata_port *ap); static unsigned int pdc_qc_issue_prot(struct ata_queued_cmd *qc); static void pdc_host_stop(struct ata_host *host); +static void pdc_freeze(struct ata_port *ap); +static void pdc_thaw(struct ata_port *ap); +static void pdc_error_handler(struct ata_port *ap); +static void pdc_post_internal_cmd(struct ata_queued_cmd *qc); static struct scsi_host_template pdc_ata_sht = { @@ -136,11 +140,12 @@ static const struct ata_port_operations pdc_sata_ops = { .exec_command = pdc_exec_command_mmio, .dev_select = ata_std_dev_select, - .phy_reset = pdc_sata_phy_reset, - .qc_prep = pdc_qc_prep, .qc_issue = pdc_qc_issue_prot, - .eng_timeout = pdc_eng_timeout, + .freeze = pdc_freeze, + .thaw = pdc_thaw, + .error_handler = pdc_error_handler, + .post_internal_cmd = pdc_post_internal_cmd, .data_xfer = ata_mmio_data_xfer, .irq_handler = pdc_interrupt, .irq_clear = pdc_irq_clear, @@ -198,23 +203,13 @@ static const struct ata_port_info pdc_port_info[] = { /* board_20619 */ { .sht = &pdc_ata_sht, - .flags = PDC_COMMON_FLAGS | ATA_FLAG_SLAVE_POSS, + .flags = PDC_COMMON_FLAGS | ATA_FLAG_SRST | ATA_FLAG_SLAVE_POSS, .pio_mask = 0x1f, /* pio0-4 */ .mwdma_mask = 0x07, /* mwdma0-2 */ .udma_mask = 0x7f, /* udma0-6 ; FIXME */ .port_ops = &pdc_pata_ops, }, - /* board_20771 */ - { - .sht = &pdc_ata_sht, - .flags = PDC_COMMON_FLAGS | ATA_FLAG_SATA, - .pio_mask = 0x1f, /* pio0-4 */ - .mwdma_mask = 0x07, /* mwdma0-2 */ - .udma_mask = 0x7f, /* udma0-6 ; FIXME */ - .port_ops = &pdc_sata_ops, - }, - /* board_2057x */ { .sht = &pdc_ata_sht, @@ -244,6 +239,7 @@ static const struct pci_device_id pdc_ata_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3570), board_2057x }, { PCI_VDEVICE(PROMISE, 0x3571), board_2057x }, { PCI_VDEVICE(PROMISE, 0x3574), board_2057x }, + { PCI_VDEVICE(PROMISE, 0x3577), board_2057x }, { PCI_VDEVICE(PROMISE, 0x3d73), board_2057x }, { PCI_VDEVICE(PROMISE, 0x3d75), board_2057x }, @@ -256,15 +252,6 @@ static const struct pci_device_id pdc_ata_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x6629), board_20619 }, -/* TODO: remove all associated board_20771 code, as it completely - * duplicates board_2037x code, unless reason for separation can be - * divined. - */ -#if 0 - { PCI_VDEVICE(PROMISE, 0x3570), board_20771 }, -#endif - { PCI_VDEVICE(PROMISE, 0x3577), board_20771 }, - { } /* terminate list */ }; @@ -366,12 +353,6 @@ static void pdc_reset_port(struct ata_port *ap) readl(mmio); /* flush */ } -static void pdc_sata_phy_reset(struct ata_port *ap) -{ - pdc_reset_port(ap); - sata_phy_reset(ap); -} - static void pdc_pata_cbl_detect(struct ata_port *ap) { u8 tmp; @@ -439,6 +420,61 @@ static void pdc_qc_prep(struct ata_queued_cmd *qc) } } +static void pdc_freeze(struct ata_port *ap) +{ + void __iomem *mmio = (void __iomem *) ap->ioaddr.cmd_addr; + u32 tmp; + + tmp = readl(mmio + PDC_CTLSTAT); + tmp |= PDC_IRQ_DISABLE; + tmp &= ~PDC_DMA_ENABLE; + writel(tmp, mmio + PDC_CTLSTAT); + readl(mmio + PDC_CTLSTAT); /* flush */ +} + +static void pdc_thaw(struct ata_port *ap) +{ + void __iomem *mmio = (void __iomem *) ap->ioaddr.cmd_addr; + u32 tmp; + + /* clear IRQ */ + readl(mmio + PDC_INT_SEQMASK); + + /* turn IRQ back on */ + tmp = readl(mmio + PDC_CTLSTAT); + tmp &= ~PDC_IRQ_DISABLE; + writel(tmp, mmio + PDC_CTLSTAT); + readl(mmio + PDC_CTLSTAT); /* flush */ +} + +static void pdc_error_handler(struct ata_port *ap) +{ + ata_reset_fn_t hardreset; + + if (!(ap->pflags & ATA_PFLAG_FROZEN)) + pdc_reset_port(ap); + + hardreset = NULL; + if (sata_scr_valid(ap)) + hardreset = sata_std_hardreset; + + /* perform recovery */ + ata_do_eh(ap, ata_std_prereset, ata_std_softreset, hardreset, + ata_std_postreset); +} + +static void pdc_post_internal_cmd(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + + if (qc->flags & ATA_QCFLAG_FAILED) + qc->err_mask |= AC_ERR_OTHER; + + /* make DMA engine forget about the failed command */ + if (qc->err_mask) + pdc_reset_port(ap); +} + static void pdc_eng_timeout(struct ata_port *ap) { struct ata_host *host = ap->host; @@ -645,9 +681,14 @@ static void pdc_host_init(unsigned int chip_id, struct ata_probe_ent *pe) { void __iomem *mmio = pe->mmio_base; struct pdc_host_priv *hp = pe->private_data; - int hotplug_offset = hp->hotplug_offset; + int hotplug_offset; u32 tmp; + if (hp->flags & PDC_FLAG_GEN_II) + hotplug_offset = PDC2_SATA_PLUG_CSR; + else + hotplug_offset = PDC_SATA_PLUG_CSR; + /* * Except for the hotplug stuff, this is voodoo from the * Promise driver. Label this entire section @@ -742,8 +783,6 @@ static int pdc_ata_init_one (struct pci_dev *pdev, const struct pci_device_id *e goto err_out_free_ent; } - /* Set default hotplug offset */ - hp->hotplug_offset = PDC_SATA_PLUG_CSR; probe_ent->private_data = hp; probe_ent->sht = pdc_port_info[board_idx].sht; @@ -767,8 +806,6 @@ static int pdc_ata_init_one (struct pci_dev *pdev, const struct pci_device_id *e switch (board_idx) { case board_40518: hp->flags |= PDC_FLAG_GEN_II; - /* Override hotplug offset for SATAII150 */ - hp->hotplug_offset = PDC2_SATA_PLUG_CSR; /* Fall through */ case board_20319: probe_ent->n_ports = 4; @@ -780,10 +817,7 @@ static int pdc_ata_init_one (struct pci_dev *pdev, const struct pci_device_id *e probe_ent->port[3].scr_addr = base + 0x700; break; case board_2057x: - case board_20771: hp->flags |= PDC_FLAG_GEN_II; - /* Override hotplug offset for SATAII150 */ - hp->hotplug_offset = PDC2_SATA_PLUG_CSR; /* Fall through */ case board_2037x: probe_ent->n_ports = 2; diff --git a/drivers/atm/Makefile b/drivers/atm/Makefile index b5077ce8cb4..1b16f8166b0 100644 --- a/drivers/atm/Makefile +++ b/drivers/atm/Makefile @@ -41,7 +41,7 @@ ifeq ($(CONFIG_ATM_FORE200E_PCA),y) # guess the target endianess to choose the right PCA-200E firmware image ifeq ($(CONFIG_ATM_FORE200E_PCA_DEFAULT_FW),y) byteorder.h := include$(if $(patsubst $(srctree),,$(objtree)),2)/asm/byteorder.h - CONFIG_ATM_FORE200E_PCA_FW := $(obj)/pca200e$(if $(shell $(CC) -E -dM $(byteorder.h) | grep ' __LITTLE_ENDIAN '),.bin,_ecd.bin2) + CONFIG_ATM_FORE200E_PCA_FW := $(obj)/pca200e$(if $(shell $(CC) $(CPPFLAGS) -E -dM $(byteorder.h) | grep ' __LITTLE_ENDIAN '),.bin,_ecd.bin2) endif endif diff --git a/drivers/atm/he.c b/drivers/atm/he.c index c7314a79da0..7d9b4e52f0b 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -820,7 +820,7 @@ he_init_group(struct he_dev *he_dev, int group) void *cpuaddr; #ifdef USE_RBPS_POOL - cpuaddr = pci_pool_alloc(he_dev->rbps_pool, SLAB_KERNEL|SLAB_DMA, &dma_handle); + cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|GFP_DMA, &dma_handle); if (cpuaddr == NULL) return -ENOMEM; #else @@ -884,7 +884,7 @@ he_init_group(struct he_dev *he_dev, int group) void *cpuaddr; #ifdef USE_RBPL_POOL - cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, SLAB_KERNEL|SLAB_DMA, &dma_handle); + cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|GFP_DMA, &dma_handle); if (cpuaddr == NULL) return -ENOMEM; #else @@ -1724,7 +1724,7 @@ __alloc_tpd(struct he_dev *he_dev) struct he_tpd *tpd; dma_addr_t dma_handle; - tpd = pci_pool_alloc(he_dev->tpd_pool, SLAB_ATOMIC|SLAB_DMA, &dma_handle); + tpd = pci_pool_alloc(he_dev->tpd_pool, GFP_ATOMIC|GFP_DMA, &dma_handle); if (tpd == NULL) return NULL; diff --git a/drivers/base/core.c b/drivers/base/core.c index e4b530ef757..67b79a7592a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -386,6 +386,7 @@ void device_initialize(struct device *dev) INIT_LIST_HEAD(&dev->node); init_MUTEX(&dev->sem); device_init_wakeup(dev, 0); + set_dev_node(dev, -1); } #ifdef CONFIG_SYSFS_DEPRECATED diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 1f745f12f94..7fd095efaeb 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -104,8 +104,8 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); /* * register_cpu - Setup a driverfs device for a CPU. - * @cpu - Callers can set the cpu->no_control field to 1, to indicate not to - * generate a control file in sysfs for this CPU. + * @cpu - cpu->hotpluggable field set to 1 will generate a control file in + * sysfs for this CPU. * @num - CPU number to use when creating the device. * * Initialize and register the CPU device. @@ -119,7 +119,7 @@ int __devinit register_cpu(struct cpu *cpu, int num) error = sysdev_register(&cpu->sysdev); - if (!error && !cpu->no_control) + if (!error && cpu->hotpluggable) register_cpu_control(cpu); if (!error) cpu_sys_devices[num] = &cpu->sysdev; diff --git a/drivers/base/dmapool.c b/drivers/base/dmapool.c index b2efbd4cf71..dbe0735f8c9 100644 --- a/drivers/base/dmapool.c +++ b/drivers/base/dmapool.c @@ -126,7 +126,7 @@ dma_pool_create (const char *name, struct device *dev, } else if (allocation < size) return NULL; - if (!(retval = kmalloc (sizeof *retval, SLAB_KERNEL))) + if (!(retval = kmalloc (sizeof *retval, GFP_KERNEL))) return retval; strlcpy (retval->name, name, sizeof retval->name); @@ -297,7 +297,7 @@ restart: } } } - if (!(page = pool_alloc_page (pool, SLAB_ATOMIC))) { + if (!(page = pool_alloc_page (pool, GFP_ATOMIC))) { if (mem_flags & __GFP_WAIT) { DECLARE_WAITQUEUE (wait, current); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c6b7d9c4b65..74b96795d2f 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -290,9 +290,8 @@ static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); static int block_size_init(void) { - sysfs_create_file(&memory_sysdev_class.kset.kobj, - &class_attr_block_size_bytes.attr); - return 0; + return sysfs_create_file(&memory_sysdev_class.kset.kobj, + &class_attr_block_size_bytes.attr); } /* @@ -323,12 +322,14 @@ static CLASS_ATTR(probe, 0700, NULL, memory_probe_store); static int memory_probe_init(void) { - sysfs_create_file(&memory_sysdev_class.kset.kobj, - &class_attr_probe.attr); - return 0; + return sysfs_create_file(&memory_sysdev_class.kset.kobj, + &class_attr_probe.attr); } #else -#define memory_probe_init(...) do {} while (0) +static inline int memory_probe_init(void) +{ + return 0; +} #endif /* @@ -431,9 +432,12 @@ int __init memory_dev_init(void) { unsigned int i; int ret; + int err; memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; ret = sysdev_class_register(&memory_sysdev_class); + if (ret) + goto out; /* * Create entries for memory sections that were found @@ -442,11 +446,19 @@ int __init memory_dev_init(void) for (i = 0; i < NR_MEM_SECTIONS; i++) { if (!valid_section_nr(i)) continue; - add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); + err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); + if (!ret) + ret = err; } - memory_probe_init(); - block_size_init(); - + err = memory_probe_init(); + if (!ret) + ret = err; + err = block_size_init(); + if (!ret) + ret = err; +out: + if (ret) + printk(KERN_ERR "%s() failed: %d\n", __FUNCTION__, ret); return ret; } diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 3d12b85b096..067a9e8bc37 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -108,7 +108,6 @@ static int __cpuinit topology_add_dev(unsigned int cpu) return rc; } -#ifdef CONFIG_HOTPLUG_CPU static void __cpuinit topology_remove_dev(unsigned int cpu) { struct sys_device *sys_dev = get_cpu_sysdev(cpu); @@ -136,7 +135,6 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, } return rc ? NOTIFY_BAD : NOTIFY_OK; } -#endif static int __cpuinit topology_sysfs_init(void) { diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 742d0740310..8d81a3a64c0 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -324,13 +324,13 @@ static boolean DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller) Command->Next = Controller->FreeCommands; Controller->FreeCommands = Command; Controller->Commands[CommandIdentifier-1] = Command; - ScatterGatherCPU = pci_pool_alloc(ScatterGatherPool, SLAB_ATOMIC, + ScatterGatherCPU = pci_pool_alloc(ScatterGatherPool, GFP_ATOMIC, &ScatterGatherDMA); if (ScatterGatherCPU == NULL) return DAC960_Failure(Controller, "AUXILIARY STRUCTURE CREATION"); if (RequestSensePool != NULL) { - RequestSenseCPU = pci_pool_alloc(RequestSensePool, SLAB_ATOMIC, + RequestSenseCPU = pci_pool_alloc(RequestSensePool, GFP_ATOMIC, &RequestSenseDMA); if (RequestSenseCPU == NULL) { pci_pool_free(ScatterGatherPool, ScatterGatherCPU, diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 17dc22282e1..85072446d77 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -168,7 +168,8 @@ config BLK_CPQ_CISS_DA config CISS_SCSI_TAPE bool "SCSI tape drive support for Smart Array 5xxx" - depends on BLK_CPQ_CISS_DA && SCSI && PROC_FS + depends on BLK_CPQ_CISS_DA && PROC_FS + depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA help When enabled (Y), this option allows SCSI tape drives and SCSI medium changers (tape robots) to be accessed via a Compaq 5xxx array @@ -305,6 +306,7 @@ config BLK_DEV_LOOP config BLK_DEV_CRYPTOLOOP tristate "Cryptoloop Support" select CRYPTO + select CRYPTO_CBC depends on BLK_DEV_LOOP ---help--- Say Y here if you want to be able to use the ciphers that are diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index aa25f8b09fe..478489c568a 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -12,7 +12,7 @@ #include <linux/netdevice.h> #include "aoe.h" -static kmem_cache_t *buf_pool_cache; +static struct kmem_cache *buf_pool_cache; static ssize_t aoedisk_show_state(struct gendisk * disk, char *page) { diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 4105c3bf347..892e092afe9 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -47,14 +47,15 @@ #include <linux/completion.h> #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin)) -#define DRIVER_NAME "HP CISS Driver (v 3.6.10)" -#define DRIVER_VERSION CCISS_DRIVER_VERSION(3,6,10) +#define DRIVER_NAME "HP CISS Driver (v 3.6.14)" +#define DRIVER_VERSION CCISS_DRIVER_VERSION(3,6,14) /* Embedded module documentation macros - see modules.h */ MODULE_AUTHOR("Hewlett-Packard Company"); -MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 3.6.10"); +MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 3.6.14"); MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400" " SA6i P600 P800 P400 P400i E200 E200i E500"); +MODULE_VERSION("3.6.14"); MODULE_LICENSE("GPL"); #include "cciss_cmd.h" @@ -81,7 +82,9 @@ static const struct pci_device_id cciss_pci_device_id[] = { {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD, 0x103C, 0x3213}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD, 0x103C, 0x3214}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD, 0x103C, 0x3215}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, 0x103C, 0x3233}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, 0x103C, 0x3237}, + {PCI_VENDOR_ID_HP, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0}, {0,} }; @@ -90,27 +93,29 @@ MODULE_DEVICE_TABLE(pci, cciss_pci_device_id); /* board_id = Subsystem Device ID & Vendor ID * product = Marketing Name for the board * access = Address of the struct of function pointers + * nr_cmds = Number of commands supported by controller */ static struct board_type products[] = { - {0x40700E11, "Smart Array 5300", &SA5_access}, - {0x40800E11, "Smart Array 5i", &SA5B_access}, - {0x40820E11, "Smart Array 532", &SA5B_access}, - {0x40830E11, "Smart Array 5312", &SA5B_access}, - {0x409A0E11, "Smart Array 641", &SA5_access}, - {0x409B0E11, "Smart Array 642", &SA5_access}, - {0x409C0E11, "Smart Array 6400", &SA5_access}, - {0x409D0E11, "Smart Array 6400 EM", &SA5_access}, - {0x40910E11, "Smart Array 6i", &SA5_access}, - {0x3225103C, "Smart Array P600", &SA5_access}, - {0x3223103C, "Smart Array P800", &SA5_access}, - {0x3234103C, "Smart Array P400", &SA5_access}, - {0x3235103C, "Smart Array P400i", &SA5_access}, - {0x3211103C, "Smart Array E200i", &SA5_access}, - {0x3212103C, "Smart Array E200", &SA5_access}, - {0x3213103C, "Smart Array E200i", &SA5_access}, - {0x3214103C, "Smart Array E200i", &SA5_access}, - {0x3215103C, "Smart Array E200i", &SA5_access}, - {0x3233103C, "Smart Array E500", &SA5_access}, + {0x40700E11, "Smart Array 5300", &SA5_access, 512}, + {0x40800E11, "Smart Array 5i", &SA5B_access, 512}, + {0x40820E11, "Smart Array 532", &SA5B_access, 512}, + {0x40830E11, "Smart Array 5312", &SA5B_access, 512}, + {0x409A0E11, "Smart Array 641", &SA5_access, 512}, + {0x409B0E11, "Smart Array 642", &SA5_access, 512}, + {0x409C0E11, "Smart Array 6400", &SA5_access, 512}, + {0x409D0E11, "Smart Array 6400 EM", &SA5_access, 512}, + {0x40910E11, "Smart Array 6i", &SA5_access, 512}, + {0x3225103C, "Smart Array P600", &SA5_access, 512}, + {0x3223103C, "Smart Array P800", &SA5_access, 512}, + {0x3234103C, "Smart Array P400", &SA5_access, 512}, + {0x3235103C, "Smart Array P400i", &SA5_access, 512}, + {0x3211103C, "Smart Array E200i", &SA5_access, 120}, + {0x3212103C, "Smart Array E200", &SA5_access, 120}, + {0x3213103C, "Smart Array E200i", &SA5_access, 120}, + {0x3214103C, "Smart Array E200i", &SA5_access, 120}, + {0x3215103C, "Smart Array E200i", &SA5_access, 120}, + {0x3237103C, "Smart Array E500", &SA5_access, 512}, + {0xFFFF103C, "Unknown Smart Array", &SA5_access, 120}, }; /* How long to wait (in milliseconds) for board to go into simple mode */ @@ -121,7 +126,6 @@ static struct board_type products[] = { #define MAX_CMD_RETRIES 3 #define READ_AHEAD 1024 -#define NR_CMDS 384 /* #commands that can be outstanding */ #define MAX_CTLR 32 /* Originally cciss driver only supports 8 major numbers */ @@ -137,7 +141,6 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg); static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); -static int revalidate_allvol(ctlr_info_t *host); static int cciss_revalidate(struct gendisk *disk); static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk); static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, @@ -265,6 +268,7 @@ static int cciss_proc_get_info(char *buffer, char **start, off_t offset, "Firmware Version: %c%c%c%c\n" "IRQ: %d\n" "Logical drives: %d\n" + "Max sectors: %d\n" "Current Q depth: %d\n" "Current # commands on controller: %d\n" "Max Q depth since init: %d\n" @@ -275,7 +279,9 @@ static int cciss_proc_get_info(char *buffer, char **start, off_t offset, (unsigned long)h->board_id, h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT], - h->num_luns, h->Qdepth, h->commands_outstanding, + h->num_luns, + h->cciss_max_sectors, + h->Qdepth, h->commands_outstanding, h->maxQsinceinit, h->max_outstanding, h->maxSG); pos += size; @@ -400,8 +406,8 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool) } else { /* get it out of the controllers pool */ do { - i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS); - if (i == NR_CMDS) + i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds); + if (i == h->nr_cmds) return NULL; } while (test_and_set_bit (i & (BITS_PER_LONG - 1), @@ -487,7 +493,7 @@ static int cciss_open(struct inode *inode, struct file *filep) * but I'm already using way to many device nodes to claim another one * for "raw controller". */ - if (drv->nr_blocks == 0) { + if (drv->heads == 0) { if (iminor(inode) != 0) { /* not node 0? */ /* if not node 0 make sure it is a partition = 0 */ if (iminor(inode) & 0x0f) { @@ -850,9 +856,7 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, } case CCISS_REVALIDVOLS: - if (bdev != bdev->bd_contains || drv != host->drv) - return -ENXIO; - return revalidate_allvol(host); + return rebuild_lun_table(host, NULL); case CCISS_GETLUNINFO:{ LogvolInfo_struct luninfo; @@ -1152,75 +1156,6 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, } } -/* - * revalidate_allvol is for online array config utilities. After a - * utility reconfigures the drives in the array, it can use this function - * (through an ioctl) to make the driver zap any previous disk structs for - * that controller and get new ones. - * - * Right now I'm using the getgeometry() function to do this, but this - * function should probably be finer grained and allow you to revalidate one - * particular logical volume (instead of all of them on a particular - * controller). - */ -static int revalidate_allvol(ctlr_info_t *host) -{ - int ctlr = host->ctlr, i; - unsigned long flags; - - spin_lock_irqsave(CCISS_LOCK(ctlr), flags); - if (host->usage_count > 1) { - spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - printk(KERN_WARNING "cciss: Device busy for volume" - " revalidation (usage=%d)\n", host->usage_count); - return -EBUSY; - } - host->usage_count++; - spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - - for (i = 0; i < NWD; i++) { - struct gendisk *disk = host->gendisk[i]; - if (disk) { - request_queue_t *q = disk->queue; - - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (q) - blk_cleanup_queue(q); - } - } - - /* - * Set the partition and block size structures for all volumes - * on this controller to zero. We will reread all of this data - */ - memset(host->drv, 0, sizeof(drive_info_struct) - * CISS_MAX_LUN); - /* - * Tell the array controller not to give us any interrupts while - * we check the new geometry. Then turn interrupts back on when - * we're done. - */ - host->access.set_intr_mask(host, CCISS_INTR_OFF); - cciss_getgeometry(ctlr); - host->access.set_intr_mask(host, CCISS_INTR_ON); - - /* Loop through each real device */ - for (i = 0; i < NWD; i++) { - struct gendisk *disk = host->gendisk[i]; - drive_info_struct *drv = &(host->drv[i]); - /* we must register the controller even if no disks exist */ - /* this is for the online array utilities */ - if (!drv->heads && i) - continue; - blk_queue_hardsect_size(drv->queue, drv->block_size); - set_capacity(disk, drv->nr_blocks); - add_disk(disk); - } - host->usage_count--; - return 0; -} - static inline void complete_buffers(struct bio *bio, int status) { while (bio) { @@ -1243,7 +1178,7 @@ static void cciss_check_queues(ctlr_info_t *h) * in case the interrupt we serviced was from an ioctl and did not * free any new commands. */ - if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS) + if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds) return; /* We have room on the queue for more commands. Now we need to queue @@ -1262,7 +1197,7 @@ static void cciss_check_queues(ctlr_info_t *h) /* check to see if we have maxed out the number of commands * that can be placed on the queue. */ - if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS) { + if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds) { if (curr_queue == start_queue) { h->next_to_run = (start_queue + 1) % (h->highest_lun + 1); @@ -1380,6 +1315,11 @@ static void cciss_update_drive_info(int ctlr, int drv_index) /* if it's the controller it's already added */ if (drv_index) { disk->queue = blk_init_queue(do_cciss_request, &h->lock); + sprintf(disk->disk_name, "cciss/c%dd%d", ctlr, drv_index); + disk->major = h->major; + disk->first_minor = drv_index << NWD_SHIFT; + disk->fops = &cciss_fops; + disk->private_data = &h->drv[drv_index]; /* Set up queue information */ disk->queue->backing_dev_info.ra_pages = READ_AHEAD; @@ -1391,7 +1331,7 @@ static void cciss_update_drive_info(int ctlr, int drv_index) /* This is a limit in the driver and could be eliminated. */ blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES); - blk_queue_max_sectors(disk->queue, 512); + blk_queue_max_sectors(disk->queue, h->cciss_max_sectors); blk_queue_softirq_done(disk->queue, cciss_softirq_done); @@ -1458,11 +1398,6 @@ static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk) /* Set busy_configuring flag for this operation */ spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); - if (h->num_luns >= CISS_MAX_LUN) { - spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); - return -EINVAL; - } - if (h->busy_configuring) { spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); return -EBUSY; @@ -1495,17 +1430,8 @@ static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk) 0, 0, TYPE_CMD); if (return_code == IO_OK) { - listlength |= - (0xff & (unsigned int)(ld_buff->LUNListLength[0])) - << 24; - listlength |= - (0xff & (unsigned int)(ld_buff->LUNListLength[1])) - << 16; - listlength |= - (0xff & (unsigned int)(ld_buff->LUNListLength[2])) - << 8; - listlength |= - 0xff & (unsigned int)(ld_buff->LUNListLength[3]); + listlength = + be32_to_cpu(*(__u32 *) ld_buff->LUNListLength); } else { /* reading number of logical volumes failed */ printk(KERN_WARNING "cciss: report logical volume" " command failed\n"); @@ -1556,6 +1482,14 @@ static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk) if (drv_index == -1) goto freeret; + /*Check if the gendisk needs to be allocated */ + if (!h->gendisk[drv_index]){ + h->gendisk[drv_index] = alloc_disk(1 << NWD_SHIFT); + if (!h->gendisk[drv_index]){ + printk(KERN_ERR "cciss: could not allocate new disk %d\n", drv_index); + goto mem_msg; + } + } } h->drv[drv_index].LunID = lunid; cciss_update_drive_info(ctlr, drv_index); @@ -1593,6 +1527,7 @@ static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk) static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, int clear_all) { + int i; ctlr_info_t *h = get_host(disk); if (!capable(CAP_SYS_RAWIO)) @@ -1616,9 +1551,35 @@ static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, del_gendisk(disk); if (q) { blk_cleanup_queue(q); + /* Set drv->queue to NULL so that we do not try + * to call blk_start_queue on this queue in the + * interrupt handler + */ drv->queue = NULL; } + /* If clear_all is set then we are deleting the logical + * drive, not just refreshing its info. For drives + * other than disk 0 we will call put_disk. We do not + * do this for disk 0 as we need it to be able to + * configure the controller. + */ + if (clear_all){ + /* This isn't pretty, but we need to find the + * disk in our array and NULL our the pointer. + * This is so that we will call alloc_disk if + * this index is used again later. + */ + for (i=0; i < CISS_MAX_LUN; i++){ + if(h->gendisk[i] == disk){ + h->gendisk[i] = NULL; + break; + } + } + put_disk(disk); + } } + } else { + set_capacity(disk, 0); } --h->num_luns; @@ -2136,7 +2097,7 @@ static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete) /* We've sent down an abort or reset, but something else has completed */ - if (srl->ncompletions >= (NR_CMDS + 2)) { + if (srl->ncompletions >= (hba[ctlr]->nr_cmds + 2)) { /* Uh oh. No room to save it for later... */ printk(KERN_WARNING "cciss%d: Sendcmd: Invalid command addr, " "reject list overflow, command lost!\n", ctlr); @@ -2673,7 +2634,7 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id) a1 = a; if ((a & 0x04)) { a2 = (a >> 3); - if (a2 >= NR_CMDS) { + if (a2 >= h->nr_cmds) { printk(KERN_WARNING "cciss: controller cciss%d failed, stopping.\n", h->ctlr); @@ -2827,23 +2788,21 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *c, if (err > 0) { printk(KERN_WARNING "cciss: only %d MSI-X vectors " "available\n", err); + goto default_int_mode; } else { printk(KERN_WARNING "cciss: MSI-X init failed %d\n", err); + goto default_int_mode; } } if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) { if (!pci_enable_msi(pdev)) { - c->intr[SIMPLE_MODE_INT] = pdev->irq; c->msi_vector = 1; - return; } else { printk(KERN_WARNING "cciss: MSI init failed\n"); - c->intr[SIMPLE_MODE_INT] = pdev->irq; - return; } } - default_int_mode: +default_int_mode: #endif /* CONFIG_PCI_MSI */ /* if we get here we're going to use the default interrupt mode */ c->intr[SIMPLE_MODE_INT] = pdev->irq; @@ -2956,16 +2915,10 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) if (board_id == products[i].board_id) { c->product_name = products[i].product_name; c->access = *(products[i].access); + c->nr_cmds = products[i].nr_cmds; break; } } - if (i == ARRAY_SIZE(products)) { - printk(KERN_WARNING "cciss: Sorry, I don't know how" - " to access the Smart Array controller %08lx\n", - (unsigned long)board_id); - err = -ENODEV; - goto err_out_free_res; - } if ((readb(&c->cfgtable->Signature[0]) != 'C') || (readb(&c->cfgtable->Signature[1]) != 'I') || (readb(&c->cfgtable->Signature[2]) != 'S') || @@ -2974,6 +2927,27 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) err = -ENODEV; goto err_out_free_res; } + /* We didn't find the controller in our list. We know the + * signature is valid. If it's an HP device let's try to + * bind to the device and fire it up. Otherwise we bail. + */ + if (i == ARRAY_SIZE(products)) { + if (subsystem_vendor_id == PCI_VENDOR_ID_HP) { + c->product_name = products[i-1].product_name; + c->access = *(products[i-1].access); + c->nr_cmds = products[i-1].nr_cmds; + printk(KERN_WARNING "cciss: This is an unknown " + "Smart Array controller.\n" + "cciss: Please update to the latest driver " + "available from www.hp.com.\n"); + } else { + printk(KERN_WARNING "cciss: Sorry, I don't know how" + " to access the Smart Array controller %08lx\n" + , (unsigned long)board_id); + err = -ENODEV; + goto err_out_free_res; + } + } #ifdef CONFIG_X86 { /* Need to enable prefetch in the SCSI core for 6400 in x86 */ @@ -2984,6 +2958,17 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) } #endif + /* Disabling DMA prefetch for the P600 + * An ASIC bug may result in a prefetch beyond + * physical memory. + */ + if(board_id == 0x3225103C) { + __u32 dma_prefetch; + dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG); + dma_prefetch |= 0x8000; + writel(dma_prefetch, c->vaddr + I2O_DMA1_CFG); + } + #ifdef CCISS_DEBUG printk("Trying to put board into Simple mode\n"); #endif /* CCISS_DEBUG */ @@ -3158,13 +3143,7 @@ geo_inq: /* Returns -1 if no free entries are left. */ static int alloc_cciss_hba(void) { - struct gendisk *disk[NWD]; - int i, n; - for (n = 0; n < NWD; n++) { - disk[n] = alloc_disk(1 << NWD_SHIFT); - if (!disk[n]) - goto out; - } + int i; for (i = 0; i < MAX_CTLR; i++) { if (!hba[i]) { @@ -3172,20 +3151,18 @@ static int alloc_cciss_hba(void) p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL); if (!p) goto Enomem; - for (n = 0; n < NWD; n++) - p->gendisk[n] = disk[n]; + p->gendisk[0] = alloc_disk(1 << NWD_SHIFT); + if (!p->gendisk[0]) + goto Enomem; hba[i] = p; return i; } } printk(KERN_WARNING "cciss: This driver supports a maximum" " of %d controllers.\n", MAX_CTLR); - goto out; - Enomem: + return -1; +Enomem: printk(KERN_ERR "cciss: out of memory.\n"); - out: - while (n--) - put_disk(disk[n]); return -1; } @@ -3195,7 +3172,7 @@ static void free_hba(int i) int n; hba[i] = NULL; - for (n = 0; n < NWD; n++) + for (n = 0; n < CISS_MAX_LUN; n++) put_disk(p->gendisk[n]); kfree(p); } @@ -3208,9 +3185,8 @@ static void free_hba(int i) static int __devinit cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - request_queue_t *q; int i; - int j; + int j = 0; int rc; int dac; @@ -3269,15 +3245,15 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); hba[i]->cmd_pool_bits = - kmalloc(((NR_CMDS + BITS_PER_LONG - + kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG - 1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL); hba[i]->cmd_pool = (CommandList_struct *) pci_alloc_consistent(hba[i]->pdev, - NR_CMDS * sizeof(CommandList_struct), + hba[i]->nr_cmds * sizeof(CommandList_struct), &(hba[i]->cmd_pool_dhandle)); hba[i]->errinfo_pool = (ErrorInfo_struct *) pci_alloc_consistent(hba[i]->pdev, - NR_CMDS * sizeof(ErrorInfo_struct), + hba[i]->nr_cmds * sizeof(ErrorInfo_struct), &(hba[i]->errinfo_pool_dhandle)); if ((hba[i]->cmd_pool_bits == NULL) || (hba[i]->cmd_pool == NULL) @@ -3288,7 +3264,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, #ifdef CONFIG_CISS_SCSI_TAPE hba[i]->scsi_rejects.complete = kmalloc(sizeof(hba[i]->scsi_rejects.complete[0]) * - (NR_CMDS + 5), GFP_KERNEL); + (hba[i]->nr_cmds + 5), GFP_KERNEL); if (hba[i]->scsi_rejects.complete == NULL) { printk(KERN_ERR "cciss: out of memory"); goto clean4; @@ -3302,7 +3278,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* command and error info recs zeroed out before they are used */ memset(hba[i]->cmd_pool_bits, 0, - ((NR_CMDS + BITS_PER_LONG - + ((hba[i]->nr_cmds + BITS_PER_LONG - 1) / BITS_PER_LONG) * sizeof(unsigned long)); #ifdef CCISS_DEBUG @@ -3317,18 +3293,34 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON); cciss_procinit(i); + + hba[i]->cciss_max_sectors = 2048; + hba[i]->busy_initializing = 0; - for (j = 0; j < NWD; j++) { /* mfm */ + do { drive_info_struct *drv = &(hba[i]->drv[j]); struct gendisk *disk = hba[i]->gendisk[j]; + request_queue_t *q; + + /* Check if the disk was allocated already */ + if (!disk){ + hba[i]->gendisk[j] = alloc_disk(1 << NWD_SHIFT); + disk = hba[i]->gendisk[j]; + } + + /* Check that the disk was able to be allocated */ + if (!disk) { + printk(KERN_ERR "cciss: unable to allocate memory for disk %d\n", j); + goto clean4; + } q = blk_init_queue(do_cciss_request, &hba[i]->lock); if (!q) { printk(KERN_ERR "cciss: unable to allocate queue for disk %d\n", j); - break; + goto clean4; } drv->queue = q; @@ -3341,7 +3333,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* This is a limit in the driver and could be eliminated. */ blk_queue_max_phys_segments(q, MAXSGENTRIES); - blk_queue_max_sectors(q, 512); + blk_queue_max_sectors(q, hba[i]->cciss_max_sectors); blk_queue_softirq_done(q, cciss_softirq_done); @@ -3360,7 +3352,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, blk_queue_hardsect_size(q, drv->block_size); set_capacity(disk, drv->nr_blocks); add_disk(disk); - } + j++; + } while (j <= hba[i]->highest_lun); return 1; @@ -3371,11 +3364,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, kfree(hba[i]->cmd_pool_bits); if (hba[i]->cmd_pool) pci_free_consistent(hba[i]->pdev, - NR_CMDS * sizeof(CommandList_struct), + hba[i]->nr_cmds * sizeof(CommandList_struct), hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); if (hba[i]->errinfo_pool) pci_free_consistent(hba[i]->pdev, - NR_CMDS * sizeof(ErrorInfo_struct), + hba[i]->nr_cmds * sizeof(ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]); @@ -3383,6 +3376,15 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, unregister_blkdev(hba[i]->major, hba[i]->devname); clean1: hba[i]->busy_initializing = 0; + /* cleanup any queues that may have been initialized */ + for (j=0; j <= hba[i]->highest_lun; j++){ + drive_info_struct *drv = &(hba[i]->drv[j]); + if (drv->queue) + blk_cleanup_queue(drv->queue); + } + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); free_hba(i); return -1; } @@ -3430,7 +3432,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) remove_proc_entry(hba[i]->devname, proc_cciss); /* remove it from the disk list */ - for (j = 0; j < NWD; j++) { + for (j = 0; j < CISS_MAX_LUN; j++) { struct gendisk *disk = hba[i]->gendisk[j]; if (disk) { request_queue_t *q = disk->queue; @@ -3442,9 +3444,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) } } - pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), + pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct), hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); - pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(ErrorInfo_struct), + pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); kfree(hba[i]->cmd_pool_bits); #ifdef CONFIG_CISS_SCSI_TAPE diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 562235c1445..b70988dd33e 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -6,7 +6,6 @@ #include "cciss_cmd.h" -#define NWD 16 #define NWD_SHIFT 4 #define MAX_PART (1 << NWD_SHIFT) @@ -60,6 +59,7 @@ struct ctlr_info __u32 board_id; void __iomem *vaddr; unsigned long paddr; + int nr_cmds; /* Number of commands allowed on this controller */ CfgTable_struct __iomem *cfgtable; int interrupts_enabled; int major; @@ -76,6 +76,7 @@ struct ctlr_info unsigned int intr[4]; unsigned int msix_vector; unsigned int msi_vector; + int cciss_max_sectors; BYTE cciss_read; BYTE cciss_write; BYTE cciss_read_capacity; @@ -110,7 +111,7 @@ struct ctlr_info int next_to_run; // Disk structures we need to pass back - struct gendisk *gendisk[NWD]; + struct gendisk *gendisk[CISS_MAX_LUN]; #ifdef CONFIG_CISS_SCSI_TAPE void *scsi_ctlr; /* ptr to structure containing scsi related stuff */ /* list of block side commands the scsi error handling sucked up */ @@ -282,6 +283,7 @@ struct board_type { __u32 board_id; char *product_name; struct access_method *access; + int nr_cmds; /* Max cmds this kind of ctlr can handle. */ }; #define CCISS_LOCK(i) (&hba[i]->lock) diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index 4af7c4c0c7a..43bf5593b59 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -55,6 +55,7 @@ #define I2O_INT_MASK 0x34 #define I2O_IBPOST_Q 0x40 #define I2O_OBPOST_Q 0x44 +#define I2O_DMA1_CFG 0x214 //Configuration Table #define CFGTBL_ChangeReq 0x00000001l @@ -88,7 +89,7 @@ typedef union _u64bit //########################################################################### //STRUCTURES //########################################################################### -#define CISS_MAX_LUN 16 +#define CISS_MAX_LUN 1024 #define CISS_MAX_PHYS_LUN 1024 // SCSI-3 Cmmands diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9d1035e8d9d..7bf2cfbd628 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -355,14 +355,30 @@ harderror: return NULL; } +static ssize_t pid_show(struct gendisk *disk, char *page) +{ + return sprintf(page, "%ld\n", + (long) ((struct nbd_device *)disk->private_data)->pid); +} + +static struct disk_attribute pid_attr = { + .attr = { .name = "pid", .mode = S_IRUGO }, + .show = pid_show, +}; + static void nbd_do_it(struct nbd_device *lo) { struct request *req; BUG_ON(lo->magic != LO_MAGIC); + lo->pid = current->pid; + sysfs_create_file(&lo->disk->kobj, &pid_attr.attr); + while ((req = nbd_read_stat(lo)) != NULL) nbd_end_request(req); + + sysfs_remove_file(&lo->disk->kobj, &pid_attr.attr); return; } diff --git a/drivers/block/paride/aten.c b/drivers/block/paride/aten.c index c4d696d43dc..2695465568a 100644 --- a/drivers/block/paride/aten.c +++ b/drivers/block/paride/aten.c @@ -149,12 +149,12 @@ static struct pi_protocol aten = { static int __init aten_init(void) { - return pi_register(&aten)-1; + return paride_register(&aten); } static void __exit aten_exit(void) { - pi_unregister( &aten ); + paride_unregister( &aten ); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/bpck.c b/drivers/block/paride/bpck.c index d462ff6b139..4f27e7392e3 100644 --- a/drivers/block/paride/bpck.c +++ b/drivers/block/paride/bpck.c @@ -464,12 +464,12 @@ static struct pi_protocol bpck = { static int __init bpck_init(void) { - return pi_register(&bpck)-1; + return paride_register(&bpck); } static void __exit bpck_exit(void) { - pi_unregister(&bpck); + paride_unregister(&bpck); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c index 41a237c5957..ad124525ac2 100644 --- a/drivers/block/paride/bpck6.c +++ b/drivers/block/paride/bpck6.c @@ -31,10 +31,7 @@ static int verbose; /* set this to 1 to see debugging messages and whatnot */ #include <linux/slab.h> #include <linux/types.h> #include <asm/io.h> - -#if defined(CONFIG_PARPORT_MODULE)||defined(CONFIG_PARPORT) #include <linux/parport.h> -#endif #include "ppc6lnx.c" #include "paride.h" @@ -139,11 +136,6 @@ static int bpck6_test_port ( PIA *pi ) /* check for 8-bit port */ PPCSTRUCT(pi)->ppc_id=pi->unit; PPCSTRUCT(pi)->lpt_addr=pi->port; -#ifdef CONFIG_PARPORT_PC_MODULE -#define CONFIG_PARPORT_PC -#endif - -#ifdef CONFIG_PARPORT_PC /* look at the parport device to see if what modes we can use */ if(((struct pardevice *)(pi->pardev))->port->modes & (PARPORT_MODE_EPP) @@ -161,11 +153,6 @@ static int bpck6_test_port ( PIA *pi ) /* check for 8-bit port */ { return 1; } -#else - /* there is no way of knowing what kind of port we have - default to the highest mode possible */ - return 5; -#endif } static int bpck6_probe_unit ( PIA *pi ) @@ -265,12 +252,12 @@ static int __init bpck6_init(void) printk(KERN_INFO "bpck6: Copyright 2001 by Micro Solutions, Inc., DeKalb IL. USA\n"); if(verbose) printk(KERN_DEBUG "bpck6: verbose debug enabled.\n"); - return pi_register(&bpck6) - 1; + return paride_register(&bpck6); } static void __exit bpck6_exit(void) { - pi_unregister(&bpck6); + paride_unregister(&bpck6); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/comm.c b/drivers/block/paride/comm.c index 43d61359d8e..9bcd3549532 100644 --- a/drivers/block/paride/comm.c +++ b/drivers/block/paride/comm.c @@ -205,12 +205,12 @@ static struct pi_protocol comm = { static int __init comm_init(void) { - return pi_register(&comm)-1; + return paride_register(&comm); } static void __exit comm_exit(void) { - pi_unregister(&comm); + paride_unregister(&comm); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/dstr.c b/drivers/block/paride/dstr.c index 04d53bf58e8..accc5c777cb 100644 --- a/drivers/block/paride/dstr.c +++ b/drivers/block/paride/dstr.c @@ -220,12 +220,12 @@ static struct pi_protocol dstr = { static int __init dstr_init(void) { - return pi_register(&dstr)-1; + return paride_register(&dstr); } static void __exit dstr_exit(void) { - pi_unregister(&dstr); + paride_unregister(&dstr); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/epat.c b/drivers/block/paride/epat.c index 55d1c0a1fb9..1bcdff77322 100644 --- a/drivers/block/paride/epat.c +++ b/drivers/block/paride/epat.c @@ -327,12 +327,12 @@ static int __init epat_init(void) #ifdef CONFIG_PARIDE_EPATC8 epatc8 = 1; #endif - return pi_register(&epat)-1; + return paride_register(&epat); } static void __exit epat_exit(void) { - pi_unregister(&epat); + paride_unregister(&epat); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/epia.c b/drivers/block/paride/epia.c index 0f2e0c292d8..fb0e782d055 100644 --- a/drivers/block/paride/epia.c +++ b/drivers/block/paride/epia.c @@ -303,12 +303,12 @@ static struct pi_protocol epia = { static int __init epia_init(void) { - return pi_register(&epia)-1; + return paride_register(&epia); } static void __exit epia_exit(void) { - pi_unregister(&epia); + paride_unregister(&epia); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/fit2.c b/drivers/block/paride/fit2.c index e0f0691d8bc..381283753ae 100644 --- a/drivers/block/paride/fit2.c +++ b/drivers/block/paride/fit2.c @@ -138,12 +138,12 @@ static struct pi_protocol fit2 = { static int __init fit2_init(void) { - return pi_register(&fit2)-1; + return paride_register(&fit2); } static void __exit fit2_exit(void) { - pi_unregister(&fit2); + paride_unregister(&fit2); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/fit3.c b/drivers/block/paride/fit3.c index 15400e7bc66..275d269458e 100644 --- a/drivers/block/paride/fit3.c +++ b/drivers/block/paride/fit3.c @@ -198,12 +198,12 @@ static struct pi_protocol fit3 = { static int __init fit3_init(void) { - return pi_register(&fit3)-1; + return paride_register(&fit3); } static void __exit fit3_exit(void) { - pi_unregister(&fit3); + paride_unregister(&fit3); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/friq.c b/drivers/block/paride/friq.c index 5ea2904d281..4f2ba244689 100644 --- a/drivers/block/paride/friq.c +++ b/drivers/block/paride/friq.c @@ -263,12 +263,12 @@ static struct pi_protocol friq = { static int __init friq_init(void) { - return pi_register(&friq)-1; + return paride_register(&friq); } static void __exit friq_exit(void) { - pi_unregister(&friq); + paride_unregister(&friq); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/frpw.c b/drivers/block/paride/frpw.c index 56b3824b153..c3cde364603 100644 --- a/drivers/block/paride/frpw.c +++ b/drivers/block/paride/frpw.c @@ -300,12 +300,12 @@ static struct pi_protocol frpw = { static int __init frpw_init(void) { - return pi_register(&frpw)-1; + return paride_register(&frpw); } static void __exit frpw_exit(void) { - pi_unregister(&frpw); + paride_unregister(&frpw); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/jumbo b/drivers/block/paride/jumbo deleted file mode 100644 index e793b9cb7e7..00000000000 --- a/drivers/block/paride/jumbo +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/sh -# -# This script can be used to build "jumbo" modules that contain the -# base PARIDE support, one protocol module and one high-level driver. -# -echo -n "High level driver [pcd] : " -read X -HLD=${X:-pcd} -# -echo -n "Protocol module [bpck] : " -read X -PROTO=${X:-bpck} -# -echo -n "Use MODVERSIONS [y] ? " -read X -UMODV=${X:-y} -# -echo -n "For SMP kernel [n] ? " -read X -USMP=${X:-n} -# -echo -n "Support PARPORT [n] ? " -read X -UPARP=${X:-n} -# -echo -# -case $USMP in - y* | Y* ) FSMP="-DCONFIG_SMP" - ;; - *) FSMP="" - ;; -esac -# -MODI="-include ../../../include/linux/modversions.h" -# -case $UMODV in - y* | Y* ) FMODV="-DMODVERSIONS $MODI" - ;; - *) FMODV="" - ;; -esac -# -case $UPARP in - y* | Y* ) FPARP="-DCONFIG_PARPORT" - ;; - *) FPARP="" - ;; -esac -# -TARG=$HLD-$PROTO.o -FPROTO=-DCONFIG_PARIDE_`echo "$PROTO" | tr [a-z] [A-Z]` -FK="-D__KERNEL__ -I ../../../include" -FLCH=-D_LINUX_CONFIG_H -# -echo cc $FK $FSMP $FLCH $FPARP $FPROTO $FMODV -Wall -O2 -o Jb.o -c paride.c -cc $FK $FSMP $FLCH $FPARP $FPROTO $FMODV -Wall -O2 -o Jb.o -c paride.c -# -echo cc $FK $FSMP $FMODV -Wall -O2 -o Jp.o -c $PROTO.c -cc $FK $FSMP $FMODV -Wall -O2 -o Jp.o -c $PROTO.c -# -echo cc $FK $FSMP $FMODV -DMODULE -DPARIDE_JUMBO -Wall -O2 -o Jd.o -c $HLD.c -cc $FK $FSMP $FMODV -DMODULE -DPARIDE_JUMBO -Wall -O2 -o Jd.o -c $HLD.c -# -echo ld -r -o $TARG Jp.o Jb.o Jd.o -ld -r -o $TARG Jp.o Jb.o Jd.o -# -# -rm Jp.o Jb.o Jd.o -# diff --git a/drivers/block/paride/kbic.c b/drivers/block/paride/kbic.c index d983bcea76f..35999c415ee 100644 --- a/drivers/block/paride/kbic.c +++ b/drivers/block/paride/kbic.c @@ -283,13 +283,21 @@ static struct pi_protocol k971 = { static int __init kbic_init(void) { - return (pi_register(&k951)||pi_register(&k971))-1; + int rv; + + rv = paride_register(&k951); + if (rv < 0) + return rv; + rv = paride_register(&k971); + if (rv < 0) + paride_unregister(&k951); + return rv; } static void __exit kbic_exit(void) { - pi_unregister(&k951); - pi_unregister(&k971); + paride_unregister(&k951); + paride_unregister(&k971); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/ktti.c b/drivers/block/paride/ktti.c index 6c7edbfba9a..117ab0e8ccf 100644 --- a/drivers/block/paride/ktti.c +++ b/drivers/block/paride/ktti.c @@ -115,12 +115,12 @@ static struct pi_protocol ktti = { static int __init ktti_init(void) { - return pi_register(&ktti)-1; + return paride_register(&ktti); } static void __exit ktti_exit(void) { - pi_unregister(&ktti); + paride_unregister(&ktti); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/on20.c b/drivers/block/paride/on20.c index 9f8e0109680..0173697a1a4 100644 --- a/drivers/block/paride/on20.c +++ b/drivers/block/paride/on20.c @@ -140,12 +140,12 @@ static struct pi_protocol on20 = { static int __init on20_init(void) { - return pi_register(&on20)-1; + return paride_register(&on20); } static void __exit on20_exit(void) { - pi_unregister(&on20); + paride_unregister(&on20); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/on26.c b/drivers/block/paride/on26.c index 0f833caa210..95ba256921f 100644 --- a/drivers/block/paride/on26.c +++ b/drivers/block/paride/on26.c @@ -306,12 +306,12 @@ static struct pi_protocol on26 = { static int __init on26_init(void) { - return pi_register(&on26)-1; + return paride_register(&on26); } static void __exit on26_exit(void) { - pi_unregister(&on26); + paride_unregister(&on26); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/paride.c b/drivers/block/paride/paride.c index 4b258f7836f..48c50f11f63 100644 --- a/drivers/block/paride/paride.c +++ b/drivers/block/paride/paride.c @@ -29,14 +29,7 @@ #include <linux/spinlock.h> #include <linux/wait.h> #include <linux/sched.h> /* TASK_* */ - -#ifdef CONFIG_PARPORT_MODULE -#define CONFIG_PARPORT -#endif - -#ifdef CONFIG_PARPORT #include <linux/parport.h> -#endif #include "paride.h" @@ -76,8 +69,6 @@ void pi_read_block(PIA * pi, char *buf, int count) EXPORT_SYMBOL(pi_read_block); -#ifdef CONFIG_PARPORT - static void pi_wake_up(void *p) { PIA *pi = (PIA *) p; @@ -100,11 +91,8 @@ static void pi_wake_up(void *p) cont(); } -#endif - int pi_schedule_claimed(PIA * pi, void (*cont) (void)) { -#ifdef CONFIG_PARPORT unsigned long flags; spin_lock_irqsave(&pi_spinlock, flags); @@ -115,7 +103,6 @@ int pi_schedule_claimed(PIA * pi, void (*cont) (void)) } pi->claimed = 1; spin_unlock_irqrestore(&pi_spinlock, flags); -#endif return 1; } EXPORT_SYMBOL(pi_schedule_claimed); @@ -133,20 +120,16 @@ static void pi_claim(PIA * pi) if (pi->claimed) return; pi->claimed = 1; -#ifdef CONFIG_PARPORT if (pi->pardev) wait_event(pi->parq, !parport_claim((struct pardevice *) pi->pardev)); -#endif } static void pi_unclaim(PIA * pi) { pi->claimed = 0; -#ifdef CONFIG_PARPORT if (pi->pardev) parport_release((struct pardevice *) (pi->pardev)); -#endif } void pi_connect(PIA * pi) @@ -167,21 +150,15 @@ EXPORT_SYMBOL(pi_disconnect); static void pi_unregister_parport(PIA * pi) { -#ifdef CONFIG_PARPORT if (pi->pardev) { parport_unregister_device((struct pardevice *) (pi->pardev)); pi->pardev = NULL; } -#endif } void pi_release(PIA * pi) { pi_unregister_parport(pi); -#ifndef CONFIG_PARPORT - if (pi->reserved) - release_region(pi->port, pi->reserved); -#endif /* !CONFIG_PARPORT */ if (pi->proto->release_proto) pi->proto->release_proto(pi); module_put(pi->proto->owner); @@ -229,7 +206,7 @@ static int pi_test_proto(PIA * pi, char *scratch, int verbose) return res; } -int pi_register(PIP * pr) +int paride_register(PIP * pr) { int k; @@ -237,24 +214,24 @@ int pi_register(PIP * pr) if (protocols[k] && !strcmp(pr->name, protocols[k]->name)) { printk("paride: %s protocol already registered\n", pr->name); - return 0; + return -1; } k = 0; while ((k < MAX_PROTOS) && (protocols[k])) k++; if (k == MAX_PROTOS) { printk("paride: protocol table full\n"); - return 0; + return -1; } protocols[k] = pr; pr->index = k; printk("paride: %s registered as protocol %d\n", pr->name, k); - return 1; + return 0; } -EXPORT_SYMBOL(pi_register); +EXPORT_SYMBOL(paride_register); -void pi_unregister(PIP * pr) +void paride_unregister(PIP * pr) { if (!pr) return; @@ -265,12 +242,10 @@ void pi_unregister(PIP * pr) protocols[pr->index] = NULL; } -EXPORT_SYMBOL(pi_unregister); +EXPORT_SYMBOL(paride_unregister); static int pi_register_parport(PIA * pi, int verbose) { -#ifdef CONFIG_PARPORT - struct parport *port; port = parport_find_base(pi->port); @@ -290,7 +265,6 @@ static int pi_register_parport(PIA * pi, int verbose) printk("%s: 0x%x is %s\n", pi->device, pi->port, port->name); pi->parname = (char *) port->name; -#endif return 1; } @@ -447,13 +421,6 @@ int pi_init(PIA * pi, int autoprobe, int port, int mode, printk("%s: Adapter not found\n", device); return 0; } -#ifndef CONFIG_PARPORT - if (!request_region(pi->port, pi->reserved, pi->device)) { - printk(KERN_WARNING "paride: Unable to request region 0x%x\n", - pi->port); - return 0; - } -#endif /* !CONFIG_PARPORT */ if (pi->parname) printk("%s: Sharing %s at 0x%x\n", pi->device, diff --git a/drivers/block/paride/paride.h b/drivers/block/paride/paride.h index c6d98ef09e4..2bddbf45518 100644 --- a/drivers/block/paride/paride.h +++ b/drivers/block/paride/paride.h @@ -163,8 +163,8 @@ struct pi_protocol { typedef struct pi_protocol PIP; -extern int pi_register( PIP * ); -extern void pi_unregister ( PIP * ); +extern int paride_register( PIP * ); +extern void paride_unregister ( PIP * ); #endif /* __DRIVERS_PARIDE_H__ */ /* end of paride.h */ diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index ac5ba462710..c852eed91e4 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -912,12 +912,12 @@ static int __init pcd_init(void) int unit; if (disable) - return -1; + return -EINVAL; pcd_init_units(); if (pcd_detect()) - return -1; + return -ENODEV; /* get the atapi capabilities page */ pcd_probe_capabilities(); @@ -925,7 +925,7 @@ static int __init pcd_init(void) if (register_blkdev(major, name)) { for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) put_disk(cd->disk); - return -1; + return -EBUSY; } pcd_queue = blk_init_queue(do_pcd_request, &pcd_lock); @@ -933,7 +933,7 @@ static int __init pcd_init(void) unregister_blkdev(major, name); for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) put_disk(cd->disk); - return -1; + return -ENOMEM; } for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 1a9dee19efc..7cdaa195126 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -933,25 +933,25 @@ static int __init pf_init(void) int unit; if (disable) - return -1; + return -EINVAL; pf_init_units(); if (pf_detect()) - return -1; + return -ENODEV; pf_busy = 0; if (register_blkdev(major, name)) { for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) put_disk(pf->disk); - return -1; + return -EBUSY; } pf_queue = blk_init_queue(do_pf_request, &pf_spin_lock); if (!pf_queue) { unregister_blkdev(major, name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) put_disk(pf->disk); - return -1; + return -ENOMEM; } blk_queue_max_phys_segments(pf_queue, cluster); diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index 13f998aa1cd..9970aedbb5d 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -646,14 +646,14 @@ static int __init pg_init(void) int err; if (disable){ - err = -1; + err = -EINVAL; goto out; } pg_init_units(); if (pg_detect()) { - err = -1; + err = -ENODEV; goto out; } diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 35fb2663672..c902b25e486 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -946,12 +946,12 @@ static int __init pt_init(void) int err; if (disable) { - err = -1; + err = -EINVAL; goto out; } if (pt_detect()) { - err = -1; + err = -ENODEV; goto out; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index f2904f67af4..e45eaa26411 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -54,7 +54,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/miscdevice.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/mutex.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_ioctl.h> diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index d0cface535f..5e2c3188200 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -330,7 +330,7 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) reliable packet if the number of packets sent but not yet ack'ed is < than the winsize */ - spin_lock_irqsave(&bcsp->unack.lock, flags); + spin_lock_irqsave_nested(&bcsp->unack.lock, flags, SINGLE_DEPTH_NESTING); if (bcsp->unack.qlen < BCSP_TXWINSIZE && (skb = skb_dequeue(&bcsp->rel)) != NULL) { struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type); @@ -696,7 +696,7 @@ static void bcsp_timed_event(unsigned long arg) BT_DBG("hu %p retransmitting %u pkts", hu, bcsp->unack.qlen); - spin_lock_irqsave(&bcsp->unack.lock, flags); + spin_lock_irqsave_nested(&bcsp->unack.lock, flags, SINGLE_DEPTH_NESTING); while ((skb = __skb_dequeue_tail(&bcsp->unack)) != NULL) { bcsp->msgq_txseq = (bcsp->msgq_txseq - 1) & 0x07; diff --git a/drivers/cdrom/optcd.c b/drivers/cdrom/optcd.c index 25032d7edc5..3541690a77d 100644 --- a/drivers/cdrom/optcd.c +++ b/drivers/cdrom/optcd.c @@ -101,7 +101,7 @@ static void debug(int debug_this, const char* fmt, ...) return; va_start(args, fmt); - vsprintf(s, fmt, args); + vsnprintf(s, sizeof(s), fmt, args); printk(KERN_DEBUG "optcd: %s\n", s); va_end(args); } diff --git a/drivers/cdrom/sbpcd.c b/drivers/cdrom/sbpcd.c index ba50e5a712f..a1283b1ef98 100644 --- a/drivers/cdrom/sbpcd.c +++ b/drivers/cdrom/sbpcd.c @@ -770,11 +770,10 @@ static void msg(int level, const char *fmt, ...) msgnum++; if (msgnum>99) msgnum=0; - sprintf(buf, MSG_LEVEL "%s-%d [%02d]: ", major_name, current_drive - D_S, msgnum); va_start(args, fmt); - vsprintf(&buf[18], fmt, args); + vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); - printk(buf); + printk(MSG_LEVEL "%s-%d [%02d]: %s", major_name, current_drive - D_S, msgnum, buf); #if KLOGD_PAUSE sbp_sleep(KLOGD_PAUSE); /* else messages get lost */ #endif /* KLOGD_PAUSE */ diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 00b17ae3973..2f2c4efff8a 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -459,7 +459,7 @@ static const struct aper_size_info_32 nforce3_sizes[5] = /* Handle shadow device of the Nvidia NForce3 */ /* CHECK-ME original 2.4 version set up some IORRs. Check if that is needed. */ -static int __devinit nforce3_agp_init(struct pci_dev *pdev) +static int nforce3_agp_init(struct pci_dev *pdev) { u32 tmp, apbase, apbar, aplimit; struct pci_dev *dev1; diff --git a/drivers/char/decserial.c b/drivers/char/decserial.c index 85f404e25c7..8ea2bea2b18 100644 --- a/drivers/char/decserial.c +++ b/drivers/char/decserial.c @@ -23,20 +23,12 @@ extern int zs_init(void); #endif -#ifdef CONFIG_DZ -extern int dz_init(void); -#endif - #ifdef CONFIG_SERIAL_CONSOLE #ifdef CONFIG_ZS extern void zs_serial_console_init(void); #endif -#ifdef CONFIG_DZ -extern void dz_serial_console_init(void); -#endif - #endif /* rs_init - starts up the serial interface - @@ -46,23 +38,11 @@ extern void dz_serial_console_init(void); int __init rs_init(void) { - -#if defined(CONFIG_ZS) && defined(CONFIG_DZ) +#ifdef CONFIG_ZS if (IOASIC) return zs_init(); - else - return dz_init(); -#else - -#ifdef CONFIG_ZS - return zs_init(); -#endif - -#ifdef CONFIG_DZ - return dz_init(); -#endif - #endif + return -ENXIO; } __initcall(rs_init); @@ -76,21 +56,9 @@ __initcall(rs_init); */ static int __init decserial_console_init(void) { -#if defined(CONFIG_ZS) && defined(CONFIG_DZ) +#ifdef CONFIG_ZS if (IOASIC) zs_serial_console_init(); - else - dz_serial_console_init(); -#else - -#ifdef CONFIG_ZS - zs_serial_console_init(); -#endif - -#ifdef CONFIG_DZ - dz_serial_console_init(); -#endif - #endif return 0; } diff --git a/drivers/char/drm/drm_sman.c b/drivers/char/drm/drm_sman.c index 425c82336ee..19c81d2e13d 100644 --- a/drivers/char/drm/drm_sman.c +++ b/drivers/char/drm/drm_sman.c @@ -162,6 +162,7 @@ drm_sman_set_manager(drm_sman_t * sman, unsigned int manager, return 0; } +EXPORT_SYMBOL(drm_sman_set_manager); static drm_owner_item_t *drm_sman_get_owner_item(drm_sman_t * sman, unsigned long owner) diff --git a/drivers/char/drm/drm_vm.c b/drivers/char/drm/drm_vm.c index b40ae438f53..ae2691942dd 100644 --- a/drivers/char/drm/drm_vm.c +++ b/drivers/char/drm/drm_vm.c @@ -147,14 +147,14 @@ static __inline__ struct page *drm_do_vm_shm_nopage(struct vm_area_struct *vma, if (address > vma->vm_end) return NOPAGE_SIGBUS; /* Disallow mremap */ if (!map) - return NOPAGE_OOM; /* Nothing allocated */ + return NOPAGE_SIGBUS; /* Nothing allocated */ offset = address - vma->vm_start; i = (unsigned long)map->handle + offset; page = (map->type == _DRM_CONSISTENT) ? virt_to_page((void *)i) : vmalloc_to_page((void *)i); if (!page) - return NOPAGE_OOM; + return NOPAGE_SIGBUS; get_page(page); DRM_DEBUG("shm_nopage 0x%lx\n", address); @@ -272,7 +272,7 @@ static __inline__ struct page *drm_do_vm_dma_nopage(struct vm_area_struct *vma, if (address > vma->vm_end) return NOPAGE_SIGBUS; /* Disallow mremap */ if (!dma->pagelist) - return NOPAGE_OOM; /* Nothing allocated */ + return NOPAGE_SIGBUS; /* Nothing allocated */ offset = address - vma->vm_start; /* vm_[pg]off[set] should be 0 */ page_nr = offset >> PAGE_SHIFT; @@ -310,7 +310,7 @@ static __inline__ struct page *drm_do_vm_sg_nopage(struct vm_area_struct *vma, if (address > vma->vm_end) return NOPAGE_SIGBUS; /* Disallow mremap */ if (!entry->pagelist) - return NOPAGE_OOM; /* Nothing allocated */ + return NOPAGE_SIGBUS; /* Nothing allocated */ offset = address - vma->vm_start; map_offset = map->offset - (unsigned long)dev->sg->virtual; diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 9902ffad3b1..cc2cd46bedc 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -38,6 +38,7 @@ #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/delay.h> +#include <linux/freezer.h> #include <asm/uaccess.h> diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c index 8728255c946..d090622f1de 100644 --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -337,11 +337,6 @@ static int hvcs_open(struct tty_struct *tty, struct file *filp); static void hvcs_close(struct tty_struct *tty, struct file *filp); static void hvcs_hangup(struct tty_struct * tty); -static void hvcs_create_device_attrs(struct hvcs_struct *hvcsd); -static void hvcs_remove_device_attrs(struct vio_dev *vdev); -static void hvcs_create_driver_attrs(void); -static void hvcs_remove_driver_attrs(void); - static int __devinit hvcs_probe(struct vio_dev *dev, const struct vio_device_id *id); static int __devexit hvcs_remove(struct vio_dev *dev); @@ -353,6 +348,172 @@ static void __exit hvcs_module_exit(void); #define HVCS_TRY_WRITE 0x00000004 #define HVCS_READ_MASK (HVCS_SCHED_READ | HVCS_QUICK_READ) +static inline struct hvcs_struct *from_vio_dev(struct vio_dev *viod) +{ + return viod->dev.driver_data; +} +/* The sysfs interface for the driver and devices */ + +static ssize_t hvcs_partner_vtys_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct vio_dev *viod = to_vio_dev(dev); + struct hvcs_struct *hvcsd = from_vio_dev(viod); + unsigned long flags; + int retval; + + spin_lock_irqsave(&hvcsd->lock, flags); + retval = sprintf(buf, "%X\n", hvcsd->p_unit_address); + spin_unlock_irqrestore(&hvcsd->lock, flags); + return retval; +} +static DEVICE_ATTR(partner_vtys, S_IRUGO, hvcs_partner_vtys_show, NULL); + +static ssize_t hvcs_partner_clcs_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct vio_dev *viod = to_vio_dev(dev); + struct hvcs_struct *hvcsd = from_vio_dev(viod); + unsigned long flags; + int retval; + + spin_lock_irqsave(&hvcsd->lock, flags); + retval = sprintf(buf, "%s\n", &hvcsd->p_location_code[0]); + spin_unlock_irqrestore(&hvcsd->lock, flags); + return retval; +} +static DEVICE_ATTR(partner_clcs, S_IRUGO, hvcs_partner_clcs_show, NULL); + +static ssize_t hvcs_current_vty_store(struct device *dev, struct device_attribute *attr, const char * buf, + size_t count) +{ + /* + * Don't need this feature at the present time because firmware doesn't + * yet support multiple partners. + */ + printk(KERN_INFO "HVCS: Denied current_vty change: -EPERM.\n"); + return -EPERM; +} + +static ssize_t hvcs_current_vty_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct vio_dev *viod = to_vio_dev(dev); + struct hvcs_struct *hvcsd = from_vio_dev(viod); + unsigned long flags; + int retval; + + spin_lock_irqsave(&hvcsd->lock, flags); + retval = sprintf(buf, "%s\n", &hvcsd->p_location_code[0]); + spin_unlock_irqrestore(&hvcsd->lock, flags); + return retval; +} + +static DEVICE_ATTR(current_vty, + S_IRUGO | S_IWUSR, hvcs_current_vty_show, hvcs_current_vty_store); + +static ssize_t hvcs_vterm_state_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + struct vio_dev *viod = to_vio_dev(dev); + struct hvcs_struct *hvcsd = from_vio_dev(viod); + unsigned long flags; + + /* writing a '0' to this sysfs entry will result in the disconnect. */ + if (simple_strtol(buf, NULL, 0) != 0) + return -EINVAL; + + spin_lock_irqsave(&hvcsd->lock, flags); + + if (hvcsd->open_count > 0) { + spin_unlock_irqrestore(&hvcsd->lock, flags); + printk(KERN_INFO "HVCS: vterm state unchanged. " + "The hvcs device node is still in use.\n"); + return -EPERM; + } + + if (hvcsd->connected == 0) { + spin_unlock_irqrestore(&hvcsd->lock, flags); + printk(KERN_INFO "HVCS: vterm state unchanged. The" + " vty-server is not connected to a vty.\n"); + return -EPERM; + } + + hvcs_partner_free(hvcsd); + printk(KERN_INFO "HVCS: Closed vty-server@%X and" + " partner vty@%X:%d connection.\n", + hvcsd->vdev->unit_address, + hvcsd->p_unit_address, + (uint32_t)hvcsd->p_partition_ID); + + spin_unlock_irqrestore(&hvcsd->lock, flags); + return count; +} + +static ssize_t hvcs_vterm_state_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct vio_dev *viod = to_vio_dev(dev); + struct hvcs_struct *hvcsd = from_vio_dev(viod); + unsigned long flags; + int retval; + + spin_lock_irqsave(&hvcsd->lock, flags); + retval = sprintf(buf, "%d\n", hvcsd->connected); + spin_unlock_irqrestore(&hvcsd->lock, flags); + return retval; +} +static DEVICE_ATTR(vterm_state, S_IRUGO | S_IWUSR, + hvcs_vterm_state_show, hvcs_vterm_state_store); + +static ssize_t hvcs_index_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct vio_dev *viod = to_vio_dev(dev); + struct hvcs_struct *hvcsd = from_vio_dev(viod); + unsigned long flags; + int retval; + + spin_lock_irqsave(&hvcsd->lock, flags); + retval = sprintf(buf, "%d\n", hvcsd->index); + spin_unlock_irqrestore(&hvcsd->lock, flags); + return retval; +} + +static DEVICE_ATTR(index, S_IRUGO, hvcs_index_show, NULL); + +static struct attribute *hvcs_attrs[] = { + &dev_attr_partner_vtys.attr, + &dev_attr_partner_clcs.attr, + &dev_attr_current_vty.attr, + &dev_attr_vterm_state.attr, + &dev_attr_index.attr, + NULL, +}; + +static struct attribute_group hvcs_attr_group = { + .attrs = hvcs_attrs, +}; + +static ssize_t hvcs_rescan_show(struct device_driver *ddp, char *buf) +{ + /* A 1 means it is updating, a 0 means it is done updating */ + return snprintf(buf, PAGE_SIZE, "%d\n", hvcs_rescan_status); +} + +static ssize_t hvcs_rescan_store(struct device_driver *ddp, const char * buf, + size_t count) +{ + if ((simple_strtol(buf, NULL, 0) != 1) + && (hvcs_rescan_status != 0)) + return -EINVAL; + + hvcs_rescan_status = 1; + printk(KERN_INFO "HVCS: rescanning partner info for all" + " vty-servers.\n"); + hvcs_rescan_devices_list(); + hvcs_rescan_status = 0; + return count; +} + +static DRIVER_ATTR(rescan, + S_IRUGO | S_IWUSR, hvcs_rescan_show, hvcs_rescan_store); + static void hvcs_kick(void) { hvcs_kicked = 1; @@ -575,7 +736,7 @@ static void destroy_hvcs_struct(struct kobject *kobj) spin_unlock_irqrestore(&hvcsd->lock, flags); spin_unlock(&hvcs_structs_lock); - hvcs_remove_device_attrs(vdev); + sysfs_remove_group(&vdev->dev.kobj, &hvcs_attr_group); kfree(hvcsd); } @@ -608,6 +769,7 @@ static int __devinit hvcs_probe( { struct hvcs_struct *hvcsd; int index; + int retval; if (!dev || !id) { printk(KERN_ERR "HVCS: probed with invalid parameter.\n"); @@ -658,14 +820,16 @@ static int __devinit hvcs_probe( * the hvcs_struct has been added to the devices list then the user app * will get -ENODEV. */ - spin_lock(&hvcs_structs_lock); - list_add_tail(&(hvcsd->next), &hvcs_structs); - spin_unlock(&hvcs_structs_lock); - hvcs_create_device_attrs(hvcsd); + retval = sysfs_create_group(&dev->dev.kobj, &hvcs_attr_group); + if (retval) { + printk(KERN_ERR "HVCS: Can't create sysfs attrs for vty-server@%X\n", + hvcsd->vdev->unit_address); + return retval; + } printk(KERN_INFO "HVCS: vty-server@%X added to the vio bus.\n", dev->unit_address); @@ -1354,8 +1518,10 @@ static int __init hvcs_module_init(void) if (!hvcs_tty_driver) return -ENOMEM; - if (hvcs_alloc_index_list(num_ttys_to_alloc)) - return -ENOMEM; + if (hvcs_alloc_index_list(num_ttys_to_alloc)) { + rc = -ENOMEM; + goto index_fail; + } hvcs_tty_driver->owner = THIS_MODULE; @@ -1385,41 +1551,57 @@ static int __init hvcs_module_init(void) * dynamically assigned major and minor numbers for our devices. */ if (tty_register_driver(hvcs_tty_driver)) { - printk(KERN_ERR "HVCS: registration " - " as a tty driver failed.\n"); - hvcs_free_index_list(); - put_tty_driver(hvcs_tty_driver); - return -EIO; + printk(KERN_ERR "HVCS: registration as a tty driver failed.\n"); + rc = -EIO; + goto register_fail; } hvcs_pi_buff = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!hvcs_pi_buff) { - tty_unregister_driver(hvcs_tty_driver); - hvcs_free_index_list(); - put_tty_driver(hvcs_tty_driver); - return -ENOMEM; + rc = -ENOMEM; + goto buff_alloc_fail; } hvcs_task = kthread_run(khvcsd, NULL, "khvcsd"); if (IS_ERR(hvcs_task)) { printk(KERN_ERR "HVCS: khvcsd creation failed. Driver not loaded.\n"); - kfree(hvcs_pi_buff); - tty_unregister_driver(hvcs_tty_driver); - hvcs_free_index_list(); - put_tty_driver(hvcs_tty_driver); - return -EIO; + rc = -EIO; + goto kthread_fail; } rc = vio_register_driver(&hvcs_vio_driver); + if (rc) { + printk(KERN_ERR "HVCS: can't register vio driver\n"); + goto vio_fail; + } /* * This needs to be done AFTER the vio_register_driver() call or else * the kobjects won't be initialized properly. */ - hvcs_create_driver_attrs(); + rc = driver_create_file(&(hvcs_vio_driver.driver), &driver_attr_rescan); + if (rc) { + printk(KERN_ERR "HVCS: sysfs attr create failed\n"); + goto attr_fail; + } printk(KERN_INFO "HVCS: driver module inserted.\n"); + return 0; + +attr_fail: + vio_unregister_driver(&hvcs_vio_driver); +vio_fail: + kthread_stop(hvcs_task); +kthread_fail: + kfree(hvcs_pi_buff); +buff_alloc_fail: + tty_unregister_driver(hvcs_tty_driver); +register_fail: + hvcs_free_index_list(); +index_fail: + put_tty_driver(hvcs_tty_driver); + hvcs_tty_driver = NULL; return rc; } @@ -1441,7 +1623,7 @@ static void __exit hvcs_module_exit(void) hvcs_pi_buff = NULL; spin_unlock(&hvcs_pi_lock); - hvcs_remove_driver_attrs(); + driver_remove_file(&hvcs_vio_driver.driver, &driver_attr_rescan); vio_unregister_driver(&hvcs_vio_driver); @@ -1456,191 +1638,3 @@ static void __exit hvcs_module_exit(void) module_init(hvcs_module_init); module_exit(hvcs_module_exit); - -static inline struct hvcs_struct *from_vio_dev(struct vio_dev *viod) -{ - return viod->dev.driver_data; -} -/* The sysfs interface for the driver and devices */ - -static ssize_t hvcs_partner_vtys_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct vio_dev *viod = to_vio_dev(dev); - struct hvcs_struct *hvcsd = from_vio_dev(viod); - unsigned long flags; - int retval; - - spin_lock_irqsave(&hvcsd->lock, flags); - retval = sprintf(buf, "%X\n", hvcsd->p_unit_address); - spin_unlock_irqrestore(&hvcsd->lock, flags); - return retval; -} -static DEVICE_ATTR(partner_vtys, S_IRUGO, hvcs_partner_vtys_show, NULL); - -static ssize_t hvcs_partner_clcs_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct vio_dev *viod = to_vio_dev(dev); - struct hvcs_struct *hvcsd = from_vio_dev(viod); - unsigned long flags; - int retval; - - spin_lock_irqsave(&hvcsd->lock, flags); - retval = sprintf(buf, "%s\n", &hvcsd->p_location_code[0]); - spin_unlock_irqrestore(&hvcsd->lock, flags); - return retval; -} -static DEVICE_ATTR(partner_clcs, S_IRUGO, hvcs_partner_clcs_show, NULL); - -static ssize_t hvcs_current_vty_store(struct device *dev, struct device_attribute *attr, const char * buf, - size_t count) -{ - /* - * Don't need this feature at the present time because firmware doesn't - * yet support multiple partners. - */ - printk(KERN_INFO "HVCS: Denied current_vty change: -EPERM.\n"); - return -EPERM; -} - -static ssize_t hvcs_current_vty_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct vio_dev *viod = to_vio_dev(dev); - struct hvcs_struct *hvcsd = from_vio_dev(viod); - unsigned long flags; - int retval; - - spin_lock_irqsave(&hvcsd->lock, flags); - retval = sprintf(buf, "%s\n", &hvcsd->p_location_code[0]); - spin_unlock_irqrestore(&hvcsd->lock, flags); - return retval; -} - -static DEVICE_ATTR(current_vty, - S_IRUGO | S_IWUSR, hvcs_current_vty_show, hvcs_current_vty_store); - -static ssize_t hvcs_vterm_state_store(struct device *dev, struct device_attribute *attr, const char *buf, - size_t count) -{ - struct vio_dev *viod = to_vio_dev(dev); - struct hvcs_struct *hvcsd = from_vio_dev(viod); - unsigned long flags; - - /* writing a '0' to this sysfs entry will result in the disconnect. */ - if (simple_strtol(buf, NULL, 0) != 0) - return -EINVAL; - - spin_lock_irqsave(&hvcsd->lock, flags); - - if (hvcsd->open_count > 0) { - spin_unlock_irqrestore(&hvcsd->lock, flags); - printk(KERN_INFO "HVCS: vterm state unchanged. " - "The hvcs device node is still in use.\n"); - return -EPERM; - } - - if (hvcsd->connected == 0) { - spin_unlock_irqrestore(&hvcsd->lock, flags); - printk(KERN_INFO "HVCS: vterm state unchanged. The" - " vty-server is not connected to a vty.\n"); - return -EPERM; - } - - hvcs_partner_free(hvcsd); - printk(KERN_INFO "HVCS: Closed vty-server@%X and" - " partner vty@%X:%d connection.\n", - hvcsd->vdev->unit_address, - hvcsd->p_unit_address, - (uint32_t)hvcsd->p_partition_ID); - - spin_unlock_irqrestore(&hvcsd->lock, flags); - return count; -} - -static ssize_t hvcs_vterm_state_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct vio_dev *viod = to_vio_dev(dev); - struct hvcs_struct *hvcsd = from_vio_dev(viod); - unsigned long flags; - int retval; - - spin_lock_irqsave(&hvcsd->lock, flags); - retval = sprintf(buf, "%d\n", hvcsd->connected); - spin_unlock_irqrestore(&hvcsd->lock, flags); - return retval; -} -static DEVICE_ATTR(vterm_state, S_IRUGO | S_IWUSR, - hvcs_vterm_state_show, hvcs_vterm_state_store); - -static ssize_t hvcs_index_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct vio_dev *viod = to_vio_dev(dev); - struct hvcs_struct *hvcsd = from_vio_dev(viod); - unsigned long flags; - int retval; - - spin_lock_irqsave(&hvcsd->lock, flags); - retval = sprintf(buf, "%d\n", hvcsd->index); - spin_unlock_irqrestore(&hvcsd->lock, flags); - return retval; -} - -static DEVICE_ATTR(index, S_IRUGO, hvcs_index_show, NULL); - -static struct attribute *hvcs_attrs[] = { - &dev_attr_partner_vtys.attr, - &dev_attr_partner_clcs.attr, - &dev_attr_current_vty.attr, - &dev_attr_vterm_state.attr, - &dev_attr_index.attr, - NULL, -}; - -static struct attribute_group hvcs_attr_group = { - .attrs = hvcs_attrs, -}; - -static void hvcs_create_device_attrs(struct hvcs_struct *hvcsd) -{ - struct vio_dev *vdev = hvcsd->vdev; - sysfs_create_group(&vdev->dev.kobj, &hvcs_attr_group); -} - -static void hvcs_remove_device_attrs(struct vio_dev *vdev) -{ - sysfs_remove_group(&vdev->dev.kobj, &hvcs_attr_group); -} - -static ssize_t hvcs_rescan_show(struct device_driver *ddp, char *buf) -{ - /* A 1 means it is updating, a 0 means it is done updating */ - return snprintf(buf, PAGE_SIZE, "%d\n", hvcs_rescan_status); -} - -static ssize_t hvcs_rescan_store(struct device_driver *ddp, const char * buf, - size_t count) -{ - if ((simple_strtol(buf, NULL, 0) != 1) - && (hvcs_rescan_status != 0)) - return -EINVAL; - - hvcs_rescan_status = 1; - printk(KERN_INFO "HVCS: rescanning partner info for all" - " vty-servers.\n"); - hvcs_rescan_devices_list(); - hvcs_rescan_status = 0; - return count; -} -static DRIVER_ATTR(rescan, - S_IRUGO | S_IWUSR, hvcs_rescan_show, hvcs_rescan_store); - -static void hvcs_create_driver_attrs(void) -{ - struct device_driver *driverfs = &(hvcs_vio_driver.driver); - driver_create_file(driverfs, &driver_attr_rescan); -} - -static void hvcs_remove_driver_attrs(void) -{ - struct device_driver *driverfs = &(hvcs_vio_driver.driver); - driver_remove_file(driverfs, &driver_attr_rescan); -} diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 9f7635f7517..5f3acd8e64b 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -3,17 +3,20 @@ # config HW_RANDOM - bool "Hardware Random Number Generator Core support" - default y + tristate "Hardware Random Number Generator Core support" + default m ---help--- Hardware Random Number Generator Core infrastructure. + To compile this driver as a module, choose M here: the + module will be called rng-core. + If unsure, say Y. config HW_RANDOM_INTEL tristate "Intel HW Random Number Generator support" depends on HW_RANDOM && (X86 || IA64) && PCI - default y + default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number Generator hardware found on Intel i8xx-based motherboards. @@ -26,7 +29,7 @@ config HW_RANDOM_INTEL config HW_RANDOM_AMD tristate "AMD HW Random Number Generator support" depends on HW_RANDOM && X86 && PCI - default y + default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number Generator hardware found on AMD 76x-based motherboards. @@ -39,7 +42,7 @@ config HW_RANDOM_AMD config HW_RANDOM_GEODE tristate "AMD Geode HW Random Number Generator support" depends on HW_RANDOM && X86 && PCI - default y + default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number Generator hardware found on the AMD Geode LX. @@ -52,7 +55,7 @@ config HW_RANDOM_GEODE config HW_RANDOM_VIA tristate "VIA HW Random Number Generator support" depends on HW_RANDOM && X86_32 - default y + default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number Generator hardware found on VIA based motherboards. @@ -65,7 +68,7 @@ config HW_RANDOM_VIA config HW_RANDOM_IXP4XX tristate "Intel IXP4xx NPU HW Random Number Generator support" depends on HW_RANDOM && ARCH_IXP4XX - default y + default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number Generator hardware found on the Intel IXP4xx NPU. @@ -78,7 +81,7 @@ config HW_RANDOM_IXP4XX config HW_RANDOM_OMAP tristate "OMAP Random Number Generator support" depends on HW_RANDOM && (ARCH_OMAP16XX || ARCH_OMAP24XX) - default y + default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number Generator hardware found on OMAP16xx and OMAP24xx multimedia diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index e263ae96f94..c41fa19454e 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -2,7 +2,8 @@ # Makefile for HW Random Number Generator (RNG) device drivers. # -obj-$(CONFIG_HW_RANDOM) += core.o +obj-$(CONFIG_HW_RANDOM) += rng-core.o +rng-core-y := core.o obj-$(CONFIG_HW_RANDOM_INTEL) += intel-rng.o obj-$(CONFIG_HW_RANDOM_AMD) += amd-rng.o obj-$(CONFIG_HW_RANDOM_GEODE) += geode-rng.o diff --git a/drivers/char/ip2/i2cmd.h b/drivers/char/ip2/i2cmd.h index baa4e721b75..29277ec6b8e 100644 --- a/drivers/char/ip2/i2cmd.h +++ b/drivers/char/ip2/i2cmd.h @@ -367,11 +367,6 @@ static UCHAR cc02[]; #define CSE_NULL 3 // Replace with a null #define CSE_MARK 4 // Replace with a 3-character sequence (as Unix) -#define CMD_SET_REPLACEMENT(arg,ch) \ - (((cmdSyntaxPtr)(ct36a))->cmd[1] = (arg), \ - (((cmdSyntaxPtr)(ct36a))->cmd[2] = (ch), \ - (cmdSyntaxPtr)(ct36a)) - #define CSE_REPLACE 0x8 // Replace the errored character with the // replacement character defined here diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c index c213fdbdb2b..78045767ec3 100644 --- a/drivers/char/ip2/i2lib.c +++ b/drivers/char/ip2/i2lib.c @@ -1016,7 +1016,6 @@ i2Output(i2ChanStrPtr pCh, const char *pSource, int count) unsigned short channel; unsigned short stuffIndex; unsigned long flags; - int rc = 0; int bailout = 10; diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c index 0030cd8e2e9..6c59baa887a 100644 --- a/drivers/char/ipmi/ipmi_bt_sm.c +++ b/drivers/char/ipmi/ipmi_bt_sm.c @@ -33,11 +33,13 @@ #include <linux/ipmi_msgdefs.h> /* for completion codes */ #include "ipmi_si_sm.h" -static int bt_debug = 0x00; /* Production value 0, see following flags */ +#define BT_DEBUG_OFF 0 /* Used in production */ +#define BT_DEBUG_ENABLE 1 /* Generic messages */ +#define BT_DEBUG_MSG 2 /* Prints all request/response buffers */ +#define BT_DEBUG_STATES 4 /* Verbose look at state changes */ + +static int bt_debug = BT_DEBUG_OFF; -#define BT_DEBUG_ENABLE 1 -#define BT_DEBUG_MSG 2 -#define BT_DEBUG_STATES 4 module_param(bt_debug, int, 0644); MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); @@ -47,38 +49,54 @@ MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); Since the Open IPMI architecture is single-message oriented at this stage, the queue depth of BT is of no concern. */ -#define BT_NORMAL_TIMEOUT 5000000 /* seconds in microseconds */ -#define BT_RETRY_LIMIT 2 -#define BT_RESET_DELAY 6000000 /* 6 seconds after warm reset */ +#define BT_NORMAL_TIMEOUT 5 /* seconds */ +#define BT_NORMAL_RETRY_LIMIT 2 +#define BT_RESET_DELAY 6 /* seconds after warm reset */ + +/* States are written in chronological order and usually cover + multiple rows of the state table discussion in the IPMI spec. */ enum bt_states { - BT_STATE_IDLE, + BT_STATE_IDLE = 0, /* Order is critical in this list */ BT_STATE_XACTION_START, BT_STATE_WRITE_BYTES, - BT_STATE_WRITE_END, BT_STATE_WRITE_CONSUME, - BT_STATE_B2H_WAIT, - BT_STATE_READ_END, - BT_STATE_RESET1, /* These must come last */ + BT_STATE_READ_WAIT, + BT_STATE_CLEAR_B2H, + BT_STATE_READ_BYTES, + BT_STATE_RESET1, /* These must come last */ BT_STATE_RESET2, BT_STATE_RESET3, BT_STATE_RESTART, - BT_STATE_HOSED + BT_STATE_PRINTME, + BT_STATE_CAPABILITIES_BEGIN, + BT_STATE_CAPABILITIES_END, + BT_STATE_LONG_BUSY /* BT doesn't get hosed :-) */ }; +/* Macros seen at the end of state "case" blocks. They help with legibility + and debugging. */ + +#define BT_STATE_CHANGE(X,Y) { bt->state = X; return Y; } + +#define BT_SI_SM_RETURN(Y) { last_printed = BT_STATE_PRINTME; return Y; } + struct si_sm_data { enum bt_states state; - enum bt_states last_state; /* assist printing and resets */ unsigned char seq; /* BT sequence number */ struct si_sm_io *io; - unsigned char write_data[IPMI_MAX_MSG_LENGTH]; - int write_count; - unsigned char read_data[IPMI_MAX_MSG_LENGTH]; - int read_count; - int truncated; - long timeout; - unsigned int error_retries; /* end of "common" fields */ + unsigned char write_data[IPMI_MAX_MSG_LENGTH]; + int write_count; + unsigned char read_data[IPMI_MAX_MSG_LENGTH]; + int read_count; + int truncated; + long timeout; /* microseconds countdown */ + int error_retries; /* end of "common" fields */ int nonzero_status; /* hung BMCs stay all 0 */ + enum bt_states complete; /* to divert the state machine */ + int BT_CAP_outreqs; + long BT_CAP_req2rsp; + int BT_CAP_retries; /* Recommended retries */ }; #define BT_CLR_WR_PTR 0x01 /* See IPMI 1.5 table 11.6.4 */ @@ -111,86 +129,118 @@ struct si_sm_data { static char *state2txt(unsigned char state) { switch (state) { - case BT_STATE_IDLE: return("IDLE"); - case BT_STATE_XACTION_START: return("XACTION"); - case BT_STATE_WRITE_BYTES: return("WR_BYTES"); - case BT_STATE_WRITE_END: return("WR_END"); - case BT_STATE_WRITE_CONSUME: return("WR_CONSUME"); - case BT_STATE_B2H_WAIT: return("B2H_WAIT"); - case BT_STATE_READ_END: return("RD_END"); - case BT_STATE_RESET1: return("RESET1"); - case BT_STATE_RESET2: return("RESET2"); - case BT_STATE_RESET3: return("RESET3"); - case BT_STATE_RESTART: return("RESTART"); - case BT_STATE_HOSED: return("HOSED"); + case BT_STATE_IDLE: return("IDLE"); + case BT_STATE_XACTION_START: return("XACTION"); + case BT_STATE_WRITE_BYTES: return("WR_BYTES"); + case BT_STATE_WRITE_CONSUME: return("WR_CONSUME"); + case BT_STATE_READ_WAIT: return("RD_WAIT"); + case BT_STATE_CLEAR_B2H: return("CLEAR_B2H"); + case BT_STATE_READ_BYTES: return("RD_BYTES"); + case BT_STATE_RESET1: return("RESET1"); + case BT_STATE_RESET2: return("RESET2"); + case BT_STATE_RESET3: return("RESET3"); + case BT_STATE_RESTART: return("RESTART"); + case BT_STATE_LONG_BUSY: return("LONG_BUSY"); + case BT_STATE_CAPABILITIES_BEGIN: return("CAP_BEGIN"); + case BT_STATE_CAPABILITIES_END: return("CAP_END"); } return("BAD STATE"); } #define STATE2TXT state2txt(bt->state) -static char *status2txt(unsigned char status, char *buf) +static char *status2txt(unsigned char status) { + /* + * This cannot be called by two threads at the same time and + * the buffer is always consumed immediately, so the static is + * safe to use. + */ + static char buf[40]; + strcpy(buf, "[ "); - if (status & BT_B_BUSY) strcat(buf, "B_BUSY "); - if (status & BT_H_BUSY) strcat(buf, "H_BUSY "); - if (status & BT_OEM0) strcat(buf, "OEM0 "); - if (status & BT_SMS_ATN) strcat(buf, "SMS "); - if (status & BT_B2H_ATN) strcat(buf, "B2H "); - if (status & BT_H2B_ATN) strcat(buf, "H2B "); + if (status & BT_B_BUSY) + strcat(buf, "B_BUSY "); + if (status & BT_H_BUSY) + strcat(buf, "H_BUSY "); + if (status & BT_OEM0) + strcat(buf, "OEM0 "); + if (status & BT_SMS_ATN) + strcat(buf, "SMS "); + if (status & BT_B2H_ATN) + strcat(buf, "B2H "); + if (status & BT_H2B_ATN) + strcat(buf, "H2B "); strcat(buf, "]"); return buf; } -#define STATUS2TXT(buf) status2txt(status, buf) +#define STATUS2TXT status2txt(status) + +/* called externally at insmod time, and internally on cleanup */ -/* This will be called from within this module on a hosed condition */ -#define FIRST_SEQ 0 static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io) { - bt->state = BT_STATE_IDLE; - bt->last_state = BT_STATE_IDLE; - bt->seq = FIRST_SEQ; - bt->io = io; - bt->write_count = 0; - bt->read_count = 0; - bt->error_retries = 0; - bt->nonzero_status = 0; - bt->truncated = 0; - bt->timeout = BT_NORMAL_TIMEOUT; + memset(bt, 0, sizeof(struct si_sm_data)); + if (bt->io != io) { /* external: one-time only things */ + bt->io = io; + bt->seq = 0; + } + bt->state = BT_STATE_IDLE; /* start here */ + bt->complete = BT_STATE_IDLE; /* end here */ + bt->BT_CAP_req2rsp = BT_NORMAL_TIMEOUT * 1000000; + bt->BT_CAP_retries = BT_NORMAL_RETRY_LIMIT; + /* BT_CAP_outreqs == zero is a flag to read BT Capabilities */ return 3; /* We claim 3 bytes of space; ought to check SPMI table */ } +/* Jam a completion code (probably an error) into a response */ + +static void force_result(struct si_sm_data *bt, unsigned char completion_code) +{ + bt->read_data[0] = 4; /* # following bytes */ + bt->read_data[1] = bt->write_data[1] | 4; /* Odd NetFn/LUN */ + bt->read_data[2] = bt->write_data[2]; /* seq (ignored) */ + bt->read_data[3] = bt->write_data[3]; /* Command */ + bt->read_data[4] = completion_code; + bt->read_count = 5; +} + +/* The upper state machine starts here */ + static int bt_start_transaction(struct si_sm_data *bt, unsigned char *data, unsigned int size) { unsigned int i; - if ((size < 2) || (size > (IPMI_MAX_MSG_LENGTH - 2))) - return -1; + if (size < 2) + return IPMI_REQ_LEN_INVALID_ERR; + if (size > IPMI_MAX_MSG_LENGTH) + return IPMI_REQ_LEN_EXCEEDED_ERR; - if ((bt->state != BT_STATE_IDLE) && (bt->state != BT_STATE_HOSED)) - return -2; + if (bt->state == BT_STATE_LONG_BUSY) + return IPMI_NODE_BUSY_ERR; + + if (bt->state != BT_STATE_IDLE) + return IPMI_NOT_IN_MY_STATE_ERR; if (bt_debug & BT_DEBUG_MSG) { - printk(KERN_WARNING "+++++++++++++++++++++++++++++++++++++\n"); - printk(KERN_WARNING "BT: write seq=0x%02X:", bt->seq); + printk(KERN_WARNING "BT: +++++++++++++++++ New command\n"); + printk(KERN_WARNING "BT: NetFn/LUN CMD [%d data]:", size - 2); for (i = 0; i < size; i ++) - printk (" %02x", data[i]); + printk (" %02x", data[i]); printk("\n"); } bt->write_data[0] = size + 1; /* all data plus seq byte */ bt->write_data[1] = *data; /* NetFn/LUN */ - bt->write_data[2] = bt->seq; + bt->write_data[2] = bt->seq++; memcpy(bt->write_data + 3, data + 1, size - 1); bt->write_count = size + 2; - bt->error_retries = 0; bt->nonzero_status = 0; - bt->read_count = 0; bt->truncated = 0; bt->state = BT_STATE_XACTION_START; - bt->last_state = BT_STATE_IDLE; - bt->timeout = BT_NORMAL_TIMEOUT; + bt->timeout = bt->BT_CAP_req2rsp; + force_result(bt, IPMI_ERR_UNSPECIFIED); return 0; } @@ -198,38 +248,30 @@ static int bt_start_transaction(struct si_sm_data *bt, it calls this. Strip out the length and seq bytes. */ static int bt_get_result(struct si_sm_data *bt, - unsigned char *data, - unsigned int length) + unsigned char *data, + unsigned int length) { int i, msg_len; msg_len = bt->read_count - 2; /* account for length & seq */ - /* Always NetFn, Cmd, cCode */ if (msg_len < 3 || msg_len > IPMI_MAX_MSG_LENGTH) { - printk(KERN_DEBUG "BT results: bad msg_len = %d\n", msg_len); - data[0] = bt->write_data[1] | 0x4; /* Kludge a response */ - data[1] = bt->write_data[3]; - data[2] = IPMI_ERR_UNSPECIFIED; + force_result(bt, IPMI_ERR_UNSPECIFIED); msg_len = 3; - } else { - data[0] = bt->read_data[1]; - data[1] = bt->read_data[3]; - if (length < msg_len) - bt->truncated = 1; - if (bt->truncated) { /* can be set in read_all_bytes() */ - data[2] = IPMI_ERR_MSG_TRUNCATED; - msg_len = 3; - } else - memcpy(data + 2, bt->read_data + 4, msg_len - 2); + } + data[0] = bt->read_data[1]; + data[1] = bt->read_data[3]; + if (length < msg_len || bt->truncated) { + data[2] = IPMI_ERR_MSG_TRUNCATED; + msg_len = 3; + } else + memcpy(data + 2, bt->read_data + 4, msg_len - 2); - if (bt_debug & BT_DEBUG_MSG) { - printk (KERN_WARNING "BT: res (raw)"); - for (i = 0; i < msg_len; i++) - printk(" %02x", data[i]); - printk ("\n"); - } + if (bt_debug & BT_DEBUG_MSG) { + printk (KERN_WARNING "BT: result %d bytes:", msg_len); + for (i = 0; i < msg_len; i++) + printk(" %02x", data[i]); + printk ("\n"); } - bt->read_count = 0; /* paranoia */ return msg_len; } @@ -238,22 +280,40 @@ static int bt_get_result(struct si_sm_data *bt, static void reset_flags(struct si_sm_data *bt) { + if (bt_debug) + printk(KERN_WARNING "IPMI BT: flag reset %s\n", + status2txt(BT_STATUS)); if (BT_STATUS & BT_H_BUSY) - BT_CONTROL(BT_H_BUSY); - if (BT_STATUS & BT_B_BUSY) - BT_CONTROL(BT_B_BUSY); - BT_CONTROL(BT_CLR_WR_PTR); - BT_CONTROL(BT_SMS_ATN); - - if (BT_STATUS & BT_B2H_ATN) { - int i; - BT_CONTROL(BT_H_BUSY); - BT_CONTROL(BT_B2H_ATN); - BT_CONTROL(BT_CLR_RD_PTR); - for (i = 0; i < IPMI_MAX_MSG_LENGTH + 2; i++) - BMC2HOST; - BT_CONTROL(BT_H_BUSY); - } + BT_CONTROL(BT_H_BUSY); /* force clear */ + BT_CONTROL(BT_CLR_WR_PTR); /* always reset */ + BT_CONTROL(BT_SMS_ATN); /* always clear */ + BT_INTMASK_W(BT_BMC_HWRST); +} + +/* Get rid of an unwanted/stale response. This should only be needed for + BMCs that support multiple outstanding requests. */ + +static void drain_BMC2HOST(struct si_sm_data *bt) +{ + int i, size; + + if (!(BT_STATUS & BT_B2H_ATN)) /* Not signalling a response */ + return; + + BT_CONTROL(BT_H_BUSY); /* now set */ + BT_CONTROL(BT_B2H_ATN); /* always clear */ + BT_STATUS; /* pause */ + BT_CONTROL(BT_B2H_ATN); /* some BMCs are stubborn */ + BT_CONTROL(BT_CLR_RD_PTR); /* always reset */ + if (bt_debug) + printk(KERN_WARNING "IPMI BT: stale response %s; ", + status2txt(BT_STATUS)); + size = BMC2HOST; + for (i = 0; i < size ; i++) + BMC2HOST; + BT_CONTROL(BT_H_BUSY); /* now clear */ + if (bt_debug) + printk("drained %d bytes\n", size + 1); } static inline void write_all_bytes(struct si_sm_data *bt) @@ -261,201 +321,256 @@ static inline void write_all_bytes(struct si_sm_data *bt) int i; if (bt_debug & BT_DEBUG_MSG) { - printk(KERN_WARNING "BT: write %d bytes seq=0x%02X", + printk(KERN_WARNING "BT: write %d bytes seq=0x%02X", bt->write_count, bt->seq); for (i = 0; i < bt->write_count; i++) printk (" %02x", bt->write_data[i]); printk ("\n"); } for (i = 0; i < bt->write_count; i++) - HOST2BMC(bt->write_data[i]); + HOST2BMC(bt->write_data[i]); } static inline int read_all_bytes(struct si_sm_data *bt) { unsigned char i; + /* length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode. + Keep layout of first four bytes aligned with write_data[] */ + bt->read_data[0] = BMC2HOST; bt->read_count = bt->read_data[0]; - if (bt_debug & BT_DEBUG_MSG) - printk(KERN_WARNING "BT: read %d bytes:", bt->read_count); - /* minimum: length, NetFn, Seq, Cmd, cCode == 5 total, or 4 more - following the length byte. */ if (bt->read_count < 4 || bt->read_count >= IPMI_MAX_MSG_LENGTH) { if (bt_debug & BT_DEBUG_MSG) - printk("bad length %d\n", bt->read_count); + printk(KERN_WARNING "BT: bad raw rsp len=%d\n", + bt->read_count); bt->truncated = 1; return 1; /* let next XACTION START clean it up */ } for (i = 1; i <= bt->read_count; i++) - bt->read_data[i] = BMC2HOST; - bt->read_count++; /* account for the length byte */ + bt->read_data[i] = BMC2HOST; + bt->read_count++; /* Account internally for length byte */ if (bt_debug & BT_DEBUG_MSG) { - for (i = 0; i < bt->read_count; i++) + int max = bt->read_count; + + printk(KERN_WARNING "BT: got %d bytes seq=0x%02X", + max, bt->read_data[2]); + if (max > 16) + max = 16; + for (i = 0; i < max; i++) printk (" %02x", bt->read_data[i]); - printk ("\n"); + printk ("%s\n", bt->read_count == max ? "" : " ..."); } - if (bt->seq != bt->write_data[2]) /* idiot check */ - printk(KERN_DEBUG "BT: internal error: sequence mismatch\n"); - /* per the spec, the (NetFn, Seq, Cmd) tuples should match */ - if ((bt->read_data[3] == bt->write_data[3]) && /* Cmd */ - (bt->read_data[2] == bt->write_data[2]) && /* Sequence */ - ((bt->read_data[1] & 0xF8) == (bt->write_data[1] & 0xF8))) + /* per the spec, the (NetFn[1], Seq[2], Cmd[3]) tuples must match */ + if ((bt->read_data[3] == bt->write_data[3]) && + (bt->read_data[2] == bt->write_data[2]) && + ((bt->read_data[1] & 0xF8) == (bt->write_data[1] & 0xF8))) return 1; if (bt_debug & BT_DEBUG_MSG) - printk(KERN_WARNING "BT: bad packet: " + printk(KERN_WARNING "IPMI BT: bad packet: " "want 0x(%02X, %02X, %02X) got (%02X, %02X, %02X)\n", - bt->write_data[1], bt->write_data[2], bt->write_data[3], + bt->write_data[1] | 0x04, bt->write_data[2], bt->write_data[3], bt->read_data[1], bt->read_data[2], bt->read_data[3]); return 0; } -/* Modifies bt->state appropriately, need to get into the bt_event() switch */ +/* Restart if retries are left, or return an error completion code */ -static void error_recovery(struct si_sm_data *bt, char *reason) +static enum si_sm_result error_recovery(struct si_sm_data *bt, + unsigned char status, + unsigned char cCode) { - unsigned char status; - char buf[40]; /* For getting status */ + char *reason; - bt->timeout = BT_NORMAL_TIMEOUT; /* various places want to retry */ + bt->timeout = bt->BT_CAP_req2rsp; - status = BT_STATUS; - printk(KERN_DEBUG "BT: %s in %s %s\n", reason, STATE2TXT, - STATUS2TXT(buf)); + switch (cCode) { + case IPMI_TIMEOUT_ERR: + reason = "timeout"; + break; + default: + reason = "internal error"; + break; + } + + printk(KERN_WARNING "IPMI BT: %s in %s %s ", /* open-ended line */ + reason, STATE2TXT, STATUS2TXT); + /* Per the IPMI spec, retries are based on the sequence number + known only to this module, so manage a restart here. */ (bt->error_retries)++; - if (bt->error_retries > BT_RETRY_LIMIT) { - printk(KERN_DEBUG "retry limit (%d) exceeded\n", BT_RETRY_LIMIT); - bt->state = BT_STATE_HOSED; - if (!bt->nonzero_status) - printk(KERN_ERR "IPMI: BT stuck, try power cycle\n"); - else if (bt->error_retries <= BT_RETRY_LIMIT + 1) { - printk(KERN_DEBUG "IPMI: BT reset (takes 5 secs)\n"); - bt->state = BT_STATE_RESET1; - } - return; + if (bt->error_retries < bt->BT_CAP_retries) { + printk("%d retries left\n", + bt->BT_CAP_retries - bt->error_retries); + bt->state = BT_STATE_RESTART; + return SI_SM_CALL_WITHOUT_DELAY; } - /* Sometimes the BMC queues get in an "off-by-one" state...*/ - if ((bt->state == BT_STATE_B2H_WAIT) && (status & BT_B2H_ATN)) { - printk(KERN_DEBUG "retry B2H_WAIT\n"); - return; + printk("failed %d retries, sending error response\n", + bt->BT_CAP_retries); + if (!bt->nonzero_status) + printk(KERN_ERR "IPMI BT: stuck, try power cycle\n"); + + /* this is most likely during insmod */ + else if (bt->seq <= (unsigned char)(bt->BT_CAP_retries & 0xFF)) { + printk(KERN_WARNING "IPMI: BT reset (takes 5 secs)\n"); + bt->state = BT_STATE_RESET1; + return SI_SM_CALL_WITHOUT_DELAY; } - printk(KERN_DEBUG "restart command\n"); - bt->state = BT_STATE_RESTART; + /* Concoct a useful error message, set up the next state, and + be done with this sequence. */ + + bt->state = BT_STATE_IDLE; + switch (cCode) { + case IPMI_TIMEOUT_ERR: + if (status & BT_B_BUSY) { + cCode = IPMI_NODE_BUSY_ERR; + bt->state = BT_STATE_LONG_BUSY; + } + break; + default: + break; + } + force_result(bt, cCode); + return SI_SM_TRANSACTION_COMPLETE; } -/* Check the status and (possibly) advance the BT state machine. The - default return is SI_SM_CALL_WITH_DELAY. */ +/* Check status and (usually) take action and change this state machine. */ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) { - unsigned char status; - char buf[40]; /* For getting status */ + unsigned char status, BT_CAP[8]; + static enum bt_states last_printed = BT_STATE_PRINTME; int i; status = BT_STATUS; bt->nonzero_status |= status; - - if ((bt_debug & BT_DEBUG_STATES) && (bt->state != bt->last_state)) + if ((bt_debug & BT_DEBUG_STATES) && (bt->state != last_printed)) { printk(KERN_WARNING "BT: %s %s TO=%ld - %ld \n", STATE2TXT, - STATUS2TXT(buf), + STATUS2TXT, bt->timeout, time); - bt->last_state = bt->state; + last_printed = bt->state; + } - if (bt->state == BT_STATE_HOSED) - return SI_SM_HOSED; + /* Commands that time out may still (eventually) provide a response. + This stale response will get in the way of a new response so remove + it if possible (hopefully during IDLE). Even if it comes up later + it will be rejected by its (now-forgotten) seq number. */ + + if ((bt->state < BT_STATE_WRITE_BYTES) && (status & BT_B2H_ATN)) { + drain_BMC2HOST(bt); + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + } - if (bt->state != BT_STATE_IDLE) { /* do timeout test */ + if ((bt->state != BT_STATE_IDLE) && + (bt->state < BT_STATE_PRINTME)) { /* check timeout */ bt->timeout -= time; - if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) { - error_recovery(bt, "timed out"); - return SI_SM_CALL_WITHOUT_DELAY; - } + if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) + return error_recovery(bt, + status, + IPMI_TIMEOUT_ERR); } switch (bt->state) { - case BT_STATE_IDLE: /* check for asynchronous messages */ + /* Idle state first checks for asynchronous messages from another + channel, then does some opportunistic housekeeping. */ + + case BT_STATE_IDLE: if (status & BT_SMS_ATN) { BT_CONTROL(BT_SMS_ATN); /* clear it */ return SI_SM_ATTN; } - return SI_SM_IDLE; - case BT_STATE_XACTION_START: - if (status & BT_H_BUSY) { + if (status & BT_H_BUSY) /* clear a leftover H_BUSY */ BT_CONTROL(BT_H_BUSY); - break; - } - if (status & BT_B2H_ATN) - break; - bt->state = BT_STATE_WRITE_BYTES; - return SI_SM_CALL_WITHOUT_DELAY; /* for logging */ - case BT_STATE_WRITE_BYTES: + /* Read BT capabilities if it hasn't been done yet */ + if (!bt->BT_CAP_outreqs) + BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN, + SI_SM_CALL_WITHOUT_DELAY); + bt->timeout = bt->BT_CAP_req2rsp; + BT_SI_SM_RETURN(SI_SM_IDLE); + + case BT_STATE_XACTION_START: if (status & (BT_B_BUSY | BT_H2B_ATN)) - break; + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + if (BT_STATUS & BT_H_BUSY) + BT_CONTROL(BT_H_BUSY); /* force clear */ + BT_STATE_CHANGE(BT_STATE_WRITE_BYTES, + SI_SM_CALL_WITHOUT_DELAY); + + case BT_STATE_WRITE_BYTES: + if (status & BT_H_BUSY) + BT_CONTROL(BT_H_BUSY); /* clear */ BT_CONTROL(BT_CLR_WR_PTR); write_all_bytes(bt); - BT_CONTROL(BT_H2B_ATN); /* clears too fast to catch? */ - bt->state = BT_STATE_WRITE_CONSUME; - return SI_SM_CALL_WITHOUT_DELAY; /* it MIGHT sail through */ - - case BT_STATE_WRITE_CONSUME: /* BMCs usually blow right thru here */ - if (status & (BT_H2B_ATN | BT_B_BUSY)) - break; - bt->state = BT_STATE_B2H_WAIT; - /* fall through with status */ - - /* Stay in BT_STATE_B2H_WAIT until a packet matches. However, spinning - hard here, constantly reading status, seems to hold off the - generation of B2H_ATN so ALWAYS return CALL_WITH_DELAY. */ - - case BT_STATE_B2H_WAIT: - if (!(status & BT_B2H_ATN)) - break; - - /* Assume ordered, uncached writes: no need to wait */ - if (!(status & BT_H_BUSY)) - BT_CONTROL(BT_H_BUSY); /* set */ - BT_CONTROL(BT_B2H_ATN); /* clear it, ACK to the BMC */ - BT_CONTROL(BT_CLR_RD_PTR); /* reset the queue */ - i = read_all_bytes(bt); - BT_CONTROL(BT_H_BUSY); /* clear */ - if (!i) /* Try this state again */ - break; - bt->state = BT_STATE_READ_END; - return SI_SM_CALL_WITHOUT_DELAY; /* for logging */ - - case BT_STATE_READ_END: - - /* I could wait on BT_H_BUSY to go clear for a truly clean - exit. However, this is already done in XACTION_START - and the (possible) extra loop/status/possible wait affects - performance. So, as long as it works, just ignore H_BUSY */ - -#ifdef MAKE_THIS_TRUE_IF_NECESSARY + BT_CONTROL(BT_H2B_ATN); /* can clear too fast to catch */ + BT_STATE_CHANGE(BT_STATE_WRITE_CONSUME, + SI_SM_CALL_WITHOUT_DELAY); - if (status & BT_H_BUSY) - break; -#endif - bt->seq++; - bt->state = BT_STATE_IDLE; - return SI_SM_TRANSACTION_COMPLETE; + case BT_STATE_WRITE_CONSUME: + if (status & (BT_B_BUSY | BT_H2B_ATN)) + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + BT_STATE_CHANGE(BT_STATE_READ_WAIT, + SI_SM_CALL_WITHOUT_DELAY); + + /* Spinning hard can suppress B2H_ATN and force a timeout */ + + case BT_STATE_READ_WAIT: + if (!(status & BT_B2H_ATN)) + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + BT_CONTROL(BT_H_BUSY); /* set */ + + /* Uncached, ordered writes should just proceeed serially but + some BMCs don't clear B2H_ATN with one hit. Fast-path a + workaround without too much penalty to the general case. */ + + BT_CONTROL(BT_B2H_ATN); /* clear it to ACK the BMC */ + BT_STATE_CHANGE(BT_STATE_CLEAR_B2H, + SI_SM_CALL_WITHOUT_DELAY); + + case BT_STATE_CLEAR_B2H: + if (status & BT_B2H_ATN) { /* keep hitting it */ + BT_CONTROL(BT_B2H_ATN); + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + } + BT_STATE_CHANGE(BT_STATE_READ_BYTES, + SI_SM_CALL_WITHOUT_DELAY); + + case BT_STATE_READ_BYTES: + if (!(status & BT_H_BUSY)) /* check in case of retry */ + BT_CONTROL(BT_H_BUSY); + BT_CONTROL(BT_CLR_RD_PTR); /* start of BMC2HOST buffer */ + i = read_all_bytes(bt); /* true == packet seq match */ + BT_CONTROL(BT_H_BUSY); /* NOW clear */ + if (!i) /* Not my message */ + BT_STATE_CHANGE(BT_STATE_READ_WAIT, + SI_SM_CALL_WITHOUT_DELAY); + bt->state = bt->complete; + return bt->state == BT_STATE_IDLE ? /* where to next? */ + SI_SM_TRANSACTION_COMPLETE : /* normal */ + SI_SM_CALL_WITHOUT_DELAY; /* Startup magic */ + + case BT_STATE_LONG_BUSY: /* For example: after FW update */ + if (!(status & BT_B_BUSY)) { + reset_flags(bt); /* next state is now IDLE */ + bt_init_data(bt, bt->io); + } + return SI_SM_CALL_WITH_DELAY; /* No repeat printing */ case BT_STATE_RESET1: - reset_flags(bt); - bt->timeout = BT_RESET_DELAY; - bt->state = BT_STATE_RESET2; - break; + reset_flags(bt); + drain_BMC2HOST(bt); + BT_STATE_CHANGE(BT_STATE_RESET2, + SI_SM_CALL_WITH_DELAY); case BT_STATE_RESET2: /* Send a soft reset */ BT_CONTROL(BT_CLR_WR_PTR); @@ -464,29 +579,59 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) HOST2BMC(42); /* Sequence number */ HOST2BMC(3); /* Cmd == Soft reset */ BT_CONTROL(BT_H2B_ATN); - bt->state = BT_STATE_RESET3; - break; + bt->timeout = BT_RESET_DELAY * 1000000; + BT_STATE_CHANGE(BT_STATE_RESET3, + SI_SM_CALL_WITH_DELAY); - case BT_STATE_RESET3: + case BT_STATE_RESET3: /* Hold off everything for a bit */ if (bt->timeout > 0) - return SI_SM_CALL_WITH_DELAY; - bt->state = BT_STATE_RESTART; /* printk in debug modes */ - break; + return SI_SM_CALL_WITH_DELAY; + drain_BMC2HOST(bt); + BT_STATE_CHANGE(BT_STATE_RESTART, + SI_SM_CALL_WITH_DELAY); - case BT_STATE_RESTART: /* don't reset retries! */ - reset_flags(bt); - bt->write_data[2] = ++bt->seq; + case BT_STATE_RESTART: /* don't reset retries or seq! */ bt->read_count = 0; bt->nonzero_status = 0; - bt->timeout = BT_NORMAL_TIMEOUT; - bt->state = BT_STATE_XACTION_START; - break; - - default: /* HOSED is supposed to be caught much earlier */ - error_recovery(bt, "internal logic error"); - break; - } - return SI_SM_CALL_WITH_DELAY; + bt->timeout = bt->BT_CAP_req2rsp; + BT_STATE_CHANGE(BT_STATE_XACTION_START, + SI_SM_CALL_WITH_DELAY); + + /* Get BT Capabilities, using timing of upper level state machine. + Set outreqs to prevent infinite loop on timeout. */ + case BT_STATE_CAPABILITIES_BEGIN: + bt->BT_CAP_outreqs = 1; + { + unsigned char GetBT_CAP[] = { 0x18, 0x36 }; + bt->state = BT_STATE_IDLE; + bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP)); + } + bt->complete = BT_STATE_CAPABILITIES_END; + BT_STATE_CHANGE(BT_STATE_XACTION_START, + SI_SM_CALL_WITH_DELAY); + + case BT_STATE_CAPABILITIES_END: + i = bt_get_result(bt, BT_CAP, sizeof(BT_CAP)); + bt_init_data(bt, bt->io); + if ((i == 8) && !BT_CAP[2]) { + bt->BT_CAP_outreqs = BT_CAP[3]; + bt->BT_CAP_req2rsp = BT_CAP[6] * 1000000; + bt->BT_CAP_retries = BT_CAP[7]; + } else + printk(KERN_WARNING "IPMI BT: using default values\n"); + if (!bt->BT_CAP_outreqs) + bt->BT_CAP_outreqs = 1; + printk(KERN_WARNING "IPMI BT: req2rsp=%ld secs retries=%d\n", + bt->BT_CAP_req2rsp / 1000000L, bt->BT_CAP_retries); + bt->timeout = bt->BT_CAP_req2rsp; + return SI_SM_CALL_WITHOUT_DELAY; + + default: /* should never occur */ + return error_recovery(bt, + status, + IPMI_ERR_UNSPECIFIED); + } + return SI_SM_CALL_WITH_DELAY; } static int bt_detect(struct si_sm_data *bt) @@ -497,7 +642,7 @@ static int bt_detect(struct si_sm_data *bt) test that first. The calling routine uses negative logic. */ if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF)) - return 1; + return 1; reset_flags(bt); return 0; } @@ -513,11 +658,11 @@ static int bt_size(void) struct si_sm_handlers bt_smi_handlers = { - .init_data = bt_init_data, - .start_transaction = bt_start_transaction, - .get_result = bt_get_result, - .event = bt_event, - .detect = bt_detect, - .cleanup = bt_cleanup, - .size = bt_size, + .init_data = bt_init_data, + .start_transaction = bt_start_transaction, + .get_result = bt_get_result, + .event = bt_event, + .detect = bt_detect, + .cleanup = bt_cleanup, + .size = bt_size, }; diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c index 81fcf0ce21d..375d3378eec 100644 --- a/drivers/char/ipmi/ipmi_devintf.c +++ b/drivers/char/ipmi/ipmi_devintf.c @@ -596,6 +596,31 @@ static int ipmi_ioctl(struct inode *inode, rv = 0; break; } + + case IPMICTL_GET_MAINTENANCE_MODE_CMD: + { + int mode; + + mode = ipmi_get_maintenance_mode(priv->user); + if (copy_to_user(arg, &mode, sizeof(mode))) { + rv = -EFAULT; + break; + } + rv = 0; + break; + } + + case IPMICTL_SET_MAINTENANCE_MODE_CMD: + { + int mode; + + if (copy_from_user(&mode, arg, sizeof(mode))) { + rv = -EFAULT; + break; + } + rv = ipmi_set_maintenance_mode(priv->user, mode); + break; + } } return rv; diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c index 2062675f9e9..c1b8228cb7b 100644 --- a/drivers/char/ipmi/ipmi_kcs_sm.c +++ b/drivers/char/ipmi/ipmi_kcs_sm.c @@ -93,8 +93,8 @@ enum kcs_states { state machine. */ }; -#define MAX_KCS_READ_SIZE 80 -#define MAX_KCS_WRITE_SIZE 80 +#define MAX_KCS_READ_SIZE IPMI_MAX_MSG_LENGTH +#define MAX_KCS_WRITE_SIZE IPMI_MAX_MSG_LENGTH /* Timeouts in microseconds. */ #define IBF_RETRY_TIMEOUT 1000000 @@ -261,12 +261,14 @@ static int start_kcs_transaction(struct si_sm_data *kcs, unsigned char *data, { unsigned int i; - if ((size < 2) || (size > MAX_KCS_WRITE_SIZE)) { - return -1; - } - if ((kcs->state != KCS_IDLE) && (kcs->state != KCS_HOSED)) { - return -2; - } + if (size < 2) + return IPMI_REQ_LEN_INVALID_ERR; + if (size > MAX_KCS_WRITE_SIZE) + return IPMI_REQ_LEN_EXCEEDED_ERR; + + if ((kcs->state != KCS_IDLE) && (kcs->state != KCS_HOSED)) + return IPMI_NOT_IN_MY_STATE_ERR; + if (kcs_debug & KCS_DEBUG_MSG) { printk(KERN_DEBUG "start_kcs_transaction -"); for (i = 0; i < size; i ++) { diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c47add8e47d..5703ee28e1c 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -48,7 +48,7 @@ #define PFX "IPMI message handler: " -#define IPMI_DRIVER_VERSION "39.0" +#define IPMI_DRIVER_VERSION "39.1" static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void); static int ipmi_init_msghandler(void); @@ -59,6 +59,9 @@ static int initialized = 0; static struct proc_dir_entry *proc_ipmi_root = NULL; #endif /* CONFIG_PROC_FS */ +/* Remain in auto-maintenance mode for this amount of time (in ms). */ +#define IPMI_MAINTENANCE_MODE_TIMEOUT 30000 + #define MAX_EVENTS_IN_QUEUE 25 /* Don't let a message sit in a queue forever, always time it with at lest @@ -193,17 +196,28 @@ struct ipmi_smi struct kref refcount; + /* Used for a list of interfaces. */ + struct list_head link; + /* The list of upper layers that are using me. seq_lock * protects this. */ struct list_head users; + /* Information to supply to users. */ + unsigned char ipmi_version_major; + unsigned char ipmi_version_minor; + /* Used for wake ups at startup. */ wait_queue_head_t waitq; struct bmc_device *bmc; char *my_dev_name; + char *sysfs_name; - /* This is the lower-layer's sender routine. */ + /* This is the lower-layer's sender routine. Note that you + * must either be holding the ipmi_interfaces_mutex or be in + * an umpreemptible region to use this. You must fetch the + * value into a local variable and make sure it is not NULL. */ struct ipmi_smi_handlers *handlers; void *send_info; @@ -242,6 +256,7 @@ struct ipmi_smi spinlock_t events_lock; /* For dealing with event stuff. */ struct list_head waiting_events; unsigned int waiting_events_count; /* How many events in queue? */ + int delivering_events; /* The event receiver for my BMC, only really used at panic shutdown as a place to store this. */ @@ -250,6 +265,12 @@ struct ipmi_smi unsigned char local_sel_device; unsigned char local_event_generator; + /* For handling of maintenance mode. */ + int maintenance_mode; + int maintenance_mode_enable; + int auto_maintenance_timeout; + spinlock_t maintenance_mode_lock; /* Used in a timer... */ + /* A cheap hack, if this is non-null and a message to an interface comes in with a NULL user, call this routine with it. Note that the message will still be freed by the @@ -338,13 +359,6 @@ struct ipmi_smi }; #define to_si_intf_from_dev(device) container_of(device, struct ipmi_smi, dev) -/* Used to mark an interface entry that cannot be used but is not a - * free entry, either, primarily used at creation and deletion time so - * a slot doesn't get reused too quickly. */ -#define IPMI_INVALID_INTERFACE_ENTRY ((ipmi_smi_t) ((long) 1)) -#define IPMI_INVALID_INTERFACE(i) (((i) == NULL) \ - || (i == IPMI_INVALID_INTERFACE_ENTRY)) - /** * The driver model view of the IPMI messaging driver. */ @@ -354,16 +368,13 @@ static struct device_driver ipmidriver = { }; static DEFINE_MUTEX(ipmidriver_mutex); -#define MAX_IPMI_INTERFACES 4 -static ipmi_smi_t ipmi_interfaces[MAX_IPMI_INTERFACES]; - -/* Directly protects the ipmi_interfaces data structure. */ -static DEFINE_SPINLOCK(interfaces_lock); +static struct list_head ipmi_interfaces = LIST_HEAD_INIT(ipmi_interfaces); +static DEFINE_MUTEX(ipmi_interfaces_mutex); /* List of watchers that want to know when smi's are added and deleted. */ static struct list_head smi_watchers = LIST_HEAD_INIT(smi_watchers); -static DECLARE_RWSEM(smi_watchers_sem); +static DEFINE_MUTEX(smi_watchers_mutex); static void free_recv_msg_list(struct list_head *q) @@ -423,48 +434,84 @@ static void intf_free(struct kref *ref) kfree(intf); } +struct watcher_entry { + int intf_num; + ipmi_smi_t intf; + struct list_head link; +}; + int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher) { - int i; - unsigned long flags; + ipmi_smi_t intf; + struct list_head to_deliver = LIST_HEAD_INIT(to_deliver); + struct watcher_entry *e, *e2; + + mutex_lock(&smi_watchers_mutex); + + mutex_lock(&ipmi_interfaces_mutex); - down_write(&smi_watchers_sem); - list_add(&(watcher->link), &smi_watchers); - up_write(&smi_watchers_sem); - spin_lock_irqsave(&interfaces_lock, flags); - for (i = 0; i < MAX_IPMI_INTERFACES; i++) { - ipmi_smi_t intf = ipmi_interfaces[i]; - if (IPMI_INVALID_INTERFACE(intf)) + /* Build a list of things to deliver. */ + list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { + if (intf->intf_num == -1) continue; - spin_unlock_irqrestore(&interfaces_lock, flags); - watcher->new_smi(i, intf->si_dev); - spin_lock_irqsave(&interfaces_lock, flags); + e = kmalloc(sizeof(*e), GFP_KERNEL); + if (!e) + goto out_err; + kref_get(&intf->refcount); + e->intf = intf; + e->intf_num = intf->intf_num; + list_add_tail(&e->link, &to_deliver); } - spin_unlock_irqrestore(&interfaces_lock, flags); + + /* We will succeed, so add it to the list. */ + list_add(&watcher->link, &smi_watchers); + + mutex_unlock(&ipmi_interfaces_mutex); + + list_for_each_entry_safe(e, e2, &to_deliver, link) { + list_del(&e->link); + watcher->new_smi(e->intf_num, e->intf->si_dev); + kref_put(&e->intf->refcount, intf_free); + kfree(e); + } + + mutex_unlock(&smi_watchers_mutex); + return 0; + + out_err: + mutex_unlock(&ipmi_interfaces_mutex); + mutex_unlock(&smi_watchers_mutex); + list_for_each_entry_safe(e, e2, &to_deliver, link) { + list_del(&e->link); + kref_put(&e->intf->refcount, intf_free); + kfree(e); + } + return -ENOMEM; } int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher) { - down_write(&smi_watchers_sem); + mutex_lock(&smi_watchers_mutex); list_del(&(watcher->link)); - up_write(&smi_watchers_sem); + mutex_unlock(&smi_watchers_mutex); return 0; } +/* + * Must be called with smi_watchers_mutex held. + */ static void call_smi_watchers(int i, struct device *dev) { struct ipmi_smi_watcher *w; - down_read(&smi_watchers_sem); list_for_each_entry(w, &smi_watchers, link) { if (try_module_get(w->owner)) { w->new_smi(i, dev); module_put(w->owner); } } - up_read(&smi_watchers_sem); } static int @@ -590,6 +637,17 @@ static void deliver_response(struct ipmi_recv_msg *msg) } } +static void +deliver_err_response(struct ipmi_recv_msg *msg, int err) +{ + msg->recv_type = IPMI_RESPONSE_RECV_TYPE; + msg->msg_data[0] = err; + msg->msg.netfn |= 1; /* Convert to a response. */ + msg->msg.data_len = 1; + msg->msg.data = msg->msg_data; + deliver_response(msg); +} + /* Find the next sequence number not being used and add the given message with the given timeout to the sequence table. This must be called with the interface's seq_lock held. */ @@ -727,14 +785,8 @@ static int intf_err_seq(ipmi_smi_t intf, } spin_unlock_irqrestore(&(intf->seq_lock), flags); - if (msg) { - msg->recv_type = IPMI_RESPONSE_RECV_TYPE; - msg->msg_data[0] = err; - msg->msg.netfn |= 1; /* Convert to a response. */ - msg->msg.data_len = 1; - msg->msg.data = msg->msg_data; - deliver_response(msg); - } + if (msg) + deliver_err_response(msg, err); return rv; } @@ -776,17 +828,18 @@ int ipmi_create_user(unsigned int if_num, if (!new_user) return -ENOMEM; - spin_lock_irqsave(&interfaces_lock, flags); - intf = ipmi_interfaces[if_num]; - if ((if_num >= MAX_IPMI_INTERFACES) || IPMI_INVALID_INTERFACE(intf)) { - spin_unlock_irqrestore(&interfaces_lock, flags); - rv = -EINVAL; - goto out_kfree; + mutex_lock(&ipmi_interfaces_mutex); + list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { + if (intf->intf_num == if_num) + goto found; } + /* Not found, return an error */ + rv = -EINVAL; + goto out_kfree; + found: /* Note that each existing user holds a refcount to the interface. */ kref_get(&intf->refcount); - spin_unlock_irqrestore(&interfaces_lock, flags); kref_init(&new_user->refcount); new_user->handler = handler; @@ -807,6 +860,10 @@ int ipmi_create_user(unsigned int if_num, } } + /* Hold the lock so intf->handlers is guaranteed to be good + * until now */ + mutex_unlock(&ipmi_interfaces_mutex); + new_user->valid = 1; spin_lock_irqsave(&intf->seq_lock, flags); list_add_rcu(&new_user->link, &intf->users); @@ -817,6 +874,7 @@ int ipmi_create_user(unsigned int if_num, out_kref: kref_put(&intf->refcount, intf_free); out_kfree: + mutex_unlock(&ipmi_interfaces_mutex); kfree(new_user); return rv; } @@ -846,6 +904,7 @@ int ipmi_destroy_user(ipmi_user_t user) && (intf->seq_table[i].recv_msg->user == user)) { intf->seq_table[i].inuse = 0; + ipmi_free_recv_msg(intf->seq_table[i].recv_msg); } } spin_unlock_irqrestore(&intf->seq_lock, flags); @@ -872,9 +931,13 @@ int ipmi_destroy_user(ipmi_user_t user) kfree(rcvr); } - module_put(intf->handlers->owner); - if (intf->handlers->dec_usecount) - intf->handlers->dec_usecount(intf->send_info); + mutex_lock(&ipmi_interfaces_mutex); + if (intf->handlers) { + module_put(intf->handlers->owner); + if (intf->handlers->dec_usecount) + intf->handlers->dec_usecount(intf->send_info); + } + mutex_unlock(&ipmi_interfaces_mutex); kref_put(&intf->refcount, intf_free); @@ -887,8 +950,8 @@ void ipmi_get_version(ipmi_user_t user, unsigned char *major, unsigned char *minor) { - *major = ipmi_version_major(&user->intf->bmc->id); - *minor = ipmi_version_minor(&user->intf->bmc->id); + *major = user->intf->ipmi_version_major; + *minor = user->intf->ipmi_version_minor; } int ipmi_set_my_address(ipmi_user_t user, @@ -931,6 +994,65 @@ int ipmi_get_my_LUN(ipmi_user_t user, return 0; } +int ipmi_get_maintenance_mode(ipmi_user_t user) +{ + int mode; + unsigned long flags; + + spin_lock_irqsave(&user->intf->maintenance_mode_lock, flags); + mode = user->intf->maintenance_mode; + spin_unlock_irqrestore(&user->intf->maintenance_mode_lock, flags); + + return mode; +} +EXPORT_SYMBOL(ipmi_get_maintenance_mode); + +static void maintenance_mode_update(ipmi_smi_t intf) +{ + if (intf->handlers->set_maintenance_mode) + intf->handlers->set_maintenance_mode( + intf->send_info, intf->maintenance_mode_enable); +} + +int ipmi_set_maintenance_mode(ipmi_user_t user, int mode) +{ + int rv = 0; + unsigned long flags; + ipmi_smi_t intf = user->intf; + + spin_lock_irqsave(&intf->maintenance_mode_lock, flags); + if (intf->maintenance_mode != mode) { + switch (mode) { + case IPMI_MAINTENANCE_MODE_AUTO: + intf->maintenance_mode = mode; + intf->maintenance_mode_enable + = (intf->auto_maintenance_timeout > 0); + break; + + case IPMI_MAINTENANCE_MODE_OFF: + intf->maintenance_mode = mode; + intf->maintenance_mode_enable = 0; + break; + + case IPMI_MAINTENANCE_MODE_ON: + intf->maintenance_mode = mode; + intf->maintenance_mode_enable = 1; + break; + + default: + rv = -EINVAL; + goto out_unlock; + } + + maintenance_mode_update(intf); + } + out_unlock: + spin_unlock_irqrestore(&intf->maintenance_mode_lock, flags); + + return rv; +} +EXPORT_SYMBOL(ipmi_set_maintenance_mode); + int ipmi_set_gets_events(ipmi_user_t user, int val) { unsigned long flags; @@ -943,20 +1065,33 @@ int ipmi_set_gets_events(ipmi_user_t user, int val) spin_lock_irqsave(&intf->events_lock, flags); user->gets_events = val; - if (val) { - /* Deliver any queued events. */ + if (intf->delivering_events) + /* + * Another thread is delivering events for this, so + * let it handle any new events. + */ + goto out; + + /* Deliver any queued events. */ + while (user->gets_events && !list_empty(&intf->waiting_events)) { list_for_each_entry_safe(msg, msg2, &intf->waiting_events, link) list_move_tail(&msg->link, &msgs); intf->waiting_events_count = 0; - } - /* Hold the events lock while doing this to preserve order. */ - list_for_each_entry_safe(msg, msg2, &msgs, link) { - msg->user = user; - kref_get(&user->refcount); - deliver_response(msg); + intf->delivering_events = 1; + spin_unlock_irqrestore(&intf->events_lock, flags); + + list_for_each_entry_safe(msg, msg2, &msgs, link) { + msg->user = user; + kref_get(&user->refcount); + deliver_response(msg); + } + + spin_lock_irqsave(&intf->events_lock, flags); + intf->delivering_events = 0; } + out: spin_unlock_irqrestore(&intf->events_lock, flags); return 0; @@ -1067,7 +1202,8 @@ int ipmi_unregister_for_cmd(ipmi_user_t user, void ipmi_user_set_run_to_completion(ipmi_user_t user, int val) { ipmi_smi_t intf = user->intf; - intf->handlers->set_run_to_completion(intf->send_info, val); + if (intf->handlers) + intf->handlers->set_run_to_completion(intf->send_info, val); } static unsigned char @@ -1178,10 +1314,11 @@ static int i_ipmi_request(ipmi_user_t user, int retries, unsigned int retry_time_ms) { - int rv = 0; - struct ipmi_smi_msg *smi_msg; - struct ipmi_recv_msg *recv_msg; - unsigned long flags; + int rv = 0; + struct ipmi_smi_msg *smi_msg; + struct ipmi_recv_msg *recv_msg; + unsigned long flags; + struct ipmi_smi_handlers *handlers; if (supplied_recv) { @@ -1204,6 +1341,13 @@ static int i_ipmi_request(ipmi_user_t user, } } + rcu_read_lock(); + handlers = intf->handlers; + if (!handlers) { + rv = -ENODEV; + goto out_err; + } + recv_msg->user = user; if (user) kref_get(&user->refcount); @@ -1246,6 +1390,24 @@ static int i_ipmi_request(ipmi_user_t user, goto out_err; } + if (((msg->netfn == IPMI_NETFN_APP_REQUEST) + && ((msg->cmd == IPMI_COLD_RESET_CMD) + || (msg->cmd == IPMI_WARM_RESET_CMD))) + || (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST)) + { + spin_lock_irqsave(&intf->maintenance_mode_lock, flags); + intf->auto_maintenance_timeout + = IPMI_MAINTENANCE_MODE_TIMEOUT; + if (!intf->maintenance_mode + && !intf->maintenance_mode_enable) + { + intf->maintenance_mode_enable = 1; + maintenance_mode_update(intf); + } + spin_unlock_irqrestore(&intf->maintenance_mode_lock, + flags); + } + if ((msg->data_len + 2) > IPMI_MAX_MSG_LENGTH) { spin_lock_irqsave(&intf->counter_lock, flags); intf->sent_invalid_commands++; @@ -1520,11 +1682,14 @@ static int i_ipmi_request(ipmi_user_t user, printk("\n"); } #endif - intf->handlers->sender(intf->send_info, smi_msg, priority); + + handlers->sender(intf->send_info, smi_msg, priority); + rcu_read_unlock(); return 0; out_err: + rcu_read_unlock(); ipmi_free_smi_msg(smi_msg); ipmi_free_recv_msg(recv_msg); return rv; @@ -1604,6 +1769,7 @@ int ipmi_request_supply_msgs(ipmi_user_t user, -1, 0); } +#ifdef CONFIG_PROC_FS static int ipmb_file_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -1692,6 +1858,7 @@ static int stat_file_read_proc(char *page, char **start, off_t off, return (out - ((char *) page)); } +#endif /* CONFIG_PROC_FS */ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, read_proc_t *read_proc, write_proc_t *write_proc, @@ -1817,13 +1984,12 @@ static int __find_bmc_prod_dev_id(struct device *dev, void *data) struct bmc_device *bmc = dev_get_drvdata(dev); return (bmc->id.product_id == id->product_id - && bmc->id.product_id == id->product_id && bmc->id.device_id == id->device_id); } static struct bmc_device *ipmi_find_bmc_prod_dev_id( struct device_driver *drv, - unsigned char product_id, unsigned char device_id) + unsigned int product_id, unsigned char device_id) { struct prod_dev_id id = { .product_id = product_id, @@ -1940,6 +2106,9 @@ static ssize_t guid_show(struct device *dev, struct device_attribute *attr, static void remove_files(struct bmc_device *bmc) { + if (!bmc->dev) + return; + device_remove_file(&bmc->dev->dev, &bmc->device_id_attr); device_remove_file(&bmc->dev->dev, @@ -1973,7 +2142,8 @@ cleanup_bmc_device(struct kref *ref) bmc = container_of(ref, struct bmc_device, refcount); remove_files(bmc); - platform_device_unregister(bmc->dev); + if (bmc->dev) + platform_device_unregister(bmc->dev); kfree(bmc); } @@ -1981,7 +2151,11 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf) { struct bmc_device *bmc = intf->bmc; - sysfs_remove_link(&intf->si_dev->kobj, "bmc"); + if (intf->sysfs_name) { + sysfs_remove_link(&intf->si_dev->kobj, intf->sysfs_name); + kfree(intf->sysfs_name); + intf->sysfs_name = NULL; + } if (intf->my_dev_name) { sysfs_remove_link(&bmc->dev->dev.kobj, intf->my_dev_name); kfree(intf->my_dev_name); @@ -1990,6 +2164,7 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf) mutex_lock(&ipmidriver_mutex); kref_put(&bmc->refcount, cleanup_bmc_device); + intf->bmc = NULL; mutex_unlock(&ipmidriver_mutex); } @@ -1997,6 +2172,56 @@ static int create_files(struct bmc_device *bmc) { int err; + bmc->device_id_attr.attr.name = "device_id"; + bmc->device_id_attr.attr.owner = THIS_MODULE; + bmc->device_id_attr.attr.mode = S_IRUGO; + bmc->device_id_attr.show = device_id_show; + + bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs"; + bmc->provides_dev_sdrs_attr.attr.owner = THIS_MODULE; + bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO; + bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show; + + bmc->revision_attr.attr.name = "revision"; + bmc->revision_attr.attr.owner = THIS_MODULE; + bmc->revision_attr.attr.mode = S_IRUGO; + bmc->revision_attr.show = revision_show; + + bmc->firmware_rev_attr.attr.name = "firmware_revision"; + bmc->firmware_rev_attr.attr.owner = THIS_MODULE; + bmc->firmware_rev_attr.attr.mode = S_IRUGO; + bmc->firmware_rev_attr.show = firmware_rev_show; + + bmc->version_attr.attr.name = "ipmi_version"; + bmc->version_attr.attr.owner = THIS_MODULE; + bmc->version_attr.attr.mode = S_IRUGO; + bmc->version_attr.show = ipmi_version_show; + + bmc->add_dev_support_attr.attr.name = "additional_device_support"; + bmc->add_dev_support_attr.attr.owner = THIS_MODULE; + bmc->add_dev_support_attr.attr.mode = S_IRUGO; + bmc->add_dev_support_attr.show = add_dev_support_show; + + bmc->manufacturer_id_attr.attr.name = "manufacturer_id"; + bmc->manufacturer_id_attr.attr.owner = THIS_MODULE; + bmc->manufacturer_id_attr.attr.mode = S_IRUGO; + bmc->manufacturer_id_attr.show = manufacturer_id_show; + + bmc->product_id_attr.attr.name = "product_id"; + bmc->product_id_attr.attr.owner = THIS_MODULE; + bmc->product_id_attr.attr.mode = S_IRUGO; + bmc->product_id_attr.show = product_id_show; + + bmc->guid_attr.attr.name = "guid"; + bmc->guid_attr.attr.owner = THIS_MODULE; + bmc->guid_attr.attr.mode = S_IRUGO; + bmc->guid_attr.show = guid_show; + + bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision"; + bmc->aux_firmware_rev_attr.attr.owner = THIS_MODULE; + bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO; + bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show; + err = device_create_file(&bmc->dev->dev, &bmc->device_id_attr); if (err) goto out; @@ -2066,7 +2291,8 @@ out: return err; } -static int ipmi_bmc_register(ipmi_smi_t intf) +static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum, + const char *sysfs_name) { int rv; struct bmc_device *bmc = intf->bmc; @@ -2106,9 +2332,39 @@ static int ipmi_bmc_register(ipmi_smi_t intf) bmc->id.product_id, bmc->id.device_id); } else { - bmc->dev = platform_device_alloc("ipmi_bmc", - bmc->id.device_id); + char name[14]; + unsigned char orig_dev_id = bmc->id.device_id; + int warn_printed = 0; + + snprintf(name, sizeof(name), + "ipmi_bmc.%4.4x", bmc->id.product_id); + + while (ipmi_find_bmc_prod_dev_id(&ipmidriver, + bmc->id.product_id, + bmc->id.device_id)) + { + if (!warn_printed) { + printk(KERN_WARNING PFX + "This machine has two different BMCs" + " with the same product id and device" + " id. This is an error in the" + " firmware, but incrementing the" + " device id to work around the problem." + " Prod ID = 0x%x, Dev ID = 0x%x\n", + bmc->id.product_id, bmc->id.device_id); + warn_printed = 1; + } + bmc->id.device_id++; /* Wraps at 255 */ + if (bmc->id.device_id == orig_dev_id) { + printk(KERN_ERR PFX + "Out of device ids!\n"); + break; + } + } + + bmc->dev = platform_device_alloc(name, bmc->id.device_id); if (!bmc->dev) { + mutex_unlock(&ipmidriver_mutex); printk(KERN_ERR "ipmi_msghandler:" " Unable to allocate platform device\n"); @@ -2121,6 +2377,8 @@ static int ipmi_bmc_register(ipmi_smi_t intf) rv = platform_device_add(bmc->dev); mutex_unlock(&ipmidriver_mutex); if (rv) { + platform_device_put(bmc->dev); + bmc->dev = NULL; printk(KERN_ERR "ipmi_msghandler:" " Unable to register bmc device: %d\n", @@ -2130,57 +2388,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf) return rv; } - bmc->device_id_attr.attr.name = "device_id"; - bmc->device_id_attr.attr.owner = THIS_MODULE; - bmc->device_id_attr.attr.mode = S_IRUGO; - bmc->device_id_attr.show = device_id_show; - - bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs"; - bmc->provides_dev_sdrs_attr.attr.owner = THIS_MODULE; - bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO; - bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show; - - bmc->revision_attr.attr.name = "revision"; - bmc->revision_attr.attr.owner = THIS_MODULE; - bmc->revision_attr.attr.mode = S_IRUGO; - bmc->revision_attr.show = revision_show; - - bmc->firmware_rev_attr.attr.name = "firmware_revision"; - bmc->firmware_rev_attr.attr.owner = THIS_MODULE; - bmc->firmware_rev_attr.attr.mode = S_IRUGO; - bmc->firmware_rev_attr.show = firmware_rev_show; - - bmc->version_attr.attr.name = "ipmi_version"; - bmc->version_attr.attr.owner = THIS_MODULE; - bmc->version_attr.attr.mode = S_IRUGO; - bmc->version_attr.show = ipmi_version_show; - - bmc->add_dev_support_attr.attr.name - = "additional_device_support"; - bmc->add_dev_support_attr.attr.owner = THIS_MODULE; - bmc->add_dev_support_attr.attr.mode = S_IRUGO; - bmc->add_dev_support_attr.show = add_dev_support_show; - - bmc->manufacturer_id_attr.attr.name = "manufacturer_id"; - bmc->manufacturer_id_attr.attr.owner = THIS_MODULE; - bmc->manufacturer_id_attr.attr.mode = S_IRUGO; - bmc->manufacturer_id_attr.show = manufacturer_id_show; - - bmc->product_id_attr.attr.name = "product_id"; - bmc->product_id_attr.attr.owner = THIS_MODULE; - bmc->product_id_attr.attr.mode = S_IRUGO; - bmc->product_id_attr.show = product_id_show; - - bmc->guid_attr.attr.name = "guid"; - bmc->guid_attr.attr.owner = THIS_MODULE; - bmc->guid_attr.attr.mode = S_IRUGO; - bmc->guid_attr.show = guid_show; - - bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision"; - bmc->aux_firmware_rev_attr.attr.owner = THIS_MODULE; - bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO; - bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show; - rv = create_files(bmc); if (rv) { mutex_lock(&ipmidriver_mutex); @@ -2202,29 +2409,44 @@ static int ipmi_bmc_register(ipmi_smi_t intf) * create symlink from system interface device to bmc device * and back. */ + intf->sysfs_name = kstrdup(sysfs_name, GFP_KERNEL); + if (!intf->sysfs_name) { + rv = -ENOMEM; + printk(KERN_ERR + "ipmi_msghandler: allocate link to BMC: %d\n", + rv); + goto out_err; + } + rv = sysfs_create_link(&intf->si_dev->kobj, - &bmc->dev->dev.kobj, "bmc"); + &bmc->dev->dev.kobj, intf->sysfs_name); if (rv) { + kfree(intf->sysfs_name); + intf->sysfs_name = NULL; printk(KERN_ERR "ipmi_msghandler: Unable to create bmc symlink: %d\n", rv); goto out_err; } - size = snprintf(dummy, 0, "ipmi%d", intf->intf_num); + size = snprintf(dummy, 0, "ipmi%d", ifnum); intf->my_dev_name = kmalloc(size+1, GFP_KERNEL); if (!intf->my_dev_name) { + kfree(intf->sysfs_name); + intf->sysfs_name = NULL; rv = -ENOMEM; printk(KERN_ERR "ipmi_msghandler: allocate link from BMC: %d\n", rv); goto out_err; } - snprintf(intf->my_dev_name, size+1, "ipmi%d", intf->intf_num); + snprintf(intf->my_dev_name, size+1, "ipmi%d", ifnum); rv = sysfs_create_link(&bmc->dev->dev.kobj, &intf->si_dev->kobj, intf->my_dev_name); if (rv) { + kfree(intf->sysfs_name); + intf->sysfs_name = NULL; kfree(intf->my_dev_name); intf->my_dev_name = NULL; printk(KERN_ERR @@ -2409,17 +2631,14 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, void *send_info, struct ipmi_device_id *device_id, struct device *si_dev, + const char *sysfs_name, unsigned char slave_addr) { int i, j; int rv; ipmi_smi_t intf; - unsigned long flags; - int version_major; - int version_minor; - - version_major = ipmi_version_major(device_id); - version_minor = ipmi_version_minor(device_id); + ipmi_smi_t tintf; + struct list_head *link; /* Make sure the driver is actually initialized, this handles problems with initialization order. */ @@ -2437,12 +2656,16 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, if (!intf) return -ENOMEM; memset(intf, 0, sizeof(*intf)); + + intf->ipmi_version_major = ipmi_version_major(device_id); + intf->ipmi_version_minor = ipmi_version_minor(device_id); + intf->bmc = kzalloc(sizeof(*intf->bmc), GFP_KERNEL); if (!intf->bmc) { kfree(intf); return -ENOMEM; } - intf->intf_num = -1; + intf->intf_num = -1; /* Mark it invalid for now. */ kref_init(&intf->refcount); intf->bmc->id = *device_id; intf->si_dev = si_dev; @@ -2470,26 +2693,30 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, INIT_LIST_HEAD(&intf->waiting_events); intf->waiting_events_count = 0; mutex_init(&intf->cmd_rcvrs_mutex); + spin_lock_init(&intf->maintenance_mode_lock); INIT_LIST_HEAD(&intf->cmd_rcvrs); init_waitqueue_head(&intf->waitq); spin_lock_init(&intf->counter_lock); intf->proc_dir = NULL; - rv = -ENOMEM; - spin_lock_irqsave(&interfaces_lock, flags); - for (i = 0; i < MAX_IPMI_INTERFACES; i++) { - if (ipmi_interfaces[i] == NULL) { - intf->intf_num = i; - /* Reserve the entry till we are done. */ - ipmi_interfaces[i] = IPMI_INVALID_INTERFACE_ENTRY; - rv = 0; + mutex_lock(&smi_watchers_mutex); + mutex_lock(&ipmi_interfaces_mutex); + /* Look for a hole in the numbers. */ + i = 0; + link = &ipmi_interfaces; + list_for_each_entry_rcu(tintf, &ipmi_interfaces, link) { + if (tintf->intf_num != i) { + link = &tintf->link; break; } + i++; } - spin_unlock_irqrestore(&interfaces_lock, flags); - if (rv) - goto out; + /* Add the new interface in numeric order. */ + if (i == 0) + list_add_rcu(&intf->link, &ipmi_interfaces); + else + list_add_tail_rcu(&intf->link, link); rv = handlers->start_processing(send_info, intf); if (rv) @@ -2497,8 +2724,9 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, get_guid(intf); - if ((version_major > 1) - || ((version_major == 1) && (version_minor >= 5))) + if ((intf->ipmi_version_major > 1) + || ((intf->ipmi_version_major == 1) + && (intf->ipmi_version_minor >= 5))) { /* Start scanning the channels to see what is available. */ @@ -2521,64 +2749,67 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, if (rv == 0) rv = add_proc_entries(intf, i); - rv = ipmi_bmc_register(intf); + rv = ipmi_bmc_register(intf, i, sysfs_name); out: if (rv) { if (intf->proc_dir) remove_proc_entries(intf); + intf->handlers = NULL; + list_del_rcu(&intf->link); + mutex_unlock(&ipmi_interfaces_mutex); + mutex_unlock(&smi_watchers_mutex); + synchronize_rcu(); kref_put(&intf->refcount, intf_free); - if (i < MAX_IPMI_INTERFACES) { - spin_lock_irqsave(&interfaces_lock, flags); - ipmi_interfaces[i] = NULL; - spin_unlock_irqrestore(&interfaces_lock, flags); - } } else { - spin_lock_irqsave(&interfaces_lock, flags); - ipmi_interfaces[i] = intf; - spin_unlock_irqrestore(&interfaces_lock, flags); + /* After this point the interface is legal to use. */ + intf->intf_num = i; + mutex_unlock(&ipmi_interfaces_mutex); call_smi_watchers(i, intf->si_dev); + mutex_unlock(&smi_watchers_mutex); } return rv; } +static void cleanup_smi_msgs(ipmi_smi_t intf) +{ + int i; + struct seq_table *ent; + + /* No need for locks, the interface is down. */ + for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) { + ent = &(intf->seq_table[i]); + if (!ent->inuse) + continue; + deliver_err_response(ent->recv_msg, IPMI_ERR_UNSPECIFIED); + } +} + int ipmi_unregister_smi(ipmi_smi_t intf) { - int i; struct ipmi_smi_watcher *w; - unsigned long flags; + int intf_num = intf->intf_num; ipmi_bmc_unregister(intf); - spin_lock_irqsave(&interfaces_lock, flags); - for (i = 0; i < MAX_IPMI_INTERFACES; i++) { - if (ipmi_interfaces[i] == intf) { - /* Set the interface number reserved until we - * are done. */ - ipmi_interfaces[i] = IPMI_INVALID_INTERFACE_ENTRY; - intf->intf_num = -1; - break; - } - } - spin_unlock_irqrestore(&interfaces_lock,flags); + mutex_lock(&smi_watchers_mutex); + mutex_lock(&ipmi_interfaces_mutex); + intf->intf_num = -1; + intf->handlers = NULL; + list_del_rcu(&intf->link); + mutex_unlock(&ipmi_interfaces_mutex); + synchronize_rcu(); - if (i == MAX_IPMI_INTERFACES) - return -ENODEV; + cleanup_smi_msgs(intf); remove_proc_entries(intf); /* Call all the watcher interfaces to tell them that an interface is gone. */ - down_read(&smi_watchers_sem); list_for_each_entry(w, &smi_watchers, link) - w->smi_gone(i); - up_read(&smi_watchers_sem); - - /* Allow the entry to be reused now. */ - spin_lock_irqsave(&interfaces_lock, flags); - ipmi_interfaces[i] = NULL; - spin_unlock_irqrestore(&interfaces_lock,flags); + w->smi_gone(intf_num); + mutex_unlock(&smi_watchers_mutex); kref_put(&intf->refcount, intf_free); return 0; @@ -2660,6 +2891,7 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, struct ipmi_ipmb_addr *ipmb_addr; struct ipmi_recv_msg *recv_msg; unsigned long flags; + struct ipmi_smi_handlers *handlers; if (msg->rsp_size < 10) { /* Message not big enough, just ignore it. */ @@ -2716,10 +2948,16 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, printk("\n"); } #endif - intf->handlers->sender(intf->send_info, msg, 0); - - rv = -1; /* We used the message, so return the value that - causes it to not be freed or queued. */ + rcu_read_lock(); + handlers = intf->handlers; + if (handlers) { + handlers->sender(intf->send_info, msg, 0); + /* We used the message, so return the value + that causes it to not be freed or + queued. */ + rv = -1; + } + rcu_read_unlock(); } else { /* Deliver the message to the user. */ spin_lock_irqsave(&intf->counter_lock, flags); @@ -3309,16 +3547,6 @@ void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf) rcu_read_unlock(); } -static void -handle_msg_timeout(struct ipmi_recv_msg *msg) -{ - msg->recv_type = IPMI_RESPONSE_RECV_TYPE; - msg->msg_data[0] = IPMI_TIMEOUT_COMPLETION_CODE; - msg->msg.netfn |= 1; /* Convert to a response. */ - msg->msg.data_len = 1; - msg->msg.data = msg->msg_data; - deliver_response(msg); -} static struct ipmi_smi_msg * smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, @@ -3350,7 +3578,11 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, struct list_head *timeouts, long timeout_period, int slot, unsigned long *flags) { - struct ipmi_recv_msg *msg; + struct ipmi_recv_msg *msg; + struct ipmi_smi_handlers *handlers; + + if (intf->intf_num == -1) + return; if (!ent->inuse) return; @@ -3393,13 +3625,19 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, return; spin_unlock_irqrestore(&intf->seq_lock, *flags); + /* Send the new message. We send with a zero * priority. It timed out, I doubt time is * that critical now, and high priority * messages are really only for messages to the * local MC, which don't get resent. */ - intf->handlers->sender(intf->send_info, - smi_msg, 0); + handlers = intf->handlers; + if (handlers) + intf->handlers->sender(intf->send_info, + smi_msg, 0); + else + ipmi_free_smi_msg(smi_msg); + spin_lock_irqsave(&intf->seq_lock, *flags); } } @@ -3411,18 +3649,12 @@ static void ipmi_timeout_handler(long timeout_period) struct ipmi_recv_msg *msg, *msg2; struct ipmi_smi_msg *smi_msg, *smi_msg2; unsigned long flags; - int i, j; + int i; INIT_LIST_HEAD(&timeouts); - spin_lock(&interfaces_lock); - for (i = 0; i < MAX_IPMI_INTERFACES; i++) { - intf = ipmi_interfaces[i]; - if (IPMI_INVALID_INTERFACE(intf)) - continue; - kref_get(&intf->refcount); - spin_unlock(&interfaces_lock); - + rcu_read_lock(); + list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { /* See if any waiting messages need to be processed. */ spin_lock_irqsave(&intf->waiting_msgs_lock, flags); list_for_each_entry_safe(smi_msg, smi_msg2, @@ -3442,35 +3674,60 @@ static void ipmi_timeout_handler(long timeout_period) have timed out, putting them in the timeouts list. */ spin_lock_irqsave(&intf->seq_lock, flags); - for (j = 0; j < IPMI_IPMB_NUM_SEQ; j++) - check_msg_timeout(intf, &(intf->seq_table[j]), - &timeouts, timeout_period, j, + for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) + check_msg_timeout(intf, &(intf->seq_table[i]), + &timeouts, timeout_period, i, &flags); spin_unlock_irqrestore(&intf->seq_lock, flags); list_for_each_entry_safe(msg, msg2, &timeouts, link) - handle_msg_timeout(msg); - - kref_put(&intf->refcount, intf_free); - spin_lock(&interfaces_lock); + deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE); + + /* + * Maintenance mode handling. Check the timeout + * optimistically before we claim the lock. It may + * mean a timeout gets missed occasionally, but that + * only means the timeout gets extended by one period + * in that case. No big deal, and it avoids the lock + * most of the time. + */ + if (intf->auto_maintenance_timeout > 0) { + spin_lock_irqsave(&intf->maintenance_mode_lock, flags); + if (intf->auto_maintenance_timeout > 0) { + intf->auto_maintenance_timeout + -= timeout_period; + if (!intf->maintenance_mode + && (intf->auto_maintenance_timeout <= 0)) + { + intf->maintenance_mode_enable = 0; + maintenance_mode_update(intf); + } + } + spin_unlock_irqrestore(&intf->maintenance_mode_lock, + flags); + } } - spin_unlock(&interfaces_lock); + rcu_read_unlock(); } static void ipmi_request_event(void) { - ipmi_smi_t intf; - int i; + ipmi_smi_t intf; + struct ipmi_smi_handlers *handlers; - spin_lock(&interfaces_lock); - for (i = 0; i < MAX_IPMI_INTERFACES; i++) { - intf = ipmi_interfaces[i]; - if (IPMI_INVALID_INTERFACE(intf)) + rcu_read_lock(); + /* Called from the timer, no need to check if handlers is + * valid. */ + list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { + /* No event requests when in maintenance mode. */ + if (intf->maintenance_mode_enable) continue; - intf->handlers->request_events(intf->send_info); + handlers = intf->handlers; + if (handlers) + handlers->request_events(intf->send_info); } - spin_unlock(&interfaces_lock); + rcu_read_unlock(); } static struct timer_list ipmi_timer; @@ -3599,7 +3856,6 @@ static void send_panic_events(char *str) struct kernel_ipmi_msg msg; ipmi_smi_t intf; unsigned char data[16]; - int i; struct ipmi_system_interface_addr *si; struct ipmi_addr addr; struct ipmi_smi_msg smi_msg; @@ -3633,9 +3889,9 @@ static void send_panic_events(char *str) recv_msg.done = dummy_recv_done_handler; /* For every registered interface, send the event. */ - for (i = 0; i < MAX_IPMI_INTERFACES; i++) { - intf = ipmi_interfaces[i]; - if (IPMI_INVALID_INTERFACE(intf)) + list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { + if (!intf->handlers) + /* Interface is not ready. */ continue; /* Send the event announcing the panic. */ @@ -3660,13 +3916,14 @@ static void send_panic_events(char *str) if (!str) return; - for (i = 0; i < MAX_IPMI_INTERFACES; i++) { + /* For every registered interface, send the event. */ + list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { char *p = str; struct ipmi_ipmb_addr *ipmb; int j; - intf = ipmi_interfaces[i]; - if (IPMI_INVALID_INTERFACE(intf)) + if (intf->intf_num == -1) + /* Interface was not ready yet. */ continue; /* First job here is to figure out where to send the @@ -3792,7 +4049,6 @@ static int panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - int i; ipmi_smi_t intf; if (has_panicked) @@ -3800,9 +4056,9 @@ static int panic_event(struct notifier_block *this, has_panicked = 1; /* For every registered interface, set it to run to completion. */ - for (i = 0; i < MAX_IPMI_INTERFACES; i++) { - intf = ipmi_interfaces[i]; - if (IPMI_INVALID_INTERFACE(intf)) + list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { + if (!intf->handlers) + /* Interface is not ready. */ continue; intf->handlers->set_run_to_completion(intf->send_info, 1); @@ -3823,7 +4079,6 @@ static struct notifier_block panic_block = { static int ipmi_init_msghandler(void) { - int i; int rv; if (initialized) @@ -3838,9 +4093,6 @@ static int ipmi_init_msghandler(void) printk(KERN_INFO "ipmi message handler version " IPMI_DRIVER_VERSION "\n"); - for (i = 0; i < MAX_IPMI_INTERFACES; i++) - ipmi_interfaces[i] = NULL; - #ifdef CONFIG_PROC_FS proc_ipmi_root = proc_mkdir("ipmi", NULL); if (!proc_ipmi_root) { diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c index 8d941db8345..597eb4f88b8 100644 --- a/drivers/char/ipmi/ipmi_poweroff.c +++ b/drivers/char/ipmi/ipmi_poweroff.c @@ -43,6 +43,9 @@ #define PFX "IPMI poweroff: " +static void ipmi_po_smi_gone(int if_num); +static void ipmi_po_new_smi(int if_num, struct device *device); + /* Definitions for controlling power off (if the system supports it). It * conveniently matches the IPMI chassis control values. */ #define IPMI_CHASSIS_POWER_DOWN 0 /* power down, the default. */ @@ -51,6 +54,37 @@ /* the IPMI data command */ static int poweroff_powercycle; +/* Which interface to use, -1 means the first we see. */ +static int ifnum_to_use = -1; + +/* Our local state. */ +static int ready = 0; +static ipmi_user_t ipmi_user; +static int ipmi_ifnum; +static void (*specific_poweroff_func)(ipmi_user_t user) = NULL; + +/* Holds the old poweroff function so we can restore it on removal. */ +static void (*old_poweroff_func)(void); + +static int set_param_ifnum(const char *val, struct kernel_param *kp) +{ + int rv = param_set_int(val, kp); + if (rv) + return rv; + if ((ifnum_to_use < 0) || (ifnum_to_use == ipmi_ifnum)) + return 0; + + ipmi_po_smi_gone(ipmi_ifnum); + ipmi_po_new_smi(ifnum_to_use, NULL); + return 0; +} + +module_param_call(ifnum_to_use, set_param_ifnum, param_get_int, + &ifnum_to_use, 0644); +MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog " + "timer. Setting to -1 defaults to the first registered " + "interface"); + /* parameter definition to allow user to flag power cycle */ module_param(poweroff_powercycle, int, 0644); MODULE_PARM_DESC(poweroff_powercycle, " Set to non-zero to enable power cycle instead of power down. Power cycle is contingent on hardware support, otherwise it defaults back to power down."); @@ -142,6 +176,42 @@ static int ipmi_request_in_rc_mode(ipmi_user_t user, #define IPMI_ATCA_GET_ADDR_INFO_CMD 0x01 #define IPMI_PICMG_ID 0 +#define IPMI_NETFN_OEM 0x2e +#define IPMI_ATCA_PPS_GRACEFUL_RESTART 0x11 +#define IPMI_ATCA_PPS_IANA "\x00\x40\x0A" +#define IPMI_MOTOROLA_MANUFACTURER_ID 0x0000A1 +#define IPMI_MOTOROLA_PPS_IPMC_PRODUCT_ID 0x0051 + +static void (*atca_oem_poweroff_hook)(ipmi_user_t user) = NULL; + +static void pps_poweroff_atca (ipmi_user_t user) +{ + struct ipmi_system_interface_addr smi_addr; + struct kernel_ipmi_msg send_msg; + int rv; + /* + * Configure IPMI address for local access + */ + smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr.channel = IPMI_BMC_CHANNEL; + smi_addr.lun = 0; + + printk(KERN_INFO PFX "PPS powerdown hook used"); + + send_msg.netfn = IPMI_NETFN_OEM; + send_msg.cmd = IPMI_ATCA_PPS_GRACEFUL_RESTART; + send_msg.data = IPMI_ATCA_PPS_IANA; + send_msg.data_len = 3; + rv = ipmi_request_in_rc_mode(user, + (struct ipmi_addr *) &smi_addr, + &send_msg); + if (rv && rv != IPMI_UNKNOWN_ERR_COMPLETION_CODE) { + printk(KERN_ERR PFX "Unable to send ATCA ," + " IPMI error 0x%x\n", rv); + } + return; +} + static int ipmi_atca_detect (ipmi_user_t user) { struct ipmi_system_interface_addr smi_addr; @@ -167,6 +237,13 @@ static int ipmi_atca_detect (ipmi_user_t user) rv = ipmi_request_wait_for_response(user, (struct ipmi_addr *) &smi_addr, &send_msg); + + printk(KERN_INFO PFX "ATCA Detect mfg 0x%X prod 0x%X\n", mfg_id, prod_id); + if((mfg_id == IPMI_MOTOROLA_MANUFACTURER_ID) + && (prod_id == IPMI_MOTOROLA_PPS_IPMC_PRODUCT_ID)) { + printk(KERN_INFO PFX "Installing Pigeon Point Systems Poweroff Hook\n"); + atca_oem_poweroff_hook = pps_poweroff_atca; + } return !rv; } @@ -200,12 +277,19 @@ static void ipmi_poweroff_atca (ipmi_user_t user) rv = ipmi_request_in_rc_mode(user, (struct ipmi_addr *) &smi_addr, &send_msg); - if (rv) { + /** At this point, the system may be shutting down, and most + ** serial drivers (if used) will have interrupts turned off + ** it may be better to ignore IPMI_UNKNOWN_ERR_COMPLETION_CODE + ** return code + **/ + if (rv && rv != IPMI_UNKNOWN_ERR_COMPLETION_CODE) { printk(KERN_ERR PFX "Unable to send ATCA powerdown message," " IPMI error 0x%x\n", rv); goto out; } + if(atca_oem_poweroff_hook) + return atca_oem_poweroff_hook(user); out: return; } @@ -440,15 +524,6 @@ static struct poweroff_function poweroff_functions[] = { / sizeof(struct poweroff_function)) -/* Our local state. */ -static int ready = 0; -static ipmi_user_t ipmi_user; -static void (*specific_poweroff_func)(ipmi_user_t user) = NULL; - -/* Holds the old poweroff function so we can restore it on removal. */ -static void (*old_poweroff_func)(void); - - /* Called on a powerdown request. */ static void ipmi_poweroff_function (void) { @@ -473,6 +548,9 @@ static void ipmi_po_new_smi(int if_num, struct device *device) if (ready) return; + if ((ifnum_to_use >= 0) && (ifnum_to_use != if_num)) + return; + rv = ipmi_create_user(if_num, &ipmi_poweroff_handler, NULL, &ipmi_user); if (rv) { @@ -481,6 +559,8 @@ static void ipmi_po_new_smi(int if_num, struct device *device) return; } + ipmi_ifnum = if_num; + /* * Do a get device ide and store some results, since this is * used by several functions. @@ -541,9 +621,15 @@ static void ipmi_po_new_smi(int if_num, struct device *device) static void ipmi_po_smi_gone(int if_num) { - /* This can never be called, because once poweroff driver is - registered, the interface can't go away until the power - driver is unregistered. */ + if (!ready) + return; + + if (ipmi_ifnum != if_num) + return; + + ready = 0; + ipmi_destroy_user(ipmi_user); + pm_power_off = old_poweroff_func; } static struct ipmi_smi_watcher smi_watcher = @@ -616,9 +702,9 @@ static int ipmi_poweroff_init (void) printk(KERN_ERR PFX "Unable to register SMI watcher: %d\n", rv); goto out_err; } -#endif out_err: +#endif return rv; } diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index bb1fac104fd..81a0c89598e 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -61,6 +61,10 @@ #include "ipmi_si_sm.h" #include <linux/init.h> #include <linux/dmi.h> +#include <linux/string.h> +#include <linux/ctype.h> + +#define PFX "ipmi_si: " /* Measure times between events in the driver. */ #undef DEBUG_TIMING @@ -92,7 +96,7 @@ enum si_intf_state { enum si_type { SI_KCS, SI_SMIC, SI_BT }; -static char *si_to_str[] = { "KCS", "SMIC", "BT" }; +static char *si_to_str[] = { "kcs", "smic", "bt" }; #define DEVICE_NAME "ipmi_si" @@ -222,7 +226,10 @@ struct smi_info static int force_kipmid[SI_MAX_PARMS]; static int num_force_kipmid; +static int unload_when_empty = 1; + static int try_smi_init(struct smi_info *smi); +static void cleanup_one_si(struct smi_info *to_clean); static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list); static int register_xaction_notifier(struct notifier_block * nb) @@ -240,14 +247,18 @@ static void deliver_recv_msg(struct smi_info *smi_info, spin_lock(&(smi_info->si_lock)); } -static void return_hosed_msg(struct smi_info *smi_info) +static void return_hosed_msg(struct smi_info *smi_info, int cCode) { struct ipmi_smi_msg *msg = smi_info->curr_msg; + if (cCode < 0 || cCode > IPMI_ERR_UNSPECIFIED) + cCode = IPMI_ERR_UNSPECIFIED; + /* else use it as is */ + /* Make it a reponse */ msg->rsp[0] = msg->data[0] | 4; msg->rsp[1] = msg->data[1]; - msg->rsp[2] = 0xFF; /* Unknown error. */ + msg->rsp[2] = cCode; msg->rsp_size = 3; smi_info->curr_msg = NULL; @@ -298,7 +309,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) smi_info->curr_msg->data, smi_info->curr_msg->data_size); if (err) { - return_hosed_msg(smi_info); + return_hosed_msg(smi_info, err); } rv = SI_SM_CALL_WITHOUT_DELAY; @@ -640,7 +651,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, /* If we were handling a user message, format a response to send to the upper layer to tell it about the error. */ - return_hosed_msg(smi_info); + return_hosed_msg(smi_info, IPMI_ERR_UNSPECIFIED); } si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0); } @@ -684,22 +695,24 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, { /* We are idle and the upper layer requested that I fetch events, so do so. */ - unsigned char msg[2]; + atomic_set(&smi_info->req_events, 0); - spin_lock(&smi_info->count_lock); - smi_info->flag_fetches++; - spin_unlock(&smi_info->count_lock); + smi_info->curr_msg = ipmi_alloc_smi_msg(); + if (!smi_info->curr_msg) + goto out; - atomic_set(&smi_info->req_events, 0); - msg[0] = (IPMI_NETFN_APP_REQUEST << 2); - msg[1] = IPMI_GET_MSG_FLAGS_CMD; + smi_info->curr_msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); + smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD; + smi_info->curr_msg->data_size = 2; smi_info->handlers->start_transaction( - smi_info->si_sm, msg, 2); - smi_info->si_state = SI_GETTING_FLAGS; + smi_info->si_sm, + smi_info->curr_msg->data, + smi_info->curr_msg->data_size); + smi_info->si_state = SI_GETTING_EVENTS; goto restart; } - + out: return si_sm_result; } @@ -714,6 +727,15 @@ static void sender(void *send_info, struct timeval t; #endif + if (atomic_read(&smi_info->stop_operation)) { + msg->rsp[0] = msg->data[0] | 4; + msg->rsp[1] = msg->data[1]; + msg->rsp[2] = IPMI_ERR_UNSPECIFIED; + msg->rsp_size = 3; + deliver_recv_msg(smi_info, msg); + return; + } + spin_lock_irqsave(&(smi_info->msg_lock), flags); #ifdef DEBUG_TIMING do_gettimeofday(&t); @@ -805,13 +827,21 @@ static void poll(void *send_info) { struct smi_info *smi_info = send_info; - smi_event_handler(smi_info, 0); + /* + * Make sure there is some delay in the poll loop so we can + * drive time forward and timeout things. + */ + udelay(10); + smi_event_handler(smi_info, 10); } static void request_events(void *send_info) { struct smi_info *smi_info = send_info; + if (atomic_read(&smi_info->stop_operation)) + return; + atomic_set(&smi_info->req_events, 1); } @@ -949,12 +979,21 @@ static int smi_start_processing(void *send_info, return 0; } +static void set_maintenance_mode(void *send_info, int enable) +{ + struct smi_info *smi_info = send_info; + + if (!enable) + atomic_set(&smi_info->req_events, 0); +} + static struct ipmi_smi_handlers handlers = { .owner = THIS_MODULE, .start_processing = smi_start_processing, .sender = sender, .request_events = request_events, + .set_maintenance_mode = set_maintenance_mode, .set_run_to_completion = set_run_to_completion, .poll = poll, }; @@ -987,6 +1026,16 @@ static int num_regshifts = 0; static int slave_addrs[SI_MAX_PARMS]; static int num_slave_addrs = 0; +#define IPMI_IO_ADDR_SPACE 0 +#define IPMI_MEM_ADDR_SPACE 1 +static char *addr_space_to_str[] = { "I/O", "mem" }; + +static int hotmod_handler(const char *val, struct kernel_param *kp); + +module_param_call(hotmod, hotmod_handler, NULL, NULL, 0200); +MODULE_PARM_DESC(hotmod, "Add and remove interfaces. See" + " Documentation/IPMI.txt in the kernel sources for the" + " gory details."); module_param_named(trydefaults, si_trydefaults, bool, 0); MODULE_PARM_DESC(trydefaults, "Setting this to 'false' will disable the" @@ -1038,12 +1087,12 @@ module_param_array(force_kipmid, int, &num_force_kipmid, 0); MODULE_PARM_DESC(force_kipmid, "Force the kipmi daemon to be enabled (1) or" " disabled(0). Normally the IPMI driver auto-detects" " this, but the value may be overridden by this parm."); +module_param(unload_when_empty, int, 0); +MODULE_PARM_DESC(unload_when_empty, "Unload the module if no interfaces are" + " specified or found, default is 1. Setting to 0" + " is useful for hot add of devices using hotmod."); -#define IPMI_IO_ADDR_SPACE 0 -#define IPMI_MEM_ADDR_SPACE 1 -static char *addr_space_to_str[] = { "I/O", "memory" }; - static void std_irq_cleanup(struct smi_info *info) { if (info->si_type == SI_BT) @@ -1317,6 +1366,234 @@ static int mem_setup(struct smi_info *info) return 0; } +/* + * Parms come in as <op1>[:op2[:op3...]]. ops are: + * add|remove,kcs|bt|smic,mem|i/o,<address>[,<opt1>[,<opt2>[,...]]] + * Options are: + * rsp=<regspacing> + * rsi=<regsize> + * rsh=<regshift> + * irq=<irq> + * ipmb=<ipmb addr> + */ +enum hotmod_op { HM_ADD, HM_REMOVE }; +struct hotmod_vals { + char *name; + int val; +}; +static struct hotmod_vals hotmod_ops[] = { + { "add", HM_ADD }, + { "remove", HM_REMOVE }, + { NULL } +}; +static struct hotmod_vals hotmod_si[] = { + { "kcs", SI_KCS }, + { "smic", SI_SMIC }, + { "bt", SI_BT }, + { NULL } +}; +static struct hotmod_vals hotmod_as[] = { + { "mem", IPMI_MEM_ADDR_SPACE }, + { "i/o", IPMI_IO_ADDR_SPACE }, + { NULL } +}; +static int ipmi_strcasecmp(const char *s1, const char *s2) +{ + while (*s1 || *s2) { + if (!*s1) + return -1; + if (!*s2) + return 1; + if (*s1 != *s2) + return *s1 - *s2; + s1++; + s2++; + } + return 0; +} +static int parse_str(struct hotmod_vals *v, int *val, char *name, char **curr) +{ + char *s; + int i; + + s = strchr(*curr, ','); + if (!s) { + printk(KERN_WARNING PFX "No hotmod %s given.\n", name); + return -EINVAL; + } + *s = '\0'; + s++; + for (i = 0; hotmod_ops[i].name; i++) { + if (ipmi_strcasecmp(*curr, v[i].name) == 0) { + *val = v[i].val; + *curr = s; + return 0; + } + } + + printk(KERN_WARNING PFX "Invalid hotmod %s '%s'\n", name, *curr); + return -EINVAL; +} + +static int hotmod_handler(const char *val, struct kernel_param *kp) +{ + char *str = kstrdup(val, GFP_KERNEL); + int rv = -EINVAL; + char *next, *curr, *s, *n, *o; + enum hotmod_op op; + enum si_type si_type; + int addr_space; + unsigned long addr; + int regspacing; + int regsize; + int regshift; + int irq; + int ipmb; + int ival; + struct smi_info *info; + + if (!str) + return -ENOMEM; + + /* Kill any trailing spaces, as we can get a "\n" from echo. */ + ival = strlen(str) - 1; + while ((ival >= 0) && isspace(str[ival])) { + str[ival] = '\0'; + ival--; + } + + for (curr = str; curr; curr = next) { + regspacing = 1; + regsize = 1; + regshift = 0; + irq = 0; + ipmb = 0x20; + + next = strchr(curr, ':'); + if (next) { + *next = '\0'; + next++; + } + + rv = parse_str(hotmod_ops, &ival, "operation", &curr); + if (rv) + break; + op = ival; + + rv = parse_str(hotmod_si, &ival, "interface type", &curr); + if (rv) + break; + si_type = ival; + + rv = parse_str(hotmod_as, &addr_space, "address space", &curr); + if (rv) + break; + + s = strchr(curr, ','); + if (s) { + *s = '\0'; + s++; + } + addr = simple_strtoul(curr, &n, 0); + if ((*n != '\0') || (*curr == '\0')) { + printk(KERN_WARNING PFX "Invalid hotmod address" + " '%s'\n", curr); + break; + } + + while (s) { + curr = s; + s = strchr(curr, ','); + if (s) { + *s = '\0'; + s++; + } + o = strchr(curr, '='); + if (o) { + *o = '\0'; + o++; + } +#define HOTMOD_INT_OPT(name, val) \ + if (ipmi_strcasecmp(curr, name) == 0) { \ + if (!o) { \ + printk(KERN_WARNING PFX \ + "No option given for '%s'\n", \ + curr); \ + goto out; \ + } \ + val = simple_strtoul(o, &n, 0); \ + if ((*n != '\0') || (*o == '\0')) { \ + printk(KERN_WARNING PFX \ + "Bad option given for '%s'\n", \ + curr); \ + goto out; \ + } \ + } + + HOTMOD_INT_OPT("rsp", regspacing) + else HOTMOD_INT_OPT("rsi", regsize) + else HOTMOD_INT_OPT("rsh", regshift) + else HOTMOD_INT_OPT("irq", irq) + else HOTMOD_INT_OPT("ipmb", ipmb) + else { + printk(KERN_WARNING PFX + "Invalid hotmod option '%s'\n", + curr); + goto out; + } +#undef HOTMOD_INT_OPT + } + + if (op == HM_ADD) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + rv = -ENOMEM; + goto out; + } + + info->addr_source = "hotmod"; + info->si_type = si_type; + info->io.addr_data = addr; + info->io.addr_type = addr_space; + if (addr_space == IPMI_MEM_ADDR_SPACE) + info->io_setup = mem_setup; + else + info->io_setup = port_setup; + + info->io.addr = NULL; + info->io.regspacing = regspacing; + if (!info->io.regspacing) + info->io.regspacing = DEFAULT_REGSPACING; + info->io.regsize = regsize; + if (!info->io.regsize) + info->io.regsize = DEFAULT_REGSPACING; + info->io.regshift = regshift; + info->irq = irq; + if (info->irq) + info->irq_setup = std_irq_setup; + info->slave_addr = ipmb; + + try_smi_init(info); + } else { + /* remove */ + struct smi_info *e, *tmp_e; + + mutex_lock(&smi_infos_lock); + list_for_each_entry_safe(e, tmp_e, &smi_infos, link) { + if (e->io.addr_type != addr_space) + continue; + if (e->si_type != si_type) + continue; + if (e->io.addr_data == addr) + cleanup_one_si(e); + } + mutex_unlock(&smi_infos_lock); + } + } + out: + kfree(str); + return rv; +} static __devinit void hardcode_find_bmc(void) { @@ -1333,11 +1610,11 @@ static __devinit void hardcode_find_bmc(void) info->addr_source = "hardcoded"; - if (!si_type[i] || strcmp(si_type[i], "kcs") == 0) { + if (!si_type[i] || ipmi_strcasecmp(si_type[i], "kcs") == 0) { info->si_type = SI_KCS; - } else if (strcmp(si_type[i], "smic") == 0) { + } else if (ipmi_strcasecmp(si_type[i], "smic") == 0) { info->si_type = SI_SMIC; - } else if (strcmp(si_type[i], "bt") == 0) { + } else if (ipmi_strcasecmp(si_type[i], "bt") == 0) { info->si_type = SI_BT; } else { printk(KERN_WARNING @@ -1952,19 +2229,9 @@ static int try_get_dev_id(struct smi_info *smi_info) static int type_file_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - char *out = (char *) page; struct smi_info *smi = data; - switch (smi->si_type) { - case SI_KCS: - return sprintf(out, "kcs\n"); - case SI_SMIC: - return sprintf(out, "smic\n"); - case SI_BT: - return sprintf(out, "bt\n"); - default: - return 0; - } + return sprintf(page, "%s\n", si_to_str[smi->si_type]); } static int stat_file_read_proc(char *page, char **start, off_t off, @@ -2000,7 +2267,24 @@ static int stat_file_read_proc(char *page, char **start, off_t off, out += sprintf(out, "incoming_messages: %ld\n", smi->incoming_messages); - return (out - ((char *) page)); + return out - page; +} + +static int param_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct smi_info *smi = data; + + return sprintf(page, + "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n", + si_to_str[smi->si_type], + addr_space_to_str[smi->io.addr_type], + smi->io.addr_data, + smi->io.regspacing, + smi->io.regsize, + smi->io.regshift, + smi->irq, + smi->slave_addr); } /* @@ -2362,6 +2646,7 @@ static int try_smi_init(struct smi_info *new_smi) new_smi, &new_smi->device_id, new_smi->dev, + "bmc", new_smi->slave_addr); if (rv) { printk(KERN_ERR @@ -2390,6 +2675,16 @@ static int try_smi_init(struct smi_info *new_smi) goto out_err_stop_timer; } + rv = ipmi_smi_add_proc_entry(new_smi->intf, "params", + param_read_proc, NULL, + new_smi, THIS_MODULE); + if (rv) { + printk(KERN_ERR + "ipmi_si: Unable to create proc entry: %d\n", + rv); + goto out_err_stop_timer; + } + list_add_tail(&new_smi->link, &smi_infos); mutex_unlock(&smi_infos_lock); @@ -2483,7 +2778,12 @@ static __devinit int init_ipmi_si(void) #endif #ifdef CONFIG_PCI - pci_module_init(&ipmi_pci_driver); + rv = pci_register_driver(&ipmi_pci_driver); + if (rv){ + printk(KERN_ERR + "init_ipmi_si: Unable to register PCI driver: %d\n", + rv); + } #endif if (si_trydefaults) { @@ -2498,7 +2798,7 @@ static __devinit int init_ipmi_si(void) } mutex_lock(&smi_infos_lock); - if (list_empty(&smi_infos)) { + if (unload_when_empty && list_empty(&smi_infos)) { mutex_unlock(&smi_infos_lock); #ifdef CONFIG_PCI pci_unregister_driver(&ipmi_pci_driver); @@ -2513,7 +2813,7 @@ static __devinit int init_ipmi_si(void) } module_init(init_ipmi_si); -static void __devexit cleanup_one_si(struct smi_info *to_clean) +static void cleanup_one_si(struct smi_info *to_clean) { int rv; unsigned long flags; diff --git a/drivers/char/ipmi/ipmi_smic_sm.c b/drivers/char/ipmi/ipmi_smic_sm.c index 39d7e5ef1a2..e64ea7d25d2 100644 --- a/drivers/char/ipmi/ipmi_smic_sm.c +++ b/drivers/char/ipmi/ipmi_smic_sm.c @@ -141,12 +141,14 @@ static int start_smic_transaction(struct si_sm_data *smic, { unsigned int i; - if ((size < 2) || (size > MAX_SMIC_WRITE_SIZE)) { - return -1; - } - if ((smic->state != SMIC_IDLE) && (smic->state != SMIC_HOSED)) { - return -2; - } + if (size < 2) + return IPMI_REQ_LEN_INVALID_ERR; + if (size > MAX_SMIC_WRITE_SIZE) + return IPMI_REQ_LEN_EXCEEDED_ERR; + + if ((smic->state != SMIC_IDLE) && (smic->state != SMIC_HOSED)) + return IPMI_NOT_IN_MY_STATE_ERR; + if (smic_debug & SMIC_DEBUG_MSG) { printk(KERN_INFO "start_smic_transaction -"); for (i = 0; i < size; i ++) { diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 73f759eaa5a..90fb2a54191 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -135,6 +135,7 @@ static int nowayout = WATCHDOG_NOWAYOUT; static ipmi_user_t watchdog_user = NULL; +static int watchdog_ifnum; /* Default the timeout to 10 seconds. */ static int timeout = 10; @@ -161,6 +162,8 @@ static struct fasync_struct *fasync_q = NULL; static char pretimeout_since_last_heartbeat = 0; static char expect_close; +static int ifnum_to_use = -1; + static DECLARE_RWSEM(register_sem); /* Parameters to ipmi_set_timeout */ @@ -169,6 +172,8 @@ static DECLARE_RWSEM(register_sem); #define IPMI_SET_TIMEOUT_FORCE_HB 2 static int ipmi_set_timeout(int do_heartbeat); +static void ipmi_register_watchdog(int ipmi_intf); +static void ipmi_unregister_watchdog(int ipmi_intf); /* If true, the driver will start running as soon as it is configured and ready. */ @@ -245,6 +250,26 @@ static int get_param_str(char *buffer, struct kernel_param *kp) return strlen(buffer); } + +static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp) +{ + int rv = param_set_int(val, kp); + if (rv) + return rv; + if ((ifnum_to_use < 0) || (ifnum_to_use == watchdog_ifnum)) + return 0; + + ipmi_unregister_watchdog(watchdog_ifnum); + ipmi_register_watchdog(ifnum_to_use); + return 0; +} + +module_param_call(ifnum_to_use, set_param_wdog_ifnum, get_param_int, + &ifnum_to_use, 0644); +MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog " + "timer. Setting to -1 defaults to the first registered " + "interface"); + module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644); MODULE_PARM_DESC(timeout, "Timeout value in seconds."); @@ -263,12 +288,13 @@ module_param_call(preop, set_param_str, get_param_str, preop_op, 0644); MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: " "preop_none, preop_panic, preop_give_data."); -module_param(start_now, int, 0); +module_param(start_now, int, 0444); MODULE_PARM_DESC(start_now, "Set to 1 to start the watchdog as" "soon as the driver is loaded."); module_param(nowayout, int, 0644); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=CONFIG_WATCHDOG_NOWAYOUT)"); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " + "(default=CONFIG_WATCHDOG_NOWAYOUT)"); /* Default state of the timer. */ static unsigned char ipmi_watchdog_state = WDOG_TIMEOUT_NONE; @@ -872,6 +898,11 @@ static void ipmi_register_watchdog(int ipmi_intf) if (watchdog_user) goto out; + if ((ifnum_to_use >= 0) && (ifnum_to_use != ipmi_intf)) + goto out; + + watchdog_ifnum = ipmi_intf; + rv = ipmi_create_user(ipmi_intf, &ipmi_hndlrs, NULL, &watchdog_user); if (rv < 0) { printk(KERN_CRIT PFX "Unable to register with ipmi\n"); @@ -901,6 +932,39 @@ static void ipmi_register_watchdog(int ipmi_intf) } } +static void ipmi_unregister_watchdog(int ipmi_intf) +{ + int rv; + + down_write(®ister_sem); + + if (!watchdog_user) + goto out; + + if (watchdog_ifnum != ipmi_intf) + goto out; + + /* Make sure no one can call us any more. */ + misc_deregister(&ipmi_wdog_miscdev); + + /* Wait to make sure the message makes it out. The lower layer has + pointers to our buffers, we want to make sure they are done before + we release our memory. */ + while (atomic_read(&set_timeout_tofree)) + schedule_timeout_uninterruptible(1); + + /* Disconnect from IPMI. */ + rv = ipmi_destroy_user(watchdog_user); + if (rv) { + printk(KERN_WARNING PFX "error unlinking from IPMI: %d\n", + rv); + } + watchdog_user = NULL; + + out: + up_write(®ister_sem); +} + #ifdef HAVE_NMI_HANDLER static int ipmi_nmi(void *dev_id, int cpu, int handled) @@ -1004,9 +1068,7 @@ static void ipmi_new_smi(int if_num, struct device *device) static void ipmi_smi_gone(int if_num) { - /* This can never be called, because once the watchdog is - registered, the interface can't go away until the watchdog - is unregistered. */ + ipmi_unregister_watchdog(if_num); } static struct ipmi_smi_watcher smi_watcher = @@ -1148,30 +1210,32 @@ static int __init ipmi_wdog_init(void) check_parms(); + register_reboot_notifier(&wdog_reboot_notifier); + atomic_notifier_chain_register(&panic_notifier_list, + &wdog_panic_notifier); + rv = ipmi_smi_watcher_register(&smi_watcher); if (rv) { #ifdef HAVE_NMI_HANDLER if (preaction_val == WDOG_PRETIMEOUT_NMI) release_nmi(&ipmi_nmi_handler); #endif + atomic_notifier_chain_unregister(&panic_notifier_list, + &wdog_panic_notifier); + unregister_reboot_notifier(&wdog_reboot_notifier); printk(KERN_WARNING PFX "can't register smi watcher\n"); return rv; } - register_reboot_notifier(&wdog_reboot_notifier); - atomic_notifier_chain_register(&panic_notifier_list, - &wdog_panic_notifier); - printk(KERN_INFO PFX "driver initialized\n"); return 0; } -static __exit void ipmi_unregister_watchdog(void) +static void __exit ipmi_wdog_exit(void) { - int rv; - - down_write(®ister_sem); + ipmi_smi_watcher_unregister(&smi_watcher); + ipmi_unregister_watchdog(watchdog_ifnum); #ifdef HAVE_NMI_HANDLER if (nmi_handler_registered) @@ -1179,37 +1243,8 @@ static __exit void ipmi_unregister_watchdog(void) #endif atomic_notifier_chain_unregister(&panic_notifier_list, - &wdog_panic_notifier); + &wdog_panic_notifier); unregister_reboot_notifier(&wdog_reboot_notifier); - - if (! watchdog_user) - goto out; - - /* Make sure no one can call us any more. */ - misc_deregister(&ipmi_wdog_miscdev); - - /* Wait to make sure the message makes it out. The lower layer has - pointers to our buffers, we want to make sure they are done before - we release our memory. */ - while (atomic_read(&set_timeout_tofree)) - schedule_timeout_uninterruptible(1); - - /* Disconnect from IPMI. */ - rv = ipmi_destroy_user(watchdog_user); - if (rv) { - printk(KERN_WARNING PFX "error unlinking from IPMI: %d\n", - rv); - } - watchdog_user = NULL; - - out: - up_write(®ister_sem); -} - -static void __exit ipmi_wdog_exit(void) -{ - ipmi_smi_watcher_unregister(&smi_watcher); - ipmi_unregister_watchdog(); } module_exit(ipmi_wdog_exit); module_init(ipmi_wdog_init); diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index bd9195e1795..8f591945ebd 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -3476,6 +3476,8 @@ static int stli_initecp(stlibrd_t *brdp) if (sig.magic != cpu_to_le32(ECP_MAGIC)) { release_region(brdp->iobase, brdp->iosize); + iounmap(brdp->membase); + brdp->membase = NULL; return -ENODEV; } @@ -3632,6 +3634,8 @@ static int stli_initonb(stlibrd_t *brdp) sig.magic3 != cpu_to_le16(ONB_MAGIC3)) { release_region(brdp->iobase, brdp->iosize); + iounmap(brdp->membase); + brdp->membase = NULL; return -ENODEV; } diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 7a484fc7cb9..7e975f60692 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -199,6 +199,8 @@ int misc_register(struct miscdevice * misc) dev_t dev; int err = 0; + INIT_LIST_HEAD(&misc->list); + down(&misc_sem); list_for_each_entry(c, &misc_list, list) { if (c->minor == misc->minor) { diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index 22b9905c1e5..c09160383a5 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -680,7 +680,7 @@ static int __init mmtimer_init(void) if (sn_rtc_cycles_per_second < 100000) { printk(KERN_ERR "%s: unable to determine clock frequency\n", MMTIMER_NAME); - return -1; + goto out1; } mmtimer_femtoperiod = ((unsigned long)1E15 + sn_rtc_cycles_per_second / @@ -689,13 +689,13 @@ static int __init mmtimer_init(void) if (request_irq(SGI_MMTIMER_VECTOR, mmtimer_interrupt, IRQF_PERCPU, MMTIMER_NAME, NULL)) { printk(KERN_WARNING "%s: unable to allocate interrupt.", MMTIMER_NAME); - return -1; + goto out1; } if (misc_register(&mmtimer_miscdev)) { printk(KERN_ERR "%s: failed to register device\n", MMTIMER_NAME); - return -1; + goto out2; } /* Get max numbered node, calculate slots needed */ @@ -709,16 +709,18 @@ static int __init mmtimer_init(void) if (timers == NULL) { printk(KERN_ERR "%s: failed to allocate memory for device\n", MMTIMER_NAME); - return -1; + goto out3; } + memset(timers,0,(sizeof(mmtimer_t *)*maxn)); + /* Allocate mmtimer_t's for each online node */ for_each_online_node(node) { timers[node] = kmalloc_node(sizeof(mmtimer_t)*NUM_COMPARATORS, GFP_KERNEL, node); if (timers[node] == NULL) { printk(KERN_ERR "%s: failed to allocate memory for device\n", MMTIMER_NAME); - return -1; + goto out4; } for (i=0; i< NUM_COMPARATORS; i++) { mmtimer_t * base = timers[node] + i; @@ -739,6 +741,17 @@ static int __init mmtimer_init(void) sn_rtc_cycles_per_second/(unsigned long)1E6); return 0; + +out4: + for_each_online_node(node) { + kfree(timers[node]); + } +out3: + misc_deregister(&mmtimer_miscdev); +out2: + free_irq(SGI_MMTIMER_VECTOR, NULL); +out1: + return -1; } module_init(mmtimer_init); diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index 2d025a9fd14..8b316953173 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c @@ -498,9 +498,12 @@ static void __exit moxa_exit(void) printk("Couldn't unregister MOXA Intellio family serial driver\n"); put_tty_driver(moxaDriver); - for (i = 0; i < MAX_BOARDS; i++) + for (i = 0; i < MAX_BOARDS; i++) { + if (moxaBaseAddr[i]) + iounmap(moxaBaseAddr[i]); if (moxa_boards[i].busType == MOXA_BUS_TYPE_PCI) pci_dev_put(moxa_boards[i].pciInfo.pdev); + } if (verbose) printk("Done\n"); diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 1bd12296dca..74d21c1c104 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -75,8 +75,10 @@ #include <pcmcia/cisreg.h> #include <pcmcia/ds.h> -#ifdef CONFIG_HDLC_MODULE -#define CONFIG_HDLC 1 +#if defined(CONFIG_HDLC) || (defined(CONFIG_HDLC_MODULE) && defined(CONFIG_SYNCLINK_CS_MODULE)) +#define SYNCLINK_GENERIC_HDLC 1 +#else +#define SYNCLINK_GENERIC_HDLC 0 #endif #define GET_USER(error,value,addr) error = get_user(value,addr) @@ -235,7 +237,7 @@ typedef struct _mgslpc_info { int dosyncppp; spinlock_t netlock; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC struct net_device *netdev; #endif @@ -392,7 +394,7 @@ static void tx_timeout(unsigned long context); static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) static void hdlcdev_tx_done(MGSLPC_INFO *info); static void hdlcdev_rx(MGSLPC_INFO *info, char *buf, int size); @@ -1053,7 +1055,7 @@ static void tx_done(MGSLPC_INFO *info) info->drop_rts_on_tx_done = 0; } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_tx_done(info); else @@ -1164,7 +1166,7 @@ static void dcd_change(MGSLPC_INFO *info) } else info->input_signal_events.dcd_down++; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) { if (info->serial_signals & SerialSignal_DCD) netif_carrier_on(info->netdev); @@ -2953,7 +2955,7 @@ static void mgslpc_add_device(MGSLPC_INFO *info) printk( "SyncLink PC Card %s:IO=%04X IRQ=%d\n", info->device_name, info->io_base, info->irq_level); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC hdlcdev_init(info); #endif } @@ -2969,7 +2971,7 @@ static void mgslpc_remove_device(MGSLPC_INFO *remove_info) last->next_device = info->next_device; else mgslpc_device_list = info->next_device; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC hdlcdev_exit(info); #endif release_resources(info); @@ -3901,7 +3903,7 @@ static int rx_get_frame(MGSLPC_INFO *info) return_frame = 1; } framesize = 0; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC { struct net_device_stats *stats = hdlc_stats(info->netdev); stats->rx_errors++; @@ -3935,7 +3937,7 @@ static int rx_get_frame(MGSLPC_INFO *info) ++framesize; } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_rx(info, buf->data, framesize); else @@ -4091,7 +4093,7 @@ static void tx_timeout(unsigned long context) spin_unlock_irqrestore(&info->lock,flags); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_tx_done(info); else @@ -4099,7 +4101,7 @@ static void tx_timeout(unsigned long context) bh_transmit(info); } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC /** * called by generic HDLC layer when protocol selected (PPP, frame relay, etc.) diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index 7ac68cb3bed..e79b2ede851 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -1026,6 +1026,7 @@ static int __init rio_init(void) found++; } else { iounmap(p->RIOHosts[p->RIONumHosts].Caddr); + p->RIOHosts[p->RIONumHosts].Caddr = NULL; } } @@ -1078,6 +1079,7 @@ static int __init rio_init(void) found++; } else { iounmap(p->RIOHosts[p->RIONumHosts].Caddr); + p->RIOHosts[p->RIONumHosts].Caddr = NULL; } #else printk(KERN_ERR "Found an older RIO PCI card, but the driver is not " "compiled to support it.\n"); @@ -1117,8 +1119,10 @@ static int __init rio_init(void) } } - if (!okboard) + if (!okboard) { iounmap(hp->Caddr); + hp->Caddr = NULL; + } } } @@ -1188,6 +1192,8 @@ static void __exit rio_exit(void) } /* It is safe/allowed to del_timer a non-active timer */ del_timer(&hp->timer); + if (hp->Caddr) + iounmap(hp->Caddr); if (hp->Type == RIO_PCI) pci_dev_put(hp->pdev); } diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c index 722dd3e7418..0a77bfcd5b5 100644 --- a/drivers/char/riscom8.c +++ b/drivers/char/riscom8.c @@ -82,11 +82,6 @@ static struct riscom_board * IRQ_to_board[16]; static struct tty_driver *riscom_driver; -static unsigned long baud_table[] = { - 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, - 9600, 19200, 38400, 57600, 76800, 0, -}; - static struct riscom_board rc_board[RC_NBOARD] = { { .base = RC_IOBASE1, diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index 147c30da81e..645187b9141 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -101,8 +101,10 @@ #include <linux/hdlc.h> #include <linux/dma-mapping.h> -#ifdef CONFIG_HDLC_MODULE -#define CONFIG_HDLC 1 +#if defined(CONFIG_HDLC) || (defined(CONFIG_HDLC_MODULE) && defined(CONFIG_SYNCLINK_MODULE)) +#define SYNCLINK_GENERIC_HDLC 1 +#else +#define SYNCLINK_GENERIC_HDLC 0 #endif #define GET_USER(error,value,addr) error = get_user(value,addr) @@ -320,7 +322,7 @@ struct mgsl_struct { int dosyncppp; spinlock_t netlock; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC struct net_device *netdev; #endif }; @@ -728,7 +730,7 @@ static void usc_loopmode_send_done( struct mgsl_struct * info ); static int mgsl_ioctl_common(struct mgsl_struct *info, unsigned int cmd, unsigned long arg); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) static void hdlcdev_tx_done(struct mgsl_struct *info); static void hdlcdev_rx(struct mgsl_struct *info, char *buf, int size); @@ -1277,7 +1279,7 @@ static void mgsl_isr_transmit_status( struct mgsl_struct *info ) info->drop_rts_on_tx_done = 0; } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_tx_done(info); else @@ -1342,7 +1344,7 @@ static void mgsl_isr_io_pin( struct mgsl_struct *info ) info->input_signal_events.dcd_up++; } else info->input_signal_events.dcd_down++; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) { if (status & MISCSTATUS_DCD) netif_carrier_on(info->netdev); @@ -4313,7 +4315,7 @@ static void mgsl_add_device( struct mgsl_struct *info ) info->max_frame_size ); } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC hdlcdev_init(info); #endif @@ -4471,7 +4473,7 @@ static void synclink_cleanup(void) info = mgsl_device_list; while(info) { -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC hdlcdev_exit(info); #endif mgsl_release_resources(info); @@ -6645,7 +6647,7 @@ static int mgsl_get_rx_frame(struct mgsl_struct *info) return_frame = 1; } framesize = 0; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC { struct net_device_stats *stats = hdlc_stats(info->netdev); stats->rx_errors++; @@ -6721,7 +6723,7 @@ static int mgsl_get_rx_frame(struct mgsl_struct *info) *ptmp); } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_rx(info,info->intermediate_rxbuffer,framesize); else @@ -7625,7 +7627,7 @@ static void mgsl_tx_timeout(unsigned long context) spin_unlock_irqrestore(&info->irq_spinlock,flags); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_tx_done(info); else @@ -7701,7 +7703,7 @@ static int usc_loopmode_active( struct mgsl_struct * info) return usc_InReg( info, CCSR ) & BIT7 ? 1 : 0 ; } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC /** * called by generic HDLC layer when protocol selected (PPP, frame relay, etc.) diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 07f34d43dc7..e4730a7312b 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -83,8 +83,10 @@ #include "linux/synclink.h" -#ifdef CONFIG_HDLC_MODULE -#define CONFIG_HDLC 1 +#if defined(CONFIG_HDLC) || (defined(CONFIG_HDLC_MODULE) && defined(CONFIG_SYNCLINK_GT_MODULE)) +#define SYNCLINK_GENERIC_HDLC 1 +#else +#define SYNCLINK_GENERIC_HDLC 0 #endif /* @@ -171,7 +173,7 @@ static void set_break(struct tty_struct *tty, int break_state); /* * generic HDLC support and callbacks */ -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) static void hdlcdev_tx_done(struct slgt_info *info); static void hdlcdev_rx(struct slgt_info *info, char *buf, int size); @@ -359,7 +361,7 @@ struct slgt_info { int netcount; int dosyncppp; spinlock_t netlock; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC struct net_device *netdev; #endif @@ -1354,7 +1356,7 @@ static void set_break(struct tty_struct *tty, int break_state) spin_unlock_irqrestore(&info->lock,flags); } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC /** * called by generic HDLC layer when protocol selected (PPP, frame relay, etc.) @@ -2002,7 +2004,7 @@ static void dcd_change(struct slgt_info *info) } else { info->input_signal_events.dcd_down++; } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) { if (info->signals & SerialSignal_DCD) netif_carrier_on(info->netdev); @@ -2180,7 +2182,7 @@ static void isr_txeom(struct slgt_info *info, unsigned short status) set_signals(info); } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_tx_done(info); else @@ -3306,7 +3308,7 @@ static void add_device(struct slgt_info *info) devstr, info->device_name, info->phys_reg_addr, info->irq_level, info->max_frame_size); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC hdlcdev_init(info); #endif } @@ -3488,7 +3490,7 @@ static void slgt_cleanup(void) /* release devices */ info = slgt_device_list; while(info) { -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC hdlcdev_exit(info); #endif free_dma_bufs(info); @@ -3522,6 +3524,7 @@ static int __init slgt_init(void) if (!slgt_device_list) { printk("%s no devices found\n",driver_name); + pci_unregister_driver(&pci_driver); return -ENODEV; } @@ -4433,7 +4436,7 @@ check_again: framesize = 0; } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (framesize == 0) { struct net_device_stats *stats = hdlc_stats(info->netdev); stats->rx_errors++; @@ -4476,7 +4479,7 @@ check_again: framesize++; } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_rx(info,info->tmp_rbuf, framesize); else @@ -4779,7 +4782,7 @@ static void tx_timeout(unsigned long context) info->tx_count = 0; spin_unlock_irqrestore(&info->lock,flags); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_tx_done(info); else diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 13a57245cf2..20a96ef250b 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -67,8 +67,10 @@ #include <linux/workqueue.h> #include <linux/hdlc.h> -#ifdef CONFIG_HDLC_MODULE -#define CONFIG_HDLC 1 +#if defined(CONFIG_HDLC) || (defined(CONFIG_HDLC_MODULE) && defined(CONFIG_SYNCLINKMP_MODULE)) +#define SYNCLINK_GENERIC_HDLC 1 +#else +#define SYNCLINK_GENERIC_HDLC 0 #endif #define GET_USER(error,value,addr) error = get_user(value,addr) @@ -280,7 +282,7 @@ typedef struct _synclinkmp_info { int dosyncppp; spinlock_t netlock; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC struct net_device *netdev; #endif @@ -536,7 +538,7 @@ static void throttle(struct tty_struct * tty); static void unthrottle(struct tty_struct * tty); static void set_break(struct tty_struct *tty, int break_state); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) static void hdlcdev_tx_done(SLMP_INFO *info); static void hdlcdev_rx(SLMP_INFO *info, char *buf, int size); @@ -1607,7 +1609,7 @@ static void set_break(struct tty_struct *tty, int break_state) spin_unlock_irqrestore(&info->lock,flags); } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC /** * called by generic HDLC layer when protocol selected (PPP, frame relay, etc.) @@ -2339,7 +2341,7 @@ static void isr_txeom(SLMP_INFO * info, unsigned char status) set_signals(info); } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_tx_done(info); else @@ -2523,7 +2525,7 @@ void isr_io_pin( SLMP_INFO *info, u16 status ) info->input_signal_events.dcd_up++; } else info->input_signal_events.dcd_down++; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) { if (status & SerialSignal_DCD) netif_carrier_on(info->netdev); @@ -3783,7 +3785,7 @@ void add_device(SLMP_INFO *info) info->irq_level, info->max_frame_size ); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC hdlcdev_init(info); #endif } @@ -3977,7 +3979,7 @@ static void synclinkmp_cleanup(void) /* release devices */ info = synclinkmp_device_list; while(info) { -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC hdlcdev_exit(info); #endif free_dma_bufs(info); @@ -4979,7 +4981,7 @@ CheckAgain: info->icount.rxcrc++; framesize = 0; -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC { struct net_device_stats *stats = hdlc_stats(info->netdev); stats->rx_errors++; @@ -5020,7 +5022,7 @@ CheckAgain: index = 0; } -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_rx(info,info->tmp_rx_buf,framesize); else @@ -5531,7 +5533,7 @@ void tx_timeout(unsigned long context) spin_unlock_irqrestore(&info->lock,flags); -#ifdef CONFIG_HDLC +#if SYNCLINK_GENERIC_HDLC if (info->netcount) hdlcdev_tx_done(info); else diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index c64f5bcff94..05810c8d20b 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -182,6 +182,18 @@ static struct sysrq_key_op sysrq_showstate_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; +static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty) +{ + show_state_filter(TASK_UNINTERRUPTIBLE); +} +static struct sysrq_key_op sysrq_showstate_blocked_op = { + .handler = sysrq_handle_showstate_blocked, + .help_msg = "showBlockedTasks", + .action_msg = "Show Blocked State", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + + static void sysrq_handle_showmem(int key, struct tty_struct *tty) { show_mem(); @@ -304,7 +316,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { /* May be assigned at init time by SMP VOYAGER */ NULL, /* v */ NULL, /* w */ - NULL, /* x */ + &sysrq_showstate_blocked_op, /* x */ NULL, /* y */ NULL /* z */ }; diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c index dd36fd04a84..07067c31c4e 100644 --- a/drivers/char/toshiba.c +++ b/drivers/char/toshiba.c @@ -249,6 +249,7 @@ int tosh_smm(SMMRegisters *regs) return eax; } +EXPORT_SYMBOL(tosh_smm); static int tosh_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 774fa861169..33e1f66e39c 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -1155,6 +1155,7 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, const struct tpm_vend if (sysfs_create_group(&dev->kobj, chip->vendor.attr_group)) { list_del(&chip->list); + misc_deregister(&chip->vendor.miscdev); put_device(dev); clear_bit(chip->dev_num, dev_mask); kfree(chip); diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 75ff0286e1a..a8239dac994 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -152,7 +152,7 @@ static void gotoxy(struct vc_data *vc, int new_x, int new_y); static void save_cur(struct vc_data *vc); static void reset_terminal(struct vc_data *vc, int do_clear); static void con_flush_chars(struct tty_struct *tty); -static void set_vesa_blanking(char __user *p); +static int set_vesa_blanking(char __user *p); static void set_cursor(struct vc_data *vc); static void hide_cursor(struct vc_data *vc); static void console_callback(struct work_struct *ignored); @@ -2369,7 +2369,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) ret = __put_user(data, p); break; case TIOCL_SETVESABLANK: - set_vesa_blanking(p); + ret = set_vesa_blanking(p); break; case TIOCL_GETKMSGREDIRECT: data = kmsg_redirect; @@ -3313,11 +3313,15 @@ postcore_initcall(vtconsole_class_init); * Screen blanking */ -static void set_vesa_blanking(char __user *p) +static int set_vesa_blanking(char __user *p) { - unsigned int mode; - get_user(mode, p + 1); - vesa_blank_mode = (mode < 4) ? mode : 0; + unsigned int mode; + + if (get_user(mode, p + 1)) + return -EFAULT; + + vesa_blank_mode = (mode < 4) ? mode : 0; + return 0; } void do_blank_screen(int entering_gfx) diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c index e275dd4a705..61138726b50 100644 --- a/drivers/char/watchdog/pcwd_usb.c +++ b/drivers/char/watchdog/pcwd_usb.c @@ -634,7 +634,7 @@ static int usb_pcwd_probe(struct usb_interface *interface, const struct usb_devi usb_pcwd->intr_size = (le16_to_cpu(endpoint->wMaxPacketSize) > 8 ? le16_to_cpu(endpoint->wMaxPacketSize) : 8); /* set up the memory buffer's */ - if (!(usb_pcwd->intr_buffer = usb_buffer_alloc(udev, usb_pcwd->intr_size, SLAB_ATOMIC, &usb_pcwd->intr_dma))) { + if (!(usb_pcwd->intr_buffer = usb_buffer_alloc(udev, usb_pcwd->intr_size, GFP_ATOMIC, &usb_pcwd->intr_dma))) { printk(KERN_ERR PFX "Out of memory\n"); goto error; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7a7c6e6dfe4..47ab42db122 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1537,7 +1537,6 @@ int cpufreq_update_policy(unsigned int cpu) } EXPORT_SYMBOL(cpufreq_update_policy); -#ifdef CONFIG_HOTPLUG_CPU static int cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -1577,7 +1576,6 @@ static struct notifier_block __cpuinitdata cpufreq_cpu_notifier = { .notifier_call = cpufreq_cpu_callback, }; -#endif /* CONFIG_HOTPLUG_CPU */ /********************************************************************* * REGISTER / UNREGISTER CPUFREQ DRIVER * diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index adb554153f6..e816535ab30 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -51,4 +51,17 @@ config CRYPTO_DEV_PADLOCK_SHA If unsure say M. The compiled module will be called padlock-sha.ko +config CRYPTO_DEV_GEODE + tristate "Support for the Geode LX AES engine" + depends on CRYPTO && X86_32 + select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER + default m + help + Say 'Y' here to use the AMD Geode LX processor on-board AES + engine for the CryptoAPI AES alogrithm. + + To compile this driver as a module, choose M here: the module + will be called geode-aes. + endmenu diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 4c3d0ec1cf8..6059cf86941 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK) += padlock.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o +obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c new file mode 100644 index 00000000000..43a68398656 --- /dev/null +++ b/drivers/crypto/geode-aes.c @@ -0,0 +1,474 @@ + /* Copyright (C) 2004-2006, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/crypto.h> +#include <linux/spinlock.h> +#include <crypto/algapi.h> + +#include <asm/io.h> +#include <asm/delay.h> + +#include "geode-aes.h" + +/* Register definitions */ + +#define AES_CTRLA_REG 0x0000 + +#define AES_CTRL_START 0x01 +#define AES_CTRL_DECRYPT 0x00 +#define AES_CTRL_ENCRYPT 0x02 +#define AES_CTRL_WRKEY 0x04 +#define AES_CTRL_DCA 0x08 +#define AES_CTRL_SCA 0x10 +#define AES_CTRL_CBC 0x20 + +#define AES_INTR_REG 0x0008 + +#define AES_INTRA_PENDING (1 << 16) +#define AES_INTRB_PENDING (1 << 17) + +#define AES_INTR_PENDING (AES_INTRA_PENDING | AES_INTRB_PENDING) +#define AES_INTR_MASK 0x07 + +#define AES_SOURCEA_REG 0x0010 +#define AES_DSTA_REG 0x0014 +#define AES_LENA_REG 0x0018 +#define AES_WRITEKEY0_REG 0x0030 +#define AES_WRITEIV0_REG 0x0040 + +/* A very large counter that is used to gracefully bail out of an + * operation in case of trouble + */ + +#define AES_OP_TIMEOUT 0x50000 + +/* Static structures */ + +static void __iomem * _iobase; +static spinlock_t lock; + +/* Write a 128 bit field (either a writable key or IV) */ +static inline void +_writefield(u32 offset, void *value) +{ + int i; + for(i = 0; i < 4; i++) + iowrite32(((u32 *) value)[i], _iobase + offset + (i * 4)); +} + +/* Read a 128 bit field (either a writable key or IV) */ +static inline void +_readfield(u32 offset, void *value) +{ + int i; + for(i = 0; i < 4; i++) + ((u32 *) value)[i] = ioread32(_iobase + offset + (i * 4)); +} + +static int +do_crypt(void *src, void *dst, int len, u32 flags) +{ + u32 status; + u32 counter = AES_OP_TIMEOUT; + + iowrite32(virt_to_phys(src), _iobase + AES_SOURCEA_REG); + iowrite32(virt_to_phys(dst), _iobase + AES_DSTA_REG); + iowrite32(len, _iobase + AES_LENA_REG); + + /* Start the operation */ + iowrite32(AES_CTRL_START | flags, _iobase + AES_CTRLA_REG); + + do + status = ioread32(_iobase + AES_INTR_REG); + while(!(status & AES_INTRA_PENDING) && --counter); + + /* Clear the event */ + iowrite32((status & 0xFF) | AES_INTRA_PENDING, _iobase + AES_INTR_REG); + return counter ? 0 : 1; +} + +static unsigned int +geode_aes_crypt(struct geode_aes_op *op) +{ + + u32 flags = 0; + int iflags; + + if (op->len == 0 || op->src == op->dst) + return 0; + + if (op->flags & AES_FLAGS_COHERENT) + flags |= (AES_CTRL_DCA | AES_CTRL_SCA); + + if (op->dir == AES_DIR_ENCRYPT) + flags |= AES_CTRL_ENCRYPT; + + /* Start the critical section */ + + spin_lock_irqsave(&lock, iflags); + + if (op->mode == AES_MODE_CBC) { + flags |= AES_CTRL_CBC; + _writefield(AES_WRITEIV0_REG, op->iv); + } + + if (op->flags & AES_FLAGS_USRKEY) { + flags |= AES_CTRL_WRKEY; + _writefield(AES_WRITEKEY0_REG, op->key); + } + + do_crypt(op->src, op->dst, op->len, flags); + + if (op->mode == AES_MODE_CBC) + _readfield(AES_WRITEIV0_REG, op->iv); + + spin_unlock_irqrestore(&lock, iflags); + + return op->len; +} + +/* CRYPTO-API Functions */ + +static int +geode_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int len) +{ + struct geode_aes_op *op = crypto_tfm_ctx(tfm); + + if (len != AES_KEY_LENGTH) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + memcpy(op->key, key, len); + return 0; +} + +static void +geode_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct geode_aes_op *op = crypto_tfm_ctx(tfm); + + if ((out == NULL) || (in == NULL)) + return; + + op->src = (void *) in; + op->dst = (void *) out; + op->mode = AES_MODE_ECB; + op->flags = 0; + op->len = AES_MIN_BLOCK_SIZE; + op->dir = AES_DIR_ENCRYPT; + + geode_aes_crypt(op); +} + + +static void +geode_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct geode_aes_op *op = crypto_tfm_ctx(tfm); + + if ((out == NULL) || (in == NULL)) + return; + + op->src = (void *) in; + op->dst = (void *) out; + op->mode = AES_MODE_ECB; + op->flags = 0; + op->len = AES_MIN_BLOCK_SIZE; + op->dir = AES_DIR_DECRYPT; + + geode_aes_crypt(op); +} + + +static struct crypto_alg geode_alg = { + .cra_name = "aes", + .cra_driver_name = "geode-aes-128", + .cra_priority = 300, + .cra_alignmask = 15, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_MIN_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct geode_aes_op), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(geode_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_KEY_LENGTH, + .cia_max_keysize = AES_KEY_LENGTH, + .cia_setkey = geode_setkey, + .cia_encrypt = geode_encrypt, + .cia_decrypt = geode_decrypt + } + } +}; + +static int +geode_cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err, ret; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while((nbytes = walk.nbytes)) { + op->src = walk.src.virt.addr, + op->dst = walk.dst.virt.addr; + op->mode = AES_MODE_CBC; + op->len = nbytes - (nbytes % AES_MIN_BLOCK_SIZE); + op->dir = AES_DIR_DECRYPT; + + memcpy(op->iv, walk.iv, AES_IV_LENGTH); + + ret = geode_aes_crypt(op); + + memcpy(walk.iv, op->iv, AES_IV_LENGTH); + nbytes -= ret; + + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static int +geode_cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err, ret; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while((nbytes = walk.nbytes)) { + op->src = walk.src.virt.addr, + op->dst = walk.dst.virt.addr; + op->mode = AES_MODE_CBC; + op->len = nbytes - (nbytes % AES_MIN_BLOCK_SIZE); + op->dir = AES_DIR_ENCRYPT; + + memcpy(op->iv, walk.iv, AES_IV_LENGTH); + + ret = geode_aes_crypt(op); + nbytes -= ret; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static struct crypto_alg geode_cbc_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-geode-128", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_MIN_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct geode_aes_op), + .cra_alignmask = 15, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(geode_cbc_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_KEY_LENGTH, + .max_keysize = AES_KEY_LENGTH, + .setkey = geode_setkey, + .encrypt = geode_cbc_encrypt, + .decrypt = geode_cbc_decrypt, + } + } +}; + +static int +geode_ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err, ret; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while((nbytes = walk.nbytes)) { + op->src = walk.src.virt.addr, + op->dst = walk.dst.virt.addr; + op->mode = AES_MODE_ECB; + op->len = nbytes - (nbytes % AES_MIN_BLOCK_SIZE); + op->dir = AES_DIR_DECRYPT; + + ret = geode_aes_crypt(op); + nbytes -= ret; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static int +geode_ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err, ret; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while((nbytes = walk.nbytes)) { + op->src = walk.src.virt.addr, + op->dst = walk.dst.virt.addr; + op->mode = AES_MODE_ECB; + op->len = nbytes - (nbytes % AES_MIN_BLOCK_SIZE); + op->dir = AES_DIR_ENCRYPT; + + ret = geode_aes_crypt(op); + nbytes -= ret; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static struct crypto_alg geode_ecb_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-geode-128", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_MIN_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct geode_aes_op), + .cra_alignmask = 15, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(geode_ecb_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_KEY_LENGTH, + .max_keysize = AES_KEY_LENGTH, + .setkey = geode_setkey, + .encrypt = geode_ecb_encrypt, + .decrypt = geode_ecb_decrypt, + } + } +}; + +static void +geode_aes_remove(struct pci_dev *dev) +{ + crypto_unregister_alg(&geode_alg); + crypto_unregister_alg(&geode_ecb_alg); + crypto_unregister_alg(&geode_cbc_alg); + + pci_iounmap(dev, _iobase); + _iobase = NULL; + + pci_release_regions(dev); + pci_disable_device(dev); +} + + +static int +geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + int ret; + + if ((ret = pci_enable_device(dev))) + return ret; + + if ((ret = pci_request_regions(dev, "geode-aes-128"))) + goto eenable; + + _iobase = pci_iomap(dev, 0, 0); + + if (_iobase == NULL) { + ret = -ENOMEM; + goto erequest; + } + + spin_lock_init(&lock); + + /* Clear any pending activity */ + iowrite32(AES_INTR_PENDING | AES_INTR_MASK, _iobase + AES_INTR_REG); + + if ((ret = crypto_register_alg(&geode_alg))) + goto eiomap; + + if ((ret = crypto_register_alg(&geode_ecb_alg))) + goto ealg; + + if ((ret = crypto_register_alg(&geode_cbc_alg))) + goto eecb; + + printk(KERN_NOTICE "geode-aes: GEODE AES engine enabled.\n"); + return 0; + + eecb: + crypto_unregister_alg(&geode_ecb_alg); + + ealg: + crypto_unregister_alg(&geode_alg); + + eiomap: + pci_iounmap(dev, _iobase); + + erequest: + pci_release_regions(dev); + + eenable: + pci_disable_device(dev); + + printk(KERN_ERR "geode-aes: GEODE AES initialization failed.\n"); + return ret; +} + +static struct pci_device_id geode_aes_tbl[] = { + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LX_AES, PCI_ANY_ID, PCI_ANY_ID} , + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, geode_aes_tbl); + +static struct pci_driver geode_aes_driver = { + .name = "Geode LX AES", + .id_table = geode_aes_tbl, + .probe = geode_aes_probe, + .remove = __devexit_p(geode_aes_remove) +}; + +static int __init +geode_aes_init(void) +{ + return pci_module_init(&geode_aes_driver); +} + +static void __exit +geode_aes_exit(void) +{ + pci_unregister_driver(&geode_aes_driver); +} + +MODULE_AUTHOR("Advanced Micro Devices, Inc."); +MODULE_DESCRIPTION("Geode LX Hardware AES driver"); +MODULE_LICENSE("GPL"); + +module_init(geode_aes_init); +module_exit(geode_aes_exit); diff --git a/drivers/crypto/geode-aes.h b/drivers/crypto/geode-aes.h new file mode 100644 index 00000000000..8003a36f3a8 --- /dev/null +++ b/drivers/crypto/geode-aes.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2003-2006, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _GEODE_AES_H_ +#define _GEODE_AES_H_ + +#define AES_KEY_LENGTH 16 +#define AES_IV_LENGTH 16 + +#define AES_MIN_BLOCK_SIZE 16 + +#define AES_MODE_ECB 0 +#define AES_MODE_CBC 1 + +#define AES_DIR_DECRYPT 0 +#define AES_DIR_ENCRYPT 1 + +#define AES_FLAGS_USRKEY (1 << 0) +#define AES_FLAGS_COHERENT (1 << 1) + +struct geode_aes_op { + + void *src; + void *dst; + + u32 mode; + u32 dir; + u32 flags; + int len; + + u8 key[AES_KEY_LENGTH]; + u8 iv[AES_IV_LENGTH]; +}; + +#endif diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c index 0358419a0e4..8e872610461 100644 --- a/drivers/dma/ioatdma.c +++ b/drivers/dma/ioatdma.c @@ -636,10 +636,10 @@ static int ioat_self_test(struct ioat_device *device) dma_cookie_t cookie; int err = 0; - src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, SLAB_KERNEL); + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); if (!src) return -ENOMEM; - dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, SLAB_KERNEL); + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); if (!dest) { kfree(src); return -ENOMEM; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 75e9e38330f..1b4fc922180 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -28,6 +28,7 @@ #include <linux/sysdev.h> #include <linux/ctype.h> #include <linux/kthread.h> +#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/page.h> #include <asm/edac.h> diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 0c68d0f0d8e..e23bc0d6215 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -389,14 +389,6 @@ config BLK_DEV_RZ1000 Linux. This may slow disk throughput by a few percent, but at least things will operate 100% reliably. -config BLK_DEV_SL82C105 - tristate "Winbond SL82c105 support" - depends on PCI && (PPC || ARM) && BLK_DEV_IDEPCI - help - If you have a Winbond SL82c105 IDE controller, say Y here to enable - special configuration for this chip. This is common on various CHRP - motherboards, but could be used elsewhere. If in doubt, say Y. - config BLK_DEV_IDEDMA_PCI bool "Generic PCI bus-master DMA support" depends on PCI && BLK_DEV_IDEPCI @@ -712,6 +704,14 @@ config BLK_DEV_SIS5513 Please read the comments at the top of <file:drivers/ide/pci/sis5513.c>. +config BLK_DEV_SL82C105 + tristate "Winbond SL82c105 support" + depends on (PPC || ARM) + help + If you have a Winbond SL82c105 IDE controller, say Y here to enable + special configuration for this chip. This is common on various CHRP + motherboards, but could be used elsewhere. If in doubt, say Y. + config BLK_DEV_SLC90E66 tristate "SLC90E66 chipset support" help diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 287a6620115..16890769dca 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -973,8 +973,8 @@ ide_settings_t *ide_find_setting_by_name (ide_drive_t *drive, char *name) * @drive: drive * * Automatically remove all the driver specific settings for this - * drive. This function may sleep and must not be called from IRQ - * context. The caller must hold ide_setting_sem. + * drive. This function may not be called from IRQ context. The + * caller must hold ide_setting_sem. */ static void auto_remove_settings (ide_drive_t *drive) @@ -1874,11 +1874,22 @@ void ide_unregister_subdriver(ide_drive_t *drive, ide_driver_t *driver) { unsigned long flags; - down(&ide_setting_sem); - spin_lock_irqsave(&ide_lock, flags); #ifdef CONFIG_PROC_FS ide_remove_proc_entries(drive->proc, driver->proc); #endif + down(&ide_setting_sem); + spin_lock_irqsave(&ide_lock, flags); + /* + * ide_setting_sem protects the settings list + * ide_lock protects the use of settings + * + * so we need to hold both, ide_settings_sem because we want to + * modify the settings list, and ide_lock because we cannot take + * a setting out that is being used. + * + * OTOH both ide_{read,write}_setting are only ever used under + * ide_setting_sem. + */ auto_remove_settings(drive); spin_unlock_irqrestore(&ide_lock, flags); up(&ide_setting_sem); diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c index eb7ab112c05..61f1a9665a7 100644 --- a/drivers/ide/pci/via82cxxx.c +++ b/drivers/ide/pci/via82cxxx.c @@ -282,11 +282,11 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const * Find the ISA bridge to see how good the IDE is. */ via_config = via_config_find(&isa); - if (!via_config->id) { - printk(KERN_WARNING "VP_IDE: Unknown VIA SouthBridge, disabling DMA.\n"); - pci_dev_put(isa); - return -ENODEV; - } + + /* We checked this earlier so if it fails here deeep badness + is involved */ + + BUG_ON(!via_config->id); /* * Setup or disable Clk66 if appropriate @@ -494,6 +494,17 @@ static ide_pci_device_t via82cxxx_chipsets[] __devinitdata = { static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_id *id) { + struct pci_dev *isa = NULL; + struct via_isa_bridge *via_config; + /* + * Find the ISA bridge and check we know what it is. + */ + via_config = via_config_find(&isa); + pci_dev_put(isa); + if (!via_config->id) { + printk(KERN_WARNING "VP_IDE: Unknown VIA SouthBridge, disabling DMA.\n"); + return -ENODEV; + } return ide_setup_pci_device(dev, &via82cxxx_chipsets[id->driver_data]); } diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c index 31e5cc49d61..27d6c642415 100644 --- a/drivers/ieee1394/eth1394.c +++ b/drivers/ieee1394/eth1394.c @@ -133,7 +133,7 @@ struct eth1394_node_info { #define ETH1394_DRIVER_NAME "eth1394" static const char driver_name[] = ETH1394_DRIVER_NAME; -static kmem_cache_t *packet_task_cache; +static struct kmem_cache *packet_task_cache; static struct hpsb_highlevel eth1394_highlevel; diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c index 8f4378a1631..b935e08695a 100644 --- a/drivers/ieee1394/hosts.c +++ b/drivers/ieee1394/hosts.c @@ -123,7 +123,7 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra, int i; int hostnum = 0; - h = kzalloc(sizeof(*h) + extra, SLAB_KERNEL); + h = kzalloc(sizeof(*h) + extra, GFP_KERNEL); if (!h) return NULL; diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 8e7b83f8448..e829c9336b3 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -15,6 +15,7 @@ #include <linux/delay.h> #include <linux/kthread.h> #include <linux/moduleparam.h> +#include <linux/freezer.h> #include <asm/atomic.h> #include "csr.h" diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index 6e8ea9110c4..eae97d8dcf0 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c @@ -1225,7 +1225,7 @@ static int ohci_iso_recv_init(struct hpsb_iso *iso) int ctx; int ret = -ENOMEM; - recv = kmalloc(sizeof(*recv), SLAB_KERNEL); + recv = kmalloc(sizeof(*recv), GFP_KERNEL); if (!recv) return -ENOMEM; @@ -1918,7 +1918,7 @@ static int ohci_iso_xmit_init(struct hpsb_iso *iso) int ctx; int ret = -ENOMEM; - xmit = kmalloc(sizeof(*xmit), SLAB_KERNEL); + xmit = kmalloc(sizeof(*xmit), GFP_KERNEL); if (!xmit) return -ENOMEM; @@ -3021,7 +3021,7 @@ alloc_dma_rcv_ctx(struct ti_ohci *ohci, struct dma_rcv_ctx *d, return -ENOMEM; } - d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, SLAB_KERNEL, d->prg_bus+i); + d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, GFP_KERNEL, d->prg_bus+i); OHCI_DMA_ALLOC("pool dma_rcv prg[%d]", i); if (d->prg_cpu[i] != NULL) { @@ -3117,7 +3117,7 @@ alloc_dma_trm_ctx(struct ti_ohci *ohci, struct dma_trm_ctx *d, OHCI_DMA_ALLOC("dma_rcv prg pool"); for (i = 0; i < d->num_desc; i++) { - d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, SLAB_KERNEL, d->prg_bus+i); + d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, GFP_KERNEL, d->prg_bus+i); OHCI_DMA_ALLOC("pool dma_trm prg[%d]", i); if (d->prg_cpu[i] != NULL) { diff --git a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c index 0a7412e27eb..9cab1d66147 100644 --- a/drivers/ieee1394/pcilynx.c +++ b/drivers/ieee1394/pcilynx.c @@ -1428,7 +1428,7 @@ static int __devinit add_card(struct pci_dev *dev, struct i2c_algo_bit_data i2c_adapter_data; error = -ENOMEM; - i2c_ad = kmalloc(sizeof(*i2c_ad), SLAB_KERNEL); + i2c_ad = kmalloc(sizeof(*i2c_ad), GFP_KERNEL); if (!i2c_ad) FAIL("failed to allocate I2C adapter memory"); memcpy(i2c_ad, &bit_ops, sizeof(struct i2c_adapter)); diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 5ec4f5eb6b1..bf71e069eaf 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -112,7 +112,7 @@ static struct pending_request *__alloc_pending_request(gfp_t flags) static inline struct pending_request *alloc_pending_request(void) { - return __alloc_pending_request(SLAB_KERNEL); + return __alloc_pending_request(GFP_KERNEL); } static void free_pending_request(struct pending_request *req) @@ -259,7 +259,7 @@ static void host_reset(struct hpsb_host *host) if (hi != NULL) { list_for_each_entry(fi, &hi->file_info_list, list) { if (fi->notification == RAW1394_NOTIFY_ON) { - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (req != NULL) { req->file_info = fi; @@ -306,13 +306,13 @@ static void iso_receive(struct hpsb_host *host, int channel, quadlet_t * data, if (!(fi->listen_channels & (1ULL << channel))) continue; - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) break; if (!ibs) { ibs = kmalloc(sizeof(*ibs) + length, - SLAB_ATOMIC); + GFP_ATOMIC); if (!ibs) { kfree(req); break; @@ -367,13 +367,13 @@ static void fcp_request(struct hpsb_host *host, int nodeid, int direction, if (!fi->fcp_buffer) continue; - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) break; if (!ibs) { ibs = kmalloc(sizeof(*ibs) + length, - SLAB_ATOMIC); + GFP_ATOMIC); if (!ibs) { kfree(req); break; @@ -593,7 +593,7 @@ static int state_initialized(struct file_info *fi, struct pending_request *req) switch (req->req.type) { case RAW1394_REQ_LIST_CARDS: spin_lock_irqsave(&host_info_lock, flags); - khl = kmalloc(sizeof(*khl) * host_count, SLAB_ATOMIC); + khl = kmalloc(sizeof(*khl) * host_count, GFP_ATOMIC); if (khl) { req->req.misc = host_count; @@ -1045,7 +1045,7 @@ static int arm_read(struct hpsb_host *host, int nodeid, quadlet_t * buffer, } if (arm_addr->notification_options & ARM_READ) { DBGMSG("arm_read -> entering notification-section"); - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) { DBGMSG("arm_read -> rcode_conflict_error"); spin_unlock_irqrestore(&host_info_lock, irqflags); @@ -1064,7 +1064,7 @@ static int arm_read(struct hpsb_host *host, int nodeid, quadlet_t * buffer, sizeof(struct arm_response) + sizeof(struct arm_request_response); } - req->data = kmalloc(size, SLAB_ATOMIC); + req->data = kmalloc(size, GFP_ATOMIC); if (!(req->data)) { free_pending_request(req); DBGMSG("arm_read -> rcode_conflict_error"); @@ -1198,7 +1198,7 @@ static int arm_write(struct hpsb_host *host, int nodeid, int destid, } if (arm_addr->notification_options & ARM_WRITE) { DBGMSG("arm_write -> entering notification-section"); - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) { DBGMSG("arm_write -> rcode_conflict_error"); spin_unlock_irqrestore(&host_info_lock, irqflags); @@ -1209,7 +1209,7 @@ static int arm_write(struct hpsb_host *host, int nodeid, int destid, sizeof(struct arm_request) + sizeof(struct arm_response) + (length) * sizeof(byte_t) + sizeof(struct arm_request_response); - req->data = kmalloc(size, SLAB_ATOMIC); + req->data = kmalloc(size, GFP_ATOMIC); if (!(req->data)) { free_pending_request(req); DBGMSG("arm_write -> rcode_conflict_error"); @@ -1400,7 +1400,7 @@ static int arm_lock(struct hpsb_host *host, int nodeid, quadlet_t * store, if (arm_addr->notification_options & ARM_LOCK) { byte_t *buf1, *buf2; DBGMSG("arm_lock -> entering notification-section"); - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) { DBGMSG("arm_lock -> rcode_conflict_error"); spin_unlock_irqrestore(&host_info_lock, irqflags); @@ -1408,7 +1408,7 @@ static int arm_lock(struct hpsb_host *host, int nodeid, quadlet_t * store, The request may be retried */ } size = sizeof(struct arm_request) + sizeof(struct arm_response) + 3 * sizeof(*store) + sizeof(struct arm_request_response); /* maximum */ - req->data = kmalloc(size, SLAB_ATOMIC); + req->data = kmalloc(size, GFP_ATOMIC); if (!(req->data)) { free_pending_request(req); DBGMSG("arm_lock -> rcode_conflict_error"); @@ -1628,7 +1628,7 @@ static int arm_lock64(struct hpsb_host *host, int nodeid, octlet_t * store, if (arm_addr->notification_options & ARM_LOCK) { byte_t *buf1, *buf2; DBGMSG("arm_lock64 -> entering notification-section"); - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) { spin_unlock_irqrestore(&host_info_lock, irqflags); DBGMSG("arm_lock64 -> rcode_conflict_error"); @@ -1636,7 +1636,7 @@ static int arm_lock64(struct hpsb_host *host, int nodeid, octlet_t * store, The request may be retried */ } size = sizeof(struct arm_request) + sizeof(struct arm_response) + 3 * sizeof(*store) + sizeof(struct arm_request_response); /* maximum */ - req->data = kmalloc(size, SLAB_ATOMIC); + req->data = kmalloc(size, GFP_ATOMIC); if (!(req->data)) { free_pending_request(req); spin_unlock_irqrestore(&host_info_lock, irqflags); @@ -1737,7 +1737,7 @@ static int arm_register(struct file_info *fi, struct pending_request *req) return (-EINVAL); } /* addr-list-entry for fileinfo */ - addr = kmalloc(sizeof(*addr), SLAB_KERNEL); + addr = kmalloc(sizeof(*addr), GFP_KERNEL); if (!addr) { req->req.length = 0; return (-ENOMEM); @@ -2103,7 +2103,7 @@ static int write_phypacket(struct file_info *fi, struct pending_request *req) static int get_config_rom(struct file_info *fi, struct pending_request *req) { int ret = sizeof(struct raw1394_request); - quadlet_t *data = kmalloc(req->req.length, SLAB_KERNEL); + quadlet_t *data = kmalloc(req->req.length, GFP_KERNEL); int status; if (!data) @@ -2133,7 +2133,7 @@ static int get_config_rom(struct file_info *fi, struct pending_request *req) static int update_config_rom(struct file_info *fi, struct pending_request *req) { int ret = sizeof(struct raw1394_request); - quadlet_t *data = kmalloc(req->req.length, SLAB_KERNEL); + quadlet_t *data = kmalloc(req->req.length, GFP_KERNEL); if (!data) return -ENOMEM; if (copy_from_user(data, int2ptr(req->req.sendb), req->req.length)) { @@ -2443,7 +2443,7 @@ static void queue_rawiso_event(struct file_info *fi) /* only one ISO activity event may be in the queue */ if (!__rawiso_event_in_queue(fi)) { struct pending_request *req = - __alloc_pending_request(SLAB_ATOMIC); + __alloc_pending_request(GFP_ATOMIC); if (req) { req->file_info = fi; @@ -2779,7 +2779,7 @@ static int raw1394_open(struct inode *inode, struct file *file) { struct file_info *fi; - fi = kzalloc(sizeof(*fi), SLAB_KERNEL); + fi = kzalloc(sizeof(*fi), GFP_KERNEL); if (!fi) return -ENOMEM; diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c index 40caeb5f41b..36620a22413 100644 --- a/drivers/infiniband/hw/amso1100/c2_vq.c +++ b/drivers/infiniband/hw/amso1100/c2_vq.c @@ -164,7 +164,7 @@ void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r) */ void *vq_repbuf_alloc(struct c2_dev *c2dev) { - return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC); + return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC); } /* diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 214e2fdddee..0d6e2c4bb24 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -57,7 +57,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); - av = kmem_cache_alloc(av_cache, SLAB_KERNEL); + av = kmem_cache_alloc(av_cache, GFP_KERNEL); if (!av) { ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", pd, ah_attr); diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 458fe19648a..93995b658d9 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -134,7 +134,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); - my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL); + my_cq = kmem_cache_alloc(cq_cache, GFP_KERNEL); if (!my_cq) { ehca_err(device, "Out of memory for ehca_cq struct device=%p", device); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 3d1c1c53503..cc47e4c13a1 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -108,7 +108,7 @@ static struct kmem_cache *ctblk_cache = NULL; void *ehca_alloc_fw_ctrlblock(void) { - void *ret = kmem_cache_zalloc(ctblk_cache, SLAB_KERNEL); + void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL); if (!ret) ehca_gen_err("Out of memory for ctblk"); return ret; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index abce676c0ae..0a5e2214cc5 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -53,7 +53,7 @@ static struct ehca_mr *ehca_mr_new(void) { struct ehca_mr *me; - me = kmem_cache_alloc(mr_cache, SLAB_KERNEL); + me = kmem_cache_alloc(mr_cache, GFP_KERNEL); if (me) { memset(me, 0, sizeof(struct ehca_mr)); spin_lock_init(&me->mrlock); @@ -72,7 +72,7 @@ static struct ehca_mw *ehca_mw_new(void) { struct ehca_mw *me; - me = kmem_cache_alloc(mw_cache, SLAB_KERNEL); + me = kmem_cache_alloc(mw_cache, GFP_KERNEL); if (me) { memset(me, 0, sizeof(struct ehca_mw)); spin_lock_init(&me->mwlock); diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index 2c3cdc6f7b3..d5345e5b3cd 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -50,7 +50,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, { struct ehca_pd *pd; - pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL); + pd = kmem_cache_alloc(pd_cache, GFP_KERNEL); if (!pd) { ehca_err(device, "device=%p context=%p out of memory", device, context); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 8682aa50c70..c6c9cef203e 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -450,7 +450,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, if (pd->uobject && udata) context = pd->uobject->context; - my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL); + my_qp = kmem_cache_alloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 57cdc1bc5f5..27caf3b0648 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -189,7 +189,7 @@ int mthca_create_ah(struct mthca_dev *dev, on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { ah->av = pci_pool_alloc(dev->av_table.pool, - SLAB_ATOMIC, &ah->avdma); + GFP_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index a0af97efe6a..79dfb4b25c9 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -23,6 +23,7 @@ #include <linux/kthread.h> #include <linux/sched.h> /* HZ */ #include <linux/mutex.h> +#include <linux/freezer.h> /*#include <asm/io.h>*/ diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index ab4da79ee56..31d5a13bfd6 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -695,7 +695,9 @@ static int __init hp_sdc_rtc_init(void) if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) return ret; - misc_register(&hp_sdc_rtc_dev); + if (misc_register(&hp_sdc_rtc_dev) != 0) + printk(KERN_INFO "Could not register misc. dev for i8042 rtc\n"); + create_proc_read_entry ("driver/rtc", 0, NULL, hp_sdc_rtc_read_proc, NULL); diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index 211943f85cb..5f1d4032fd5 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/kthread.h> #include <linux/mutex.h> +#include <linux/freezer.h> MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); MODULE_DESCRIPTION("Serio abstraction core"); diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index f56d6a0f062..0517c7387d6 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -189,7 +189,7 @@ static int ads7846_read12_ser(struct device *dev, unsigned command) { struct spi_device *spi = to_spi_device(dev); struct ads7846 *ts = dev_get_drvdata(dev); - struct ser_req *req = kzalloc(sizeof *req, SLAB_KERNEL); + struct ser_req *req = kzalloc(sizeof *req, GFP_KERNEL); int status; int sample; int i; diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index 0c937325a1b..63b629b1cdb 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -572,7 +572,7 @@ static int atread_submit(struct cardstate *cs, int timeout) ucs->rcvbuf, ucs->rcvbuf_size, read_ctrl_callback, cs->inbuf); - if ((ret = usb_submit_urb(ucs->urb_cmd_in, SLAB_ATOMIC)) != 0) { + if ((ret = usb_submit_urb(ucs->urb_cmd_in, GFP_ATOMIC)) != 0) { update_basstate(ucs, 0, BS_ATRDPEND); dev_err(cs->dev, "could not submit HD_READ_ATMESSAGE: %s\n", get_usb_rcmsg(ret)); @@ -747,7 +747,7 @@ static void read_int_callback(struct urb *urb) check_pending(ucs); resubmit: - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc != 0 && rc != -ENODEV)) { dev_err(cs->dev, "could not resubmit interrupt URB: %s\n", get_usb_rcmsg(rc)); @@ -807,7 +807,7 @@ static void read_iso_callback(struct urb *urb) urb->number_of_packets = BAS_NUMFRAMES; gig_dbg(DEBUG_ISO, "%s: isoc read overrun/resubmit", __func__); - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc != 0 && rc != -ENODEV)) { dev_err(bcs->cs->dev, "could not resubmit isochronous read " @@ -900,7 +900,7 @@ static int starturbs(struct bc_state *bcs) } dump_urb(DEBUG_ISO, "Initial isoc read", urb); - if ((rc = usb_submit_urb(urb, SLAB_ATOMIC)) != 0) + if ((rc = usb_submit_urb(urb, GFP_ATOMIC)) != 0) goto error; } @@ -935,7 +935,7 @@ static int starturbs(struct bc_state *bcs) /* submit two URBs, keep third one */ for (k = 0; k < 2; ++k) { dump_urb(DEBUG_ISO, "Initial isoc write", urb); - rc = usb_submit_urb(ubc->isoouturbs[k].urb, SLAB_ATOMIC); + rc = usb_submit_urb(ubc->isoouturbs[k].urb, GFP_ATOMIC); if (rc != 0) goto error; } @@ -1042,7 +1042,7 @@ static int submit_iso_write_urb(struct isow_urbctx_t *ucx) return 0; /* no data to send */ urb->number_of_packets = nframe; - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc)) { if (rc == -ENODEV) /* device removed - give up silently */ @@ -1341,7 +1341,7 @@ static void read_iso_tasklet(unsigned long data) urb->dev = bcs->cs->hw.bas->udev; urb->transfer_flags = URB_ISO_ASAP; urb->number_of_packets = BAS_NUMFRAMES; - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc != 0 && rc != -ENODEV)) { dev_err(cs->dev, "could not resubmit isochronous read URB: %s\n", @@ -1458,7 +1458,7 @@ static void write_ctrl_callback(struct urb *urb) ucs->retry_ctrl); /* urb->dev is clobbered by USB subsystem */ urb->dev = ucs->udev; - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc)) { dev_err(&ucs->interface->dev, "could not resubmit request 0x%02x: %s\n", @@ -1517,7 +1517,7 @@ static int req_submit(struct bc_state *bcs, int req, int val, int timeout) (unsigned char*) &ucs->dr_ctrl, NULL, 0, write_ctrl_callback, ucs); ucs->retry_ctrl = 0; - ret = usb_submit_urb(ucs->urb_ctrl, SLAB_ATOMIC); + ret = usb_submit_urb(ucs->urb_ctrl, GFP_ATOMIC); if (unlikely(ret)) { dev_err(bcs->cs->dev, "could not submit request 0x%02x: %s\n", req, get_usb_rcmsg(ret)); @@ -1763,7 +1763,7 @@ static int atwrite_submit(struct cardstate *cs, unsigned char *buf, int len) usb_sndctrlpipe(ucs->udev, 0), (unsigned char*) &ucs->dr_cmd_out, buf, len, write_command_callback, cs); - rc = usb_submit_urb(ucs->urb_cmd_out, SLAB_ATOMIC); + rc = usb_submit_urb(ucs->urb_cmd_out, GFP_ATOMIC); if (unlikely(rc)) { update_basstate(ucs, 0, BS_ATWRPEND); dev_err(cs->dev, "could not submit HD_WRITE_ATMESSAGE: %s\n", @@ -2218,21 +2218,21 @@ static int gigaset_probe(struct usb_interface *interface, * - three for the different uses of the default control pipe * - three for each isochronous pipe */ - if (!(ucs->urb_int_in = usb_alloc_urb(0, SLAB_KERNEL)) || - !(ucs->urb_cmd_in = usb_alloc_urb(0, SLAB_KERNEL)) || - !(ucs->urb_cmd_out = usb_alloc_urb(0, SLAB_KERNEL)) || - !(ucs->urb_ctrl = usb_alloc_urb(0, SLAB_KERNEL))) + if (!(ucs->urb_int_in = usb_alloc_urb(0, GFP_KERNEL)) || + !(ucs->urb_cmd_in = usb_alloc_urb(0, GFP_KERNEL)) || + !(ucs->urb_cmd_out = usb_alloc_urb(0, GFP_KERNEL)) || + !(ucs->urb_ctrl = usb_alloc_urb(0, GFP_KERNEL))) goto allocerr; for (j = 0; j < 2; ++j) { ubc = cs->bcs[j].hw.bas; for (i = 0; i < BAS_OUTURBS; ++i) if (!(ubc->isoouturbs[i].urb = - usb_alloc_urb(BAS_NUMFRAMES, SLAB_KERNEL))) + usb_alloc_urb(BAS_NUMFRAMES, GFP_KERNEL))) goto allocerr; for (i = 0; i < BAS_INURBS; ++i) if (!(ubc->isoinurbs[i] = - usb_alloc_urb(BAS_NUMFRAMES, SLAB_KERNEL))) + usb_alloc_urb(BAS_NUMFRAMES, GFP_KERNEL))) goto allocerr; } @@ -2246,7 +2246,7 @@ static int gigaset_probe(struct usb_interface *interface, (endpoint->bEndpointAddress) & 0x0f), ucs->int_in_buf, 3, read_int_callback, cs, endpoint->bInterval); - if ((rc = usb_submit_urb(ucs->urb_int_in, SLAB_KERNEL)) != 0) { + if ((rc = usb_submit_urb(ucs->urb_int_in, GFP_KERNEL)) != 0) { dev_err(cs->dev, "could not submit interrupt URB: %s\n", get_usb_rcmsg(rc)); goto error; diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index 5ebf49ac9b2..04f2ad7ba8b 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -410,7 +410,7 @@ static void gigaset_read_int_callback(struct urb *urb) if (resubmit) { spin_lock_irqsave(&cs->lock, flags); - r = cs->connected ? usb_submit_urb(urb, SLAB_ATOMIC) : -ENODEV; + r = cs->connected ? usb_submit_urb(urb, GFP_ATOMIC) : -ENODEV; spin_unlock_irqrestore(&cs->lock, flags); if (r) dev_err(cs->dev, "error %d when resubmitting urb.\n", @@ -486,7 +486,7 @@ static int send_cb(struct cardstate *cs, struct cmdbuf_t *cb) atomic_set(&ucs->busy, 1); spin_lock_irqsave(&cs->lock, flags); - status = cs->connected ? usb_submit_urb(ucs->bulk_out_urb, SLAB_ATOMIC) : -ENODEV; + status = cs->connected ? usb_submit_urb(ucs->bulk_out_urb, GFP_ATOMIC) : -ENODEV; spin_unlock_irqrestore(&cs->lock, flags); if (status) { @@ -664,7 +664,7 @@ static int write_modem(struct cardstate *cs) ucs->bulk_out_endpointAddr & 0x0f), ucs->bulk_out_buffer, count, gigaset_write_bulk_callback, cs); - ret = usb_submit_urb(ucs->bulk_out_urb, SLAB_ATOMIC); + ret = usb_submit_urb(ucs->bulk_out_urb, GFP_ATOMIC); } else { ret = -ENODEV; } @@ -763,7 +763,7 @@ static int gigaset_probe(struct usb_interface *interface, goto error; } - ucs->bulk_out_urb = usb_alloc_urb(0, SLAB_KERNEL); + ucs->bulk_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!ucs->bulk_out_urb) { dev_err(cs->dev, "Couldn't allocate bulk_out_urb\n"); retval = -ENOMEM; @@ -774,7 +774,7 @@ static int gigaset_probe(struct usb_interface *interface, atomic_set(&ucs->busy, 0); - ucs->read_urb = usb_alloc_urb(0, SLAB_KERNEL); + ucs->read_urb = usb_alloc_urb(0, GFP_KERNEL); if (!ucs->read_urb) { dev_err(cs->dev, "No free urbs available\n"); retval = -ENOMEM; @@ -797,7 +797,7 @@ static int gigaset_probe(struct usb_interface *interface, gigaset_read_int_callback, cs->inbuf + 0, endpoint->bInterval); - retval = usb_submit_urb(ucs->read_urb, SLAB_KERNEL); + retval = usb_submit_urb(ucs->read_urb, GFP_KERNEL); if (retval) { dev_err(cs->dev, "Could not submit URB (error %d)\n", -retval); goto error; diff --git a/drivers/isdn/hisax/isdnhdlc.h b/drivers/isdn/hisax/isdnhdlc.h index 269315988dc..5655b5f9c48 100644 --- a/drivers/isdn/hisax/isdnhdlc.h +++ b/drivers/isdn/hisax/isdnhdlc.h @@ -41,10 +41,10 @@ struct isdnhdlc_vars { unsigned char shift_reg; unsigned char ffvalue; - int data_received:1; // set if transferring data - int dchannel:1; // set if D channel (send idle instead of flags) - int do_adapt56:1; // set if 56K adaptation - int do_closing:1; // set if in closing phase (need to send CRC + flag + unsigned int data_received:1; // set if transferring data + unsigned int dchannel:1; // set if D channel (send idle instead of flags) + unsigned int do_adapt56:1; // set if 56K adaptation + unsigned int do_closing:1; // set if in closing phase (need to send CRC + flag }; diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 9c39b98d5a5..176142c6149 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -76,6 +76,12 @@ config LEDS_NET48XX This option enables support for the Soekris net4801 and net4826 error LED. +config LEDS_WRAP + tristate "LED Support for the WRAP series LEDs" + depends on LEDS_CLASS && SCx200_GPIO + help + This option enables support for the PCEngines WRAP programmable LEDs. + comment "LED Triggers" config LEDS_TRIGGERS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 6aa2aed7539..500de3dc962 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_LEDS_TOSA) += leds-tosa.o obj-$(CONFIG_LEDS_S3C24XX) += leds-s3c24xx.o obj-$(CONFIG_LEDS_AMS_DELTA) += leds-ams-delta.o obj-$(CONFIG_LEDS_NET48XX) += leds-net48xx.o +obj-$(CONFIG_LEDS_WRAP) += leds-wrap.o # LED Triggers obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o diff --git a/drivers/leds/leds-wrap.c b/drivers/leds/leds-wrap.c new file mode 100644 index 00000000000..27fb2d8e991 --- /dev/null +++ b/drivers/leds/leds-wrap.c @@ -0,0 +1,142 @@ +/* + * LEDs driver for PCEngines WRAP + * + * Copyright (C) 2006 Kristian Kielhofner <kris@krisk.org> + * + * Based on leds-net48xx.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/leds.h> +#include <linux/err.h> +#include <asm/io.h> +#include <linux/scx200_gpio.h> + +#define DRVNAME "wrap-led" +#define WRAP_ERROR_LED_GPIO 3 +#define WRAP_EXTRA_LED_GPIO 18 + +static struct platform_device *pdev; + +static void wrap_error_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + if (value) + scx200_gpio_set_low(WRAP_ERROR_LED_GPIO); + else + scx200_gpio_set_high(WRAP_ERROR_LED_GPIO); +} + +static void wrap_extra_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + if (value) + scx200_gpio_set_low(WRAP_EXTRA_LED_GPIO); + else + scx200_gpio_set_high(WRAP_EXTRA_LED_GPIO); +} + +static struct led_classdev wrap_error_led = { + .name = "wrap:error", + .brightness_set = wrap_error_led_set, +}; + +static struct led_classdev wrap_extra_led = { + .name = "wrap:extra", + .brightness_set = wrap_extra_led_set, +}; + +#ifdef CONFIG_PM +static int wrap_led_suspend(struct platform_device *dev, + pm_message_t state) +{ + led_classdev_suspend(&wrap_error_led); + led_classdev_suspend(&wrap_extra_led); + return 0; +} + +static int wrap_led_resume(struct platform_device *dev) +{ + led_classdev_resume(&wrap_error_led); + led_classdev_resume(&wrap_extra_led); + return 0; +} +#else +#define wrap_led_suspend NULL +#define wrap_led_resume NULL +#endif + +static int wrap_led_probe(struct platform_device *pdev) +{ + int ret; + + ret = led_classdev_register(&pdev->dev, &wrap_error_led); + if (ret == 0) { + ret = led_classdev_register(&pdev->dev, &wrap_extra_led); + if (ret < 0) + led_classdev_unregister(&wrap_error_led); + } + return ret; +} + +static int wrap_led_remove(struct platform_device *pdev) +{ + led_classdev_unregister(&wrap_error_led); + led_classdev_unregister(&wrap_extra_led); + return 0; +} + +static struct platform_driver wrap_led_driver = { + .probe = wrap_led_probe, + .remove = wrap_led_remove, + .suspend = wrap_led_suspend, + .resume = wrap_led_resume, + .driver = { + .name = DRVNAME, + .owner = THIS_MODULE, + }, +}; + +static int __init wrap_led_init(void) +{ + int ret; + + if (!scx200_gpio_present()) { + ret = -ENODEV; + goto out; + } + + ret = platform_driver_register(&wrap_led_driver); + if (ret < 0) + goto out; + + pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0); + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); + platform_driver_unregister(&wrap_led_driver); + goto out; + } + +out: + return ret; +} + +static void __exit wrap_led_exit(void) +{ + platform_device_unregister(pdev); + platform_driver_unregister(&wrap_led_driver); +} + +module_init(wrap_led_init); +module_exit(wrap_led_exit); + +MODULE_AUTHOR("Kristian Kielhofner <kris@krisk.org>"); +MODULE_DESCRIPTION("PCEngines WRAP LED driver"); +MODULE_LICENSE("GPL"); + diff --git a/drivers/macintosh/apm_emu.c b/drivers/macintosh/apm_emu.c index 1293876a2eb..8862a83b8d8 100644 --- a/drivers/macintosh/apm_emu.c +++ b/drivers/macintosh/apm_emu.c @@ -529,7 +529,8 @@ static int __init apm_emu_init(void) if (apm_proc) apm_proc->owner = THIS_MODULE; - misc_register(&apm_device); + if (misc_register(&apm_device) != 0) + printk(KERN_INFO "Could not create misc. device for apm\n"); pmu_register_sleep_notifier(&apm_sleep_notifier); diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index 13b953ae8eb..3d3bf1643e7 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -24,6 +24,7 @@ #include <linux/suspend.h> #include <linux/kthread.h> #include <linux/moduleparam.h> +#include <linux/freezer.h> #include <asm/prom.h> #include <asm/machdep.h> diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index e63ea1c1f3c..c8558d4ed50 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -42,7 +42,7 @@ #include <linux/interrupt.h> #include <linux/device.h> #include <linux/sysdev.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/syscalls.h> #include <linux/cpu.h> #include <asm/prom.h> diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index ab3faa702d5..e947af982f9 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -34,6 +34,7 @@ #include <linux/device.h> #include <linux/platform_device.h> #include <linux/mutex.h> +#include <linux/freezer.h> #include <asm/prom.h> diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ed2d4ef27fd..a1086ee8ccc 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -20,6 +20,7 @@ #include <asm/atomic.h> #include <linux/scatterlist.h> #include <asm/page.h> +#include <asm/unaligned.h> #include "dm.h" @@ -85,7 +86,10 @@ struct crypt_config { */ struct crypt_iv_operations *iv_gen_ops; char *iv_mode; - struct crypto_cipher *iv_gen_private; + union { + struct crypto_cipher *essiv_tfm; + int benbi_shift; + } iv_gen_private; sector_t iv_offset; unsigned int iv_size; @@ -101,7 +105,7 @@ struct crypt_config { #define MIN_POOL_PAGES 32 #define MIN_BIO_PAGES 8 -static kmem_cache_t *_crypt_io_pool; +static struct kmem_cache *_crypt_io_pool; /* * Different IV generation algorithms: @@ -113,6 +117,9 @@ static kmem_cache_t *_crypt_io_pool; * encrypted with the bulk cipher using a salt as key. The salt * should be derived from the bulk cipher's key via hashing. * + * benbi: the 64-bit "big-endian 'narrow block'-count", starting at 1 + * (needed for LRW-32-AES and possible other narrow block modes) + * * plumb: unimplemented, see: * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 */ @@ -191,21 +198,61 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, } kfree(salt); - cc->iv_gen_private = essiv_tfm; + cc->iv_gen_private.essiv_tfm = essiv_tfm; return 0; } static void crypt_iv_essiv_dtr(struct crypt_config *cc) { - crypto_free_cipher(cc->iv_gen_private); - cc->iv_gen_private = NULL; + crypto_free_cipher(cc->iv_gen_private.essiv_tfm); + cc->iv_gen_private.essiv_tfm = NULL; } static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector) { memset(iv, 0, cc->iv_size); *(u64 *)iv = cpu_to_le64(sector); - crypto_cipher_encrypt_one(cc->iv_gen_private, iv, iv); + crypto_cipher_encrypt_one(cc->iv_gen_private.essiv_tfm, iv, iv); + return 0; +} + +static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, + const char *opts) +{ + unsigned int bs = crypto_blkcipher_blocksize(cc->tfm); + int log = long_log2(bs); + + /* we need to calculate how far we must shift the sector count + * to get the cipher block count, we use this shift in _gen */ + + if (1 << log != bs) { + ti->error = "cypher blocksize is not a power of 2"; + return -EINVAL; + } + + if (log > 9) { + ti->error = "cypher blocksize is > 512"; + return -EINVAL; + } + + cc->iv_gen_private.benbi_shift = 9 - log; + + return 0; +} + +static void crypt_iv_benbi_dtr(struct crypt_config *cc) +{ +} + +static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, sector_t sector) +{ + __be64 val; + + memset(iv, 0, cc->iv_size - sizeof(u64)); /* rest is cleared below */ + + val = cpu_to_be64(((u64)sector << cc->iv_gen_private.benbi_shift) + 1); + put_unaligned(val, (__be64 *)(iv + cc->iv_size - sizeof(u64))); + return 0; } @@ -219,13 +266,18 @@ static struct crypt_iv_operations crypt_iv_essiv_ops = { .generator = crypt_iv_essiv_gen }; +static struct crypt_iv_operations crypt_iv_benbi_ops = { + .ctr = crypt_iv_benbi_ctr, + .dtr = crypt_iv_benbi_dtr, + .generator = crypt_iv_benbi_gen +}; static int crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out, struct scatterlist *in, unsigned int length, int write, sector_t sector) { - u8 iv[cc->iv_size]; + u8 iv[cc->iv_size] __attribute__ ((aligned(__alignof__(u64)))); struct blkcipher_desc desc = { .tfm = cc->tfm, .info = iv, @@ -768,7 +820,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->tfm = tfm; /* - * Choose ivmode. Valid modes: "plain", "essiv:<esshash>". + * Choose ivmode. Valid modes: "plain", "essiv:<esshash>", "benbi". * See comments at iv code */ @@ -778,6 +830,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_gen_ops = &crypt_iv_plain_ops; else if (strcmp(ivmode, "essiv") == 0) cc->iv_gen_ops = &crypt_iv_essiv_ops; + else if (strcmp(ivmode, "benbi") == 0) + cc->iv_gen_ops = &crypt_iv_benbi_ops; else { ti->error = "Invalid IV mode"; goto bad2; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e77ee6fd104..cf8bf052138 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -101,7 +101,7 @@ typedef int (*action_fn) (struct pgpath *pgpath); #define MIN_IOS 256 /* Mempool size */ -static kmem_cache_t *_mpio_cache; +static struct kmem_cache *_mpio_cache; struct workqueue_struct *kmultipathd; static void process_queued_ios(struct work_struct *work); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 91c7aa1fed0..b0ce2ce8227 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -88,8 +88,8 @@ struct pending_exception { * Hash table mapping origin volumes to lists of snapshots and * a lock to protect it */ -static kmem_cache_t *exception_cache; -static kmem_cache_t *pending_cache; +static struct kmem_cache *exception_cache; +static struct kmem_cache *pending_cache; static mempool_t *pending_pool; /* @@ -228,7 +228,7 @@ static int init_exception_table(struct exception_table *et, uint32_t size) return 0; } -static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) +static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem) { struct list_head *slot; struct exception *ex, *next; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fc4f743f3b5..7ec1b112a6d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -121,8 +121,8 @@ struct mapped_device { }; #define MIN_IOS 256 -static kmem_cache_t *_io_cache; -static kmem_cache_t *_tio_cache; +static struct kmem_cache *_io_cache; +static struct kmem_cache *_tio_cache; static int __init local_init(void) { diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index b3c01496c73..b46f6c575f7 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -203,7 +203,7 @@ struct kcopyd_job { /* FIXME: this should scale with the number of pages */ #define MIN_JOBS 512 -static kmem_cache_t *_job_cache; +static struct kmem_cache *_job_cache; static mempool_t *_job_pool; /* diff --git a/drivers/md/md.c b/drivers/md/md.c index 8cbf9c9df1c..6c4345bde07 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -39,10 +39,10 @@ #include <linux/raid/bitmap.h> #include <linux/sysctl.h> #include <linux/buffer_head.h> /* for invalidate_bdev */ -#include <linux/suspend.h> #include <linux/poll.h> #include <linux/mutex.h> #include <linux/ctype.h> +#include <linux/freezer.h> #include <linux/init.h> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 69c3e201fa3..52914d5cec7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -348,7 +348,7 @@ static int grow_one_stripe(raid5_conf_t *conf) static int grow_stripes(raid5_conf_t *conf, int num) { - kmem_cache_t *sc; + struct kmem_cache *sc; int devs = conf->raid_disks; sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev)); @@ -397,7 +397,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) LIST_HEAD(newstripes); struct disk_info *ndisks; int err = 0; - kmem_cache_t *sc; + struct kmem_cache *sc; int i; if (newsize <= conf->pool_size) diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c index 206c13e47a0..9123147e376 100644 --- a/drivers/media/dvb/cinergyT2/cinergyT2.c +++ b/drivers/media/dvb/cinergyT2/cinergyT2.c @@ -287,7 +287,7 @@ static int cinergyt2_alloc_stream_urbs (struct cinergyt2 *cinergyt2) int i; cinergyt2->streambuf = usb_buffer_alloc(cinergyt2->udev, STREAM_URB_COUNT*STREAM_BUF_SIZE, - SLAB_KERNEL, &cinergyt2->streambuf_dmahandle); + GFP_KERNEL, &cinergyt2->streambuf_dmahandle); if (!cinergyt2->streambuf) { dprintk(1, "failed to alloc consistent stream memory area, bailing out!\n"); return -ENOMEM; diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index a2ab2eebfc6..e85972222ab 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -34,7 +34,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/list.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/jiffies.h> #include <asm/processor.h> diff --git a/drivers/media/dvb/dvb-usb/usb-urb.c b/drivers/media/dvb/dvb-usb/usb-urb.c index 78035ee824c..397f51a7b2a 100644 --- a/drivers/media/dvb/dvb-usb/usb-urb.c +++ b/drivers/media/dvb/dvb-usb/usb-urb.c @@ -116,7 +116,7 @@ static int usb_allocate_stream_buffers(struct usb_data_stream *stream, int num, for (stream->buf_num = 0; stream->buf_num < num; stream->buf_num++) { deb_mem("allocating buffer %d\n",stream->buf_num); if (( stream->buf_list[stream->buf_num] = - usb_buffer_alloc(stream->udev, size, SLAB_ATOMIC, + usb_buffer_alloc(stream->udev, size, GFP_ATOMIC, &stream->dma_addr[stream->buf_num]) ) == NULL) { deb_mem("not enough memory for urb-buffer allocation.\n"); usb_free_stream_buffers(stream); diff --git a/drivers/media/dvb/frontends/l64781.c b/drivers/media/dvb/frontends/l64781.c index f3bc82e44a2..1aeacb1c4af 100644 --- a/drivers/media/dvb/frontends/l64781.c +++ b/drivers/media/dvb/frontends/l64781.c @@ -36,7 +36,7 @@ struct l64781_state { struct dvb_frontend frontend; /* private demodulator data */ - int first:1; + unsigned int first:1; }; #define dprintk(args...) \ diff --git a/drivers/media/dvb/ttusb-dec/ttusb_dec.c b/drivers/media/dvb/ttusb-dec/ttusb_dec.c index 8135f3e76ae..10b121ada83 100644 --- a/drivers/media/dvb/ttusb-dec/ttusb_dec.c +++ b/drivers/media/dvb/ttusb-dec/ttusb_dec.c @@ -1244,7 +1244,7 @@ static int ttusb_dec_init_usb(struct ttusb_dec *dec) return -ENOMEM; } dec->irq_buffer = usb_buffer_alloc(dec->udev,IRQ_PACKET_SIZE, - SLAB_ATOMIC, &dec->irq_dma_handle); + GFP_ATOMIC, &dec->irq_dma_handle); if(!dec->irq_buffer) { return -ENOMEM; } diff --git a/drivers/media/radio/Kconfig b/drivers/media/radio/Kconfig index 6d96b17a7f8..920b63f8cf0 100644 --- a/drivers/media/radio/Kconfig +++ b/drivers/media/radio/Kconfig @@ -173,38 +173,6 @@ config RADIO_MAESTRO To compile this driver as a module, choose M here: the module will be called radio-maestro. -config RADIO_MIROPCM20 - tristate "miroSOUND PCM20 radio" - depends on ISA && VIDEO_V4L1 && SOUND_ACI_MIXER - ---help--- - Choose Y here if you have this FM radio card. You also need to say Y - to "ACI mixer (miroSOUND PCM1-pro/PCM12/PCM20 radio)" (in "Sound") - for this to work. - - In order to control your radio card, you will need to use programs - that are compatible with the Video For Linux API. Information on - this API and pointers to "v4l" programs may be found at - <file:Documentation/video4linux/API.html>. - - To compile this driver as a module, choose M here: the - module will be called miropcm20. - -config RADIO_MIROPCM20_RDS - tristate "miroSOUND PCM20 radio RDS user interface (EXPERIMENTAL)" - depends on RADIO_MIROPCM20 && EXPERIMENTAL - ---help--- - Choose Y here if you want to see RDS/RBDS information like - RadioText, Programme Service name, Clock Time and date, Programme - Type and Traffic Announcement/Programme identification. - - It's not possible to read the raw RDS packets from the device, so - the driver cant provide an V4L interface for this. But the - availability of RDS is reported over V4L by the basic driver - already. Here RDS can be read from files in /dev/v4l/rds. - - To compile this driver as a module, choose M here: the - module will be called miropcm20-rds. - config RADIO_SF16FMI tristate "SF16FMI Radio" depends on ISA && VIDEO_V4L2 diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c index cf43df3fe70..e1b56dc13c3 100644 --- a/drivers/media/video/msp3400-driver.c +++ b/drivers/media/video/msp3400-driver.c @@ -56,7 +56,7 @@ #include <media/tvaudio.h> #include <media/msp3400.h> #include <linux/kthread.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include "msp3400-driver.h" /* ---------------------------------------------------------------------- */ diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index fcaef4bf828..d506dfaa45a 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -29,6 +29,7 @@ #include <linux/init.h> #include <linux/smp_lock.h> #include <linux/kthread.h> +#include <linux/freezer.h> #include <media/tvaudio.h> #include <media/v4l2-common.h> diff --git a/drivers/media/video/video-buf-dvb.c b/drivers/media/video/video-buf-dvb.c index f53edf1923b..fcc5467e763 100644 --- a/drivers/media/video/video-buf-dvb.c +++ b/drivers/media/video/video-buf-dvb.c @@ -20,7 +20,7 @@ #include <linux/fs.h> #include <linux/kthread.h> #include <linux/file.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <media/video-buf.h> #include <media/video-buf-dvb.h> diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c index 3c8dc72dc8e..9986de5cb3d 100644 --- a/drivers/media/video/vivi.c +++ b/drivers/media/video/vivi.c @@ -36,6 +36,7 @@ #include <media/v4l2-common.h> #include <linux/kthread.h> #include <linux/highmem.h> +#include <linux/freezer.h> /* Wake up at about 30 fps */ #define WAKE_NUMERATOR 30 diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 051b7c5b8f0..6e068cf1049 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -347,7 +347,7 @@ mpt_reply(MPT_ADAPTER *ioc, u32 pa) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_interrupt - MPT adapter (IOC) specific interrupt handler. * @irq: irq number (not used) * @bus_id: bus identifier cookie == pointer to MPT_ADAPTER structure @@ -387,14 +387,16 @@ mpt_interrupt(int irq, void *bus_id) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mpt_base_reply - MPT base driver's callback routine; all base driver - * "internal" request/reply processing is routed here. - * Currently used for EventNotification and EventAck handling. +/** + * mpt_base_reply - MPT base driver's callback routine * @ioc: Pointer to MPT_ADAPTER structure * @mf: Pointer to original MPT request frame * @reply: Pointer to MPT reply frame (NULL if TurboReply) * + * MPT base driver's callback routine; all base driver + * "internal" request/reply processing is routed here. + * Currently used for EventNotification and EventAck handling. + * * Returns 1 indicating original alloc'd request frame ptr * should be freed, or 0 if it shouldn't. */ @@ -530,7 +532,7 @@ mpt_base_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *reply) * @dclass: Protocol driver's class (%MPT_DRIVER_CLASS enum value) * * This routine is called by a protocol-specific driver (SCSI host, - * LAN, SCSI target) to register it's reply callback routine. Each + * LAN, SCSI target) to register its reply callback routine. Each * protocol-specific driver must do this before it will be able to * use any IOC resources, such as obtaining request frames. * @@ -572,7 +574,7 @@ mpt_register(MPT_CALLBACK cbfunc, MPT_DRIVER_CLASS dclass) * mpt_deregister - Deregister a protocol drivers resources. * @cb_idx: previously registered callback handle * - * Each protocol-specific driver should call this routine when it's + * Each protocol-specific driver should call this routine when its * module is unloaded. */ void @@ -617,7 +619,7 @@ mpt_event_register(int cb_idx, MPT_EVHANDLER ev_cbfunc) * * Each protocol-specific driver should call this routine * when it does not (or can no longer) handle events, - * or when it's module is unloaded. + * or when its module is unloaded. */ void mpt_event_deregister(int cb_idx) @@ -656,7 +658,7 @@ mpt_reset_register(int cb_idx, MPT_RESETHANDLER reset_func) * * Each protocol-specific driver should call this routine * when it does not (or can no longer) handle IOC reset handling, - * or when it's module is unloaded. + * or when its module is unloaded. */ void mpt_reset_deregister(int cb_idx) @@ -670,6 +672,8 @@ mpt_reset_deregister(int cb_idx) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mpt_device_driver_register - Register device driver hooks + * @dd_cbfunc: driver callbacks struct + * @cb_idx: MPT protocol driver index */ int mpt_device_driver_register(struct mpt_pci_driver * dd_cbfunc, int cb_idx) @@ -696,6 +700,7 @@ mpt_device_driver_register(struct mpt_pci_driver * dd_cbfunc, int cb_idx) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mpt_device_driver_deregister - DeRegister device driver hooks + * @cb_idx: MPT protocol driver index */ void mpt_device_driver_deregister(int cb_idx) @@ -887,8 +892,7 @@ mpt_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mpt_send_handshake_request - Send MPT request via doorbell - * handshake method. + * mpt_send_handshake_request - Send MPT request via doorbell handshake method. * @handle: Handle of registered MPT protocol driver * @ioc: Pointer to MPT adapter structure * @reqBytes: Size of the request in bytes @@ -981,10 +985,13 @@ mpt_send_handshake_request(int handle, MPT_ADAPTER *ioc, int reqBytes, u32 *req, /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mpt_host_page_access_control - provides mechanism for the host - * driver to control the IOC's Host Page Buffer access. + * mpt_host_page_access_control - control the IOC's Host Page Buffer access * @ioc: Pointer to MPT adapter structure * @access_control_value: define bits below + * @sleepFlag: Specifies whether the process can sleep + * + * Provides mechanism for the host driver to control the IOC's + * Host Page Buffer access. * * Access Control Value - bits[15:12] * 0h Reserved @@ -1022,10 +1029,10 @@ mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_value, int slee /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mpt_host_page_alloc - allocate system memory for the fw - * If we already allocated memory in past, then resend the same pointer. - * ioc@: Pointer to pointer to IOC adapter - * ioc_init@: Pointer to ioc init config page + * @ioc: Pointer to pointer to IOC adapter + * @ioc_init: Pointer to ioc init config page * + * If we already allocated memory in past, then resend the same pointer. * Returns 0 for success, non-zero for failure. */ static int @@ -1091,12 +1098,15 @@ return 0; /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mpt_verify_adapter - Given a unique IOC identifier, set pointer to - * the associated MPT adapter structure. + * mpt_verify_adapter - Given IOC identifier, set pointer to its adapter structure. * @iocid: IOC unique identifier (integer) * @iocpp: Pointer to pointer to IOC adapter * - * Returns iocid and sets iocpp. + * Given a unique IOC identifier, set pointer to the associated MPT + * adapter structure. + * + * Returns iocid and sets iocpp if iocid is found. + * Returns -1 if iocid is not found. */ int mpt_verify_adapter(int iocid, MPT_ADAPTER **iocpp) @@ -1115,9 +1125,10 @@ mpt_verify_adapter(int iocid, MPT_ADAPTER **iocpp) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_attach - Install a PCI intelligent MPT adapter. * @pdev: Pointer to pci_dev structure + * @id: PCI device ID information * * This routine performs all the steps necessary to bring the IOC of * a MPT adapter to a OPERATIONAL state. This includes registering @@ -1417,10 +1428,9 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_detach - Remove a PCI intelligent MPT adapter. * @pdev: Pointer to pci_dev structure - * */ void @@ -1466,10 +1476,10 @@ mpt_detach(struct pci_dev *pdev) */ #ifdef CONFIG_PM /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_suspend - Fusion MPT base driver suspend routine. - * - * + * @pdev: Pointer to pci_dev structure + * @state: new state to enter */ int mpt_suspend(struct pci_dev *pdev, pm_message_t state) @@ -1505,10 +1515,9 @@ mpt_suspend(struct pci_dev *pdev, pm_message_t state) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_resume - Fusion MPT base driver resume routine. - * - * + * @pdev: Pointer to pci_dev structure */ int mpt_resume(struct pci_dev *pdev) @@ -1566,7 +1575,7 @@ mpt_signal_reset(int index, MPT_ADAPTER *ioc, int reset_phase) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_do_ioc_recovery - Initialize or recover MPT adapter. * @ioc: Pointer to MPT adapter structure * @reason: Event word / reason @@ -1892,13 +1901,15 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mpt_detect_bound_ports - Search for PCI bus/dev_function - * which matches PCI bus/dev_function (+/-1) for newly discovered 929, - * 929X, 1030 or 1035. +/** + * mpt_detect_bound_ports - Search for matching PCI bus/dev_function * @ioc: Pointer to MPT adapter structure * @pdev: Pointer to (struct pci_dev) structure * + * Search for PCI bus/dev_function which matches + * PCI bus/dev_function (+/-1) for newly discovered 929, + * 929X, 1030 or 1035. + * * If match on PCI dev_function +/-1 is found, bind the two MPT adapters * using alt_ioc pointer fields in their %MPT_ADAPTER structures. */ @@ -1945,9 +1956,9 @@ mpt_detect_bound_ports(MPT_ADAPTER *ioc, struct pci_dev *pdev) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_adapter_disable - Disable misbehaving MPT adapter. - * @this: Pointer to MPT adapter structure + * @ioc: Pointer to MPT adapter structure */ static void mpt_adapter_disable(MPT_ADAPTER *ioc) @@ -2046,9 +2057,8 @@ mpt_adapter_disable(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mpt_adapter_dispose - Free all resources associated with a MPT - * adapter. +/** + * mpt_adapter_dispose - Free all resources associated with an MPT adapter * @ioc: Pointer to MPT adapter structure * * This routine unregisters h/w resources and frees all alloc'd memory @@ -2099,8 +2109,8 @@ mpt_adapter_dispose(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * MptDisplayIocCapabilities - Disply IOC's capacilities. +/** + * MptDisplayIocCapabilities - Disply IOC's capabilities. * @ioc: Pointer to MPT adapter structure */ static void @@ -2142,7 +2152,7 @@ MptDisplayIocCapabilities(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * MakeIocReady - Get IOC to a READY state, using KickStart if needed. * @ioc: Pointer to MPT_ADAPTER structure * @force: Force hard KickStart of IOC @@ -2279,7 +2289,7 @@ MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_GetIocState - Get the current state of a MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @cooked: Request raw or cooked IOC state @@ -2304,7 +2314,7 @@ mpt_GetIocState(MPT_ADAPTER *ioc, int cooked) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * GetIocFacts - Send IOCFacts request to MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @sleepFlag: Specifies whether the process can sleep @@ -2478,7 +2488,7 @@ GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * GetPortFacts - Send PortFacts request to MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @portnum: Port number @@ -2545,7 +2555,7 @@ GetPortFacts(MPT_ADAPTER *ioc, int portnum, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * SendIocInit - Send IOCInit request to MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @sleepFlag: Specifies whether the process can sleep @@ -2630,7 +2640,7 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag) } /* No need to byte swap the multibyte fields in the reply - * since we don't even look at it's contents. + * since we don't even look at its contents. */ dhsprintk((MYIOC_s_INFO_FMT "Sending PortEnable (req @ %p)\n", @@ -2672,7 +2682,7 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * SendPortEnable - Send PortEnable request to MPT adapter port. * @ioc: Pointer to MPT_ADAPTER structure * @portnum: Port number to enable @@ -2723,9 +2733,13 @@ SendPortEnable(MPT_ADAPTER *ioc, int portnum, int sleepFlag) return rc; } -/* - * ioc: Pointer to MPT_ADAPTER structure - * size - total FW bytes +/** + * mpt_alloc_fw_memory - allocate firmware memory + * @ioc: Pointer to MPT_ADAPTER structure + * @size: total FW bytes + * + * If memory has already been allocated, the same (cached) value + * is returned. */ void mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size) @@ -2742,9 +2756,12 @@ mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size) ioc->alloc_total += size; } } -/* - * If alt_img is NULL, delete from ioc structure. - * Else, delete a secondary image in same format. +/** + * mpt_free_fw_memory - free firmware memory + * @ioc: Pointer to MPT_ADAPTER structure + * + * If alt_img is NULL, delete from ioc structure. + * Else, delete a secondary image in same format. */ void mpt_free_fw_memory(MPT_ADAPTER *ioc) @@ -2763,7 +2780,7 @@ mpt_free_fw_memory(MPT_ADAPTER *ioc) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_do_upload - Construct and Send FWUpload request to MPT adapter port. * @ioc: Pointer to MPT_ADAPTER structure * @sleepFlag: Specifies whether the process can sleep @@ -2865,10 +2882,10 @@ mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_downloadboot - DownloadBoot code * @ioc: Pointer to MPT_ADAPTER structure - * @flag: Specify which part of IOC memory is to be uploaded. + * @pFwHeader: Pointer to firmware header info * @sleepFlag: Specifies whether the process can sleep * * FwDownloadBoot requires Programmed IO access. @@ -3071,7 +3088,7 @@ mpt_downloadboot(MPT_ADAPTER *ioc, MpiFwHeader_t *pFwHeader, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * KickStart - Perform hard reset of MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @force: Force hard reset @@ -3145,12 +3162,12 @@ KickStart(MPT_ADAPTER *ioc, int force, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_diag_reset - Perform hard reset of the adapter. * @ioc: Pointer to MPT_ADAPTER structure * @ignore: Set if to honor and clear to ignore * the reset history bit - * @sleepflag: CAN_SLEEP if called in a non-interrupt thread, + * @sleepFlag: CAN_SLEEP if called in a non-interrupt thread, * else set to NO_SLEEP (use mdelay instead) * * This routine places the adapter in diagnostic mode via the @@ -3436,11 +3453,12 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * SendIocReset - Send IOCReset request to MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @reset_type: reset type, expected values are * %MPI_FUNCTION_IOC_MESSAGE_UNIT_RESET or %MPI_FUNCTION_IO_UNIT_RESET + * @sleepFlag: Specifies whether the process can sleep * * Send IOCReset request to the MPT adapter. * @@ -3494,11 +3512,12 @@ SendIocReset(MPT_ADAPTER *ioc, u8 reset_type, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * initChainBuffers - Allocate memory for and initialize - * chain buffers, chain buffer control arrays and spinlock. - * @hd: Pointer to MPT_SCSI_HOST structure - * @init: If set, initialize the spin lock. +/** + * initChainBuffers - Allocate memory for and initialize chain buffers + * @ioc: Pointer to MPT_ADAPTER structure + * + * Allocates memory for and initializes chain buffers, + * chain buffer control arrays and spinlock. */ static int initChainBuffers(MPT_ADAPTER *ioc) @@ -3594,7 +3613,7 @@ initChainBuffers(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * PrimeIocFifos - Initialize IOC request and reply FIFOs. * @ioc: Pointer to MPT_ADAPTER structure * @@ -3891,15 +3910,15 @@ mpt_handshake_req_reply_wait(MPT_ADAPTER *ioc, int reqBytes, u32 *req, } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * WaitForDoorbellAck - Wait for IOC to clear the IOP_DOORBELL_STATUS bit - * in it's IntStatus register. +/** + * WaitForDoorbellAck - Wait for IOC doorbell handshake acknowledge * @ioc: Pointer to MPT_ADAPTER structure * @howlong: How long to wait (in seconds) * @sleepFlag: Specifies whether the process can sleep * * This routine waits (up to ~2 seconds max) for IOC doorbell - * handshake ACKnowledge. + * handshake ACKnowledge, indicated by the IOP_DOORBELL_STATUS + * bit in its IntStatus register being clear. * * Returns a negative value on failure, else wait loop count. */ @@ -3942,14 +3961,14 @@ WaitForDoorbellAck(MPT_ADAPTER *ioc, int howlong, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * WaitForDoorbellInt - Wait for IOC to set the HIS_DOORBELL_INTERRUPT bit - * in it's IntStatus register. +/** + * WaitForDoorbellInt - Wait for IOC to set its doorbell interrupt bit * @ioc: Pointer to MPT_ADAPTER structure * @howlong: How long to wait (in seconds) * @sleepFlag: Specifies whether the process can sleep * - * This routine waits (up to ~2 seconds max) for IOC doorbell interrupt. + * This routine waits (up to ~2 seconds max) for IOC doorbell interrupt + * (MPI_HIS_DOORBELL_INTERRUPT) to be set in the IntStatus register. * * Returns a negative value on failure, else wait loop count. */ @@ -3991,8 +4010,8 @@ WaitForDoorbellInt(MPT_ADAPTER *ioc, int howlong, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * WaitForDoorbellReply - Wait for and capture a IOC handshake reply. +/** + * WaitForDoorbellReply - Wait for and capture an IOC handshake reply. * @ioc: Pointer to MPT_ADAPTER structure * @howlong: How long to wait (in seconds) * @sleepFlag: Specifies whether the process can sleep @@ -4077,7 +4096,7 @@ WaitForDoorbellReply(MPT_ADAPTER *ioc, int howlong, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * GetLanConfigPages - Fetch LANConfig pages. * @ioc: Pointer to MPT_ADAPTER structure * @@ -4188,12 +4207,9 @@ GetLanConfigPages(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mptbase_sas_persist_operation - Perform operation on SAS Persitent Table +/** + * mptbase_sas_persist_operation - Perform operation on SAS Persistent Table * @ioc: Pointer to MPT_ADAPTER structure - * @sas_address: 64bit SAS Address for operation. - * @target_id: specified target for operation - * @bus: specified bus for operation * @persist_opcode: see below * * MPI_SAS_OP_CLEAR_NOT_PRESENT - Free all persist TargetID mappings for @@ -4202,7 +4218,7 @@ GetLanConfigPages(MPT_ADAPTER *ioc) * * NOTE: Don't use not this function during interrupt time. * - * Returns: 0 for success, non-zero error + * Returns 0 for success, non-zero error */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -4399,7 +4415,7 @@ mptbase_raid_process_event_data(MPT_ADAPTER *ioc, } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * GetIoUnitPage2 - Retrieve BIOS version and boot order information. * @ioc: Pointer to MPT_ADAPTER structure * @@ -4457,7 +4473,8 @@ GetIoUnitPage2(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* mpt_GetScsiPortSettings - read SCSI Port Page 0 and 2 +/** + * mpt_GetScsiPortSettings - read SCSI Port Page 0 and 2 * @ioc: Pointer to a Adapter Strucutre * @portnum: IOC port number * @@ -4644,7 +4661,8 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* mpt_readScsiDevicePageHeaders - save version and length of SDP1 +/** + * mpt_readScsiDevicePageHeaders - save version and length of SDP1 * @ioc: Pointer to a Adapter Strucutre * @portnum: IOC port number * @@ -4996,9 +5014,8 @@ mpt_read_ioc_pg_1(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * SendEventNotification - Send EventNotification (on or off) request - * to MPT adapter. +/** + * SendEventNotification - Send EventNotification (on or off) request to adapter * @ioc: Pointer to MPT_ADAPTER structure * @EvSwitch: Event switch flags */ @@ -5062,8 +5079,8 @@ SendEventAck(MPT_ADAPTER *ioc, EventNotificationReply_t *evnp) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mpt_config - Generic function to issue config message - * @ioc - Pointer to an adapter structure - * @cfg - Pointer to a configuration structure. Struct contains + * @ioc: Pointer to an adapter structure + * @pCfg: Pointer to a configuration structure. Struct contains * action, page address, direction, physical address * and pointer to a configuration page header * Page header is updated. @@ -5188,8 +5205,8 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mpt_timer_expired - Call back for timer process. +/** + * mpt_timer_expired - Callback for timer process. * Used only internal config functionality. * @data: Pointer to MPT_SCSI_HOST recast as an unsigned long */ @@ -5214,12 +5231,12 @@ mpt_timer_expired(unsigned long data) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_ioc_reset - Base cleanup for hard reset * @ioc: Pointer to the adapter structure * @reset_phase: Indicates pre- or post-reset functionality * - * Remark: Free's resources with internally generated commands. + * Remark: Frees resources with internally generated commands. */ static int mpt_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) @@ -5271,7 +5288,7 @@ mpt_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) * procfs (%MPT_PROCFS_MPTBASEDIR/...) support stuff... */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * procmpt_create - Create %MPT_PROCFS_MPTBASEDIR entries. * * Returns 0 for success, non-zero for failure. @@ -5297,7 +5314,7 @@ procmpt_create(void) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * procmpt_destroy - Tear down %MPT_PROCFS_MPTBASEDIR entries. * * Returns 0 for success, non-zero for failure. @@ -5311,16 +5328,16 @@ procmpt_destroy(void) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * procmpt_summary_read - Handle read request from /proc/mpt/summary - * or from /proc/mpt/iocN/summary. +/** + * procmpt_summary_read - Handle read request of a summary file * @buf: Pointer to area to write information * @start: Pointer to start pointer * @offset: Offset to start writing - * @request: + * @request: Amount of read data requested * @eof: Pointer to EOF integer * @data: Pointer * + * Handles read request from /proc/mpt/summary or /proc/mpt/iocN/summary. * Returns number of characters written to process performing the read. */ static int @@ -5355,12 +5372,12 @@ procmpt_summary_read(char *buf, char **start, off_t offset, int request, int *eo } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * procmpt_version_read - Handle read request from /proc/mpt/version. * @buf: Pointer to area to write information * @start: Pointer to start pointer * @offset: Offset to start writing - * @request: + * @request: Amount of read data requested * @eof: Pointer to EOF integer * @data: Pointer * @@ -5411,12 +5428,12 @@ procmpt_version_read(char *buf, char **start, off_t offset, int request, int *eo } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * procmpt_iocinfo_read - Handle read request from /proc/mpt/iocN/info. * @buf: Pointer to area to write information * @start: Pointer to start pointer * @offset: Offset to start writing - * @request: + * @request: Amount of read data requested * @eof: Pointer to EOF integer * @data: Pointer * @@ -5577,16 +5594,17 @@ mpt_print_ioc_summary(MPT_ADAPTER *ioc, char *buffer, int *size, int len, int sh */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mpt_HardResetHandler - Generic reset handler, issue SCSI Task - * Management call based on input arg values. If TaskMgmt fails, - * return associated SCSI request. + * mpt_HardResetHandler - Generic reset handler * @ioc: Pointer to MPT_ADAPTER structure * @sleepFlag: Indicates if sleep or schedule must be called. * + * Issues SCSI Task Management call based on input arg values. + * If TaskMgmt fails, returns associated SCSI request. + * * Remark: _HardResetHandler can be invoked from an interrupt thread (timer) * or a non-interrupt thread. In the former, must not call schedule(). * - * Remark: A return of -1 is a FATAL error case, as it means a + * Note: A return of -1 is a FATAL error case, as it means a * FW reload/initialization failed. * * Returns 0 for SUCCESS or -1 if FAILED. @@ -5935,13 +5953,14 @@ EventDescriptionStr(u8 event, u32 evData0, char *evStr) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * ProcessEventNotification - Route a received EventNotificationReply to - * all currently regeistered event handlers. +/** + * ProcessEventNotification - Route EventNotificationReply to all event handlers * @ioc: Pointer to MPT_ADAPTER structure * @pEventReply: Pointer to EventNotification reply frame * @evHandlers: Pointer to integer, number of event handlers * + * Routes a received EventNotificationReply to all currently registered + * event handlers. * Returns sum of event handlers return values. */ static int @@ -6056,7 +6075,7 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_fc_log_info - Log information returned from Fibre Channel IOC. * @ioc: Pointer to MPT_ADAPTER structure * @log_info: U32 LogInfo reply word from the IOC @@ -6077,7 +6096,7 @@ mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_spi_log_info - Log information returned from SCSI Parallel IOC. * @ioc: Pointer to MPT_ADAPTER structure * @mr: Pointer to MPT reply frame @@ -6200,7 +6219,7 @@ mpt_spi_log_info(MPT_ADAPTER *ioc, u32 log_info) }; /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_sas_log_info - Log information returned from SAS IOC. * @ioc: Pointer to MPT_ADAPTER structure * @log_info: U32 LogInfo reply word from the IOC @@ -6255,7 +6274,7 @@ union loginfo_type { } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_sp_ioc_info - IOC information returned from SCSI Parallel IOC. * @ioc: Pointer to MPT_ADAPTER structure * @ioc_status: U32 IOCStatus word from IOC @@ -6416,7 +6435,7 @@ EXPORT_SYMBOL(mpt_free_fw_memory); EXPORT_SYMBOL(mptbase_sas_persist_operation); /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * fusion_init - Fusion MPT base driver initialization routine. * * Returns 0 for success, non-zero for failure. @@ -6456,7 +6475,7 @@ fusion_init(void) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * fusion_exit - Perform driver unload cleanup. * * This routine frees all resources associated with each MPT adapter diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c index ef2b55e1991..ca2f9107f14 100644 --- a/drivers/message/fusion/mptfc.c +++ b/drivers/message/fusion/mptfc.c @@ -1395,8 +1395,7 @@ mptfc_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptfc_init - Register MPT adapter(s) as SCSI host(s) with - * linux scsi mid-layer. + * mptfc_init - Register MPT adapter(s) as SCSI host(s) with SCSI mid-layer. * * Returns 0 for success, non-zero for failure. */ @@ -1440,7 +1439,7 @@ mptfc_init(void) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptfc_remove - Removed fc infrastructure for devices + * mptfc_remove - Remove fc infrastructure for devices * @pdev: Pointer to pci_dev structure * */ diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 30524dc54b1..2c72c36b817 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1230,15 +1230,15 @@ mptscsih_host_info(MPT_ADAPTER *ioc, char *pbuf, off_t offset, int len) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mptscsih_proc_info - Return information about MPT adapter + * @host: scsi host struct + * @buffer: if write, user data; if read, buffer for user + * @start: returns the buffer address + * @offset: if write, 0; if read, the current offset into the buffer from + * the previous read. + * @length: if write, return length; + * @func: write = 1; read = 0 * * (linux scsi_host_template.info routine) - * - * buffer: if write, user data; if read, buffer for user - * length: if write, return length; - * offset: if write, 0; if read, the current offset into the buffer from - * the previous read. - * hostno: scsi host number - * func: if write = 1; if read = 0 */ int mptscsih_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset, @@ -1902,8 +1902,7 @@ mptscsih_bus_reset(struct scsi_cmnd * SCpnt) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptscsih_host_reset - Perform a SCSI host adapter RESET! - * new_eh variant + * mptscsih_host_reset - Perform a SCSI host adapter RESET (new_eh variant) * @SCpnt: Pointer to scsi_cmnd structure, IO which reset is due to * * (linux scsi_host_template.eh_host_reset_handler routine) @@ -1949,8 +1948,7 @@ mptscsih_host_reset(struct scsi_cmnd *SCpnt) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptscsih_tm_pending_wait - wait for pending task management request to - * complete. + * mptscsih_tm_pending_wait - wait for pending task management request to complete * @hd: Pointer to MPT host structure. * * Returns {SUCCESS,FAILED}. @@ -1982,6 +1980,7 @@ mptscsih_tm_pending_wait(MPT_SCSI_HOST * hd) /** * mptscsih_tm_wait_for_completion - wait for completion of TM task * @hd: Pointer to MPT host structure. + * @timeout: timeout in seconds * * Returns {SUCCESS,FAILED}. */ @@ -3429,8 +3428,7 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io) /** * mptscsih_synchronize_cache - Send SYNCHRONIZE_CACHE to all disks. * @hd: Pointer to a SCSI HOST structure - * @vtarget: per device private data - * @lun: lun + * @vdevice: virtual target device * * Uses the ISR, but with special processing. * MUST be single-threaded. diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index f422c0d0621..36641da5928 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -1100,8 +1100,7 @@ static struct pci_driver mptspi_driver = { /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptspi_init - Register MPT adapter(s) as SCSI host(s) with - * linux scsi mid-layer. + * mptspi_init - Register MPT adapter(s) as SCSI host(s) with SCSI mid-layer. * * Returns 0 for success, non-zero for failure. */ @@ -1135,7 +1134,6 @@ mptspi_init(void) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mptspi_exit - Unregisters MPT adapter(s) - * */ static void __exit mptspi_exit(void) diff --git a/drivers/message/i2o/bus-osm.c b/drivers/message/i2o/bus-osm.c index d96c687aee9..c463dc2efc0 100644 --- a/drivers/message/i2o/bus-osm.c +++ b/drivers/message/i2o/bus-osm.c @@ -56,6 +56,9 @@ static int i2o_bus_scan(struct i2o_device *dev) /** * i2o_bus_store_scan - Scan the I2O Bus Adapter * @d: device which should be scanned + * @attr: device_attribute + * @buf: output buffer + * @count: buffer size * * Returns count. */ diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c index ee183053fa2..b9df143e4ff 100644 --- a/drivers/message/i2o/device.c +++ b/drivers/message/i2o/device.c @@ -54,8 +54,8 @@ static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd, * @dev: I2O device to claim * @drv: I2O driver which wants to claim the device * - * Do the leg work to assign a device to a given OSM. If the claim succeed - * the owner of the rimary. If the attempt fails a negative errno code + * Do the leg work to assign a device to a given OSM. If the claim succeeds, + * the owner is the primary. If the attempt fails a negative errno code * is returned. On success zero is returned. */ int i2o_device_claim(struct i2o_device *dev) @@ -208,24 +208,23 @@ static struct i2o_device *i2o_device_alloc(void) /** * i2o_device_add - allocate a new I2O device and add it to the IOP - * @iop: I2O controller where the device is on + * @c: I2O controller that the device is on * @entry: LCT entry of the I2O device * * Allocate a new I2O device and initialize it with the LCT entry. The * device is appended to the device list of the controller. * - * Returns a pointer to the I2O device on success or negative error code - * on failure. + * Returns zero on success, or a -ve errno. */ -static struct i2o_device *i2o_device_add(struct i2o_controller *c, - i2o_lct_entry * entry) +static int i2o_device_add(struct i2o_controller *c, i2o_lct_entry *entry) { struct i2o_device *i2o_dev, *tmp; + int rc; i2o_dev = i2o_device_alloc(); if (IS_ERR(i2o_dev)) { printk(KERN_ERR "i2o: unable to allocate i2o device\n"); - return i2o_dev; + return PTR_ERR(i2o_dev); } i2o_dev->lct_data = *entry; @@ -236,7 +235,9 @@ static struct i2o_device *i2o_device_add(struct i2o_controller *c, i2o_dev->iop = c; i2o_dev->device.parent = &c->device; - device_register(&i2o_dev->device); + rc = device_register(&i2o_dev->device); + if (rc) + goto err; list_add_tail(&i2o_dev->list, &c->devices); @@ -270,12 +271,16 @@ static struct i2o_device *i2o_device_add(struct i2o_controller *c, pr_debug("i2o: device %s added\n", i2o_dev->device.bus_id); - return i2o_dev; + return 0; + +err: + kfree(i2o_dev); + return rc; } /** * i2o_device_remove - remove an I2O device from the I2O core - * @dev: I2O device which should be released + * @i2o_dev: I2O device which should be released * * Is used on I2O controller removal or LCT modification, when the device * is removed from the system. Note that the device could still hang diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c index 7fc7399bd2e..9104b65ff70 100644 --- a/drivers/message/i2o/driver.c +++ b/drivers/message/i2o/driver.c @@ -34,9 +34,7 @@ static spinlock_t i2o_drivers_lock; static struct i2o_driver **i2o_drivers; /** - * i2o_bus_match - Tell if a I2O device class id match the class ids of - * the I2O driver (OSM) - * + * i2o_bus_match - Tell if I2O device class id matches the class ids of the I2O driver (OSM) * @dev: device which should be verified * @drv: the driver to match against * @@ -248,7 +246,7 @@ int i2o_driver_dispatch(struct i2o_controller *c, u32 m) /** * i2o_driver_notify_controller_add_all - Send notify of added controller - * to all I2O drivers + * @c: newly added controller * * Send notifications to all registered drivers that a new controller was * added. @@ -267,8 +265,8 @@ void i2o_driver_notify_controller_add_all(struct i2o_controller *c) } /** - * i2o_driver_notify_controller_remove_all - Send notify of removed - * controller to all I2O drivers + * i2o_driver_notify_controller_remove_all - Send notify of removed controller + * @c: controller that is being removed * * Send notifications to all registered drivers that a controller was * removed. @@ -287,8 +285,8 @@ void i2o_driver_notify_controller_remove_all(struct i2o_controller *c) } /** - * i2o_driver_notify_device_add_all - Send notify of added device to all - * I2O drivers + * i2o_driver_notify_device_add_all - Send notify of added device + * @i2o_dev: newly added I2O device * * Send notifications to all registered drivers that a device was added. */ @@ -306,8 +304,8 @@ void i2o_driver_notify_device_add_all(struct i2o_device *i2o_dev) } /** - * i2o_driver_notify_device_remove_all - Send notify of removed device to - * all I2O drivers + * i2o_driver_notify_device_remove_all - Send notify of removed device + * @i2o_dev: device that is being removed * * Send notifications to all registered drivers that a device was removed. */ @@ -362,7 +360,7 @@ int __init i2o_driver_init(void) /** * i2o_driver_exit - clean up I2O drivers (OSMs) * - * Unregisters the I2O bus and free driver array. + * Unregisters the I2O bus and frees driver array. */ void __exit i2o_driver_exit(void) { diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c index 9e529d8dd5c..902753b2c66 100644 --- a/drivers/message/i2o/exec-osm.c +++ b/drivers/message/i2o/exec-osm.c @@ -94,8 +94,8 @@ static struct i2o_exec_wait *i2o_exec_wait_alloc(void) }; /** - * i2o_exec_wait_free - Free a i2o_exec_wait struct - * @i2o_exec_wait: I2O wait data which should be cleaned up + * i2o_exec_wait_free - Free an i2o_exec_wait struct + * @wait: I2O wait data which should be cleaned up */ static void i2o_exec_wait_free(struct i2o_exec_wait *wait) { @@ -105,7 +105,7 @@ static void i2o_exec_wait_free(struct i2o_exec_wait *wait) /** * i2o_msg_post_wait_mem - Post and wait a message with DMA buffers * @c: controller - * @m: message to post + * @msg: message to post * @timeout: time in seconds to wait * @dma: i2o_dma struct of the DMA buffer to free on failure * @@ -269,6 +269,7 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m, /** * i2o_exec_show_vendor_id - Displays Vendor ID of controller * @d: device of which the Vendor ID should be displayed + * @attr: device_attribute to display * @buf: buffer into which the Vendor ID should be printed * * Returns number of bytes printed into buffer. @@ -290,6 +291,7 @@ static ssize_t i2o_exec_show_vendor_id(struct device *d, /** * i2o_exec_show_product_id - Displays Product ID of controller * @d: device of which the Product ID should be displayed + * @attr: device_attribute to display * @buf: buffer into which the Product ID should be printed * * Returns number of bytes printed into buffer. @@ -365,7 +367,7 @@ static int i2o_exec_remove(struct device *dev) /** * i2o_exec_lct_modified - Called on LCT NOTIFY reply - * @c: I2O controller on which the LCT has modified + * @work: work struct for a specific controller * * This function handles asynchronus LCT NOTIFY replies. It parses the * new LCT and if the buffer for the LCT was to small sends a LCT NOTIFY diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 70ae0025332..da9859f2caf 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -259,7 +259,7 @@ static int i2o_block_device_unlock(struct i2o_device *dev, u32 media_id) /** * i2o_block_device_power - Power management for device dev * @dev: I2O device which should receive the power management request - * @operation: Operation which should be send + * @op: Operation to send * * Send a power management request to the device dev. * @@ -315,7 +315,7 @@ static inline struct i2o_block_request *i2o_block_request_alloc(void) * i2o_block_request_free - Frees a I2O block request * @ireq: I2O block request which should be freed * - * Fres the allocated memory (give it back to the request mempool). + * Frees the allocated memory (give it back to the request mempool). */ static inline void i2o_block_request_free(struct i2o_block_request *ireq) { @@ -326,6 +326,7 @@ static inline void i2o_block_request_free(struct i2o_block_request *ireq) * i2o_block_sglist_alloc - Allocate the SG list and map it * @c: I2O controller to which the request belongs * @ireq: I2O block request + * @mptr: message body pointer * * Builds the SG list and map it to be accessable by the controller. * @@ -490,7 +491,7 @@ static void i2o_block_end_request(struct request *req, int uptodate, * i2o_block_reply - Block OSM reply handler. * @c: I2O controller from which the message arrives * @m: message id of reply - * qmsg: the actuall I2O message reply + * @msg: the actual I2O message reply * * This function gets all the message replies. * @@ -602,6 +603,8 @@ static void i2o_block_biosparam(unsigned long capacity, unsigned short *cyls, /** * i2o_block_open - Open the block device + * @inode: inode for block device being opened + * @file: file to open * * Power up the device, mount and lock the media. This function is called, * if the block device is opened for access. @@ -629,6 +632,8 @@ static int i2o_block_open(struct inode *inode, struct file *file) /** * i2o_block_release - Release the I2O block device + * @inode: inode for block device being released + * @file: file to close * * Unlock and unmount the media, and power down the device. Gets called if * the block device is closed. @@ -675,6 +680,8 @@ static int i2o_block_getgeo(struct block_device *bdev, struct hd_geometry *geo) /** * i2o_block_ioctl - Issue device specific ioctl calls. + * @inode: inode for block device ioctl + * @file: file for ioctl * @cmd: ioctl command * @arg: arg * @@ -902,7 +909,7 @@ static int i2o_block_transfer(struct request *req) /** * i2o_block_request_fn - request queue handling function - * q: request queue from which the request could be fetched + * @q: request queue from which the request could be fetched * * Takes the next request from the queue, transfers it and if no error * occurs dequeue it from the queue. On arrival of the reply the message diff --git a/drivers/message/i2o/i2o_block.h b/drivers/message/i2o/i2o_block.h index d9fdc95b440..67f921b4419 100644 --- a/drivers/message/i2o/i2o_block.h +++ b/drivers/message/i2o/i2o_block.h @@ -64,7 +64,7 @@ /* I2O Block OSM mempool struct */ struct i2o_block_mempool { - kmem_cache_t *slab; + struct kmem_cache *slab; mempool_t *pool; }; diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c index 7d23e082bf2..1de30d71167 100644 --- a/drivers/message/i2o/i2o_config.c +++ b/drivers/message/i2o/i2o_config.c @@ -265,7 +265,11 @@ static int i2o_cfg_swdl(unsigned long arg) return -ENOMEM; } - __copy_from_user(buffer.virt, kxfer.buf, fragsize); + if (__copy_from_user(buffer.virt, kxfer.buf, fragsize)) { + i2o_msg_nop(c, msg); + i2o_dma_free(&c->pdev->dev, &buffer); + return -EFAULT; + } msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7); msg->u.head[1] = @@ -516,7 +520,6 @@ static int i2o_cfg_evt_get(unsigned long arg, struct file *fp) return 0; } -#ifdef CONFIG_I2O_EXT_ADAPTEC #ifdef CONFIG_COMPAT static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, unsigned long arg) @@ -759,6 +762,7 @@ static long i2o_cfg_compat_ioctl(struct file *file, unsigned cmd, #endif +#ifdef CONFIG_I2O_EXT_ADAPTEC static int i2o_cfg_passthru(unsigned long arg) { struct i2o_cmd_passthru __user *cmd = diff --git a/drivers/message/i2o/i2o_proc.c b/drivers/message/i2o/i2o_proc.c index 3d2e76eea93..a61cb17c5c1 100644 --- a/drivers/message/i2o/i2o_proc.c +++ b/drivers/message/i2o/i2o_proc.c @@ -163,7 +163,7 @@ static int print_serial_number(struct seq_file *seq, u8 * serialno, int max_len) * i2o_get_class_name - do i2o class name lookup * @class: class number * - * Return a descriptive string for an i2o class + * Return a descriptive string for an i2o class. */ static const char *i2o_get_class_name(int class) { diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c index 6ebf38213f9..1045c8a518b 100644 --- a/drivers/message/i2o/i2o_scsi.c +++ b/drivers/message/i2o/i2o_scsi.c @@ -220,7 +220,7 @@ static int i2o_scsi_probe(struct device *dev) u32 id = -1; u64 lun = -1; int channel = -1; - int i; + int i, rc; i2o_shost = i2o_scsi_get_host(c); if (!i2o_shost) @@ -304,14 +304,20 @@ static int i2o_scsi_probe(struct device *dev) return PTR_ERR(scsi_dev); } - sysfs_create_link(&i2o_dev->device.kobj, &scsi_dev->sdev_gendev.kobj, - "scsi"); + rc = sysfs_create_link(&i2o_dev->device.kobj, + &scsi_dev->sdev_gendev.kobj, "scsi"); + if (rc) + goto err; osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %ld\n", i2o_dev->lct_data.tid, channel, le32_to_cpu(id), (long unsigned int)le64_to_cpu(lun)); return 0; + +err: + scsi_remove_device(scsi_dev); + return rc; }; static const char *i2o_scsi_info(struct Scsi_Host *SChost) @@ -405,8 +411,7 @@ static void i2o_scsi_notify_device_add(struct i2o_device *i2o_dev) }; /** - * i2o_scsi_notify_device_remove - Retrieve notifications of removed - * devices + * i2o_scsi_notify_device_remove - Retrieve notifications of removed devices * @i2o_dev: the I2O device which was removed * * If a I2O device is removed, we catch the notification to remove the @@ -426,8 +431,7 @@ static void i2o_scsi_notify_device_remove(struct i2o_device *i2o_dev) }; /** - * i2o_scsi_notify_controller_add - Retrieve notifications of added - * controllers + * i2o_scsi_notify_controller_add - Retrieve notifications of added controllers * @c: the controller which was added * * If a I2O controller is added, we catch the notification to add a @@ -457,8 +461,7 @@ static void i2o_scsi_notify_controller_add(struct i2o_controller *c) }; /** - * i2o_scsi_notify_controller_remove - Retrieve notifications of removed - * controllers + * i2o_scsi_notify_controller_remove - Retrieve notifications of removed controllers * @c: the controller which was removed * * If a I2O controller is removed, we catch the notification to remove the @@ -745,7 +748,7 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt) * @capacity: size in sectors * @ip: geometry array * - * This is anyones guess quite frankly. We use the same rules everyone + * This is anyone's guess quite frankly. We use the same rules everyone * else appears to and hope. It seems to work. */ diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c index 8287f95c8c4..3661e6e065d 100644 --- a/drivers/message/i2o/pci.c +++ b/drivers/message/i2o/pci.c @@ -259,6 +259,7 @@ static irqreturn_t i2o_pci_interrupt(int irq, void *dev_id) /** * i2o_pci_irq_enable - Allocate interrupt for I2O controller + * @c: i2o_controller that the request is for * * Allocate an interrupt for the I2O controller, and activate interrupts * on the I2O controller. @@ -305,7 +306,7 @@ static void i2o_pci_irq_disable(struct i2o_controller *c) /** * i2o_pci_probe - Probe the PCI device for an I2O controller - * @dev: PCI device to test + * @pdev: PCI device to test * @id: id which matched with the PCI device id table * * Probe the PCI device for any device which is a memory of the @@ -447,7 +448,7 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev, /** * i2o_pci_remove - Removes a I2O controller from the system - * pdev: I2O controller which should be removed + * @pdev: I2O controller which should be removed * * Reset the I2O controller, disable interrupts and remove all allocated * resources. diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 82938ad6ddb..ce1a4810821 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -28,7 +28,7 @@ #include <linux/string.h> #include <linux/input.h> #include <linux/device.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/slab.h> #include <linux/kthread.h> diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c index ee326136d03..d61df5c3ac3 100644 --- a/drivers/misc/tifm_core.c +++ b/drivers/misc/tifm_core.c @@ -219,8 +219,9 @@ static int tifm_device_remove(struct device *dev) struct tifm_driver *drv = fm_dev->drv; if (drv) { - if (drv->remove) drv->remove(fm_dev); - fm_dev->drv = 0; + if (drv->remove) + drv->remove(fm_dev); + fm_dev->drv = NULL; } put_device(dev); diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index ef4a731ca5c..334e078ffaf 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -451,7 +451,7 @@ static int __devinit m25p_probe(struct spi_device *spi) return -ENODEV; } - flash = kzalloc(sizeof *flash, SLAB_KERNEL); + flash = kzalloc(sizeof *flash, GFP_KERNEL); if (!flash) return -ENOMEM; diff --git a/drivers/net/3c501.c b/drivers/net/3c501.c index 11d170afa9c..06e33786078 100644 --- a/drivers/net/3c501.c +++ b/drivers/net/3c501.c @@ -922,7 +922,7 @@ int __init init_module(void) * and then free up the resources we took when the card was found. */ -void cleanup_module(void) +void __exit cleanup_module(void) { struct net_device *dev = dev_3c501; unregister_netdev(dev); diff --git a/drivers/net/3c503.c b/drivers/net/3c503.c index a34b2206132..7e34c4f07b7 100644 --- a/drivers/net/3c503.c +++ b/drivers/net/3c503.c @@ -726,7 +726,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c index 458cb9cbe91..702bfb2a5e9 100644 --- a/drivers/net/3c505.c +++ b/drivers/net/3c505.c @@ -1670,7 +1670,7 @@ int __init init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/3c507.c b/drivers/net/3c507.c index aa43563610a..54e1d5aebed 100644 --- a/drivers/net/3c507.c +++ b/drivers/net/3c507.c @@ -940,7 +940,7 @@ int __init init_module(void) return IS_ERR(dev_3c507) ? PTR_ERR(dev_3c507) : 0; } -void +void __exit cleanup_module(void) { struct net_device *dev = dev_3c507; diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c index 91849469b4f..17d61eb0a7e 100644 --- a/drivers/net/3c523.c +++ b/drivers/net/3c523.c @@ -1302,7 +1302,7 @@ int __init init_module(void) } else return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; for (this_dev=0; this_dev<MAX_3C523_CARDS; this_dev++) { diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c index f4aca5386ad..6c7437e60bd 100644 --- a/drivers/net/3c527.c +++ b/drivers/net/3c527.c @@ -1659,7 +1659,7 @@ int __init init_module(void) * transmit operations are allowed to start scribbling into memory. */ -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(this_device); cleanup_card(this_device); diff --git a/drivers/net/ac3200.c b/drivers/net/ac3200.c index 0dca8bb9d2c..c01f87f5bed 100644 --- a/drivers/net/ac3200.c +++ b/drivers/net/ac3200.c @@ -405,7 +405,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/apne.c b/drivers/net/apne.c index 9164d8cd670..d4e40816907 100644 --- a/drivers/net/apne.c +++ b/drivers/net/apne.c @@ -568,7 +568,7 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id) #ifdef MODULE static struct net_device *apne_dev; -int init_module(void) +int __init init_module(void) { apne_dev = apne_probe(-1); if (IS_ERR(apne_dev)) @@ -576,7 +576,7 @@ int init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(apne_dev); diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index cc1a27ed197..dba5e516545 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -1041,7 +1041,7 @@ int __init init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(cops_dev); cleanup_card(cops_dev); diff --git a/drivers/net/arm/at91_ether.c b/drivers/net/arm/at91_ether.c index b54b857e357..fada15d959d 100644 --- a/drivers/net/arm/at91_ether.c +++ b/drivers/net/arm/at91_ether.c @@ -41,9 +41,6 @@ #define DRV_NAME "at91_ether" #define DRV_VERSION "1.0" -static struct net_device *at91_dev; - -static struct timer_list check_timer; #define LINK_POLL_INTERVAL (HZ) /* ..................................................................... */ @@ -146,7 +143,7 @@ static void read_phy(unsigned char phy_addr, unsigned char address, unsigned int */ static void update_linkspeed(struct net_device *dev, int silent) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); unsigned int bmsr, bmcr, lpa, mac_cfg; unsigned int speed, duplex; @@ -199,7 +196,7 @@ static void update_linkspeed(struct net_device *dev, int silent) static irqreturn_t at91ether_phy_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *) dev_id; - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); unsigned int phy; /* @@ -242,7 +239,7 @@ done: */ static void enable_phyirq(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); unsigned int dsintr, irq_number; int status; @@ -252,8 +249,7 @@ static void enable_phyirq(struct net_device *dev) * PHY doesn't have an IRQ pin (RTL8201, DP83847, AC101L), * or board does not have it connected. */ - check_timer.expires = jiffies + LINK_POLL_INTERVAL; - add_timer(&check_timer); + mod_timer(&lp->check_timer, jiffies + LINK_POLL_INTERVAL); return; } @@ -294,13 +290,13 @@ static void enable_phyirq(struct net_device *dev) */ static void disable_phyirq(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); unsigned int dsintr; unsigned int irq_number; irq_number = lp->board_data.phy_irq_pin; if (!irq_number) { - del_timer_sync(&check_timer); + del_timer_sync(&lp->check_timer); return; } @@ -340,7 +336,7 @@ static void disable_phyirq(struct net_device *dev) #if 0 static void reset_phy(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); unsigned int bmcr; spin_lock_irq(&lp->lock); @@ -362,13 +358,13 @@ static void reset_phy(struct net_device *dev) static void at91ether_check_link(unsigned long dev_id) { struct net_device *dev = (struct net_device *) dev_id; + struct at91_private *lp = netdev_priv(dev); enable_mdi(); update_linkspeed(dev, 1); disable_mdi(); - check_timer.expires = jiffies + LINK_POLL_INTERVAL; - add_timer(&check_timer); + mod_timer(&lp->check_timer, jiffies + LINK_POLL_INTERVAL); } /* ......................... ADDRESS MANAGEMENT ........................ */ @@ -590,7 +586,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int at91ether_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); int ret; spin_lock_irq(&lp->lock); @@ -611,7 +607,7 @@ static int at91ether_get_settings(struct net_device *dev, struct ethtool_cmd *cm static int at91ether_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); int ret; spin_lock_irq(&lp->lock); @@ -627,7 +623,7 @@ static int at91ether_set_settings(struct net_device *dev, struct ethtool_cmd *cm static int at91ether_nwayreset(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); int ret; spin_lock_irq(&lp->lock); @@ -658,7 +654,7 @@ static const struct ethtool_ops at91ether_ethtool_ops = { static int at91ether_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); int res; if (!netif_running(dev)) @@ -680,7 +676,7 @@ static int at91ether_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) */ static void at91ether_start(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); struct recv_desc_bufs *dlist, *dlist_phys; int i; unsigned long ctl; @@ -712,7 +708,7 @@ static void at91ether_start(struct net_device *dev) */ static int at91ether_open(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); unsigned long ctl; if (!is_valid_ether_addr(dev->dev_addr)) @@ -752,7 +748,7 @@ static int at91ether_open(struct net_device *dev) */ static int at91ether_close(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); unsigned long ctl; /* Disable Receiver and Transmitter */ @@ -779,7 +775,7 @@ static int at91ether_close(struct net_device *dev) */ static int at91ether_tx(struct sk_buff *skb, struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); if (at91_emac_read(AT91_EMAC_TSR) & AT91_EMAC_TSR_BNQ) { netif_stop_queue(dev); @@ -811,7 +807,7 @@ static int at91ether_tx(struct sk_buff *skb, struct net_device *dev) */ static struct net_device_stats *at91ether_stats(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); int ale, lenerr, seqe, lcol, ecol; if (netif_running(dev)) { @@ -847,7 +843,7 @@ static struct net_device_stats *at91ether_stats(struct net_device *dev) */ static void at91ether_rx(struct net_device *dev) { - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); struct recv_desc_bufs *dlist; unsigned char *p_recv; struct sk_buff *skb; @@ -857,14 +853,13 @@ static void at91ether_rx(struct net_device *dev) while (dlist->descriptors[lp->rxBuffIndex].addr & EMAC_DESC_DONE) { p_recv = dlist->recv_buf[lp->rxBuffIndex]; pktlen = dlist->descriptors[lp->rxBuffIndex].size & 0x7ff; /* Length of frame including FCS */ - skb = alloc_skb(pktlen + 2, GFP_ATOMIC); + skb = dev_alloc_skb(pktlen + 2); if (skb != NULL) { skb_reserve(skb, 2); memcpy(skb_put(skb, pktlen), p_recv, pktlen); skb->dev = dev; skb->protocol = eth_type_trans(skb, dev); - skb->len = pktlen; dev->last_rx = jiffies; lp->stats.rx_bytes += pktlen; netif_rx(skb); @@ -891,7 +886,7 @@ static void at91ether_rx(struct net_device *dev) static irqreturn_t at91ether_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *) dev_id; - struct at91_private *lp = (struct at91_private *) dev->priv; + struct at91_private *lp = netdev_priv(dev); unsigned long intstatus, ctl; /* MAC Interrupt Status register indicates what interrupts are pending. @@ -927,6 +922,17 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void at91ether_poll_controller(struct net_device *dev) +{ + unsigned long flags; + + local_irq_save(flags); + at91ether_interrupt(dev->irq, dev); + local_irq_restore(flags); +} +#endif + /* * Initialize the ethernet interface */ @@ -939,9 +945,6 @@ static int __init at91ether_setup(unsigned long phy_type, unsigned short phy_add unsigned int val; int res; - if (at91_dev) /* already initialized */ - return 0; - dev = alloc_etherdev(sizeof(struct at91_private)); if (!dev) return -ENOMEM; @@ -957,7 +960,7 @@ static int __init at91ether_setup(unsigned long phy_type, unsigned short phy_add } /* Allocate memory for DMA Receive descriptors */ - lp = (struct at91_private *)dev->priv; + lp = netdev_priv(dev); lp->dlist = (struct recv_desc_bufs *) dma_alloc_coherent(NULL, sizeof(struct recv_desc_bufs), (dma_addr_t *) &lp->dlist_phys, GFP_KERNEL); if (lp->dlist == NULL) { free_irq(dev->irq, dev); @@ -979,6 +982,9 @@ static int __init at91ether_setup(unsigned long phy_type, unsigned short phy_add dev->set_mac_address = set_mac_address; dev->ethtool_ops = &at91ether_ethtool_ops; dev->do_ioctl = at91ether_ioctl; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = at91ether_poll_controller; +#endif SET_NETDEV_DEV(dev, &pdev->dev); @@ -1024,7 +1030,6 @@ static int __init at91ether_setup(unsigned long phy_type, unsigned short phy_add dma_free_coherent(NULL, sizeof(struct recv_desc_bufs), lp->dlist, (dma_addr_t)lp->dlist_phys); return res; } - at91_dev = dev; /* Determine current link speed */ spin_lock_irq(&lp->lock); @@ -1036,9 +1041,9 @@ static int __init at91ether_setup(unsigned long phy_type, unsigned short phy_add /* If board has no PHY IRQ, use a timer to poll the PHY */ if (!lp->board_data.phy_irq_pin) { - init_timer(&check_timer); - check_timer.data = (unsigned long)dev; - check_timer.function = at91ether_check_link; + init_timer(&lp->check_timer); + lp->check_timer.data = (unsigned long)dev; + lp->check_timer.function = at91ether_check_link; } /* Display ethernet banner */ @@ -1115,15 +1120,16 @@ static int __init at91ether_probe(struct platform_device *pdev) static int __devexit at91ether_remove(struct platform_device *pdev) { - struct at91_private *lp = (struct at91_private *) at91_dev->priv; + struct net_device *dev = platform_get_drvdata(pdev); + struct at91_private *lp = netdev_priv(dev); - unregister_netdev(at91_dev); - free_irq(at91_dev->irq, at91_dev); + unregister_netdev(dev); + free_irq(dev->irq, dev); dma_free_coherent(NULL, sizeof(struct recv_desc_bufs), lp->dlist, (dma_addr_t)lp->dlist_phys); clk_put(lp->ether_clk); - free_netdev(at91_dev); - at91_dev = NULL; + platform_set_drvdata(pdev, NULL); + free_netdev(dev); return 0; } @@ -1131,8 +1137,8 @@ static int __devexit at91ether_remove(struct platform_device *pdev) static int at91ether_suspend(struct platform_device *pdev, pm_message_t mesg) { - struct at91_private *lp = (struct at91_private *) at91_dev->priv; struct net_device *net_dev = platform_get_drvdata(pdev); + struct at91_private *lp = netdev_priv(net_dev); int phy_irq = lp->board_data.phy_irq_pin; if (netif_running(net_dev)) { @@ -1149,8 +1155,8 @@ static int at91ether_suspend(struct platform_device *pdev, pm_message_t mesg) static int at91ether_resume(struct platform_device *pdev) { - struct at91_private *lp = (struct at91_private *) at91_dev->priv; struct net_device *net_dev = platform_get_drvdata(pdev); + struct at91_private *lp = netdev_priv(net_dev); int phy_irq = lp->board_data.phy_irq_pin; if (netif_running(net_dev)) { diff --git a/drivers/net/arm/at91_ether.h b/drivers/net/arm/at91_ether.h index d1e72e02be3..b6b665de2ea 100644 --- a/drivers/net/arm/at91_ether.h +++ b/drivers/net/arm/at91_ether.h @@ -87,6 +87,7 @@ struct at91_private spinlock_t lock; /* lock for MDI interface */ short phy_media; /* media interface type */ unsigned short phy_address; /* 5-bit MDI address of PHY (0..31) */ + struct timer_list check_timer; /* Poll link status */ /* Transmit */ struct sk_buff *skb; /* holds skb until xmit interrupt completes */ diff --git a/drivers/net/arm/ether1.c b/drivers/net/arm/ether1.c index f3478a30e77..d6da3ce9ad7 100644 --- a/drivers/net/arm/ether1.c +++ b/drivers/net/arm/ether1.c @@ -254,7 +254,7 @@ ether1_readbuffer (struct net_device *dev, void *data, unsigned int start, unsig } while (thislen); } -static int __init +static int __devinit ether1_ramtest(struct net_device *dev, unsigned char byte) { unsigned char *buffer = kmalloc (BUFFER_SIZE, GFP_KERNEL); @@ -308,7 +308,7 @@ ether1_reset (struct net_device *dev) return BUS_16; } -static int __init +static int __devinit ether1_init_2(struct net_device *dev) { int i; @@ -986,7 +986,7 @@ ether1_setmulticastlist (struct net_device *dev) /* ------------------------------------------------------------------------- */ -static void __init ether1_banner(void) +static void __devinit ether1_banner(void) { static unsigned int version_printed = 0; diff --git a/drivers/net/arm/ether3.c b/drivers/net/arm/ether3.c index 84686c8a5bc..4fc234785d5 100644 --- a/drivers/net/arm/ether3.c +++ b/drivers/net/arm/ether3.c @@ -198,7 +198,7 @@ static inline void ether3_ledon(struct net_device *dev) * Read the ethernet address string from the on board rom. * This is an ascii string!!! */ -static int __init +static int __devinit ether3_addr(char *addr, struct expansion_card *ec) { struct in_chunk_dir cd; @@ -223,7 +223,7 @@ ether3_addr(char *addr, struct expansion_card *ec) /* --------------------------------------------------------------------------- */ -static int __init +static int __devinit ether3_ramtest(struct net_device *dev, unsigned char byte) { unsigned char *buffer = kmalloc(RX_END, GFP_KERNEL); @@ -272,7 +272,7 @@ ether3_ramtest(struct net_device *dev, unsigned char byte) /* ------------------------------------------------------------------------------- */ -static int __init ether3_init_2(struct net_device *dev) +static int __devinit ether3_init_2(struct net_device *dev) { int i; @@ -765,7 +765,7 @@ static void ether3_tx(struct net_device *dev) } } -static void __init ether3_banner(void) +static void __devinit ether3_banner(void) { static unsigned version_printed = 0; diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c index 8620a5b470f..56ae8babd91 100644 --- a/drivers/net/at1700.c +++ b/drivers/net/at1700.c @@ -908,7 +908,7 @@ int __init init_module(void) return 0; } -void +void __exit cleanup_module(void) { unregister_netdev(dev_at1700); diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c index d79489e4624..7e37ac86a69 100644 --- a/drivers/net/atarilance.c +++ b/drivers/net/atarilance.c @@ -1179,7 +1179,7 @@ static int lance_set_mac_address( struct net_device *dev, void *addr ) #ifdef MODULE static struct net_device *atarilance_dev; -int init_module(void) +int __init init_module(void) { atarilance_dev = atarilance_probe(-1); if (IS_ERR(atarilance_dev)) @@ -1187,7 +1187,7 @@ int init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(atarilance_dev); free_irq(atarilance_dev->irq, atarilance_dev); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 488d8ed9e74..6482aed4bb7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3684,7 +3684,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd mii->val_out = 0; read_lock_bh(&bond->lock); read_lock(&bond->curr_slave_lock); - if (bond->curr_active_slave) { + if (netif_carrier_ok(bond->dev)) { mii->val_out = BMSR_LSTATUS; } read_unlock(&bond->curr_slave_lock); diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c index dec70c2b374..4612f71a710 100644 --- a/drivers/net/cs89x0.c +++ b/drivers/net/cs89x0.c @@ -1974,7 +1974,7 @@ out: return ret; } -void +void __exit cleanup_module(void) { unregister_netdev(dev_cs89x0); diff --git a/drivers/net/de600.c b/drivers/net/de600.c index 690bb40b353..8396e411f1c 100644 --- a/drivers/net/de600.c +++ b/drivers/net/de600.c @@ -43,7 +43,6 @@ static const char version[] = "de600.c: $Revision: 1.41-2.5 $, Bjorn Ekwall (bj * modify the following "#define": (see <asm/io.h> for more info) #define REALLY_SLOW_IO */ -#define SLOW_IO_BY_JUMPING /* Looks "better" than dummy write to port 0x80 :-) */ /* use 0 for production, 1 for verification, >2 for debug */ #ifdef DE600_DEBUG diff --git a/drivers/net/declance.c b/drivers/net/declance.c index 00e2a8a134d..4ae0fed7122 100644 --- a/drivers/net/declance.c +++ b/drivers/net/declance.c @@ -40,6 +40,10 @@ * * v0.009: Module support fixes, multiple interfaces support, various * bits. macro + * + * v0.010: Fixes for the PMAD mapping of the LANCE buffer and for the + * PMAX requirement to only use halfword accesses to the + * buffer. macro */ #include <linux/crc32.h> @@ -54,6 +58,7 @@ #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> +#include <linux/types.h> #include <asm/addrspace.h> #include <asm/system.h> @@ -67,7 +72,7 @@ #include <asm/dec/tc.h> static char version[] __devinitdata = -"declance.c: v0.009 by Linux MIPS DECstation task force\n"; +"declance.c: v0.010 by Linux MIPS DECstation task force\n"; MODULE_AUTHOR("Linux MIPS DECstation task force"); MODULE_DESCRIPTION("DEC LANCE (DECstation onboard, PMAD-xx) driver"); @@ -110,24 +115,25 @@ MODULE_LICENSE("GPL"); #define LE_C3_BCON 0x1 /* Byte control */ /* Receive message descriptor 1 */ -#define LE_R1_OWN 0x80 /* Who owns the entry */ -#define LE_R1_ERR 0x40 /* Error: if FRA, OFL, CRC or BUF is set */ -#define LE_R1_FRA 0x20 /* FRA: Frame error */ -#define LE_R1_OFL 0x10 /* OFL: Frame overflow */ -#define LE_R1_CRC 0x08 /* CRC error */ -#define LE_R1_BUF 0x04 /* BUF: Buffer error */ -#define LE_R1_SOP 0x02 /* Start of packet */ -#define LE_R1_EOP 0x01 /* End of packet */ -#define LE_R1_POK 0x03 /* Packet is complete: SOP + EOP */ - -#define LE_T1_OWN 0x80 /* Lance owns the packet */ -#define LE_T1_ERR 0x40 /* Error summary */ -#define LE_T1_EMORE 0x10 /* Error: more than one retry needed */ -#define LE_T1_EONE 0x08 /* Error: one retry needed */ -#define LE_T1_EDEF 0x04 /* Error: deferred */ -#define LE_T1_SOP 0x02 /* Start of packet */ -#define LE_T1_EOP 0x01 /* End of packet */ -#define LE_T1_POK 0x03 /* Packet is complete: SOP + EOP */ +#define LE_R1_OWN 0x8000 /* Who owns the entry */ +#define LE_R1_ERR 0x4000 /* Error: if FRA, OFL, CRC or BUF is set */ +#define LE_R1_FRA 0x2000 /* FRA: Frame error */ +#define LE_R1_OFL 0x1000 /* OFL: Frame overflow */ +#define LE_R1_CRC 0x0800 /* CRC error */ +#define LE_R1_BUF 0x0400 /* BUF: Buffer error */ +#define LE_R1_SOP 0x0200 /* Start of packet */ +#define LE_R1_EOP 0x0100 /* End of packet */ +#define LE_R1_POK 0x0300 /* Packet is complete: SOP + EOP */ + +/* Transmit message descriptor 1 */ +#define LE_T1_OWN 0x8000 /* Lance owns the packet */ +#define LE_T1_ERR 0x4000 /* Error summary */ +#define LE_T1_EMORE 0x1000 /* Error: more than one retry needed */ +#define LE_T1_EONE 0x0800 /* Error: one retry needed */ +#define LE_T1_EDEF 0x0400 /* Error: deferred */ +#define LE_T1_SOP 0x0200 /* Start of packet */ +#define LE_T1_EOP 0x0100 /* End of packet */ +#define LE_T1_POK 0x0300 /* Packet is complete: SOP + EOP */ #define LE_T3_BUF 0x8000 /* Buffer error */ #define LE_T3_UFL 0x4000 /* Error underflow */ @@ -156,69 +162,57 @@ MODULE_LICENSE("GPL"); #undef TEST_HITS #define ZERO 0 -/* The DS2000/3000 have a linear 64 KB buffer. - - * The PMAD-AA has 128 kb buffer on-board. +/* + * The DS2100/3100 have a linear 64 kB buffer which supports halfword + * accesses only. Each halfword of the buffer is word-aligned in the + * CPU address space. * - * The IOASIC LANCE devices use a shared memory region. This region as seen - * from the CPU is (max) 128 KB long and has to be on an 128 KB boundary. - * The LANCE sees this as a 64 KB long continuous memory region. + * The PMAD-AA has a 128 kB buffer on-board. * - * The LANCE's DMA address is used as an index in this buffer and DMA takes - * place in bursts of eight 16-Bit words which are packed into four 32-Bit words - * by the IOASIC. This leads to a strange padding: 16 bytes of valid data followed - * by a 16 byte gap :-(. + * The IOASIC LANCE devices use a shared memory region. This region + * as seen from the CPU is (max) 128 kB long and has to be on an 128 kB + * boundary. The LANCE sees this as a 64 kB long continuous memory + * region. + * + * The LANCE's DMA address is used as an index in this buffer and DMA + * takes place in bursts of eight 16-bit words which are packed into + * four 32-bit words by the IOASIC. This leads to a strange padding: + * 16 bytes of valid data followed by a 16 byte gap :-(. */ struct lance_rx_desc { unsigned short rmd0; /* low address of packet */ - short gap0; - unsigned char rmd1_hadr; /* high address of packet */ - unsigned char rmd1_bits; /* descriptor bits */ - short gap1; + unsigned short rmd1; /* high address of packet + and descriptor bits */ short length; /* 2s complement (negative!) of buffer length */ - short gap2; unsigned short mblength; /* actual number of bytes received */ - short gap3; }; struct lance_tx_desc { unsigned short tmd0; /* low address of packet */ - short gap0; - unsigned char tmd1_hadr; /* high address of packet */ - unsigned char tmd1_bits; /* descriptor bits */ - short gap1; + unsigned short tmd1; /* high address of packet + and descriptor bits */ short length; /* 2s complement (negative!) of buffer length */ - short gap2; unsigned short misc; - short gap3; }; /* First part of the LANCE initialization block, described in databook. */ struct lance_init_block { unsigned short mode; /* pre-set mode (reg. 15) */ - short gap0; - unsigned char phys_addr[12]; /* physical ethernet address - only 0, 1, 4, 5, 8, 9 are valid - 2, 3, 6, 7, 10, 11 are gaps */ - unsigned short filter[8]; /* multicast filter - only 0, 2, 4, 6 are valid - 1, 3, 5, 7 are gaps */ + unsigned short phys_addr[3]; /* physical ethernet address */ + unsigned short filter[4]; /* multicast filter */ /* Receive and transmit ring base, along with extra bits. */ unsigned short rx_ptr; /* receive descriptor addr */ - short gap1; unsigned short rx_len; /* receive len and high addr */ - short gap2; unsigned short tx_ptr; /* transmit descriptor addr */ - short gap3; unsigned short tx_len; /* transmit len and high addr */ - short gap4; - short gap5[8]; + + short gap[4]; /* The buffer descriptors */ struct lance_rx_desc brx_ring[RX_RING_SIZE]; @@ -226,15 +220,28 @@ struct lance_init_block { }; #define BUF_OFFSET_CPU sizeof(struct lance_init_block) -#define BUF_OFFSET_LNC (sizeof(struct lance_init_block)>>1) +#define BUF_OFFSET_LNC sizeof(struct lance_init_block) -#define libdesc_offset(rt, elem) \ -((__u32)(((unsigned long)(&(((struct lance_init_block *)0)->rt[elem]))))) +#define shift_off(off, type) \ + (type == ASIC_LANCE || type == PMAX_LANCE ? off << 1 : off) -/* - * This works *only* for the ring descriptors - */ -#define LANCE_ADDR(x) (CPHYSADDR(x) >> 1) +#define lib_off(rt, type) \ + shift_off(offsetof(struct lance_init_block, rt), type) + +#define lib_ptr(ib, rt, type) \ + ((volatile u16 *)((u8 *)(ib) + lib_off(rt, type))) + +#define rds_off(rt, type) \ + shift_off(offsetof(struct lance_rx_desc, rt), type) + +#define rds_ptr(rd, rt, type) \ + ((volatile u16 *)((u8 *)(rd) + rds_off(rt, type))) + +#define tds_off(rt, type) \ + shift_off(offsetof(struct lance_tx_desc, rt), type) + +#define tds_ptr(td, rt, type) \ + ((volatile u16 *)((u8 *)(td) + tds_off(rt, type))) struct lance_private { struct net_device *next; @@ -242,7 +249,6 @@ struct lance_private { int slot; int dma_irq; volatile struct lance_regs *ll; - volatile struct lance_init_block *init_block; spinlock_t lock; @@ -260,8 +266,8 @@ struct lance_private { char *tx_buf_ptr_cpu[TX_RING_SIZE]; /* Pointers to the ring buffers as seen from the LANCE */ - char *rx_buf_ptr_lnc[RX_RING_SIZE]; - char *tx_buf_ptr_lnc[TX_RING_SIZE]; + uint rx_buf_ptr_lnc[RX_RING_SIZE]; + uint tx_buf_ptr_lnc[TX_RING_SIZE]; }; #define TX_BUFFS_AVAIL ((lp->tx_old<=lp->tx_new)?\ @@ -294,7 +300,7 @@ static inline void writereg(volatile unsigned short *regptr, short value) static void load_csrs(struct lance_private *lp) { volatile struct lance_regs *ll = lp->ll; - int leptr; + uint leptr; /* The address space as seen from the LANCE * begins at address 0. HK @@ -316,12 +322,14 @@ static void load_csrs(struct lance_private *lp) * Our specialized copy routines * */ -void cp_to_buf(const int type, void *to, const void *from, int len) +static void cp_to_buf(const int type, void *to, const void *from, int len) { unsigned short *tp, *fp, clen; unsigned char *rtp, *rfp; - if (type == PMAX_LANCE) { + if (type == PMAD_LANCE) { + memcpy(to, from, len); + } else if (type == PMAX_LANCE) { clen = len >> 1; tp = (unsigned short *) to; fp = (unsigned short *) from; @@ -370,12 +378,14 @@ void cp_to_buf(const int type, void *to, const void *from, int len) iob(); } -void cp_from_buf(const int type, void *to, const void *from, int len) +static void cp_from_buf(const int type, void *to, const void *from, int len) { unsigned short *tp, *fp, clen; unsigned char *rtp, *rfp; - if (type == PMAX_LANCE) { + if (type == PMAD_LANCE) { + memcpy(to, from, len); + } else if (type == PMAX_LANCE) { clen = len >> 1; tp = (unsigned short *) to; fp = (unsigned short *) from; @@ -431,12 +441,10 @@ void cp_from_buf(const int type, void *to, const void *from, int len) static void lance_init_ring(struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); - volatile struct lance_init_block *ib; - int leptr; + volatile u16 *ib = (volatile u16 *)dev->mem_start; + uint leptr; int i; - ib = (struct lance_init_block *) (dev->mem_start); - /* Lock out other processes while setting up hardware */ netif_stop_queue(dev); lp->rx_new = lp->tx_new = 0; @@ -445,55 +453,64 @@ static void lance_init_ring(struct net_device *dev) /* Copy the ethernet address to the lance init block. * XXX bit 0 of the physical address registers has to be zero */ - ib->phys_addr[0] = dev->dev_addr[0]; - ib->phys_addr[1] = dev->dev_addr[1]; - ib->phys_addr[4] = dev->dev_addr[2]; - ib->phys_addr[5] = dev->dev_addr[3]; - ib->phys_addr[8] = dev->dev_addr[4]; - ib->phys_addr[9] = dev->dev_addr[5]; + *lib_ptr(ib, phys_addr[0], lp->type) = (dev->dev_addr[1] << 8) | + dev->dev_addr[0]; + *lib_ptr(ib, phys_addr[1], lp->type) = (dev->dev_addr[3] << 8) | + dev->dev_addr[2]; + *lib_ptr(ib, phys_addr[2], lp->type) = (dev->dev_addr[5] << 8) | + dev->dev_addr[4]; /* Setup the initialization block */ /* Setup rx descriptor pointer */ - leptr = LANCE_ADDR(libdesc_offset(brx_ring, 0)); - ib->rx_len = (LANCE_LOG_RX_BUFFERS << 13) | (leptr >> 16); - ib->rx_ptr = leptr; + leptr = offsetof(struct lance_init_block, brx_ring); + *lib_ptr(ib, rx_len, lp->type) = (LANCE_LOG_RX_BUFFERS << 13) | + (leptr >> 16); + *lib_ptr(ib, rx_ptr, lp->type) = leptr; if (ZERO) - printk("RX ptr: %8.8x(%8.8x)\n", leptr, libdesc_offset(brx_ring, 0)); + printk("RX ptr: %8.8x(%8.8x)\n", + leptr, lib_off(brx_ring, lp->type)); /* Setup tx descriptor pointer */ - leptr = LANCE_ADDR(libdesc_offset(btx_ring, 0)); - ib->tx_len = (LANCE_LOG_TX_BUFFERS << 13) | (leptr >> 16); - ib->tx_ptr = leptr; + leptr = offsetof(struct lance_init_block, btx_ring); + *lib_ptr(ib, tx_len, lp->type) = (LANCE_LOG_TX_BUFFERS << 13) | + (leptr >> 16); + *lib_ptr(ib, tx_ptr, lp->type) = leptr; if (ZERO) - printk("TX ptr: %8.8x(%8.8x)\n", leptr, libdesc_offset(btx_ring, 0)); + printk("TX ptr: %8.8x(%8.8x)\n", + leptr, lib_off(btx_ring, lp->type)); if (ZERO) printk("TX rings:\n"); /* Setup the Tx ring entries */ for (i = 0; i < TX_RING_SIZE; i++) { - leptr = (int) lp->tx_buf_ptr_lnc[i]; - ib->btx_ring[i].tmd0 = leptr; - ib->btx_ring[i].tmd1_hadr = leptr >> 16; - ib->btx_ring[i].tmd1_bits = 0; - ib->btx_ring[i].length = 0xf000; /* The ones required by tmd2 */ - ib->btx_ring[i].misc = 0; + leptr = lp->tx_buf_ptr_lnc[i]; + *lib_ptr(ib, btx_ring[i].tmd0, lp->type) = leptr; + *lib_ptr(ib, btx_ring[i].tmd1, lp->type) = (leptr >> 16) & + 0xff; + *lib_ptr(ib, btx_ring[i].length, lp->type) = 0xf000; + /* The ones required by tmd2 */ + *lib_ptr(ib, btx_ring[i].misc, lp->type) = 0; if (i < 3 && ZERO) - printk("%d: 0x%8.8x(0x%8.8x)\n", i, leptr, (int) lp->tx_buf_ptr_cpu[i]); + printk("%d: 0x%8.8x(0x%8.8x)\n", + i, leptr, (uint)lp->tx_buf_ptr_cpu[i]); } /* Setup the Rx ring entries */ if (ZERO) printk("RX rings:\n"); for (i = 0; i < RX_RING_SIZE; i++) { - leptr = (int) lp->rx_buf_ptr_lnc[i]; - ib->brx_ring[i].rmd0 = leptr; - ib->brx_ring[i].rmd1_hadr = leptr >> 16; - ib->brx_ring[i].rmd1_bits = LE_R1_OWN; - ib->brx_ring[i].length = -RX_BUFF_SIZE | 0xf000; - ib->brx_ring[i].mblength = 0; + leptr = lp->rx_buf_ptr_lnc[i]; + *lib_ptr(ib, brx_ring[i].rmd0, lp->type) = leptr; + *lib_ptr(ib, brx_ring[i].rmd1, lp->type) = ((leptr >> 16) & + 0xff) | + LE_R1_OWN; + *lib_ptr(ib, brx_ring[i].length, lp->type) = -RX_BUFF_SIZE | + 0xf000; + *lib_ptr(ib, brx_ring[i].mblength, lp->type) = 0; if (i < 3 && ZERO) - printk("%d: 0x%8.8x(0x%8.8x)\n", i, leptr, (int) lp->rx_buf_ptr_cpu[i]); + printk("%d: 0x%8.8x(0x%8.8x)\n", + i, leptr, (uint)lp->rx_buf_ptr_cpu[i]); } iob(); } @@ -511,11 +528,13 @@ static int init_restart_lance(struct lance_private *lp) udelay(10); } if ((i == 100) || (ll->rdp & LE_C0_ERR)) { - printk("LANCE unopened after %d ticks, csr0=%4.4x.\n", i, ll->rdp); + printk("LANCE unopened after %d ticks, csr0=%4.4x.\n", + i, ll->rdp); return -1; } if ((ll->rdp & LE_C0_ERR)) { - printk("LANCE unopened after %d ticks, csr0=%4.4x.\n", i, ll->rdp); + printk("LANCE unopened after %d ticks, csr0=%4.4x.\n", + i, ll->rdp); return -1; } writereg(&ll->rdp, LE_C0_IDON); @@ -528,12 +547,11 @@ static int init_restart_lance(struct lance_private *lp) static int lance_rx(struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); - volatile struct lance_init_block *ib; - volatile struct lance_rx_desc *rd = 0; - unsigned char bits; - int len = 0; - struct sk_buff *skb = 0; - ib = (struct lance_init_block *) (dev->mem_start); + volatile u16 *ib = (volatile u16 *)dev->mem_start; + volatile u16 *rd; + unsigned short bits; + int entry, len; + struct sk_buff *skb; #ifdef TEST_HITS { @@ -542,19 +560,22 @@ static int lance_rx(struct net_device *dev) printk("["); for (i = 0; i < RX_RING_SIZE; i++) { if (i == lp->rx_new) - printk("%s", ib->brx_ring[i].rmd1_bits & + printk("%s", *lib_ptr(ib, brx_ring[i].rmd1, + lp->type) & LE_R1_OWN ? "_" : "X"); else - printk("%s", ib->brx_ring[i].rmd1_bits & + printk("%s", *lib_ptr(ib, brx_ring[i].rmd1, + lp->type) & LE_R1_OWN ? "." : "1"); } printk("]"); } #endif - for (rd = &ib->brx_ring[lp->rx_new]; - !((bits = rd->rmd1_bits) & LE_R1_OWN); - rd = &ib->brx_ring[lp->rx_new]) { + for (rd = lib_ptr(ib, brx_ring[lp->rx_new], lp->type); + !((bits = *rds_ptr(rd, rmd1, lp->type)) & LE_R1_OWN); + rd = lib_ptr(ib, brx_ring[lp->rx_new], lp->type)) { + entry = lp->rx_new; /* We got an incomplete frame? */ if ((bits & LE_R1_POK) != LE_R1_POK) { @@ -575,16 +596,18 @@ static int lance_rx(struct net_device *dev) if (bits & LE_R1_EOP) lp->stats.rx_errors++; } else { - len = (rd->mblength & 0xfff) - 4; + len = (*rds_ptr(rd, mblength, lp->type) & 0xfff) - 4; skb = dev_alloc_skb(len + 2); if (skb == 0) { printk("%s: Memory squeeze, deferring packet.\n", dev->name); lp->stats.rx_dropped++; - rd->mblength = 0; - rd->rmd1_bits = LE_R1_OWN; - lp->rx_new = (lp->rx_new + 1) & RX_RING_MOD_MASK; + *rds_ptr(rd, mblength, lp->type) = 0; + *rds_ptr(rd, rmd1, lp->type) = + ((lp->rx_buf_ptr_lnc[entry] >> 16) & + 0xff) | LE_R1_OWN; + lp->rx_new = (entry + 1) & RX_RING_MOD_MASK; return 0; } lp->stats.rx_bytes += len; @@ -594,8 +617,7 @@ static int lance_rx(struct net_device *dev) skb_put(skb, len); /* make room */ cp_from_buf(lp->type, skb->data, - (char *)lp->rx_buf_ptr_cpu[lp->rx_new], - len); + (char *)lp->rx_buf_ptr_cpu[entry], len); skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); @@ -604,10 +626,11 @@ static int lance_rx(struct net_device *dev) } /* Return the packet to the pool */ - rd->mblength = 0; - rd->length = -RX_BUFF_SIZE | 0xf000; - rd->rmd1_bits = LE_R1_OWN; - lp->rx_new = (lp->rx_new + 1) & RX_RING_MOD_MASK; + *rds_ptr(rd, mblength, lp->type) = 0; + *rds_ptr(rd, length, lp->type) = -RX_BUFF_SIZE | 0xf000; + *rds_ptr(rd, rmd1, lp->type) = + ((lp->rx_buf_ptr_lnc[entry] >> 16) & 0xff) | LE_R1_OWN; + lp->rx_new = (entry + 1) & RX_RING_MOD_MASK; } return 0; } @@ -615,24 +638,24 @@ static int lance_rx(struct net_device *dev) static void lance_tx(struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); - volatile struct lance_init_block *ib; + volatile u16 *ib = (volatile u16 *)dev->mem_start; volatile struct lance_regs *ll = lp->ll; - volatile struct lance_tx_desc *td; + volatile u16 *td; int i, j; int status; - ib = (struct lance_init_block *) (dev->mem_start); + j = lp->tx_old; spin_lock(&lp->lock); for (i = j; i != lp->tx_new; i = j) { - td = &ib->btx_ring[i]; + td = lib_ptr(ib, btx_ring[i], lp->type); /* If we hit a packet not owned by us, stop */ - if (td->tmd1_bits & LE_T1_OWN) + if (*tds_ptr(td, tmd1, lp->type) & LE_T1_OWN) break; - if (td->tmd1_bits & LE_T1_ERR) { - status = td->misc; + if (*tds_ptr(td, tmd1, lp->type) & LE_T1_ERR) { + status = *tds_ptr(td, misc, lp->type); lp->stats.tx_errors++; if (status & LE_T3_RTY) @@ -667,18 +690,19 @@ static void lance_tx(struct net_device *dev) init_restart_lance(lp); goto out; } - } else if ((td->tmd1_bits & LE_T1_POK) == LE_T1_POK) { + } else if ((*tds_ptr(td, tmd1, lp->type) & LE_T1_POK) == + LE_T1_POK) { /* * So we don't count the packet more than once. */ - td->tmd1_bits &= ~(LE_T1_POK); + *tds_ptr(td, tmd1, lp->type) &= ~(LE_T1_POK); /* One collision before packet was sent. */ - if (td->tmd1_bits & LE_T1_EONE) + if (*tds_ptr(td, tmd1, lp->type) & LE_T1_EONE) lp->stats.collisions++; /* More than one collision, be optimistic. */ - if (td->tmd1_bits & LE_T1_EMORE) + if (*tds_ptr(td, tmd1, lp->type) & LE_T1_EMORE) lp->stats.collisions += 2; lp->stats.tx_packets++; @@ -752,7 +776,7 @@ struct net_device *last_dev = 0; static int lance_open(struct net_device *dev) { - volatile struct lance_init_block *ib = (struct lance_init_block *) (dev->mem_start); + volatile u16 *ib = (volatile u16 *)dev->mem_start; struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; int status = 0; @@ -769,11 +793,11 @@ static int lance_open(struct net_device *dev) * * BTW it is common bug in all lance drivers! --ANK */ - ib->mode = 0; - ib->filter [0] = 0; - ib->filter [2] = 0; - ib->filter [4] = 0; - ib->filter [6] = 0; + *lib_ptr(ib, mode, lp->type) = 0; + *lib_ptr(ib, filter[0], lp->type) = 0; + *lib_ptr(ib, filter[1], lp->type) = 0; + *lib_ptr(ib, filter[2], lp->type) = 0; + *lib_ptr(ib, filter[3], lp->type) = 0; lance_init_ring(dev); load_csrs(lp); @@ -874,12 +898,10 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; - volatile struct lance_init_block *ib = (struct lance_init_block *) (dev->mem_start); - int entry, skblen, len; + volatile u16 *ib = (volatile u16 *)dev->mem_start; + int entry, len; - skblen = skb->len; - - len = skblen; + len = skb->len; if (len < ETH_ZLEN) { if (skb_padto(skb, ETH_ZLEN)) @@ -889,23 +911,17 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) lp->stats.tx_bytes += len; - entry = lp->tx_new & TX_RING_MOD_MASK; - ib->btx_ring[entry].length = (-len); - ib->btx_ring[entry].misc = 0; - - cp_to_buf(lp->type, (char *)lp->tx_buf_ptr_cpu[entry], skb->data, - skblen); + entry = lp->tx_new; + *lib_ptr(ib, btx_ring[entry].length, lp->type) = (-len); + *lib_ptr(ib, btx_ring[entry].misc, lp->type) = 0; - /* Clear the slack of the packet, do I need this? */ - /* For a firewall it's a good idea - AC */ -/* - if (len != skblen) - memset ((char *) &ib->tx_buf [entry][skblen], 0, (len - skblen) << 1); - */ + cp_to_buf(lp->type, (char *)lp->tx_buf_ptr_cpu[entry], skb->data, len); /* Now, give the packet to the lance */ - ib->btx_ring[entry].tmd1_bits = (LE_T1_POK | LE_T1_OWN); - lp->tx_new = (lp->tx_new + 1) & TX_RING_MOD_MASK; + *lib_ptr(ib, btx_ring[entry].tmd1, lp->type) = + ((lp->tx_buf_ptr_lnc[entry] >> 16) & 0xff) | + (LE_T1_POK | LE_T1_OWN); + lp->tx_new = (entry + 1) & TX_RING_MOD_MASK; if (TX_BUFFS_AVAIL <= 0) netif_stop_queue(dev); @@ -930,8 +946,8 @@ static struct net_device_stats *lance_get_stats(struct net_device *dev) static void lance_load_multicast(struct net_device *dev) { - volatile struct lance_init_block *ib = (struct lance_init_block *) (dev->mem_start); - volatile u16 *mcast_table = (u16 *) & ib->filter; + struct lance_private *lp = netdev_priv(dev); + volatile u16 *ib = (volatile u16 *)dev->mem_start; struct dev_mc_list *dmi = dev->mc_list; char *addrs; int i; @@ -939,17 +955,17 @@ static void lance_load_multicast(struct net_device *dev) /* set all multicast bits */ if (dev->flags & IFF_ALLMULTI) { - ib->filter[0] = 0xffff; - ib->filter[2] = 0xffff; - ib->filter[4] = 0xffff; - ib->filter[6] = 0xffff; + *lib_ptr(ib, filter[0], lp->type) = 0xffff; + *lib_ptr(ib, filter[1], lp->type) = 0xffff; + *lib_ptr(ib, filter[2], lp->type) = 0xffff; + *lib_ptr(ib, filter[3], lp->type) = 0xffff; return; } /* clear the multicast filter */ - ib->filter[0] = 0; - ib->filter[2] = 0; - ib->filter[4] = 0; - ib->filter[6] = 0; + *lib_ptr(ib, filter[0], lp->type) = 0; + *lib_ptr(ib, filter[1], lp->type) = 0; + *lib_ptr(ib, filter[2], lp->type) = 0; + *lib_ptr(ib, filter[3], lp->type) = 0; /* Add addresses */ for (i = 0; i < dev->mc_count; i++) { @@ -962,7 +978,7 @@ static void lance_load_multicast(struct net_device *dev) crc = ether_crc_le(ETH_ALEN, addrs); crc = crc >> 26; - mcast_table[2 * (crc >> 4)] |= 1 << (crc & 0xf); + *lib_ptr(ib, filter[crc >> 4], lp->type) |= 1 << (crc & 0xf); } return; } @@ -970,11 +986,9 @@ static void lance_load_multicast(struct net_device *dev) static void lance_set_multicast(struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); - volatile struct lance_init_block *ib; + volatile u16 *ib = (volatile u16 *)dev->mem_start; volatile struct lance_regs *ll = lp->ll; - ib = (struct lance_init_block *) (dev->mem_start); - if (!netif_running(dev)) return; @@ -992,9 +1006,9 @@ static void lance_set_multicast(struct net_device *dev) lance_init_ring(dev); if (dev->flags & IFF_PROMISC) { - ib->mode |= LE_MO_PROM; + *lib_ptr(ib, mode, lp->type) |= LE_MO_PROM; } else { - ib->mode &= ~LE_MO_PROM; + *lib_ptr(ib, mode, lp->type) &= ~LE_MO_PROM; lance_load_multicast(dev); } load_csrs(lp); @@ -1051,7 +1065,6 @@ static int __init dec_lance_init(const int type, const int slot) lp->type = type; lp->slot = slot; switch (type) { -#ifdef CONFIG_TC case ASIC_LANCE: dev->base_addr = CKSEG1ADDR(dec_kn_slot_base + IOASIC_LANCE); @@ -1073,20 +1086,20 @@ static int __init dec_lance_init(const int type, const int slot) */ for (i = 0; i < RX_RING_SIZE; i++) { lp->rx_buf_ptr_cpu[i] = - (char *)(dev->mem_start + BUF_OFFSET_CPU + + (char *)(dev->mem_start + 2 * BUF_OFFSET_CPU + 2 * i * RX_BUFF_SIZE); lp->rx_buf_ptr_lnc[i] = - (char *)(BUF_OFFSET_LNC + i * RX_BUFF_SIZE); + (BUF_OFFSET_LNC + i * RX_BUFF_SIZE); } for (i = 0; i < TX_RING_SIZE; i++) { lp->tx_buf_ptr_cpu[i] = - (char *)(dev->mem_start + BUF_OFFSET_CPU + + (char *)(dev->mem_start + 2 * BUF_OFFSET_CPU + 2 * RX_RING_SIZE * RX_BUFF_SIZE + 2 * i * TX_BUFF_SIZE); lp->tx_buf_ptr_lnc[i] = - (char *)(BUF_OFFSET_LNC + - RX_RING_SIZE * RX_BUFF_SIZE + - i * TX_BUFF_SIZE); + (BUF_OFFSET_LNC + + RX_RING_SIZE * RX_BUFF_SIZE + + i * TX_BUFF_SIZE); } /* Setup I/O ASIC LANCE DMA. */ @@ -1095,11 +1108,12 @@ static int __init dec_lance_init(const int type, const int slot) CPHYSADDR(dev->mem_start) << 3); break; - +#ifdef CONFIG_TC case PMAD_LANCE: claim_tc_card(slot); dev->mem_start = CKSEG1ADDR(get_tc_base_addr(slot)); + dev->mem_end = dev->mem_start + 0x100000; dev->base_addr = dev->mem_start + 0x100000; dev->irq = get_tc_irq_nr(slot); esar_base = dev->mem_start + 0x1c0002; @@ -1110,7 +1124,7 @@ static int __init dec_lance_init(const int type, const int slot) (char *)(dev->mem_start + BUF_OFFSET_CPU + i * RX_BUFF_SIZE); lp->rx_buf_ptr_lnc[i] = - (char *)(BUF_OFFSET_LNC + i * RX_BUFF_SIZE); + (BUF_OFFSET_LNC + i * RX_BUFF_SIZE); } for (i = 0; i < TX_RING_SIZE; i++) { lp->tx_buf_ptr_cpu[i] = @@ -1118,18 +1132,18 @@ static int __init dec_lance_init(const int type, const int slot) RX_RING_SIZE * RX_BUFF_SIZE + i * TX_BUFF_SIZE); lp->tx_buf_ptr_lnc[i] = - (char *)(BUF_OFFSET_LNC + - RX_RING_SIZE * RX_BUFF_SIZE + - i * TX_BUFF_SIZE); + (BUF_OFFSET_LNC + + RX_RING_SIZE * RX_BUFF_SIZE + + i * TX_BUFF_SIZE); } break; #endif - case PMAX_LANCE: dev->irq = dec_interrupt[DEC_IRQ_LANCE]; dev->base_addr = CKSEG1ADDR(KN01_SLOT_BASE + KN01_LANCE); dev->mem_start = CKSEG1ADDR(KN01_SLOT_BASE + KN01_LANCE_MEM); + dev->mem_end = dev->mem_start + KN01_SLOT_SIZE; esar_base = CKSEG1ADDR(KN01_SLOT_BASE + KN01_ESAR + 1); lp->dma_irq = -1; @@ -1138,20 +1152,20 @@ static int __init dec_lance_init(const int type, const int slot) */ for (i = 0; i < RX_RING_SIZE; i++) { lp->rx_buf_ptr_cpu[i] = - (char *)(dev->mem_start + BUF_OFFSET_CPU + + (char *)(dev->mem_start + 2 * BUF_OFFSET_CPU + 2 * i * RX_BUFF_SIZE); lp->rx_buf_ptr_lnc[i] = - (char *)(BUF_OFFSET_LNC + i * RX_BUFF_SIZE); + (BUF_OFFSET_LNC + i * RX_BUFF_SIZE); } for (i = 0; i < TX_RING_SIZE; i++) { lp->tx_buf_ptr_cpu[i] = - (char *)(dev->mem_start + BUF_OFFSET_CPU + + (char *)(dev->mem_start + 2 * BUF_OFFSET_CPU + 2 * RX_RING_SIZE * RX_BUFF_SIZE + 2 * i * TX_BUFF_SIZE); lp->tx_buf_ptr_lnc[i] = - (char *)(BUF_OFFSET_LNC + - RX_RING_SIZE * RX_BUFF_SIZE + - i * TX_BUFF_SIZE); + (BUF_OFFSET_LNC + + RX_RING_SIZE * RX_BUFF_SIZE + + i * TX_BUFF_SIZE); } break; @@ -1279,10 +1293,8 @@ static int __init dec_lance_probe(void) /* Then handle onboard devices. */ if (dec_interrupt[DEC_IRQ_LANCE] >= 0) { if (dec_interrupt[DEC_IRQ_LANCE_MERR] >= 0) { -#ifdef CONFIG_TC if (dec_lance_init(ASIC_LANCE, -1) >= 0) count++; -#endif } else if (!TURBOCHANNEL) { if (dec_lance_init(PMAX_LANCE, -1) >= 0) count++; diff --git a/drivers/net/e2100.c b/drivers/net/e2100.c index d39e8480ca5..c62d9c6363c 100644 --- a/drivers/net/e2100.c +++ b/drivers/net/e2100.c @@ -463,7 +463,7 @@ static void cleanup_card(struct net_device *dev) release_region(dev->base_addr, E21_IO_EXTENT); } -void +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c index a4eb0dc99ec..b4463094c93 100644 --- a/drivers/net/eepro.c +++ b/drivers/net/eepro.c @@ -1827,7 +1827,7 @@ int __init init_module(void) return n_eepro ? 0 : -ENODEV; } -void +void __exit cleanup_module(void) { int i; diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index e14be020e56..4a50fcb5ad6 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -1719,7 +1719,7 @@ int __init init_module(void) return -ENXIO; } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/es3210.c b/drivers/net/es3210.c index fd7b32a24ea..2d2ea94a00b 100644 --- a/drivers/net/es3210.c +++ b/drivers/net/es3210.c @@ -455,7 +455,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c index b7b8bc2a630..93283e386f3 100644 --- a/drivers/net/eth16i.c +++ b/drivers/net/eth16i.c @@ -1475,7 +1475,7 @@ int __init init_module(void) return -ENXIO; } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/hp-plus.c b/drivers/net/hp-plus.c index 6abcfd2a4b2..99a36cc3f8d 100644 --- a/drivers/net/hp-plus.c +++ b/drivers/net/hp-plus.c @@ -482,7 +482,7 @@ static void cleanup_card(struct net_device *dev) release_region(dev->base_addr - NIC_OFFSET, HP_IO_EXTENT); } -void +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/hp.c b/drivers/net/hp.c index 29470970aa2..635b13c2e2a 100644 --- a/drivers/net/hp.c +++ b/drivers/net/hp.c @@ -444,7 +444,7 @@ static void cleanup_card(struct net_device *dev) release_region(dev->base_addr - NIC_OFFSET, HP_IO_EXTENT); } -void +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c index f9a1c88a428..9137e239fac 100644 --- a/drivers/net/irda/pxaficp_ir.c +++ b/drivers/net/irda/pxaficp_ir.c @@ -704,9 +704,9 @@ static int pxa_irda_stop(struct net_device *dev) return 0; } -static int pxa_irda_suspend(struct device *_dev, pm_message_t state) +static int pxa_irda_suspend(struct platform_device *_dev, pm_message_t state) { - struct net_device *dev = dev_get_drvdata(_dev); + struct net_device *dev = platform_get_drvdata(_dev); struct pxa_irda *si; if (dev && netif_running(dev)) { @@ -718,9 +718,9 @@ static int pxa_irda_suspend(struct device *_dev, pm_message_t state) return 0; } -static int pxa_irda_resume(struct device *_dev) +static int pxa_irda_resume(struct platform_device *_dev) { - struct net_device *dev = dev_get_drvdata(_dev); + struct net_device *dev = platform_get_drvdata(_dev); struct pxa_irda *si; if (dev && netif_running(dev)) { @@ -746,9 +746,8 @@ static int pxa_irda_init_iobuf(iobuff_t *io, int size) return io->head ? 0 : -ENOMEM; } -static int pxa_irda_probe(struct device *_dev) +static int pxa_irda_probe(struct platform_device *pdev) { - struct platform_device *pdev = to_platform_device(_dev); struct net_device *dev; struct pxa_irda *si; unsigned int baudrate_mask; @@ -822,9 +821,9 @@ err_mem_1: return err; } -static int pxa_irda_remove(struct device *_dev) +static int pxa_irda_remove(struct platform_device *_dev) { - struct net_device *dev = dev_get_drvdata(_dev); + struct net_device *dev = platform_get_drvdata(_dev); if (dev) { struct pxa_irda *si = netdev_priv(dev); @@ -840,9 +839,10 @@ static int pxa_irda_remove(struct device *_dev) return 0; } -static struct device_driver pxa_ir_driver = { - .name = "pxa2xx-ir", - .bus = &platform_bus_type, +static struct platform_driver pxa_ir_driver = { + .driver = { + .name = "pxa2xx-ir", + }, .probe = pxa_irda_probe, .remove = pxa_irda_remove, .suspend = pxa_irda_suspend, @@ -851,12 +851,12 @@ static struct device_driver pxa_ir_driver = { static int __init pxa_irda_init(void) { - return driver_register(&pxa_ir_driver); + return platform_driver_register(&pxa_ir_driver); } static void __exit pxa_irda_exit(void) { - driver_unregister(&pxa_ir_driver); + platform_driver_unregister(&pxa_ir_driver); } module_init(pxa_irda_init); diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c index 3b4c4787593..c14a74634fd 100644 --- a/drivers/net/irda/stir4200.c +++ b/drivers/net/irda/stir4200.c @@ -50,6 +50,7 @@ #include <linux/usb.h> #include <linux/crc32.h> #include <linux/kthread.h> +#include <linux/freezer.h> #include <net/irda/irda.h> #include <net/irda/irlap.h> #include <net/irda/irda_device.h> diff --git a/drivers/net/lance.c b/drivers/net/lance.c index 4256c13c73c..a3843320dbe 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -368,7 +368,7 @@ static void cleanup_card(struct net_device *dev) kfree(lp); } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c index f4d815bca64..ea392f2a5aa 100644 --- a/drivers/net/lasi_82596.c +++ b/drivers/net/lasi_82596.c @@ -119,14 +119,14 @@ #define DEB(x,y) if (i596_debug & (x)) { y; } -#define CHECK_WBACK(addr,len) \ - do { dma_cache_sync((void *)addr, len, DMA_TO_DEVICE); } while (0) +#define CHECK_WBACK(priv, addr,len) \ + do { dma_cache_sync((priv)->dev, (void *)addr, len, DMA_TO_DEVICE); } while (0) -#define CHECK_INV(addr,len) \ - do { dma_cache_sync((void *)addr, len, DMA_FROM_DEVICE); } while(0) +#define CHECK_INV(priv, addr,len) \ + do { dma_cache_sync((priv)->dev, (void *)addr, len, DMA_FROM_DEVICE); } while(0) -#define CHECK_WBACK_INV(addr,len) \ - do { dma_cache_sync((void *)addr, len, DMA_BIDIRECTIONAL); } while (0) +#define CHECK_WBACK_INV(priv, addr,len) \ + do { dma_cache_sync((priv)->dev, (void *)addr, len, DMA_BIDIRECTIONAL); } while (0) #define PA_I82596_RESET 0 /* Offsets relative to LASI-LAN-Addr.*/ @@ -449,10 +449,10 @@ static inline void MPU_PORT(struct net_device *dev, int c, dma_addr_t x) static inline int wait_istat(struct net_device *dev, struct i596_private *lp, int delcnt, char *str) { - CHECK_INV(&(lp->iscp), sizeof(struct i596_iscp)); + CHECK_INV(lp, &(lp->iscp), sizeof(struct i596_iscp)); while (--delcnt && lp->iscp.stat) { udelay(10); - CHECK_INV(&(lp->iscp), sizeof(struct i596_iscp)); + CHECK_INV(lp, &(lp->iscp), sizeof(struct i596_iscp)); } if (!delcnt) { printk("%s: %s, iscp.stat %04x, didn't clear\n", @@ -466,10 +466,10 @@ static inline int wait_istat(struct net_device *dev, struct i596_private *lp, in static inline int wait_cmd(struct net_device *dev, struct i596_private *lp, int delcnt, char *str) { - CHECK_INV(&(lp->scb), sizeof(struct i596_scb)); + CHECK_INV(lp, &(lp->scb), sizeof(struct i596_scb)); while (--delcnt && lp->scb.command) { udelay(10); - CHECK_INV(&(lp->scb), sizeof(struct i596_scb)); + CHECK_INV(lp, &(lp->scb), sizeof(struct i596_scb)); } if (!delcnt) { printk("%s: %s, status %4.4x, cmd %4.4x.\n", @@ -522,7 +522,7 @@ static void i596_display_data(struct net_device *dev) rbd, rbd->count, rbd->b_next, rbd->b_data, rbd->size); rbd = rbd->v_next; } while (rbd != lp->rbd_head); - CHECK_INV(lp, sizeof(struct i596_private)); + CHECK_INV(lp, lp, sizeof(struct i596_private)); } @@ -592,7 +592,7 @@ static inline void init_rx_bufs(struct net_device *dev) rfd->b_next = WSWAPrfd(virt_to_dma(lp,lp->rfds)); rfd->cmd = CMD_EOL|CMD_FLEX; - CHECK_WBACK_INV(lp, sizeof(struct i596_private)); + CHECK_WBACK_INV(lp, lp, sizeof(struct i596_private)); } static inline void remove_rx_bufs(struct net_device *dev) @@ -629,7 +629,7 @@ static void rebuild_rx_bufs(struct net_device *dev) lp->rbd_head = lp->rbds; lp->rfds[0].rbd = WSWAPrbd(virt_to_dma(lp,lp->rbds)); - CHECK_WBACK_INV(lp, sizeof(struct i596_private)); + CHECK_WBACK_INV(lp, lp, sizeof(struct i596_private)); } @@ -663,8 +663,8 @@ static int init_i596_mem(struct net_device *dev) DEB(DEB_INIT, printk("%s: starting i82596.\n", dev->name)); - CHECK_WBACK(&(lp->scp), sizeof(struct i596_scp)); - CHECK_WBACK(&(lp->iscp), sizeof(struct i596_iscp)); + CHECK_WBACK(lp, &(lp->scp), sizeof(struct i596_scp)); + CHECK_WBACK(lp, &(lp->iscp), sizeof(struct i596_iscp)); MPU_PORT(dev, PORT_ALTSCP, virt_to_dma(lp,&lp->scp)); @@ -678,25 +678,25 @@ static int init_i596_mem(struct net_device *dev) rebuild_rx_bufs(dev); lp->scb.command = 0; - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); enable_irq(dev->irq); /* enable IRQs from LAN */ DEB(DEB_INIT, printk("%s: queuing CmdConfigure\n", dev->name)); memcpy(lp->cf_cmd.i596_config, init_setup, 14); lp->cf_cmd.cmd.command = CmdConfigure; - CHECK_WBACK(&(lp->cf_cmd), sizeof(struct cf_cmd)); + CHECK_WBACK(lp, &(lp->cf_cmd), sizeof(struct cf_cmd)); i596_add_cmd(dev, &lp->cf_cmd.cmd); DEB(DEB_INIT, printk("%s: queuing CmdSASetup\n", dev->name)); memcpy(lp->sa_cmd.eth_addr, dev->dev_addr, 6); lp->sa_cmd.cmd.command = CmdSASetup; - CHECK_WBACK(&(lp->sa_cmd), sizeof(struct sa_cmd)); + CHECK_WBACK(lp, &(lp->sa_cmd), sizeof(struct sa_cmd)); i596_add_cmd(dev, &lp->sa_cmd.cmd); DEB(DEB_INIT, printk("%s: queuing CmdTDR\n", dev->name)); lp->tdr_cmd.cmd.command = CmdTDR; - CHECK_WBACK(&(lp->tdr_cmd), sizeof(struct tdr_cmd)); + CHECK_WBACK(lp, &(lp->tdr_cmd), sizeof(struct tdr_cmd)); i596_add_cmd(dev, &lp->tdr_cmd.cmd); spin_lock_irqsave (&lp->lock, flags); @@ -708,7 +708,7 @@ static int init_i596_mem(struct net_device *dev) DEB(DEB_INIT, printk("%s: Issuing RX_START\n", dev->name)); lp->scb.command = RX_START; lp->scb.rfd = WSWAPrfd(virt_to_dma(lp,lp->rfds)); - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); CA(dev); @@ -740,13 +740,13 @@ static inline int i596_rx(struct net_device *dev) rfd = lp->rfd_head; /* Ref next frame to check */ - CHECK_INV(rfd, sizeof(struct i596_rfd)); + CHECK_INV(lp, rfd, sizeof(struct i596_rfd)); while ((rfd->stat) & STAT_C) { /* Loop while complete frames */ if (rfd->rbd == I596_NULL) rbd = NULL; else if (rfd->rbd == lp->rbd_head->b_addr) { rbd = lp->rbd_head; - CHECK_INV(rbd, sizeof(struct i596_rbd)); + CHECK_INV(lp, rbd, sizeof(struct i596_rbd)); } else { printk("%s: rbd chain broken!\n", dev->name); @@ -790,7 +790,7 @@ static inline int i596_rx(struct net_device *dev) dma_addr = dma_map_single(lp->dev, newskb->data, PKT_BUF_SZ, DMA_FROM_DEVICE); rbd->v_data = newskb->data; rbd->b_data = WSWAPchar(dma_addr); - CHECK_WBACK_INV(rbd, sizeof(struct i596_rbd)); + CHECK_WBACK_INV(lp, rbd, sizeof(struct i596_rbd)); } else skb = dev_alloc_skb(pkt_len + 2); @@ -842,7 +842,7 @@ memory_squeeze: if (rbd != NULL && (rbd->count & 0x4000)) { rbd->count = 0; lp->rbd_head = rbd->v_next; - CHECK_WBACK_INV(rbd, sizeof(struct i596_rbd)); + CHECK_WBACK_INV(lp, rbd, sizeof(struct i596_rbd)); } /* Tidy the frame descriptor, marking it as end of list */ @@ -860,10 +860,10 @@ memory_squeeze: lp->scb.rfd = rfd->b_next; lp->rfd_head = rfd->v_next; - CHECK_WBACK_INV(rfd->v_prev, sizeof(struct i596_rfd)); - CHECK_WBACK_INV(rfd, sizeof(struct i596_rfd)); + CHECK_WBACK_INV(lp, rfd->v_prev, sizeof(struct i596_rfd)); + CHECK_WBACK_INV(lp, rfd, sizeof(struct i596_rfd)); rfd = lp->rfd_head; - CHECK_INV(rfd, sizeof(struct i596_rfd)); + CHECK_INV(lp, rfd, sizeof(struct i596_rfd)); } DEB(DEB_RXFRAME, printk("frames %d\n", frames)); @@ -902,12 +902,12 @@ static inline void i596_cleanup_cmd(struct net_device *dev, struct i596_private ptr->v_next = NULL; ptr->b_next = I596_NULL; } - CHECK_WBACK_INV(ptr, sizeof(struct i596_cmd)); + CHECK_WBACK_INV(lp, ptr, sizeof(struct i596_cmd)); } wait_cmd(dev, lp, 100, "i596_cleanup_cmd timed out"); lp->scb.cmd = I596_NULL; - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); } @@ -925,7 +925,7 @@ static inline void i596_reset(struct net_device *dev, struct i596_private *lp) /* FIXME: this command might cause an lpmc */ lp->scb.command = CUC_ABORT | RX_ABORT; - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); CA(dev); /* wait for shutdown */ @@ -951,20 +951,20 @@ static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd) cmd->command |= (CMD_EOL | CMD_INTR); cmd->v_next = NULL; cmd->b_next = I596_NULL; - CHECK_WBACK(cmd, sizeof(struct i596_cmd)); + CHECK_WBACK(lp, cmd, sizeof(struct i596_cmd)); spin_lock_irqsave (&lp->lock, flags); if (lp->cmd_head != NULL) { lp->cmd_tail->v_next = cmd; lp->cmd_tail->b_next = WSWAPcmd(virt_to_dma(lp,&cmd->status)); - CHECK_WBACK(lp->cmd_tail, sizeof(struct i596_cmd)); + CHECK_WBACK(lp, lp->cmd_tail, sizeof(struct i596_cmd)); } else { lp->cmd_head = cmd; wait_cmd(dev, lp, 100, "i596_add_cmd timed out"); lp->scb.cmd = WSWAPcmd(virt_to_dma(lp,&cmd->status)); lp->scb.command = CUC_START; - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); CA(dev); } lp->cmd_tail = cmd; @@ -998,12 +998,12 @@ static int i596_test(struct net_device *dev) data = virt_to_dma(lp,tint); tint[1] = -1; - CHECK_WBACK(tint,PAGE_SIZE); + CHECK_WBACK(lp, tint, PAGE_SIZE); MPU_PORT(dev, 1, data); for(data = 1000000; data; data--) { - CHECK_INV(tint,PAGE_SIZE); + CHECK_INV(lp, tint, PAGE_SIZE); if(tint[1] != -1) break; @@ -1061,7 +1061,7 @@ static void i596_tx_timeout (struct net_device *dev) /* Issue a channel attention signal */ DEB(DEB_ERRORS, printk("Kicking board.\n")); lp->scb.command = CUC_START | RX_START; - CHECK_WBACK_INV(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK_INV(lp, &(lp->scb), sizeof(struct i596_scb)); CA (dev); lp->last_restart = lp->stats.tx_packets; } @@ -1118,8 +1118,8 @@ static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev) tbd->data = WSWAPchar(tx_cmd->dma_addr); DEB(DEB_TXADDR,print_eth(skb->data, "tx-queued")); - CHECK_WBACK_INV(tx_cmd, sizeof(struct tx_cmd)); - CHECK_WBACK_INV(tbd, sizeof(struct i596_tbd)); + CHECK_WBACK_INV(lp, tx_cmd, sizeof(struct tx_cmd)); + CHECK_WBACK_INV(lp, tbd, sizeof(struct i596_tbd)); i596_add_cmd(dev, &tx_cmd->cmd); lp->stats.tx_packets++; @@ -1228,7 +1228,7 @@ static int __devinit i82596_probe(struct net_device *dev, lp->dma_addr = dma_addr; lp->dev = gen_dev; - CHECK_WBACK_INV(dev->mem_start, sizeof(struct i596_private)); + CHECK_WBACK_INV(lp, dev->mem_start, sizeof(struct i596_private)); i = register_netdev(dev); if (i) { @@ -1295,7 +1295,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) DEB(DEB_INTS, printk("%s: i596 interrupt command unit inactive %x.\n", dev->name, status & 0x0700)); while (lp->cmd_head != NULL) { - CHECK_INV(lp->cmd_head, sizeof(struct i596_cmd)); + CHECK_INV(lp, lp->cmd_head, sizeof(struct i596_cmd)); if (!(lp->cmd_head->status & STAT_C)) break; @@ -1358,7 +1358,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) } ptr->v_next = NULL; ptr->b_next = I596_NULL; - CHECK_WBACK(ptr, sizeof(struct i596_cmd)); + CHECK_WBACK(lp, ptr, sizeof(struct i596_cmd)); lp->last_cmd = jiffies; } @@ -1372,13 +1372,13 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) ptr->command &= 0x1fff; ptr = ptr->v_next; - CHECK_WBACK_INV(prev, sizeof(struct i596_cmd)); + CHECK_WBACK_INV(lp, prev, sizeof(struct i596_cmd)); } if ((lp->cmd_head != NULL)) ack_cmd |= CUC_START; lp->scb.cmd = WSWAPcmd(virt_to_dma(lp,&lp->cmd_head->status)); - CHECK_WBACK_INV(&lp->scb, sizeof(struct i596_scb)); + CHECK_WBACK_INV(lp, &lp->scb, sizeof(struct i596_scb)); } if ((status & 0x1000) || (status & 0x4000)) { if ((status & 0x4000)) @@ -1397,7 +1397,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) } wait_cmd(dev, lp, 100, "i596 interrupt, timeout"); lp->scb.command = ack_cmd; - CHECK_WBACK(&lp->scb, sizeof(struct i596_scb)); + CHECK_WBACK(lp, &lp->scb, sizeof(struct i596_scb)); /* DANGER: I suspect that some kind of interrupt acknowledgement aside from acking the 82596 might be needed @@ -1426,7 +1426,7 @@ static int i596_close(struct net_device *dev) wait_cmd(dev, lp, 100, "close1 timed out"); lp->scb.command = CUC_ABORT | RX_ABORT; - CHECK_WBACK(&lp->scb, sizeof(struct i596_scb)); + CHECK_WBACK(lp, &lp->scb, sizeof(struct i596_scb)); CA(dev); @@ -1486,7 +1486,7 @@ static void set_multicast_list(struct net_device *dev) dev->name); else { lp->cf_cmd.cmd.command = CmdConfigure; - CHECK_WBACK_INV(&lp->cf_cmd, sizeof(struct cf_cmd)); + CHECK_WBACK_INV(lp, &lp->cf_cmd, sizeof(struct cf_cmd)); i596_add_cmd(dev, &lp->cf_cmd.cmd); } } @@ -1514,7 +1514,7 @@ static void set_multicast_list(struct net_device *dev) DEB(DEB_MULTI, printk("%s: Adding address %02x:%02x:%02x:%02x:%02x:%02x\n", dev->name, cp[0],cp[1],cp[2],cp[3],cp[4],cp[5])); } - CHECK_WBACK_INV(&lp->mc_cmd, sizeof(struct mc_cmd)); + CHECK_WBACK_INV(lp, &lp->mc_cmd, sizeof(struct mc_cmd)); i596_add_cmd(dev, &cmd->cmd); } } diff --git a/drivers/net/lne390.c b/drivers/net/lne390.c index 5795ee11620..0a08d0c4e7b 100644 --- a/drivers/net/lne390.c +++ b/drivers/net/lne390.c @@ -440,7 +440,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index d9f48bb04b0..c41ae4286ee 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1100,7 +1100,7 @@ static void eth_tx_fill_frag_descs(struct mv643xx_private *mp, ETH_TX_ENABLE_INTERRUPT; mp->tx_skb[tx_index] = skb; } else - mp->tx_skb[tx_index] = 0; + mp->tx_skb[tx_index] = NULL; desc = &mp->p_tx_desc_area[tx_index]; desc->l4i_chk = 0; @@ -1136,7 +1136,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp, eth_tx_fill_frag_descs(mp, skb); length = skb_headlen(skb); - mp->tx_skb[tx_index] = 0; + mp->tx_skb[tx_index] = NULL; } else { cmd_sts |= ETH_ZERO_PADDING | ETH_TX_LAST_DESC | diff --git a/drivers/net/mvme147.c b/drivers/net/mvme147.c index 56a82d8ee8f..e246d00bba6 100644 --- a/drivers/net/mvme147.c +++ b/drivers/net/mvme147.c @@ -184,7 +184,7 @@ static int m147lance_close(struct net_device *dev) MODULE_LICENSE("GPL"); static struct net_device *dev_mvme147_lance; -int init_module(void) +int __init init_module(void) { dev_mvme147_lance = mvme147lance_probe(-1); if (IS_ERR(dev_mvme147_lance)) @@ -192,7 +192,7 @@ int init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { struct m147lance_private *lp = dev_mvme147_lance->priv; unregister_netdev(dev_mvme147_lance); diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 38df4280238..81f127a78af 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -89,7 +89,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define MYRI10GE_EEPROM_STRINGS_SIZE 256 #define MYRI10GE_MAX_SEND_DESC_TSO ((65536 / 2048) * 2) -#define MYRI10GE_NO_CONFIRM_DATA 0xffffffff +#define MYRI10GE_NO_CONFIRM_DATA htonl(0xffffffff) #define MYRI10GE_NO_RESPONSE_RESULT 0xffffffff struct myri10ge_rx_buffer_state { @@ -156,8 +156,8 @@ struct myri10ge_priv { int sram_size; unsigned long board_span; unsigned long iomem_base; - u32 __iomem *irq_claim; - u32 __iomem *irq_deassert; + __be32 __iomem *irq_claim; + __be32 __iomem *irq_deassert; char *mac_addr_string; struct mcp_cmd_response *cmd; dma_addr_t cmd_bus; @@ -165,10 +165,10 @@ struct myri10ge_priv { dma_addr_t fw_stats_bus; struct pci_dev *pdev; int msi_enabled; - unsigned int link_state; + __be32 link_state; unsigned int rdma_tags_available; int intr_coal_delay; - u32 __iomem *intr_coal_delay_ptr; + __be32 __iomem *intr_coal_delay_ptr; int mtrr; int wake_queue; int stop_queue; @@ -273,6 +273,11 @@ MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)"); #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8) +static inline void put_be32(__be32 val, __be32 __iomem *p) +{ + __raw_writel((__force __u32)val, (__force void __iomem *)p); +} + static int myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, struct myri10ge_cmd *data, int atomic) @@ -296,7 +301,7 @@ myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, buf->response_addr.low = htonl(dma_low); buf->response_addr.high = htonl(dma_high); - response->result = MYRI10GE_NO_RESPONSE_RESULT; + response->result = htonl(MYRI10GE_NO_RESPONSE_RESULT); mb(); myri10ge_pio_copy(cmd_addr, buf, sizeof(*buf)); @@ -311,14 +316,14 @@ myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, * (1ms will be enough for those commands) */ for (sleep_total = 0; sleep_total < 1000 - && response->result == MYRI10GE_NO_RESPONSE_RESULT; + && response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); sleep_total += 10) udelay(10); } else { /* use msleep for most command */ for (sleep_total = 0; sleep_total < 15 - && response->result == MYRI10GE_NO_RESPONSE_RESULT; + && response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); sleep_total++) msleep(1); } @@ -393,7 +398,7 @@ abort: static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) { char __iomem *submit; - u32 buf[16]; + __be32 buf[16]; u32 dma_low, dma_high; int i; @@ -410,7 +415,7 @@ static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) buf[0] = htonl(dma_high); /* confirm addr MSW */ buf[1] = htonl(dma_low); /* confirm addr LSW */ - buf[2] = htonl(MYRI10GE_NO_CONFIRM_DATA); /* confirm data */ + buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ buf[3] = htonl(dma_high); /* dummy addr MSW */ buf[4] = htonl(dma_low); /* dummy addr LSW */ buf[5] = htonl(enable); /* enable? */ @@ -479,7 +484,7 @@ static int myri10ge_load_hotplug_firmware(struct myri10ge_priv *mgp, u32 * size) } /* check id */ - hdr_offset = ntohl(*(u32 *) (fw->data + MCP_HEADER_PTR_OFFSET)); + hdr_offset = ntohl(*(__be32 *) (fw->data + MCP_HEADER_PTR_OFFSET)); if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->size) { dev_err(dev, "Bad firmware file\n"); status = -EINVAL; @@ -550,7 +555,7 @@ static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp) static int myri10ge_load_firmware(struct myri10ge_priv *mgp) { char __iomem *submit; - u32 buf[16]; + __be32 buf[16]; u32 dma_low, dma_high, size; int status, i; @@ -600,7 +605,7 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp) buf[0] = htonl(dma_high); /* confirm addr MSW */ buf[1] = htonl(dma_low); /* confirm addr LSW */ - buf[2] = htonl(MYRI10GE_NO_CONFIRM_DATA); /* confirm data */ + buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ /* FIX: All newest firmware should un-protect the bottom of * the sram before handoff. However, the very first interfaces @@ -705,21 +710,21 @@ static int myri10ge_reset(struct myri10ge_priv *mgp) status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0); - mgp->irq_claim = (__iomem u32 *) (mgp->sram + cmd.data0); + mgp->irq_claim = (__iomem __be32 *) (mgp->sram + cmd.data0); if (!mgp->msi_enabled) { status |= myri10ge_send_cmd (mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd, 0); - mgp->irq_deassert = (__iomem u32 *) (mgp->sram + cmd.data0); + mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0); } status |= myri10ge_send_cmd (mgp, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd, 0); - mgp->intr_coal_delay_ptr = (__iomem u32 *) (mgp->sram + cmd.data0); + mgp->intr_coal_delay_ptr = (__iomem __be32 *) (mgp->sram + cmd.data0); if (status != 0) { dev_err(&mgp->pdev->dev, "failed set interrupt parameters\n"); return status; } - __raw_writel(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); + put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); /* Run a small DMA test. * The magic multipliers to the length tell the firmware @@ -786,14 +791,16 @@ static inline void myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst, struct mcp_kreq_ether_recv *src) { - u32 low; + __be32 low; low = src->addr_low; - src->addr_low = DMA_32BIT_MASK; - myri10ge_pio_copy(dst, src, 8 * sizeof(*src)); + src->addr_low = htonl(DMA_32BIT_MASK); + myri10ge_pio_copy(dst, src, 4 * sizeof(*src)); + mb(); + myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof(*src)); mb(); src->addr_low = low; - __raw_writel(low, &dst->addr_low); + put_be32(low, &dst->addr_low); mb(); } @@ -939,11 +946,11 @@ done: return retval; } -static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, u16 hw_csum) +static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, __wsum hw_csum) { struct vlan_hdr *vh = (struct vlan_hdr *)(skb->data); - if ((skb->protocol == ntohs(ETH_P_8021Q)) && + if ((skb->protocol == htons(ETH_P_8021Q)) && (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) || vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) { skb->csum = hw_csum; @@ -953,7 +960,7 @@ static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, u16 hw_csum) static inline unsigned long myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, - int bytes, int len, int csum) + int bytes, int len, __wsum csum) { dma_addr_t bus; struct sk_buff *skb; @@ -986,12 +993,12 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, skb->protocol = eth_type_trans(skb, mgp->dev); if (mgp->csum_flag) { - if ((skb->protocol == ntohs(ETH_P_IP)) || - (skb->protocol == ntohs(ETH_P_IPV6))) { - skb->csum = ntohs((u16) csum); + if ((skb->protocol == htons(ETH_P_IP)) || + (skb->protocol == htons(ETH_P_IPV6))) { + skb->csum = csum; skb->ip_summed = CHECKSUM_COMPLETE; } else - myri10ge_vlan_ip_csum(skb, ntohs((u16) csum)); + myri10ge_vlan_ip_csum(skb, csum); } netif_receive_skb(skb); @@ -1060,12 +1067,12 @@ static inline void myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int *limit) int idx = rx_done->idx; int cnt = rx_done->cnt; u16 length; - u16 checksum; + __wsum checksum; while (rx_done->entry[idx].length != 0 && *limit != 0) { length = ntohs(rx_done->entry[idx].length); rx_done->entry[idx].length = 0; - checksum = ntohs(rx_done->entry[idx].checksum); + checksum = csum_unfold(rx_done->entry[idx].checksum); if (length <= mgp->small_bytes) rx_ok = myri10ge_rx_done(mgp, &mgp->rx_small, mgp->small_bytes, @@ -1142,7 +1149,7 @@ static int myri10ge_poll(struct net_device *netdev, int *budget) if (rx_done->entry[rx_done->idx].length == 0 || !netif_running(netdev)) { netif_rx_complete(netdev); - __raw_writel(htonl(3), mgp->irq_claim); + put_be32(htonl(3), mgp->irq_claim); return 0; } return 1; @@ -1166,7 +1173,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg) netif_rx_schedule(mgp->dev); if (!mgp->msi_enabled) { - __raw_writel(0, mgp->irq_deassert); + put_be32(0, mgp->irq_deassert); if (!myri10ge_deassert_wait) stats->valid = 0; mb(); @@ -1195,7 +1202,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg) myri10ge_check_statblock(mgp); - __raw_writel(htonl(3), mgp->irq_claim + 1); + put_be32(htonl(3), mgp->irq_claim + 1); return (IRQ_HANDLED); } @@ -1233,7 +1240,7 @@ myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) struct myri10ge_priv *mgp = netdev_priv(netdev); mgp->intr_coal_delay = coal->rx_coalesce_usecs; - __raw_writel(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); + put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); return 0; } @@ -1748,7 +1755,7 @@ static int myri10ge_open(struct net_device *dev) goto abort_with_rings; } - mgp->link_state = -1; + mgp->link_state = htonl(~0U); mgp->rdma_tags_available = 15; netif_poll_enable(mgp->dev); /* must happen prior to any irq */ @@ -1876,7 +1883,7 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, /* re-write the last 32-bits with the valid flags */ src->flags = last_flags; - __raw_writel(*((u32 *) src + 3), (u32 __iomem *) dst + 3); + put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3); tx->req += cnt; mb(); } @@ -1919,7 +1926,8 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev) struct myri10ge_tx_buf *tx = &mgp->tx; struct skb_frag_struct *frag; dma_addr_t bus; - u32 low, high_swapped; + u32 low; + __be32 high_swapped; unsigned int len; int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; u16 pseudo_hdr_offset, cksum_offset; @@ -1964,7 +1972,6 @@ again: cksum_offset = 0; pseudo_hdr_offset = 0; } else { - pseudo_hdr_offset = htons(pseudo_hdr_offset); odd_flag = MXGEFW_FLAGS_ALIGN_ODD; flags |= MXGEFW_FLAGS_CKSUM; } @@ -1986,7 +1993,7 @@ again: /* for TSO, pseudo_hdr_offset holds mss. * The firmware figures out where to put * the checksum by parsing the header. */ - pseudo_hdr_offset = htons(mss); + pseudo_hdr_offset = mss; } else #endif /*NETIF_F_TSO */ /* Mark small packets, and pad out tiny packets */ @@ -2086,7 +2093,7 @@ again: #endif /* NETIF_F_TSO */ req->addr_high = high_swapped; req->addr_low = htonl(low); - req->pseudo_hdr_offset = pseudo_hdr_offset; + req->pseudo_hdr_offset = htons(pseudo_hdr_offset); req->pad = 0; /* complete solid 16-byte block; does this matter? */ req->rdma_count = 1; req->length = htons(seglen); @@ -2199,6 +2206,7 @@ static void myri10ge_set_multicast_list(struct net_device *dev) struct myri10ge_cmd cmd; struct myri10ge_priv *mgp; struct dev_mc_list *mc_list; + __be32 data[2] = {0, 0}; int err; mgp = netdev_priv(dev); @@ -2237,10 +2245,9 @@ static void myri10ge_set_multicast_list(struct net_device *dev) /* Walk the multicast list, and add each address */ for (mc_list = dev->mc_list; mc_list != NULL; mc_list = mc_list->next) { - memcpy(&cmd.data0, &mc_list->dmi_addr, 4); - memcpy(&cmd.data1, ((char *)&mc_list->dmi_addr) + 4, 2); - cmd.data0 = htonl(cmd.data0); - cmd.data1 = htonl(cmd.data1); + memcpy(data, &mc_list->dmi_addr, 6); + cmd.data0 = ntohl(data[0]); + cmd.data1 = ntohl(data[1]); err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP, &cmd, 1); diff --git a/drivers/net/myri10ge/myri10ge_mcp.h b/drivers/net/myri10ge/myri10ge_mcp.h index 9519ae7cd5e..29463b301a8 100644 --- a/drivers/net/myri10ge/myri10ge_mcp.h +++ b/drivers/net/myri10ge/myri10ge_mcp.h @@ -6,23 +6,23 @@ /* 8 Bytes */ struct mcp_dma_addr { - u32 high; - u32 low; + __be32 high; + __be32 low; }; /* 4 Bytes */ struct mcp_slot { - u16 checksum; - u16 length; + __sum16 checksum; + __be16 length; }; /* 64 Bytes */ struct mcp_cmd { - u32 cmd; - u32 data0; /* will be low portion if data > 32 bits */ + __be32 cmd; + __be32 data0; /* will be low portion if data > 32 bits */ /* 8 */ - u32 data1; /* will be high portion if data > 32 bits */ - u32 data2; /* currently unused.. */ + __be32 data1; /* will be high portion if data > 32 bits */ + __be32 data2; /* currently unused.. */ /* 16 */ struct mcp_dma_addr response_addr; /* 24 */ @@ -31,8 +31,8 @@ struct mcp_cmd { /* 8 Bytes */ struct mcp_cmd_response { - u32 data; - u32 result; + __be32 data; + __be32 result; }; /* @@ -73,10 +73,10 @@ union mcp_pso_or_cumlen { /* 16 Bytes */ struct mcp_kreq_ether_send { - u32 addr_high; - u32 addr_low; - u16 pseudo_hdr_offset; - u16 length; + __be32 addr_high; + __be32 addr_low; + __be16 pseudo_hdr_offset; + __be16 length; u8 pad; u8 rdma_count; u8 cksum_offset; /* where to start computing cksum */ @@ -85,8 +85,8 @@ struct mcp_kreq_ether_send { /* 8 Bytes */ struct mcp_kreq_ether_recv { - u32 addr_high; - u32 addr_low; + __be32 addr_high; + __be32 addr_low; }; /* Commands */ @@ -219,19 +219,19 @@ enum myri10ge_mcp_cmd_status { struct mcp_irq_data { /* add new counters at the beginning */ - u32 future_use[5]; - u32 dropped_multicast_filtered; + __be32 future_use[5]; + __be32 dropped_multicast_filtered; /* 40 Bytes */ - u32 send_done_count; - - u32 link_up; - u32 dropped_link_overflow; - u32 dropped_link_error_or_filtered; - u32 dropped_runt; - u32 dropped_overrun; - u32 dropped_no_small_buffer; - u32 dropped_no_big_buffer; - u32 rdma_tags_available; + __be32 send_done_count; + + __be32 link_up; + __be32 dropped_link_overflow; + __be32 dropped_link_error_or_filtered; + __be32 dropped_runt; + __be32 dropped_overrun; + __be32 dropped_no_small_buffer; + __be32 dropped_no_big_buffer; + __be32 rdma_tags_available; u8 tx_stopped; u8 link_down; diff --git a/drivers/net/myri10ge/myri10ge_mcp_gen_header.h b/drivers/net/myri10ge/myri10ge_mcp_gen_header.h index 487f7792fd4..16a810dd6d5 100644 --- a/drivers/net/myri10ge/myri10ge_mcp_gen_header.h +++ b/drivers/net/myri10ge/myri10ge_mcp_gen_header.h @@ -36,7 +36,7 @@ struct mcp_gen_header { /* the first 4 fields are filled at compile time */ unsigned header_length; - unsigned mcp_type; + __be32 mcp_type; char version[128]; unsigned mcp_globals; /* pointer to mcp-type specific structure */ diff --git a/drivers/net/ne.c b/drivers/net/ne.c index 787aa422152..a5c4199e275 100644 --- a/drivers/net/ne.c +++ b/drivers/net/ne.c @@ -867,7 +867,7 @@ static void cleanup_card(struct net_device *dev) release_region(dev->base_addr, NE_IO_EXTENT); } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/ne2.c b/drivers/net/ne2.c index 5fccfea66d8..089b5bb702f 100644 --- a/drivers/net/ne2.c +++ b/drivers/net/ne2.c @@ -813,7 +813,7 @@ static void cleanup_card(struct net_device *dev) release_region(dev->base_addr, NE_IO_EXTENT); } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index 9c588af8ab7..b5410bee5f2 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -1,25 +1,25 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -63,40 +63,68 @@ #include "netxen_nic_hw.h" -#define NETXEN_NIC_BUILD_NO "5" -#define _NETXEN_NIC_LINUX_MAJOR 2 +#define NETXEN_NIC_BUILD_NO "1" +#define _NETXEN_NIC_LINUX_MAJOR 3 #define _NETXEN_NIC_LINUX_MINOR 3 -#define _NETXEN_NIC_LINUX_SUBVERSION 59 -#define NETXEN_NIC_LINUX_VERSIONID "2.3.59" "-" NETXEN_NIC_BUILD_NO -#define NETXEN_NIC_FW_VERSIONID "2.3.59" +#define _NETXEN_NIC_LINUX_SUBVERSION 2 +#define NETXEN_NIC_LINUX_VERSIONID "3.3.2" "-" NETXEN_NIC_BUILD_NO +#define NETXEN_NIC_FW_VERSIONID "3.3.2" #define RCV_DESC_RINGSIZE \ (sizeof(struct rcv_desc) * adapter->max_rx_desc_count) #define STATUS_DESC_RINGSIZE \ (sizeof(struct status_desc)* adapter->max_rx_desc_count) +#define LRO_DESC_RINGSIZE \ + (sizeof(rcvDesc_t) * adapter->max_lro_rx_desc_count) #define TX_RINGSIZE \ (sizeof(struct netxen_cmd_buffer) * adapter->max_tx_desc_count) #define RCV_BUFFSIZE \ (sizeof(struct netxen_rx_buffer) * rcv_desc->max_rx_desc_count) #define find_diff_among(a,b,range) ((a)<(b)?((b)-(a)):((b)+(range)-(a))) -#define NETXEN_NETDEV_STATUS 0x1 +#define NETXEN_NETDEV_STATUS 0x1 +#define NETXEN_RCV_PRODUCER_OFFSET 0 +#define NETXEN_RCV_PEG_DB_ID 2 +#define NETXEN_HOST_DUMMY_DMA_SIZE 1024 #define ADDR_IN_WINDOW1(off) \ ((off > NETXEN_CRB_PCIX_HOST2) && (off < NETXEN_CRB_MAX)) ? 1 : 0 +/* + * In netxen_nic_down(), we must wait for any pending callback requests into + * netxen_watchdog_task() to complete; eg otherwise the watchdog_timer could be + * reenabled right after it is deleted in netxen_nic_down(). FLUSH_SCHEDULED_WORK() + * does this synchronization. + * + * Normally, schedule_work()/flush_scheduled_work() could have worked, but + * netxen_nic_close() is invoked with kernel rtnl lock held. netif_carrier_off() + * call in netxen_nic_close() triggers a schedule_work(&linkwatch_work), and a + * subsequent call to flush_scheduled_work() in netxen_nic_down() would cause + * linkwatch_event() to be executed which also attempts to acquire the rtnl + * lock thus causing a deadlock. + */ + +#define SCHEDULE_WORK(tp) queue_work(netxen_workq, tp) +#define FLUSH_SCHEDULED_WORK() flush_workqueue(netxen_workq) +extern struct workqueue_struct *netxen_workq; /* * normalize a 64MB crb address to 32MB PCI window * To use NETXEN_CRB_NORMALIZE, window _must_ be set to 1 */ -#define NETXEN_CRB_NORMAL(reg) \ - (reg) - NETXEN_CRB_PCIX_HOST2 + NETXEN_CRB_PCIX_HOST +#define NETXEN_CRB_NORMAL(reg) \ + ((reg) - NETXEN_CRB_PCIX_HOST2 + NETXEN_CRB_PCIX_HOST) #define NETXEN_CRB_NORMALIZE(adapter, reg) \ pci_base_offset(adapter, NETXEN_CRB_NORMAL(reg)) +#define DB_NORMALIZE(adapter, off) \ + (adapter->ahw.db_base + (off)) + +#define NX_P2_C0 0x24 +#define NX_P2_C1 0x25 + #define FIRST_PAGE_GROUP_START 0 -#define FIRST_PAGE_GROUP_END 0x400000 +#define FIRST_PAGE_GROUP_END 0x100000 #define SECOND_PAGE_GROUP_START 0x4000000 #define SECOND_PAGE_GROUP_END 0x66BC000 @@ -108,11 +136,13 @@ #define SECOND_PAGE_GROUP_SIZE SECOND_PAGE_GROUP_END - SECOND_PAGE_GROUP_START #define THIRD_PAGE_GROUP_SIZE THIRD_PAGE_GROUP_END - THIRD_PAGE_GROUP_START -#define MAX_RX_BUFFER_LENGTH 2000 +#define MAX_RX_BUFFER_LENGTH 1760 #define MAX_RX_JUMBO_BUFFER_LENGTH 9046 -#define RX_DMA_MAP_LEN (MAX_RX_BUFFER_LENGTH - NET_IP_ALIGN) +#define MAX_RX_LRO_BUFFER_LENGTH ((48*1024)-512) +#define RX_DMA_MAP_LEN (MAX_RX_BUFFER_LENGTH - 2) #define RX_JUMBO_DMA_MAP_LEN \ - (MAX_RX_JUMBO_BUFFER_LENGTH - NET_IP_ALIGN) + (MAX_RX_JUMBO_BUFFER_LENGTH - 2) +#define RX_LRO_DMA_MAP_LEN (MAX_RX_LRO_BUFFER_LENGTH - 2) #define NETXEN_ROM_ROUNDUP 0x80000000ULL /* @@ -151,30 +181,38 @@ enum { /* Host writes the following to notify that it has done the init-handshake */ #define PHAN_INITIALIZE_ACK 0xf00f -#define NUM_RCV_DESC_RINGS 2 /* No of Rcv Descriptor contexts */ +#define NUM_RCV_DESC_RINGS 3 /* No of Rcv Descriptor contexts */ /* descriptor types */ #define RCV_DESC_NORMAL 0x01 #define RCV_DESC_JUMBO 0x02 +#define RCV_DESC_LRO 0x04 #define RCV_DESC_NORMAL_CTXID 0 #define RCV_DESC_JUMBO_CTXID 1 +#define RCV_DESC_LRO_CTXID 2 #define RCV_DESC_TYPE(ID) \ - ((ID == RCV_DESC_JUMBO_CTXID) ? RCV_DESC_JUMBO : RCV_DESC_NORMAL) + ((ID == RCV_DESC_JUMBO_CTXID) \ + ? RCV_DESC_JUMBO \ + : ((ID == RCV_DESC_LRO_CTXID) \ + ? RCV_DESC_LRO : \ + (RCV_DESC_NORMAL))) #define MAX_CMD_DESCRIPTORS 1024 #define MAX_RCV_DESCRIPTORS 32768 -#define MAX_JUMBO_RCV_DESCRIPTORS 1024 +#define MAX_JUMBO_RCV_DESCRIPTORS 4096 +#define MAX_LRO_RCV_DESCRIPTORS 2048 #define MAX_RCVSTATUS_DESCRIPTORS MAX_RCV_DESCRIPTORS #define MAX_JUMBO_RCV_DESC MAX_JUMBO_RCV_DESCRIPTORS #define MAX_RCV_DESC MAX_RCV_DESCRIPTORS #define MAX_RCVSTATUS_DESC MAX_RCV_DESCRIPTORS -#define NUM_RCV_DESC (MAX_RCV_DESC + MAX_JUMBO_RCV_DESCRIPTORS) #define MAX_EPG_DESCRIPTORS (MAX_CMD_DESCRIPTORS * 8) - +#define NUM_RCV_DESC (MAX_RCV_DESC + MAX_JUMBO_RCV_DESCRIPTORS + \ + MAX_LRO_RCV_DESCRIPTORS) #define MIN_TX_COUNT 4096 #define MIN_RX_COUNT 4096 - +#define NETXEN_CTX_SIGNATURE 0xdee0 +#define NETXEN_RCV_PRODUCER(ringid) (ringid) #define MAX_FRAME_SIZE 0x10000 /* 64K MAX size for LSO */ #define PHAN_PEG_RCV_INITIALIZED 0xff01 @@ -186,6 +224,67 @@ enum { #define get_index_range(index,length,count) \ (((index) + (count)) & ((length) - 1)) +#define MPORT_SINGLE_FUNCTION_MODE 0x1111 + +extern unsigned long long netxen_dma_mask; + +/* + * NetXen host-peg signal message structure + * + * Bit 0-1 : peg_id => 0x2 for tx and 01 for rx + * Bit 2 : priv_id => must be 1 + * Bit 3-17 : count => for doorbell + * Bit 18-27 : ctx_id => Context id + * Bit 28-31 : opcode + */ + +typedef u32 netxen_ctx_msg; + +#define _netxen_set_bits(config_word, start, bits, val) {\ + unsigned long long mask = (((1ULL << (bits)) - 1) << (start)); \ + unsigned long long value = (val); \ + (config_word) &= ~mask; \ + (config_word) |= (((value) << (start)) & mask); \ +} + +#define netxen_set_msg_peg_id(config_word, val) \ + _netxen_set_bits(config_word, 0, 2, val) +#define netxen_set_msg_privid(config_word) \ + set_bit(2, (unsigned long*)&config_word) +#define netxen_set_msg_count(config_word, val) \ + _netxen_set_bits(config_word, 3, 15, val) +#define netxen_set_msg_ctxid(config_word, val) \ + _netxen_set_bits(config_word, 18, 10, val) +#define netxen_set_msg_opcode(config_word, val) \ + _netxen_set_bits(config_word, 28, 4, val) + +struct netxen_rcv_context { + u32 rcv_ring_addr_lo; + u32 rcv_ring_addr_hi; + u32 rcv_ring_size; + u32 rsrvd; +}; + +struct netxen_ring_ctx { + + /* one command ring */ + u64 cmd_consumer_offset; + u32 cmd_ring_addr_lo; + u32 cmd_ring_addr_hi; + u32 cmd_ring_size; + u32 rsrvd; + + /* three receive rings */ + struct netxen_rcv_context rcv_ctx[3]; + + /* one status ring */ + u32 sts_ring_addr_lo; + u32 sts_ring_addr_hi; + u32 sts_ring_size; + + u32 ctx_id; +} __attribute__ ((aligned(64))); + /* * Following data structures describe the descriptors that will be used. * Added fileds of tcpHdrSize and ipHdrSize, The driver needs to do it only when @@ -203,22 +302,32 @@ enum { #define FLAGS_IPSEC_SA_DELETE 0x08 #define FLAGS_VLAN_TAGGED 0x10 -#define CMD_DESC_TOTAL_LENGTH(cmd_desc) \ - ((cmd_desc)->length_tcp_hdr & 0x00FFFFFF) -#define CMD_DESC_TCP_HDR_OFFSET(cmd_desc) \ - (((cmd_desc)->length_tcp_hdr >> 24) & 0x0FF) -#define CMD_DESC_PORT(cmd_desc) ((cmd_desc)->port_ctxid & 0x0F) -#define CMD_DESC_CTX_ID(cmd_desc) (((cmd_desc)->port_ctxid >> 4) & 0x0F) +#define netxen_set_cmd_desc_port(cmd_desc, var) \ + ((cmd_desc)->port_ctxid |= ((var) & 0x0F)) -#define CMD_DESC_TOTAL_LENGTH_WRT(cmd_desc, var) \ - ((cmd_desc)->length_tcp_hdr |= ((var) & 0x00FFFFFF)) -#define CMD_DESC_TCP_HDR_OFFSET_WRT(cmd_desc, var) \ - ((cmd_desc)->length_tcp_hdr |= (((var) << 24) & 0xFF000000)) -#define CMD_DESC_PORT_WRT(cmd_desc, var) \ - ((cmd_desc)->port_ctxid |= ((var) & 0x0F)) +#define netxen_set_cmd_desc_flags(cmd_desc, val) \ + _netxen_set_bits((cmd_desc)->flags_opcode, 0, 7, val) +#define netxen_set_cmd_desc_opcode(cmd_desc, val) \ + _netxen_set_bits((cmd_desc)->flags_opcode, 7, 6, val) + +#define netxen_set_cmd_desc_num_of_buff(cmd_desc, val) \ + _netxen_set_bits((cmd_desc)->num_of_buffers_total_length, 0, 8, val); +#define netxen_set_cmd_desc_totallength(cmd_desc, val) \ + _netxen_set_bits((cmd_desc)->num_of_buffers_total_length, 8, 24, val); + +#define netxen_get_cmd_desc_opcode(cmd_desc) \ + (((cmd_desc)->flags_opcode >> 7) & 0x003F) +#define netxen_get_cmd_desc_totallength(cmd_desc) \ + (((cmd_desc)->num_of_buffers_total_length >> 8) & 0x0FFFFFF) struct cmd_desc_type0 { - u64 netxen_next; /* for fragments handled by Phantom */ + u8 tcp_hdr_offset; /* For LSO only */ + u8 ip_hdr_offset; /* For LSO only */ + /* Bit pattern: 0-6 flags, 7-12 opcode, 13-15 unused */ + u16 flags_opcode; + /* Bit pattern: 0-7 total number of segments, + 8-31 Total size of the packet */ + u32 num_of_buffers_total_length; union { struct { u32 addr_low_part2; @@ -227,13 +336,6 @@ struct cmd_desc_type0 { u64 addr_buffer2; }; - /* Bit pattern: 0-23 total length, 24-32 tcp header offset */ - u32 length_tcp_hdr; - u8 ip_hdr_offset; /* For LSO only */ - u8 num_of_buffers; /* total number of segments */ - u8 flags; /* as defined above */ - u8 opcode; - u16 reference_handle; /* changed to u16 to add mss */ u16 mss; /* passed by NDIS_PACKET for LSO */ /* Bit pattern 0-3 port, 0-3 ctx id */ @@ -248,7 +350,6 @@ struct cmd_desc_type0 { }; u64 addr_buffer3; }; - union { struct { u32 addr_low_part1; @@ -270,6 +371,8 @@ struct cmd_desc_type0 { u64 addr_buffer4; }; + u64 unused; + } __attribute__ ((aligned(64))); /* Note: sizeof(rcv_desc) should always be a mutliple of 2 */ @@ -296,22 +399,49 @@ struct rcv_desc { #define NETXEN_PROT_UNKNOWN (0) /* Note: sizeof(status_desc) should always be a mutliple of 2 */ -#define STATUS_DESC_PORT(status_desc) \ - ((status_desc)->port_status_type_op & 0x0F) -#define STATUS_DESC_STATUS(status_desc) \ - (((status_desc)->port_status_type_op >> 4) & 0x0F) -#define STATUS_DESC_TYPE(status_desc) \ - (((status_desc)->port_status_type_op >> 8) & 0x0F) -#define STATUS_DESC_OPCODE(status_desc) \ - (((status_desc)->port_status_type_op >> 12) & 0x0F) + +#define netxen_get_sts_desc_lro_cnt(status_desc) \ + ((status_desc)->lro & 0x7F) +#define netxen_get_sts_desc_lro_last_frag(status_desc) \ + (((status_desc)->lro & 0x80) >> 7) + +#define netxen_get_sts_port(status_desc) \ + ((status_desc)->status_desc_data & 0x0F) +#define netxen_get_sts_status(status_desc) \ + (((status_desc)->status_desc_data >> 4) & 0x0F) +#define netxen_get_sts_type(status_desc) \ + (((status_desc)->status_desc_data >> 8) & 0x0F) +#define netxen_get_sts_totallength(status_desc) \ + (((status_desc)->status_desc_data >> 12) & 0xFFFF) +#define netxen_get_sts_refhandle(status_desc) \ + (((status_desc)->status_desc_data >> 28) & 0xFFFF) +#define netxen_get_sts_prot(status_desc) \ + (((status_desc)->status_desc_data >> 44) & 0x0F) +#define netxen_get_sts_owner(status_desc) \ + (((status_desc)->status_desc_data >> 56) & 0x03) +#define netxen_get_sts_opcode(status_desc) \ + (((status_desc)->status_desc_data >> 58) & 0x03F) + +#define netxen_clear_sts_owner(status_desc) \ + ((status_desc)->status_desc_data &= \ + ~(((unsigned long long)3) << 56 )) +#define netxen_set_sts_owner(status_desc, val) \ + ((status_desc)->status_desc_data |= \ + (((unsigned long long)((val) & 0x3)) << 56 )) struct status_desc { - /* Bit pattern: 0-3 port, 4-7 status, 8-11 type, 12-15 opcode */ - u16 port_status_type_op; - u16 total_length; /* NIC mode */ - u16 reference_handle; /* handle for the associated packet */ - /* Bit pattern: 0-1 owner, 2-5 protocol */ - u16 owner; /* Owner of the descriptor */ + /* Bit pattern: 0-3 port, 4-7 status, 8-11 type, 12-27 total_length + 28-43 reference_handle, 44-47 protocol, 48-52 unused + 53-55 desc_cnt, 56-57 owner, 58-63 opcode + */ + u64 status_desc_data; + u32 hash_value; + u8 hash_type; + u8 msg_type; + u8 unused; + /* Bit pattern: 0-6 lro_count indicates frag sequence, + 7 last_frag indicates last frag */ + u8 lro; } __attribute__ ((aligned(8))); enum { @@ -559,11 +689,12 @@ typedef enum { #define PRIMARY_START (BOOTLD_START) #define FLASH_CRBINIT_SIZE (0x4000) #define FLASH_BRDCFG_SIZE (sizeof(struct netxen_board_info)) -#define FLASH_USER_SIZE (sizeof(netxen_user_info)/sizeof(u32)) +#define FLASH_USER_SIZE (sizeof(struct netxen_user_info)/sizeof(u32)) #define FLASH_SECONDARY_SIZE (USER_START-SECONDARY_START) #define NUM_PRIMARY_SECTORS (0x20) #define NUM_CONFIG_SECTORS (1) -#define PFX "netxen: " +#define PFX "NetXen: " +extern char netxen_nic_driver_name[]; /* Note: Make sure to not call this before adapter->port is valid */ #if !defined(NETXEN_DEBUG) @@ -572,7 +703,7 @@ typedef enum { #else #define DPRINTK(klevel, fmt, args...) do { \ printk(KERN_##klevel PFX "%s: %s: " fmt, __FUNCTION__,\ - (adapter != NULL && adapter->port != NULL && \ + (adapter != NULL && \ adapter->port[0] != NULL && \ adapter->port[0]->netdev != NULL) ? \ adapter->port[0]->netdev->name : NULL, \ @@ -609,7 +740,6 @@ struct netxen_cmd_buffer { u8 frag_count; unsigned long time_stamp; u32 state; - u32 no_of_descriptors; }; /* In rx_buffer, we do not need multiple fragments as is a single buffer */ @@ -618,6 +748,9 @@ struct netxen_rx_buffer { u64 dma; u16 ref_handle; u16 state; + u32 lro_expected_frags; + u32 lro_current_frags; + u32 lro_length; }; /* Board types */ @@ -633,6 +766,8 @@ struct netxen_hardware_context { void __iomem *pci_base0; void __iomem *pci_base1; void __iomem *pci_base2; + void __iomem *db_base; + unsigned long db_len; u8 revision_id; u16 board_type; @@ -642,14 +777,13 @@ struct netxen_hardware_context { u32 qg_linksup; /* Address of cmd ring in Phantom */ struct cmd_desc_type0 *cmd_desc_head; - char *pauseaddr; struct pci_dev *cmd_desc_pdev; dma_addr_t cmd_desc_phys_addr; - dma_addr_t pause_physaddr; - struct pci_dev *pause_pdev; struct netxen_adapter *adapter; }; +#define RCV_RING_LRO RCV_DESC_LRO + #define MINIMUM_ETHERNET_FRAME_SIZE 64 /* With FCS */ #define ETHERNET_FCS_SIZE 4 @@ -702,8 +836,13 @@ struct netxen_recv_context { }; #define NETXEN_NIC_MSI_ENABLED 0x02 +#define NETXEN_DMA_MASK 0xfffffffe +#define NETXEN_DB_MAPSIZE_BYTES 0x1000 -struct netxen_drvops; +struct netxen_dummy_dma { + void *addr; + dma_addr_t phys_addr; +}; struct netxen_adapter { struct netxen_hardware_context ahw; @@ -720,12 +859,13 @@ struct netxen_adapter { u32 curr_window; u32 cmd_producer; - u32 cmd_consumer; + u32 *cmd_consumer; u32 last_cmd_consumer; u32 max_tx_desc_count; u32 max_rx_desc_count; u32 max_jumbo_rx_desc_count; + u32 max_lro_rx_desc_count; /* Num of instances active on cmd buffer ring */ u32 proc_cmd_buf_counter; @@ -747,8 +887,27 @@ struct netxen_adapter { struct netxen_recv_context recv_ctx[MAX_RCV_CTX]; int is_up; - int work_done; - struct netxen_drvops *ops; + int number; + struct netxen_dummy_dma dummy_dma; + + /* Context interface shared between card and host */ + struct netxen_ring_ctx *ctx_desc; + struct pci_dev *ctx_desc_pdev; + dma_addr_t ctx_desc_phys_addr; + int (*enable_phy_interrupts) (struct netxen_adapter *, int); + int (*disable_phy_interrupts) (struct netxen_adapter *, int); + void (*handle_phy_intr) (struct netxen_adapter *); + int (*macaddr_set) (struct netxen_port *, netxen_ethernet_macaddr_t); + int (*set_mtu) (struct netxen_port *, int); + int (*set_promisc) (struct netxen_adapter *, int, + netxen_niu_prom_mode_t); + int (*unset_promisc) (struct netxen_adapter *, int, + netxen_niu_prom_mode_t); + int (*phy_read) (struct netxen_adapter *, long phy, long reg, u32 *); + int (*phy_write) (struct netxen_adapter *, long phy, long reg, u32 val); + int (*init_port) (struct netxen_adapter *, int); + void (*init_niu) (struct netxen_adapter *); + int (*stop_port) (struct netxen_adapter *, int); }; /* netxen_adapter structure */ /* Max number of xmit producer threads that can run simultaneously */ @@ -830,25 +989,6 @@ static inline void __iomem *pci_base(struct netxen_adapter *adapter, return NULL; } -struct netxen_drvops { - int (*enable_phy_interrupts) (struct netxen_adapter *, int); - int (*disable_phy_interrupts) (struct netxen_adapter *, int); - void (*handle_phy_intr) (struct netxen_adapter *); - int (*macaddr_set) (struct netxen_port *, netxen_ethernet_macaddr_t); - int (*set_mtu) (struct netxen_port *, int); - int (*set_promisc) (struct netxen_adapter *, int, - netxen_niu_prom_mode_t); - int (*unset_promisc) (struct netxen_adapter *, int, - netxen_niu_prom_mode_t); - int (*phy_read) (struct netxen_adapter *, long phy, long reg, u32 *); - int (*phy_write) (struct netxen_adapter *, long phy, long reg, u32 val); - int (*init_port) (struct netxen_adapter *, int); - void (*init_niu) (struct netxen_adapter *); - int (*stop_port) (struct netxen_adapter *, int); -}; - -extern char netxen_nic_driver_name[]; - int netxen_niu_xgbe_enable_phy_interrupts(struct netxen_adapter *adapter, int port); int netxen_niu_gbe_enable_phy_interrupts(struct netxen_adapter *adapter, @@ -887,10 +1027,20 @@ int netxen_nic_hw_read_wx(struct netxen_adapter *adapter, u64 off, void *data, int len); int netxen_nic_hw_write_wx(struct netxen_adapter *adapter, u64 off, void *data, int len); +int netxen_nic_hw_read_ioctl(struct netxen_adapter *adapter, u64 off, + void *data, int len); +int netxen_nic_hw_write_ioctl(struct netxen_adapter *adapter, u64 off, + void *data, int len); +int netxen_nic_pci_mem_write_ioctl(struct netxen_adapter *adapter, + u64 off, void *data, int size); +int netxen_nic_pci_mem_read_ioctl(struct netxen_adapter *adapter, + u64 off, void *data, int size); void netxen_crb_writelit_adapter(struct netxen_adapter *adapter, unsigned long off, int data); /* Functions from netxen_nic_init.c */ +void netxen_free_adapter_offload(struct netxen_adapter *adapter); +int netxen_initialize_adapter_offload(struct netxen_adapter *adapter); void netxen_phantom_init(struct netxen_adapter *adapter, int pegtune_val); void netxen_load_firmware(struct netxen_adapter *adapter); int netxen_pinit_from_rom(struct netxen_adapter *adapter, int verbose); @@ -925,7 +1075,9 @@ int netxen_nic_tx_has_work(struct netxen_adapter *adapter); void netxen_watchdog_task(struct work_struct *work); void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid); -void netxen_process_cmd_ring(unsigned long data); +void netxen_post_rx_buffers_nodb(struct netxen_adapter *adapter, u32 ctx, + u32 ringid); +int netxen_process_cmd_ring(unsigned long data); u32 netxen_process_rcv_ring(struct netxen_adapter *adapter, int ctx, int max); void netxen_nic_set_multi(struct net_device *netdev); int netxen_nic_change_mtu(struct net_device *netdev, int new_mtu); @@ -1019,7 +1171,6 @@ static inline void get_brd_name_by_type(u32 type, char *name) int netxen_is_flash_supported(struct netxen_adapter *adapter); int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 mac[]); - extern void netxen_change_ringparam(struct netxen_adapter *adapter); extern int netxen_rom_fast_read(struct netxen_adapter *adapter, int addr, int *valp); diff --git a/drivers/net/netxen/netxen_nic_ethtool.c b/drivers/net/netxen/netxen_nic_ethtool.c index 9a914aeba5b..2ab4885cc95 100644 --- a/drivers/net/netxen/netxen_nic_ethtool.c +++ b/drivers/net/netxen/netxen_nic_ethtool.c @@ -1,25 +1,25 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -118,7 +118,7 @@ netxen_nic_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) u32 fw_minor = 0; u32 fw_build = 0; - strncpy(drvinfo->driver, "netxen_nic", 32); + strncpy(drvinfo->driver, netxen_nic_driver_name, 32); strncpy(drvinfo->version, NETXEN_NIC_LINUX_VERSIONID, 32); fw_major = readl(NETXEN_CRB_NORMALIZE(adapter, NETXEN_FW_VERSION_MAJOR)); @@ -210,7 +210,6 @@ netxen_nic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) printk(KERN_ERR "netxen-nic: Unsupported board model %d\n", (netxen_brdtype_t) boardinfo->board_type); return -EIO; - } return 0; @@ -226,18 +225,18 @@ netxen_nic_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) /* read which mode */ if (adapter->ahw.board_type == NETXEN_NIC_GBE) { /* autonegotiation */ - if (adapter->ops->phy_write - && adapter->ops->phy_write(adapter, port->portnum, - NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG, - (__le32) ecmd->autoneg) != 0) + if (adapter->phy_write + && adapter->phy_write(adapter, port->portnum, + NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG, + (__le32) ecmd->autoneg) != 0) return -EIO; else port->link_autoneg = ecmd->autoneg; - if (adapter->ops->phy_read - && adapter->ops->phy_read(adapter, port->portnum, - NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, - &status) != 0) + if (adapter->phy_read + && adapter->phy_read(adapter, port->portnum, + NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, + &status) != 0) return -EIO; /* speed */ @@ -257,10 +256,10 @@ netxen_nic_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) netxen_clear_phy_duplex(status); if (ecmd->duplex == DUPLEX_FULL) netxen_set_phy_duplex(status); - if (adapter->ops->phy_write - && adapter->ops->phy_write(adapter, port->portnum, - NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, - *((int *)&status)) != 0) + if (adapter->phy_write + && adapter->phy_write(adapter, port->portnum, + NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, + *((int *)&status)) != 0) return -EIO; else { port->link_speed = ecmd->speed; @@ -422,10 +421,10 @@ static u32 netxen_nic_get_link(struct net_device *dev) /* read which mode */ if (adapter->ahw.board_type == NETXEN_NIC_GBE) { - if (adapter->ops->phy_read - && adapter->ops->phy_read(adapter, port->portnum, - NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, - &status) != 0) + if (adapter->phy_read + && adapter->phy_read(adapter, port->portnum, + NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, + &status) != 0) return -EIO; else return (netxen_get_phy_link(status)); @@ -460,20 +459,22 @@ netxen_nic_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct netxen_port *port = netdev_priv(dev); struct netxen_adapter *adapter = port->adapter; - int i, j; + int i; ring->rx_pending = 0; + ring->rx_jumbo_pending = 0; for (i = 0; i < MAX_RCV_CTX; ++i) { - for (j = 0; j < NUM_RCV_DESC_RINGS; j++) - ring->rx_pending += - adapter->recv_ctx[i].rcv_desc[j].rcv_pending; + ring->rx_pending += adapter->recv_ctx[i]. + rcv_desc[RCV_DESC_NORMAL_CTXID].rcv_pending; + ring->rx_jumbo_pending += adapter->recv_ctx[i]. + rcv_desc[RCV_DESC_JUMBO_CTXID].rcv_pending; } ring->rx_max_pending = adapter->max_rx_desc_count; ring->tx_max_pending = adapter->max_tx_desc_count; + ring->rx_jumbo_max_pending = adapter->max_jumbo_rx_desc_count; ring->rx_mini_max_pending = 0; ring->rx_mini_pending = 0; - ring->rx_jumbo_max_pending = 0; ring->rx_jumbo_pending = 0; } @@ -526,10 +527,10 @@ netxen_nic_set_pauseparam(struct net_device *dev, *(u32 *) (&val)); /* set autoneg */ autoneg = pause->autoneg; - if (adapter->ops->phy_write - && adapter->ops->phy_write(adapter, port->portnum, - NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG, - (__le32) autoneg) != 0) + if (adapter->phy_write + && adapter->phy_write(adapter, port->portnum, + NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG, + (__le32) autoneg) != 0) return -EIO; else { port->link_autoneg = pause->autoneg; diff --git a/drivers/net/netxen/netxen_nic_hdr.h b/drivers/net/netxen/netxen_nic_hdr.h index 72c6ec4ee2a..fe8b675f9e7 100644 --- a/drivers/net/netxen/netxen_nic_hdr.h +++ b/drivers/net/netxen/netxen_nic_hdr.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 @@ -16,10 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c index 105c24f0ad4..9147b6048df 100644 --- a/drivers/net/netxen/netxen_nic_hw.c +++ b/drivers/net/netxen/netxen_nic_hw.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 @@ -16,10 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -42,7 +42,7 @@ #define NETXEN_FLASH_BASE (BOOTLD_START) #define NETXEN_PHANTOM_MEM_BASE (NETXEN_FLASH_BASE) -#define NETXEN_MAX_MTU 8000 +#define NETXEN_MAX_MTU 8000 + NETXEN_ENET_HEADER_SIZE + NETXEN_ETH_FCS_SIZE #define NETXEN_MIN_MTU 64 #define NETXEN_ETH_FCS_SIZE 4 #define NETXEN_ENET_HEADER_SIZE 14 @@ -81,8 +81,8 @@ int netxen_nic_set_mac(struct net_device *netdev, void *p) DPRINTK(INFO, "valid ether addr\n"); memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - if (adapter->ops->macaddr_set) - adapter->ops->macaddr_set(port, addr->sa_data); + if (adapter->macaddr_set) + adapter->macaddr_set(port, addr->sa_data); return 0; } @@ -99,17 +99,17 @@ void netxen_nic_set_multi(struct net_device *netdev) mc_ptr = netdev->mc_list; if (netdev->flags & IFF_PROMISC) { - if (adapter->ops->set_promisc) - adapter->ops->set_promisc(adapter, - port->portnum, - NETXEN_NIU_PROMISC_MODE); + if (adapter->set_promisc) + adapter->set_promisc(adapter, + port->portnum, + NETXEN_NIU_PROMISC_MODE); } else { - if (adapter->ops->unset_promisc && + if (adapter->unset_promisc && adapter->ahw.boardcfg.board_type != NETXEN_BRDTYPE_P2_SB31_10G_IMEZ) - adapter->ops->unset_promisc(adapter, - port->portnum, - NETXEN_NIU_NON_PROMISC_MODE); + adapter->unset_promisc(adapter, + port->portnum, + NETXEN_NIU_NON_PROMISC_MODE); } if (adapter->ahw.board_type == NETXEN_NIC_XGBE) { netxen_nic_mcr_set_mode_select(netxen_mac_addr_cntl_data, 0x03); @@ -160,8 +160,8 @@ int netxen_nic_change_mtu(struct net_device *netdev, int mtu) return -EINVAL; } - if (adapter->ops->set_mtu) - adapter->ops->set_mtu(port, mtu); + if (adapter->set_mtu) + adapter->set_mtu(port, mtu); netdev->mtu = mtu; return 0; @@ -176,22 +176,18 @@ int netxen_nic_hw_resources(struct netxen_adapter *adapter) struct netxen_hardware_context *hw = &adapter->ahw; u32 state = 0; void *addr; - void *pause_addr; int loops = 0, err = 0; int ctx, ring; u32 card_cmdring = 0; - struct netxen_rcv_desc_crb *rcv_desc_crb = NULL; struct netxen_recv_context *recv_ctx; struct netxen_rcv_desc_ctx *rcv_desc; - DPRINTK(INFO, "crb_base: %lx %lx", NETXEN_PCI_CRBSPACE, + DPRINTK(INFO, "crb_base: %lx %x", NETXEN_PCI_CRBSPACE, PCI_OFFSET_SECOND_RANGE(adapter, NETXEN_PCI_CRBSPACE)); - DPRINTK(INFO, "cam base: %lx %lx", NETXEN_CRB_CAM, + DPRINTK(INFO, "cam base: %lx %x", NETXEN_CRB_CAM, pci_base_offset(adapter, NETXEN_CRB_CAM)); - DPRINTK(INFO, "cam RAM: %lx %lx", NETXEN_CAM_RAM_BASE, + DPRINTK(INFO, "cam RAM: %lx %x", NETXEN_CAM_RAM_BASE, pci_base_offset(adapter, NETXEN_CAM_RAM_BASE)); - DPRINTK(INFO, "NIC base:%lx %lx\n", NIC_CRB_BASE_PORT1, - pci_base_offset(adapter, NIC_CRB_BASE_PORT1)); /* Window 1 call */ card_cmdring = readl(NETXEN_CRB_NORMALIZE(adapter, CRB_CMDPEG_CMDRING)); @@ -226,33 +222,42 @@ int netxen_nic_hw_resources(struct netxen_adapter *adapter) DPRINTK(INFO, "Recieve Peg ready too. starting stuff\n"); addr = netxen_alloc(adapter->ahw.pdev, - sizeof(struct cmd_desc_type0) * - adapter->max_tx_desc_count, - &hw->cmd_desc_phys_addr, &hw->cmd_desc_pdev); + sizeof(struct netxen_ring_ctx) + + sizeof(uint32_t), + (dma_addr_t *) & adapter->ctx_desc_phys_addr, + &adapter->ctx_desc_pdev); + printk("ctx_desc_phys_addr: 0x%llx\n", + (u64) adapter->ctx_desc_phys_addr); if (addr == NULL) { DPRINTK(ERR, "bad return from pci_alloc_consistent\n"); - return -ENOMEM; + err = -ENOMEM; + return err; } + memset(addr, 0, sizeof(struct netxen_ring_ctx)); + adapter->ctx_desc = (struct netxen_ring_ctx *)addr; + adapter->ctx_desc->cmd_consumer_offset = adapter->ctx_desc_phys_addr + + sizeof(struct netxen_ring_ctx); + adapter->cmd_consumer = (uint32_t *) (((char *)addr) + + sizeof(struct netxen_ring_ctx)); + + addr = pci_alloc_consistent(adapter->ahw.pdev, + sizeof(struct cmd_desc_type0) * + adapter->max_tx_desc_count, + (dma_addr_t *) & hw->cmd_desc_phys_addr); + printk("cmd_desc_phys_addr: 0x%llx\n", (u64) hw->cmd_desc_phys_addr); - pause_addr = netxen_alloc(adapter->ahw.pdev, 512, - (dma_addr_t *) & hw->pause_physaddr, - &hw->pause_pdev); - if (pause_addr == NULL) { - DPRINTK(1, ERR, "bad return from pci_alloc_consistent\n"); + if (addr == NULL) { + DPRINTK(ERR, "bad return from pci_alloc_consistent\n"); + netxen_free_hw_resources(adapter); return -ENOMEM; } - hw->pauseaddr = (char *)pause_addr; - { - u64 *ptr = (u64 *) pause_addr; - *ptr++ = NETXEN_NIC_ZERO_PAUSE_ADDR; - *ptr++ = NETXEN_NIC_ZERO_PAUSE_ADDR; - *ptr++ = NETXEN_NIC_UNIT_PAUSE_ADDR; - *ptr++ = NETXEN_NIC_ZERO_PAUSE_ADDR; - *ptr++ = NETXEN_NIC_EPG_PAUSE_ADDR1; - *ptr++ = NETXEN_NIC_EPG_PAUSE_ADDR2; - } + adapter->ctx_desc->cmd_ring_addr_lo = + hw->cmd_desc_phys_addr & 0xffffffffUL; + adapter->ctx_desc->cmd_ring_addr_hi = + ((u64) hw->cmd_desc_phys_addr >> 32); + adapter->ctx_desc->cmd_ring_size = adapter->max_tx_desc_count; hw->cmd_desc_head = (struct cmd_desc_type0 *)addr; @@ -273,6 +278,12 @@ int netxen_nic_hw_resources(struct netxen_adapter *adapter) return err; } rcv_desc->desc_head = (struct rcv_desc *)addr; + adapter->ctx_desc->rcv_ctx[ring].rcv_ring_addr_lo = + rcv_desc->phys_addr & 0xffffffffUL; + adapter->ctx_desc->rcv_ctx[ring].rcv_ring_addr_hi = + ((u64) rcv_desc->phys_addr >> 32); + adapter->ctx_desc->rcv_ctx[ring].rcv_ring_size = + rcv_desc->max_rx_desc_count; } addr = netxen_alloc(adapter->ahw.pdev, STATUS_DESC_RINGSIZE, @@ -286,47 +297,21 @@ int netxen_nic_hw_resources(struct netxen_adapter *adapter) return err; } recv_ctx->rcv_status_desc_head = (struct status_desc *)addr; - for (ring = 0; ring < NUM_RCV_DESC_RINGS; ring++) { - rcv_desc = &recv_ctx->rcv_desc[ring]; - rcv_desc_crb = - &recv_crb_registers[ctx].rcv_desc_crb[ring]; - DPRINTK(INFO, "ring #%d crb global ring reg 0x%x\n", - ring, rcv_desc_crb->crb_globalrcv_ring); - /* Window = 1 */ - writel(lower32(rcv_desc->phys_addr), - NETXEN_CRB_NORMALIZE(adapter, - rcv_desc_crb-> - crb_globalrcv_ring)); - DPRINTK(INFO, "GLOBAL_RCV_RING ctx %d, addr 0x%x" - " val 0x%llx," - " virt %p\n", ctx, - rcv_desc_crb->crb_globalrcv_ring, - (unsigned long long)rcv_desc->phys_addr, - +rcv_desc->desc_head); - } + adapter->ctx_desc->sts_ring_addr_lo = + recv_ctx->rcv_status_desc_phys_addr & 0xffffffffUL; + adapter->ctx_desc->sts_ring_addr_hi = + ((u64) recv_ctx->rcv_status_desc_phys_addr >> 32); + adapter->ctx_desc->sts_ring_size = adapter->max_rx_desc_count; - /* Window = 1 */ - writel(lower32(recv_ctx->rcv_status_desc_phys_addr), - NETXEN_CRB_NORMALIZE(adapter, - recv_crb_registers[ctx]. - crb_rcvstatus_ring)); - DPRINTK(INFO, "RCVSTATUS_RING, ctx %d, addr 0x%x," - " val 0x%x,virt%p\n", - ctx, - recv_crb_registers[ctx].crb_rcvstatus_ring, - (unsigned long long)recv_ctx->rcv_status_desc_phys_addr, - recv_ctx->rcv_status_desc_head); } /* Window = 1 */ - writel(lower32(hw->pause_physaddr), - NETXEN_CRB_NORMALIZE(adapter, CRB_PAUSE_ADDR_LO)); - writel(upper32(hw->pause_physaddr), - NETXEN_CRB_NORMALIZE(adapter, CRB_PAUSE_ADDR_HI)); - - writel(lower32(hw->cmd_desc_phys_addr), - NETXEN_CRB_NORMALIZE(adapter, CRB_HOST_CMD_ADDR_LO)); - writel(upper32(hw->cmd_desc_phys_addr), - NETXEN_CRB_NORMALIZE(adapter, CRB_HOST_CMD_ADDR_HI)); + + writel(lower32(adapter->ctx_desc_phys_addr), + NETXEN_CRB_NORMALIZE(adapter, CRB_CTX_ADDR_REG_LO)); + writel(upper32(adapter->ctx_desc_phys_addr), + NETXEN_CRB_NORMALIZE(adapter, CRB_CTX_ADDR_REG_HI)); + writel(NETXEN_CTX_SIGNATURE, + NETXEN_CRB_NORMALIZE(adapter, CRB_CTX_SIGNATURE_REG)); return err; } @@ -336,6 +321,15 @@ void netxen_free_hw_resources(struct netxen_adapter *adapter) struct netxen_rcv_desc_ctx *rcv_desc; int ctx, ring; + if (adapter->ctx_desc != NULL) { + pci_free_consistent(adapter->ctx_desc_pdev, + sizeof(struct netxen_ring_ctx) + + sizeof(uint32_t), + adapter->ctx_desc, + adapter->ctx_desc_phys_addr); + adapter->ctx_desc = NULL; + } + if (adapter->ahw.cmd_desc_head != NULL) { pci_free_consistent(adapter->ahw.cmd_desc_pdev, sizeof(struct cmd_desc_type0) * @@ -344,11 +338,9 @@ void netxen_free_hw_resources(struct netxen_adapter *adapter) adapter->ahw.cmd_desc_phys_addr); adapter->ahw.cmd_desc_head = NULL; } - if (adapter->ahw.pauseaddr != NULL) { - pci_free_consistent(adapter->ahw.pause_pdev, 512, - adapter->ahw.pauseaddr, - adapter->ahw.pause_physaddr); - adapter->ahw.pauseaddr = NULL; + /* Special handling: there are 2 ports on this board */ + if (adapter->ahw.boardcfg.board_type == NETXEN_BRDTYPE_P2_SB31_10G_IMEZ) { + adapter->ahw.max_ports = 2; } for (ctx = 0; ctx < MAX_RCV_CTX; ++ctx) { @@ -383,19 +375,22 @@ void netxen_tso_check(struct netxen_adapter *adapter, desc->total_hdr_length = sizeof(struct ethhdr) + ((skb->nh.iph)->ihl * sizeof(u32)) + ((skb->h.th)->doff * sizeof(u32)); - desc->opcode = TX_TCP_LSO; + netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO); } else if (skb->ip_summed == CHECKSUM_COMPLETE) { if (skb->nh.iph->protocol == IPPROTO_TCP) { - desc->opcode = TX_TCP_PKT; + netxen_set_cmd_desc_opcode(desc, TX_TCP_PKT); } else if (skb->nh.iph->protocol == IPPROTO_UDP) { - desc->opcode = TX_UDP_PKT; + netxen_set_cmd_desc_opcode(desc, TX_UDP_PKT); } else { return; } } adapter->stats.xmitcsummed++; - CMD_DESC_TCP_HDR_OFFSET_WRT(desc, skb->h.raw - skb->data); - desc->length_tcp_hdr = cpu_to_le32(desc->length_tcp_hdr); + desc->tcp_hdr_offset = skb->h.raw - skb->data; + netxen_set_cmd_desc_totallength(desc, + cpu_to_le32 + (netxen_get_cmd_desc_totallength + (desc))); desc->ip_hdr_offset = skb->nh.raw - skb->data; } @@ -648,7 +643,7 @@ void netxen_nic_reg_write(struct netxen_adapter *adapter, u64 off, u32 val) addr = NETXEN_CRB_NORMALIZE(adapter, off); DPRINTK(INFO, "writing to base %lx offset %llx addr %p data %x\n", - pci_base(adapter, off), off, addr); + pci_base(adapter, off), off, addr, val); writel(val, addr); } @@ -660,7 +655,7 @@ int netxen_nic_reg_read(struct netxen_adapter *adapter, u64 off) addr = NETXEN_CRB_NORMALIZE(adapter, off); DPRINTK(INFO, "reading from base %lx offset %llx addr %p\n", - adapter->ahw.pci_base, off, addr); + pci_base(adapter, off), off, addr); val = readl(addr); writel(val, addr); @@ -848,8 +843,8 @@ void netxen_nic_stop_all_ports(struct netxen_adapter *adapter) for (port_nr = 0; port_nr < adapter->ahw.max_ports; port_nr++) { port = adapter->port[port_nr]; - if (adapter->ops->stop_port) - adapter->ops->stop_port(adapter, port->portnum); + if (adapter->stop_port) + adapter->stop_port(adapter, port->portnum); } } @@ -873,13 +868,13 @@ void netxen_nic_set_link_parameters(struct netxen_port *port) { struct netxen_adapter *adapter = port->adapter; __le32 status; - u16 autoneg; + __le32 autoneg; __le32 mode; netxen_nic_read_w0(adapter, NETXEN_NIU_MODE, &mode); if (netxen_get_niu_enable_ge(mode)) { /* Gb 10/100/1000 Mbps mode */ - if (adapter->ops->phy_read - && adapter->ops-> + if (adapter->phy_read + && adapter-> phy_read(adapter, port->portnum, NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, &status) == 0) { @@ -909,11 +904,11 @@ void netxen_nic_set_link_parameters(struct netxen_port *port) port->link_duplex = -1; break; } - if (adapter->ops->phy_read - && adapter->ops-> + if (adapter->phy_read + && adapter-> phy_read(adapter, port->portnum, NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG, - (__le32 *) & autoneg) != 0) + &autoneg) != 0) port->link_autoneg = autoneg; } else goto link_down; @@ -1008,3 +1003,291 @@ int netxen_crb_read_val(struct netxen_adapter *adapter, unsigned long off) netxen_nic_hw_read_wx(adapter, off, &data, 4); return data; } + +int netxen_nic_hw_write_ioctl(struct netxen_adapter *adapter, u64 off, + void *data, int len) +{ + void *addr; + u64 offset = off; + u8 *mem_ptr = NULL; + unsigned long mem_base; + unsigned long mem_page; + + if (ADDR_IN_WINDOW1(off)) { + addr = NETXEN_CRB_NORMALIZE(adapter, off); + if (!addr) { + mem_base = pci_resource_start(adapter->ahw.pdev, 0); + offset = NETXEN_CRB_NORMAL(off); + mem_page = offset & PAGE_MASK; + if (mem_page != ((offset + len - 1) & PAGE_MASK)) + mem_ptr = + ioremap(mem_base + mem_page, PAGE_SIZE * 2); + else + mem_ptr = + ioremap(mem_base + mem_page, PAGE_SIZE); + if (mem_ptr == 0UL) { + return 1; + } + addr = mem_ptr; + addr += offset & (PAGE_SIZE - 1); + } + } else { + addr = pci_base_offset(adapter, off); + if (!addr) { + mem_base = pci_resource_start(adapter->ahw.pdev, 0); + mem_page = off & PAGE_MASK; + if (mem_page != ((off + len - 1) & PAGE_MASK)) + mem_ptr = + ioremap(mem_base + mem_page, PAGE_SIZE * 2); + else + mem_ptr = + ioremap(mem_base + mem_page, PAGE_SIZE); + if (mem_ptr == 0UL) { + return 1; + } + addr = mem_ptr; + addr += off & (PAGE_SIZE - 1); + } + netxen_nic_pci_change_crbwindow(adapter, 0); + } + switch (len) { + case 1: + writeb(*(u8 *) data, addr); + break; + case 2: + writew(*(u16 *) data, addr); + break; + case 4: + writel(*(u32 *) data, addr); + break; + case 8: + writeq(*(u64 *) data, addr); + break; + default: + DPRINTK(INFO, + "writing data %lx to offset %llx, num words=%d\n", + *(unsigned long *)data, off, (len >> 3)); + + netxen_nic_hw_block_write64((u64 __iomem *) data, addr, + (len >> 3)); + break; + } + + if (!ADDR_IN_WINDOW1(off)) + netxen_nic_pci_change_crbwindow(adapter, 1); + if (mem_ptr) + iounmap(mem_ptr); + return 0; +} + +int netxen_nic_hw_read_ioctl(struct netxen_adapter *adapter, u64 off, + void *data, int len) +{ + void *addr; + u64 offset; + u8 *mem_ptr = NULL; + unsigned long mem_base; + unsigned long mem_page; + + if (ADDR_IN_WINDOW1(off)) { + addr = NETXEN_CRB_NORMALIZE(adapter, off); + if (!addr) { + mem_base = pci_resource_start(adapter->ahw.pdev, 0); + offset = NETXEN_CRB_NORMAL(off); + mem_page = offset & PAGE_MASK; + if (mem_page != ((offset + len - 1) & PAGE_MASK)) + mem_ptr = + ioremap(mem_base + mem_page, PAGE_SIZE * 2); + else + mem_ptr = + ioremap(mem_base + mem_page, PAGE_SIZE); + if (mem_ptr == 0UL) { + *(u8 *) data = 0; + return 1; + } + addr = mem_ptr; + addr += offset & (PAGE_SIZE - 1); + } + } else { + addr = pci_base_offset(adapter, off); + if (!addr) { + mem_base = pci_resource_start(adapter->ahw.pdev, 0); + mem_page = off & PAGE_MASK; + if (mem_page != ((off + len - 1) & PAGE_MASK)) + mem_ptr = + ioremap(mem_base + mem_page, PAGE_SIZE * 2); + else + mem_ptr = + ioremap(mem_base + mem_page, PAGE_SIZE); + if (mem_ptr == 0UL) + return 1; + addr = mem_ptr; + addr += off & (PAGE_SIZE - 1); + } + netxen_nic_pci_change_crbwindow(adapter, 0); + } + switch (len) { + case 1: + *(u8 *) data = readb(addr); + break; + case 2: + *(u16 *) data = readw(addr); + break; + case 4: + *(u32 *) data = readl(addr); + break; + case 8: + *(u64 *) data = readq(addr); + break; + default: + netxen_nic_hw_block_read64((u64 __iomem *) data, addr, + (len >> 3)); + break; + } + if (!ADDR_IN_WINDOW1(off)) + netxen_nic_pci_change_crbwindow(adapter, 1); + if (mem_ptr) + iounmap(mem_ptr); + return 0; +} + +int netxen_nic_pci_mem_write_ioctl(struct netxen_adapter *adapter, u64 off, + void *data, int size) +{ + void *addr; + int ret = 0; + u8 *mem_ptr = NULL; + unsigned long mem_base; + unsigned long mem_page; + + if (data == NULL || off > (128 * 1024 * 1024)) { + printk(KERN_ERR "%s: data: %p off:%llx\n", + netxen_nic_driver_name, data, off); + return 1; + } + off = netxen_nic_pci_set_window(adapter, off); + /* Corner case : Malicious user tried to break the driver by reading + last few bytes in ranges and tries to read further addresses. + */ + if (!pci_base(adapter, off + size - 1) && pci_base(adapter, off)) { + printk(KERN_ERR "%s: Invalid access to memory address range" + " 0x%llx - 0x%llx\n", netxen_nic_driver_name, off, + off + size); + return 1; + } + addr = pci_base_offset(adapter, off); + DPRINTK(INFO, "writing data %llx to offset %llx\n", + *(unsigned long long *)data, off); + if (!addr) { + mem_base = pci_resource_start(adapter->ahw.pdev, 0); + mem_page = off & PAGE_MASK; + /* Map two pages whenever user tries to access addresses in two + consecutive pages. + */ + if (mem_page != ((off + size - 1) & PAGE_MASK)) + mem_ptr = ioremap(mem_base + mem_page, PAGE_SIZE * 2); + else + mem_ptr = ioremap(mem_base + mem_page, PAGE_SIZE); + if (mem_ptr == 0UL) { + return 1; + } + addr = mem_ptr; + addr += off & (PAGE_SIZE - 1); + } + switch (size) { + case 1: + writeb(*(u8 *) data, addr); + break; + case 2: + writew(*(u16 *) data, addr); + break; + case 4: + writel(*(u32 *) data, addr); + break; + case 8: + writeq(*(u64 *) data, addr); + break; + default: + DPRINTK(INFO, + "writing data %lx to offset %llx, num words=%d\n", + *(unsigned long *)data, off, (size >> 3)); + + netxen_nic_hw_block_write64((u64 __iomem *) data, addr, + (size >> 3)); + break; + } + + if (mem_ptr) + iounmap(mem_ptr); + DPRINTK(INFO, "wrote %llx\n", *(unsigned long long *)data); + + return ret; +} + +int netxen_nic_pci_mem_read_ioctl(struct netxen_adapter *adapter, + u64 off, void *data, int size) +{ + void *addr; + int ret = 0; + u8 *mem_ptr = NULL; + unsigned long mem_base; + unsigned long mem_page; + + if (data == NULL || off > (128 * 1024 * 1024)) { + printk(KERN_ERR "%s: data: %p off:%llx\n", + netxen_nic_driver_name, data, off); + return 1; + } + off = netxen_nic_pci_set_window(adapter, off); + /* Corner case : Malicious user tried to break the driver by reading + last few bytes in ranges and tries to read further addresses. + */ + if (!pci_base(adapter, off + size - 1) && pci_base(adapter, off)) { + printk(KERN_ERR "%s: Invalid access to memory address range" + " 0x%llx - 0x%llx\n", netxen_nic_driver_name, off, + off + size); + return 1; + } + addr = pci_base_offset(adapter, off); + if (!addr) { + mem_base = pci_resource_start(adapter->ahw.pdev, 0); + mem_page = off & PAGE_MASK; + /* Map two pages whenever user tries to access addresses in two + consecutive pages. + */ + if (mem_page != ((off + size - 1) & PAGE_MASK)) + mem_ptr = ioremap(mem_base + mem_page, PAGE_SIZE * 2); + else + mem_ptr = ioremap(mem_base + mem_page, PAGE_SIZE); + if (mem_ptr == 0UL) { + *(u8 *) data = 0; + return 1; + } + addr = mem_ptr; + addr += off & (PAGE_SIZE - 1); + } + switch (size) { + case 1: + *(u8 *) data = readb(addr); + break; + case 2: + *(u16 *) data = readw(addr); + break; + case 4: + *(u32 *) data = readl(addr); + break; + case 8: + *(u64 *) data = readq(addr); + break; + default: + netxen_nic_hw_block_read64((u64 __iomem *) data, addr, + (size >> 3)); + break; + } + + if (mem_ptr) + iounmap(mem_ptr); + DPRINTK(INFO, "read %llx\n", *(unsigned long long *)data); + + return ret; +} diff --git a/drivers/net/netxen/netxen_nic_hw.h b/drivers/net/netxen/netxen_nic_hw.h index 201a636b7ab..0685633a9c1 100644 --- a/drivers/net/netxen/netxen_nic_hw.h +++ b/drivers/net/netxen/netxen_nic_hw.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 @@ -16,10 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -83,8 +83,8 @@ struct netxen_adapter; #define NETXEN_PCI_MAPSIZE_BYTES (NETXEN_PCI_MAPSIZE << 20) #define NETXEN_NIC_LOCKED_READ_REG(X, Y) \ - addr = pci_base_offset(adapter, (X)); \ - *(u32 *)Y = readl(addr); + addr = pci_base_offset(adapter, X); \ + *(u32 *)Y = readl((void __iomem*) addr); struct netxen_port; void netxen_nic_set_link_parameters(struct netxen_port *port); diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index eae18236aef..869725f0bb1 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -1,25 +1,25 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -137,6 +137,8 @@ int netxen_init_firmware(struct netxen_adapter *adapter) return err; } /* Window 1 call */ + writel(MPORT_SINGLE_FUNCTION_MODE, + NETXEN_CRB_NORMALIZE(adapter, CRB_MPORT_MODE)); writel(PHAN_INITIALIZE_ACK, NETXEN_CRB_NORMALIZE(adapter, CRB_CMDPEG_STATE)); @@ -184,15 +186,12 @@ void netxen_initialize_adapter_sw(struct netxen_adapter *adapter) for (i = 0; i < num_rx_bufs; i++) { rx_buf->ref_handle = i; rx_buf->state = NETXEN_BUFFER_FREE; - DPRINTK(INFO, "Rx buf:ctx%d i(%d) rx_buf:" "%p\n", ctxid, i, rx_buf); rx_buf++; } } } - DPRINTK(INFO, "initialized buffers for %s and %s\n", - "adapter->free_cmd_buf_list", "adapter->free_rxbuf"); } void netxen_initialize_adapter_hw(struct netxen_adapter *adapter) @@ -212,37 +211,36 @@ void netxen_initialize_adapter_hw(struct netxen_adapter *adapter) void netxen_initialize_adapter_ops(struct netxen_adapter *adapter) { - struct netxen_drvops *ops = adapter->ops; switch (adapter->ahw.board_type) { case NETXEN_NIC_GBE: - ops->enable_phy_interrupts = + adapter->enable_phy_interrupts = netxen_niu_gbe_enable_phy_interrupts; - ops->disable_phy_interrupts = + adapter->disable_phy_interrupts = netxen_niu_gbe_disable_phy_interrupts; - ops->handle_phy_intr = netxen_nic_gbe_handle_phy_intr; - ops->macaddr_set = netxen_niu_macaddr_set; - ops->set_mtu = netxen_nic_set_mtu_gb; - ops->set_promisc = netxen_niu_set_promiscuous_mode; - ops->unset_promisc = netxen_niu_set_promiscuous_mode; - ops->phy_read = netxen_niu_gbe_phy_read; - ops->phy_write = netxen_niu_gbe_phy_write; - ops->init_port = netxen_niu_gbe_init_port; - ops->init_niu = netxen_nic_init_niu_gb; - ops->stop_port = netxen_niu_disable_gbe_port; + adapter->handle_phy_intr = netxen_nic_gbe_handle_phy_intr; + adapter->macaddr_set = netxen_niu_macaddr_set; + adapter->set_mtu = netxen_nic_set_mtu_gb; + adapter->set_promisc = netxen_niu_set_promiscuous_mode; + adapter->unset_promisc = netxen_niu_set_promiscuous_mode; + adapter->phy_read = netxen_niu_gbe_phy_read; + adapter->phy_write = netxen_niu_gbe_phy_write; + adapter->init_port = netxen_niu_gbe_init_port; + adapter->init_niu = netxen_nic_init_niu_gb; + adapter->stop_port = netxen_niu_disable_gbe_port; break; case NETXEN_NIC_XGBE: - ops->enable_phy_interrupts = + adapter->enable_phy_interrupts = netxen_niu_xgbe_enable_phy_interrupts; - ops->disable_phy_interrupts = + adapter->disable_phy_interrupts = netxen_niu_xgbe_disable_phy_interrupts; - ops->handle_phy_intr = netxen_nic_xgbe_handle_phy_intr; - ops->macaddr_set = netxen_niu_xg_macaddr_set; - ops->set_mtu = netxen_nic_set_mtu_xgb; - ops->init_port = netxen_niu_xg_init_port; - ops->set_promisc = netxen_niu_xg_set_promiscuous_mode; - ops->unset_promisc = netxen_niu_xg_set_promiscuous_mode; - ops->stop_port = netxen_niu_disable_xg_port; + adapter->handle_phy_intr = netxen_nic_xgbe_handle_phy_intr; + adapter->macaddr_set = netxen_niu_xg_macaddr_set; + adapter->set_mtu = netxen_nic_set_mtu_xgb; + adapter->init_port = netxen_niu_xg_init_port; + adapter->set_promisc = netxen_niu_xg_set_promiscuous_mode; + adapter->unset_promisc = netxen_niu_xg_set_promiscuous_mode; + adapter->stop_port = netxen_niu_disable_xg_port; break; default: @@ -383,8 +381,8 @@ int netxen_rom_wip_poll(struct netxen_adapter *adapter) return 0; } -static inline int do_rom_fast_write(struct netxen_adapter *adapter, - int addr, int data) +static inline int do_rom_fast_write(struct netxen_adapter *adapter, int addr, + int data) { if (netxen_rom_wren(adapter)) { return -1; @@ -622,6 +620,43 @@ int netxen_pinit_from_rom(struct netxen_adapter *adapter, int verbose) return 0; } +int netxen_initialize_adapter_offload(struct netxen_adapter *adapter) +{ + uint64_t addr; + uint32_t hi; + uint32_t lo; + + adapter->dummy_dma.addr = + pci_alloc_consistent(adapter->ahw.pdev, + NETXEN_HOST_DUMMY_DMA_SIZE, + &adapter->dummy_dma.phys_addr); + if (adapter->dummy_dma.addr == NULL) { + printk("%s: ERROR: Could not allocate dummy DMA memory\n", + __FUNCTION__); + return -ENOMEM; + } + + addr = (uint64_t) adapter->dummy_dma.phys_addr; + hi = (addr >> 32) & 0xffffffff; + lo = addr & 0xffffffff; + + writel(hi, NETXEN_CRB_NORMALIZE(adapter, CRB_HOST_DUMMY_BUF_ADDR_HI)); + writel(lo, NETXEN_CRB_NORMALIZE(adapter, CRB_HOST_DUMMY_BUF_ADDR_LO)); + + return 0; +} + +void netxen_free_adapter_offload(struct netxen_adapter *adapter) +{ + if (adapter->dummy_dma.addr) { + pci_free_consistent(adapter->ahw.pdev, + NETXEN_HOST_DUMMY_DMA_SIZE, + adapter->dummy_dma.addr, + adapter->dummy_dma.phys_addr); + adapter->dummy_dma.addr = NULL; + } +} + void netxen_phantom_init(struct netxen_adapter *adapter, int pegtune_val) { u32 val = 0; @@ -656,7 +691,8 @@ int netxen_nic_rx_has_work(struct netxen_adapter *adapter) desc_head = recv_ctx->rcv_status_desc_head; desc = &desc_head[consumer]; - if (((le16_to_cpu(desc->owner)) & STATUS_OWNER_HOST)) + if (((le16_to_cpu(netxen_get_sts_owner(desc))) + & STATUS_OWNER_HOST)) return 1; } @@ -735,8 +771,8 @@ void netxen_watchdog_task(struct work_struct *work) netif_wake_queue(netdev); } - if (adapter->ops->handle_phy_intr) - adapter->ops->handle_phy_intr(adapter); + if (adapter->handle_phy_intr) + adapter->handle_phy_intr(adapter); mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ); } @@ -749,19 +785,19 @@ void netxen_process_rcv(struct netxen_adapter *adapter, int ctxid, struct status_desc *desc) { - struct netxen_port *port = adapter->port[STATUS_DESC_PORT(desc)]; + struct netxen_port *port = adapter->port[netxen_get_sts_port(desc)]; struct pci_dev *pdev = port->pdev; struct net_device *netdev = port->netdev; - int index = le16_to_cpu(desc->reference_handle); + int index = le16_to_cpu(netxen_get_sts_refhandle(desc)); struct netxen_recv_context *recv_ctx = &(adapter->recv_ctx[ctxid]); struct netxen_rx_buffer *buffer; struct sk_buff *skb; - u32 length = le16_to_cpu(desc->total_length); + u32 length = le16_to_cpu(netxen_get_sts_totallength(desc)); u32 desc_ctx; struct netxen_rcv_desc_ctx *rcv_desc; int ret; - desc_ctx = STATUS_DESC_TYPE(desc); + desc_ctx = netxen_get_sts_type(desc); if (unlikely(desc_ctx >= NUM_RCV_DESC_RINGS)) { printk("%s: %s Bad Rcv descriptor ring\n", netxen_nic_driver_name, netdev->name); @@ -769,20 +805,49 @@ netxen_process_rcv(struct netxen_adapter *adapter, int ctxid, } rcv_desc = &recv_ctx->rcv_desc[desc_ctx]; + if (unlikely(index > rcv_desc->max_rx_desc_count)) { + DPRINTK(ERR, "Got a buffer index:%x Max is %x\n", + index, rcv_desc->max_rx_desc_count); + return; + } buffer = &rcv_desc->rx_buf_arr[index]; + if (desc_ctx == RCV_DESC_LRO_CTXID) { + buffer->lro_current_frags++; + if (netxen_get_sts_desc_lro_last_frag(desc)) { + buffer->lro_expected_frags = + netxen_get_sts_desc_lro_cnt(desc); + buffer->lro_length = length; + } + if (buffer->lro_current_frags != buffer->lro_expected_frags) { + if (buffer->lro_expected_frags != 0) { + printk("LRO: (refhandle:%x) recv frag." + "wait for last. flags: %x expected:%d" + "have:%d\n", index, + netxen_get_sts_desc_lro_last_frag(desc), + buffer->lro_expected_frags, + buffer->lro_current_frags); + } + return; + } + } pci_unmap_single(pdev, buffer->dma, rcv_desc->dma_size, PCI_DMA_FROMDEVICE); skb = (struct sk_buff *)buffer->skb; - if (likely(STATUS_DESC_STATUS(desc) == STATUS_CKSUM_OK)) { + if (likely(netxen_get_sts_status(desc) == STATUS_CKSUM_OK)) { port->stats.csummed++; skb->ip_summed = CHECKSUM_UNNECESSARY; - } else - skb->ip_summed = CHECKSUM_NONE; + } skb->dev = netdev; - skb_put(skb, length); + if (desc_ctx == RCV_DESC_LRO_CTXID) { + /* True length was only available on the last pkt */ + skb_put(skb, buffer->lro_length); + } else { + skb_put(skb, length); + } + skb->protocol = eth_type_trans(skb, netdev); ret = netif_receive_skb(skb); @@ -828,6 +893,8 @@ netxen_process_rcv(struct netxen_adapter *adapter, int ctxid, adapter->stats.post_called++; buffer->skb = NULL; buffer->state = NETXEN_BUFFER_FREE; + buffer->lro_current_frags = 0; + buffer->lro_expected_frags = 0; port->stats.no_rcv++; port->stats.rxbytes += length; @@ -840,6 +907,7 @@ u32 netxen_process_rcv_ring(struct netxen_adapter *adapter, int ctxid, int max) struct status_desc *desc_head = recv_ctx->rcv_status_desc_head; struct status_desc *desc; /* used to read status desc here */ u32 consumer = recv_ctx->status_rx_consumer; + u32 producer = 0; int count = 0, ring; DPRINTK(INFO, "procesing receive\n"); @@ -851,18 +919,22 @@ u32 netxen_process_rcv_ring(struct netxen_adapter *adapter, int ctxid, int max) */ while (count < max) { desc = &desc_head[consumer]; - if (!((le16_to_cpu(desc->owner)) & STATUS_OWNER_HOST)) { - DPRINTK(ERR, "desc %p ownedby %x\n", desc, desc->owner); + if (! + (le16_to_cpu(netxen_get_sts_owner(desc)) & + STATUS_OWNER_HOST)) { + DPRINTK(ERR, "desc %p ownedby %x\n", desc, + netxen_get_sts_owner(desc)); break; } netxen_process_rcv(adapter, ctxid, desc); - desc->owner = STATUS_OWNER_PHANTOM; + netxen_clear_sts_owner(desc); + netxen_set_sts_owner(desc, STATUS_OWNER_PHANTOM); consumer = (consumer + 1) & (adapter->max_rx_desc_count - 1); count++; } if (count) { for (ring = 0; ring < NUM_RCV_DESC_RINGS; ring++) { - netxen_post_rx_buffers(adapter, ctxid, ring); + netxen_post_rx_buffers_nodb(adapter, ctxid, ring); } } @@ -870,6 +942,7 @@ u32 netxen_process_rcv_ring(struct netxen_adapter *adapter, int ctxid, int max) if (count) { adapter->stats.process_rcv++; recv_ctx->status_rx_consumer = consumer; + recv_ctx->status_rx_producer = producer; /* Window = 1 */ writel(consumer, @@ -882,12 +955,13 @@ u32 netxen_process_rcv_ring(struct netxen_adapter *adapter, int ctxid, int max) } /* Process Command status ring */ -void netxen_process_cmd_ring(unsigned long data) +int netxen_process_cmd_ring(unsigned long data) { u32 last_consumer; u32 consumer; struct netxen_adapter *adapter = (struct netxen_adapter *)data; - int count = 0; + int count1 = 0; + int count2 = 0; struct netxen_cmd_buffer *buffer; struct netxen_port *port; /* port #1 */ struct netxen_port *nport; @@ -896,6 +970,7 @@ void netxen_process_cmd_ring(unsigned long data) u32 i; struct sk_buff *skb = NULL; int p; + int done; spin_lock(&adapter->tx_lock); last_consumer = adapter->last_cmd_consumer; @@ -905,14 +980,13 @@ void netxen_process_cmd_ring(unsigned long data) * number as part of the descriptor. This way we will be able to get * the netdev which is associated with that device. */ - consumer = - readl(NETXEN_CRB_NORMALIZE(adapter, CRB_CMD_CONSUMER_OFFSET)); + consumer = *(adapter->cmd_consumer); if (last_consumer == consumer) { /* Ring is empty */ DPRINTK(INFO, "last_consumer %d == consumer %d\n", last_consumer, consumer); spin_unlock(&adapter->tx_lock); - return; + return 1; } adapter->proc_cmd_buf_counter++; @@ -923,7 +997,7 @@ void netxen_process_cmd_ring(unsigned long data) */ spin_unlock(&adapter->tx_lock); - while ((last_consumer != consumer) && (count < MAX_STATUS_HANDLE)) { + while ((last_consumer != consumer) && (count1 < MAX_STATUS_HANDLE)) { buffer = &adapter->cmd_buf_arr[last_consumer]; port = adapter->port[buffer->port]; pdev = port->pdev; @@ -949,24 +1023,24 @@ void netxen_process_cmd_ring(unsigned long data) && netif_carrier_ok(port->netdev)) && ((jiffies - port->netdev->trans_start) > port->netdev->watchdog_timeo)) { - schedule_work(&port->adapter->tx_timeout_task); + SCHEDULE_WORK(&port->adapter->tx_timeout_task); } last_consumer = get_next_index(last_consumer, adapter->max_tx_desc_count); - count++; + count1++; } - adapter->stats.noxmitdone += count; + adapter->stats.noxmitdone += count1; - count = 0; + count2 = 0; spin_lock(&adapter->tx_lock); if ((--adapter->proc_cmd_buf_counter) == 0) { adapter->last_cmd_consumer = last_consumer; while ((adapter->last_cmd_consumer != consumer) - && (count < MAX_STATUS_HANDLE)) { + && (count2 < MAX_STATUS_HANDLE)) { buffer = &adapter->cmd_buf_arr[adapter->last_cmd_consumer]; - count++; + count2++; if (buffer->skb) break; else @@ -975,7 +1049,7 @@ void netxen_process_cmd_ring(unsigned long data) adapter->max_tx_desc_count); } } - if (count) { + if (count1 || count2) { for (p = 0; p < adapter->ahw.max_ports; p++) { nport = adapter->port[p]; if (netif_queue_stopped(nport->netdev) @@ -985,10 +1059,30 @@ void netxen_process_cmd_ring(unsigned long data) } } } + /* + * If everything is freed up to consumer then check if the ring is full + * If the ring is full then check if more needs to be freed and + * schedule the call back again. + * + * This happens when there are 2 CPUs. One could be freeing and the + * other filling it. If the ring is full when we get out of here and + * the card has already interrupted the host then the host can miss the + * interrupt. + * + * There is still a possible race condition and the host could miss an + * interrupt. The card has to take care of this. + */ + if (adapter->last_cmd_consumer == consumer && + (((adapter->cmd_producer + 1) % + adapter->max_tx_desc_count) == adapter->last_cmd_consumer)) { + consumer = *(adapter->cmd_consumer); + } + done = (adapter->last_cmd_consumer == consumer); spin_unlock(&adapter->tx_lock); DPRINTK(INFO, "last consumer is %d in %s\n", last_consumer, __FUNCTION__); + return (done); } /* @@ -1000,17 +1094,16 @@ void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid) struct sk_buff *skb; struct netxen_recv_context *recv_ctx = &(adapter->recv_ctx[ctx]); struct netxen_rcv_desc_ctx *rcv_desc = NULL; - struct netxen_recv_crb *crbarea = &recv_crb_registers[ctx]; - struct netxen_rcv_desc_crb *rcv_desc_crb = NULL; - u32 producer; + uint producer; struct rcv_desc *pdesc; struct netxen_rx_buffer *buffer; int count = 0; int index = 0; + netxen_ctx_msg msg = 0; + dma_addr_t dma; adapter->stats.post_called++; rcv_desc = &recv_ctx->rcv_desc[ringid]; - rcv_desc_crb = &crbarea->rcv_desc_crb[ringid]; producer = rcv_desc->producer; index = rcv_desc->begin_alloc; @@ -1020,6 +1113,7 @@ void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid) skb = dev_alloc_skb(rcv_desc->skb_size); if (unlikely(!skb)) { /* + * TODO * We need to schedule the posting of buffers to the pegs. */ rcv_desc->begin_alloc = index; @@ -1027,9 +1121,105 @@ void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid) " allocated only %d buffers\n", count); break; } + count++; /* now there should be no failure */ pdesc = &rcv_desc->desc_head[producer]; - skb_reserve(skb, NET_IP_ALIGN); + +#if defined(XGB_DEBUG) + *(unsigned long *)(skb->head) = 0xc0debabe; + if (skb_is_nonlinear(skb)) { + printk("Allocated SKB @%p is nonlinear\n"); + } +#endif + skb_reserve(skb, 2); + /* This will be setup when we receive the + * buffer after it has been filled FSL TBD TBD + * skb->dev = netdev; + */ + dma = pci_map_single(pdev, skb->data, rcv_desc->dma_size, + PCI_DMA_FROMDEVICE); + pdesc->addr_buffer = dma; + buffer->skb = skb; + buffer->state = NETXEN_BUFFER_BUSY; + buffer->dma = dma; + /* make a rcv descriptor */ + pdesc->reference_handle = buffer->ref_handle; + pdesc->buffer_length = rcv_desc->dma_size; + DPRINTK(INFO, "done writing descripter\n"); + producer = + get_next_index(producer, rcv_desc->max_rx_desc_count); + index = get_next_index(index, rcv_desc->max_rx_desc_count); + buffer = &rcv_desc->rx_buf_arr[index]; + } + /* if we did allocate buffers, then write the count to Phantom */ + if (count) { + rcv_desc->begin_alloc = index; + rcv_desc->rcv_pending += count; + adapter->stats.lastposted = count; + adapter->stats.posted += count; + rcv_desc->producer = producer; + if (rcv_desc->rcv_free >= 32) { + rcv_desc->rcv_free = 0; + /* Window = 1 */ + writel((producer - 1) & + (rcv_desc->max_rx_desc_count - 1), + NETXEN_CRB_NORMALIZE(adapter, + recv_crb_registers[0]. + rcv_desc_crb[ringid]. + crb_rcv_producer_offset)); + /* + * Write a doorbell msg to tell phanmon of change in + * receive ring producer + */ + netxen_set_msg_peg_id(msg, NETXEN_RCV_PEG_DB_ID); + netxen_set_msg_privid(msg); + netxen_set_msg_count(msg, + ((producer - + 1) & (rcv_desc-> + max_rx_desc_count - 1))); + netxen_set_msg_ctxid(msg, 0); + netxen_set_msg_opcode(msg, NETXEN_RCV_PRODUCER(ringid)); + writel(msg, + DB_NORMALIZE(adapter, + NETXEN_RCV_PRODUCER_OFFSET)); + } + } +} + +void netxen_post_rx_buffers_nodb(struct netxen_adapter *adapter, uint32_t ctx, + uint32_t ringid) +{ + struct pci_dev *pdev = adapter->ahw.pdev; + struct sk_buff *skb; + struct netxen_recv_context *recv_ctx = &(adapter->recv_ctx[ctx]); + struct netxen_rcv_desc_ctx *rcv_desc = NULL; + u32 producer; + struct rcv_desc *pdesc; + struct netxen_rx_buffer *buffer; + int count = 0; + int index = 0; + + adapter->stats.post_called++; + rcv_desc = &recv_ctx->rcv_desc[ringid]; + + producer = rcv_desc->producer; + index = rcv_desc->begin_alloc; + buffer = &rcv_desc->rx_buf_arr[index]; + /* We can start writing rx descriptors into the phantom memory. */ + while (buffer->state == NETXEN_BUFFER_FREE) { + skb = dev_alloc_skb(rcv_desc->skb_size); + if (unlikely(!skb)) { + /* + * We need to schedule the posting of buffers to the pegs. + */ + rcv_desc->begin_alloc = index; + DPRINTK(ERR, "netxen_post_rx_buffers_nodb: " + " allocated only %d buffers\n", count); + break; + } + count++; /* now there should be no failure */ + pdesc = &rcv_desc->desc_head[producer]; + skb_reserve(skb, 2); /* * This will be setup when we receive the * buffer after it has been filled @@ -1040,6 +1230,7 @@ void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid) buffer->dma = pci_map_single(pdev, skb->data, rcv_desc->dma_size, PCI_DMA_FROMDEVICE); + /* make a rcv descriptor */ pdesc->reference_handle = le16_to_cpu(buffer->ref_handle); pdesc->buffer_length = le16_to_cpu(rcv_desc->dma_size); @@ -1064,7 +1255,8 @@ void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid) writel((producer - 1) & (rcv_desc->max_rx_desc_count - 1), NETXEN_CRB_NORMALIZE(adapter, - rcv_desc_crb-> + recv_crb_registers[0]. + rcv_desc_crb[ringid]. crb_rcv_producer_offset)); wmb(); } @@ -1197,8 +1389,8 @@ netxen_nic_do_ioctl(struct netxen_adapter *adapter, void *u_data, switch (data.cmd) { case netxen_nic_cmd_pci_read: - if ((retval = netxen_nic_hw_read_wx(adapter, data.off, - &(data.u), data.size))) + if ((retval = netxen_nic_hw_read_ioctl(adapter, data.off, + &(data.u), data.size))) goto error_out; if (copy_to_user ((void __user *)&(up_data->u), &(data.u), data.size)) { @@ -1211,8 +1403,35 @@ netxen_nic_do_ioctl(struct netxen_adapter *adapter, void *u_data, break; case netxen_nic_cmd_pci_write: - data.rv = netxen_nic_hw_write_wx(adapter, data.off, &(data.u), - data.size); + if ((retval = netxen_nic_hw_write_ioctl(adapter, data.off, + &(data.u), data.size))) + goto error_out; + data.rv = 0; + break; + + case netxen_nic_cmd_pci_mem_read: + if (netxen_nic_pci_mem_read_ioctl(adapter, data.off, &(data.u), + data.size)) { + DPRINTK(ERR, "Failed to read the data.\n"); + retval = -EFAULT; + goto error_out; + } + if (copy_to_user + ((void __user *)&(up_data->u), &(data.u), data.size)) { + DPRINTK(ERR, "bad copy to userland: %d\n", + (int)sizeof(data)); + retval = -EFAULT; + goto error_out; + } + data.rv = 0; + break; + + case netxen_nic_cmd_pci_mem_write: + if ((retval = netxen_nic_pci_mem_write_ioctl(adapter, data.off, + &(data.u), + data.size))) + goto error_out; + data.rv = 0; break; case netxen_nic_cmd_pci_config_read: @@ -1297,7 +1516,7 @@ netxen_nic_do_ioctl(struct netxen_adapter *adapter, void *u_data, retval = -EOPNOTSUPP; goto error_out; } - put_user(data.rv, (u16 __user *) (&(up_data->rv))); + put_user(data.rv, (&(up_data->rv))); DPRINTK(INFO, "done ioctl for %p well.\n", adapter); error_out: diff --git a/drivers/net/netxen/netxen_nic_ioctl.h b/drivers/net/netxen/netxen_nic_ioctl.h index 23e53adbf12..1221fa52755 100644 --- a/drivers/net/netxen/netxen_nic_ioctl.h +++ b/drivers/net/netxen/netxen_nic_ioctl.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 @@ -16,10 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -36,7 +36,7 @@ #define NETXEN_NIC_CMD (NETXEN_CMD_START + 1) #define NETXEN_NIC_NAME (NETXEN_CMD_START + 2) #define NETXEN_NIC_NAME_LEN 16 -#define NETXEN_NIC_NAME_RSP "NETXEN" +#define NETXEN_NIC_NAME_RSP "NETXEN-UNM" typedef enum { netxen_nic_cmd_none = 0, diff --git a/drivers/net/netxen/netxen_nic_isr.c b/drivers/net/netxen/netxen_nic_isr.c index ae180fee800..1b45f50fa6a 100644 --- a/drivers/net/netxen/netxen_nic_isr.c +++ b/drivers/net/netxen/netxen_nic_isr.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 @@ -16,10 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -68,8 +68,7 @@ struct net_device_stats *netxen_nic_get_stats(struct net_device *netdev) void netxen_indicate_link_status(struct netxen_adapter *adapter, u32 portno, u32 link) { - struct netxen_port *pport = adapter->port[portno]; - struct net_device *netdev = pport->netdev; + struct net_device *netdev = (adapter->port[portno])->netdev; if (link) netif_carrier_on(netdev); @@ -84,46 +83,41 @@ void netxen_handle_port_int(struct netxen_adapter *adapter, u32 portno, struct netxen_port *port; /* This should clear the interrupt source */ - if (adapter->ops->phy_read) - adapter->ops->phy_read(adapter, portno, - NETXEN_NIU_GB_MII_MGMT_ADDR_INT_STATUS, - &int_src); + if (adapter->phy_read) + adapter->phy_read(adapter, portno, + NETXEN_NIU_GB_MII_MGMT_ADDR_INT_STATUS, + &int_src); if (int_src == 0) { DPRINTK(INFO, "No phy interrupts for port #%d\n", portno); return; } - if (adapter->ops->disable_phy_interrupts) - adapter->ops->disable_phy_interrupts(adapter, portno); + if (adapter->disable_phy_interrupts) + adapter->disable_phy_interrupts(adapter, portno); port = adapter->port[portno]; if (netxen_get_phy_int_jabber(int_src)) - DPRINTK(INFO, "NetXen: %s Jabber interrupt \n", - port->netdev->name); + DPRINTK(INFO, "Jabber interrupt \n"); if (netxen_get_phy_int_polarity_changed(int_src)) - DPRINTK(INFO, "NetXen: %s POLARITY CHANGED int \n", - port->netdev->name); + DPRINTK(INFO, "POLARITY CHANGED int \n"); if (netxen_get_phy_int_energy_detect(int_src)) - DPRINTK(INFO, "NetXen: %s ENERGY DETECT INT \n", - port->netdev->name); + DPRINTK(INFO, "ENERGY DETECT INT \n"); if (netxen_get_phy_int_downshift(int_src)) - DPRINTK(INFO, "NetXen: %s DOWNSHIFT INT \n", - port->netdev->name); + DPRINTK(INFO, "DOWNSHIFT INT \n"); /* write it down later.. */ if ((netxen_get_phy_int_speed_changed(int_src)) || (netxen_get_phy_int_link_status_changed(int_src))) { __le32 status; - DPRINTK(INFO, "NetXen: %s SPEED CHANGED OR" - " LINK STATUS CHANGED \n", port->netdev->name); + DPRINTK(INFO, "SPEED CHANGED OR LINK STATUS CHANGED \n"); - if (adapter->ops->phy_read - && adapter->ops->phy_read(adapter, portno, - NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, - &status) == 0) { + if (adapter->phy_read + && adapter->phy_read(adapter, portno, + NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, + &status) == 0) { if (netxen_get_phy_int_link_status_changed(int_src)) { if (netxen_get_phy_link(status)) { netxen_niu_gbe_init_port(adapter, @@ -143,8 +137,8 @@ void netxen_handle_port_int(struct netxen_adapter *adapter, u32 portno, } } } - if (adapter->ops->enable_phy_interrupts) - adapter->ops->enable_phy_interrupts(adapter, portno); + if (adapter->enable_phy_interrupts) + adapter->enable_phy_interrupts(adapter, portno); } void netxen_nic_isr_other(struct netxen_adapter *adapter) @@ -159,8 +153,7 @@ void netxen_nic_isr_other(struct netxen_adapter *adapter) qg_linksup = adapter->ahw.qg_linksup; adapter->ahw.qg_linksup = val; - DPRINTK(1, INFO, "%s: link update 0x%08x\n", netxen_nic_driver_name, - val); + DPRINTK(INFO, "link update 0x%08x\n", val); for (portno = 0; portno < NETXEN_NIU_MAX_GBE_PORTS; portno++) { linkup = val & 1; if (linkup != (qg_linksup & 1)) { diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index df0bb36a1cf..575b71b6720 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1,25 +1,25 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -32,6 +32,7 @@ */ #include <linux/vmalloc.h> +#include <linux/highmem.h> #include "netxen_nic_hw.h" #include "netxen_nic.h" @@ -48,14 +49,21 @@ MODULE_DESCRIPTION("NetXen Multi port (1/10) Gigabit Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(NETXEN_NIC_LINUX_VERSIONID); -char netxen_nic_driver_name[] = "netxen"; +char netxen_nic_driver_name[] = "netxen-nic"; static char netxen_nic_driver_string[] = "NetXen Network Driver version " NETXEN_NIC_LINUX_VERSIONID; +struct netxen_adapter *g_adapter = NULL; + #define NETXEN_NETDEV_WEIGHT 120 #define NETXEN_ADAPTER_UP_MAGIC 777 #define NETXEN_NIC_PEG_TUNE 0 +u8 nx_p2_id = NX_P2_C0; + +#define DMA_32BIT_MASK 0x00000000ffffffffULL +#define DMA_35BIT_MASK 0x00000007ffffffffULL + /* Local functions to NetXen NIC driver */ static int __devinit netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent); @@ -87,6 +95,9 @@ static struct pci_device_id netxen_pci_tbl[] __devinitdata = { MODULE_DEVICE_TABLE(pci, netxen_pci_tbl); +struct workqueue_struct *netxen_workq; +static void netxen_watchdog(unsigned long); + /* * netxen_nic_probe() * @@ -105,20 +116,28 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *netdev = NULL; struct netxen_adapter *adapter = NULL; struct netxen_port *port = NULL; - u8 *mem_ptr0 = NULL; - u8 *mem_ptr1 = NULL; - u8 *mem_ptr2 = NULL; + void __iomem *mem_ptr0 = NULL; + void __iomem *mem_ptr1 = NULL; + void __iomem *mem_ptr2 = NULL; - unsigned long mem_base, mem_len; + u8 *db_ptr = NULL; + unsigned long mem_base, mem_len, db_base, db_len; int pci_using_dac, i, err; int ring; struct netxen_recv_context *recv_ctx = NULL; struct netxen_rcv_desc_ctx *rcv_desc = NULL; struct netxen_cmd_buffer *cmd_buf_arr = NULL; u64 mac_addr[FLASH_NUM_PORTS + 1]; - int valid_mac; + int valid_mac = 0; + static int netxen_cards_found = 0; printk(KERN_INFO "%s \n", netxen_nic_driver_string); + /* In current scheme, we use only PCI function 0 */ + if (PCI_FUNC(pdev->devfn) != 0) { + DPRINTK(ERR, "NetXen function %d will not be enabled.\n", + PCI_FUNC(pdev->devfn)); + return -ENODEV; + } if ((err = pci_enable_device(pdev))) return err; if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { @@ -130,10 +149,12 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_disable_pdev; pci_set_master(pdev); - if ((pci_set_dma_mask(pdev, DMA_64BIT_MASK) == 0) && - (pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK) == 0)) + pci_read_config_byte(pdev, PCI_REVISION_ID, &nx_p2_id); + if (nx_p2_id == NX_P2_C1 && + (pci_set_dma_mask(pdev, DMA_35BIT_MASK) == 0) && + (pci_set_consistent_dma_mask(pdev, DMA_35BIT_MASK) == 0)) { pci_using_dac = 1; - else { + } else { if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK)) || (err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK))) goto err_out_free_res; @@ -153,21 +174,34 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ioremap(mem_base + THIRD_PAGE_GROUP_START, THIRD_PAGE_GROUP_SIZE); if ((mem_ptr0 == 0UL) || (mem_ptr1 == 0UL) || (mem_ptr2 == 0UL)) { - DPRINTK(1, ERR, + DPRINTK(ERR, "Cannot remap adapter memory aborting.:" "0 -> %p, 1 -> %p, 2 -> %p\n", mem_ptr0, mem_ptr1, mem_ptr2); err = -EIO; - if (mem_ptr0) - iounmap(mem_ptr0); - if (mem_ptr1) - iounmap(mem_ptr1); - if (mem_ptr2) - iounmap(mem_ptr2); - - goto err_out_free_res; + goto err_out_iounmap; + } + db_base = pci_resource_start(pdev, 4); /* doorbell is on bar 4 */ + db_len = pci_resource_len(pdev, 4); + + if (db_len == 0) { + printk(KERN_ERR "%s: doorbell is disabled\n", + netxen_nic_driver_name); + err = -EIO; + goto err_out_iounmap; + } + DPRINTK(INFO, "doorbell ioremap from %lx a size of %lx\n", db_base, + db_len); + + db_ptr = ioremap(db_base, NETXEN_DB_MAPSIZE_BYTES); + if (db_ptr == 0UL) { + printk(KERN_ERR "%s: Failed to allocate doorbell map.", + netxen_nic_driver_name); + err = -EIO; + goto err_out_iounmap; } + DPRINTK(INFO, "doorbell ioremaped at %p\n", db_ptr); /* * Allocate a adapter structure which will manage all the initialization @@ -183,17 +217,24 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netxen_nic_driver_name, (int)sizeof(struct netxen_adapter)); err = -ENOMEM; - goto err_out_iounmap; + goto err_out_dbunmap; } + if (netxen_cards_found == 0) { + g_adapter = adapter; + } adapter->max_tx_desc_count = MAX_CMD_DESCRIPTORS; adapter->max_rx_desc_count = MAX_RCV_DESCRIPTORS; adapter->max_jumbo_rx_desc_count = MAX_JUMBO_RCV_DESCRIPTORS; + adapter->max_lro_rx_desc_count = MAX_LRO_RCV_DESCRIPTORS; pci_set_drvdata(pdev, adapter); cmd_buf_arr = (struct netxen_cmd_buffer *)vmalloc(TX_RINGSIZE); if (cmd_buf_arr == NULL) { + printk(KERN_ERR + "%s: Could not allocate cmd_buf_arr memory:%d\n", + netxen_nic_driver_name, (int)TX_RINGSIZE); err = -ENOMEM; goto err_out_free_adapter; } @@ -220,11 +261,23 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) rcv_desc->skb_size = MAX_RX_JUMBO_BUFFER_LENGTH; break; + case RCV_RING_LRO: + rcv_desc->max_rx_desc_count = + adapter->max_lro_rx_desc_count; + rcv_desc->flags = RCV_DESC_LRO; + rcv_desc->dma_size = RX_LRO_DMA_MAP_LEN; + rcv_desc->skb_size = MAX_RX_LRO_BUFFER_LENGTH; + break; + } rcv_desc->rx_buf_arr = (struct netxen_rx_buffer *) vmalloc(RCV_BUFFSIZE); if (rcv_desc->rx_buf_arr == NULL) { + printk(KERN_ERR "%s: Could not allocate" + "rcv_desc->rx_buf_arr memory:%d\n", + netxen_nic_driver_name, + (int)RCV_BUFFSIZE); err = -ENOMEM; goto err_out_free_rx_buffer; } @@ -233,30 +286,21 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } - adapter->ops = kzalloc(sizeof(struct netxen_drvops), GFP_KERNEL); - if (adapter->ops == NULL) { - printk(KERN_ERR - "%s: Could not allocate memory for adapter->ops:%d\n", - netxen_nic_driver_name, - (int)sizeof(struct netxen_adapter)); - err = -ENOMEM; - goto err_out_free_rx_buffer; - } - adapter->cmd_buf_arr = cmd_buf_arr; adapter->ahw.pci_base0 = mem_ptr0; adapter->ahw.pci_base1 = mem_ptr1; adapter->ahw.pci_base2 = mem_ptr2; + adapter->ahw.db_base = db_ptr; + adapter->ahw.db_len = db_len; spin_lock_init(&adapter->tx_lock); spin_lock_init(&adapter->lock); + netxen_initialize_adapter_sw(adapter); /* initialize the buffers in adapter */ #ifdef CONFIG_IA64 netxen_pinit_from_rom(adapter, 0); udelay(500); netxen_load_firmware(adapter); #endif - /* initialize the buffers in adapter */ - netxen_initialize_adapter_sw(adapter); /* * Set the CRB window to invalid. If any register in window 0 is * accessed it should set the window to 0 and then reset it to 1. @@ -277,7 +321,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->watchdog_task, netxen_watchdog_task); adapter->ahw.pdev = pdev; adapter->proc_cmd_buf_counter = 0; - pci_read_config_byte(pdev, PCI_REVISION_ID, &adapter->ahw.revision_id); + adapter->ahw.revision_id = nx_p2_id; if (pci_enable_msi(pdev)) { adapter->flags &= ~NETXEN_NIC_MSI_ENABLED; @@ -299,6 +343,12 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) writel(0, NETXEN_CRB_NORMALIZE(adapter, CRB_CMD_CONSUMER_OFFSET)); writel(0, NETXEN_CRB_NORMALIZE(adapter, CRB_HOST_CMD_ADDR_LO)); + /* do this before waking up pegs so that we have valid dummy dma addr */ + err = netxen_initialize_adapter_offload(adapter); + if (err) { + goto err_out_free_dev; + } + /* Unlock the HW, prompting the boot sequence */ writel(1, NETXEN_CRB_NORMALIZE(adapter, NETXEN_ROMUSB_GLB_PEGTUNE_DONE)); @@ -307,6 +357,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netxen_phantom_init(adapter, NETXEN_NIC_PEG_TUNE); /* initialize the all the ports */ + adapter->active_ports = 0; for (i = 0; i < adapter->ahw.max_ports; i++) { netdev = alloc_etherdev(sizeof(struct netxen_port)); @@ -372,10 +423,9 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->dev_addr[4], netdev->dev_addr[5]); } else { - if (adapter->ops->macaddr_set) - adapter->ops->macaddr_set(port, - netdev-> - dev_addr); + if (adapter->macaddr_set) + adapter->macaddr_set(port, + netdev->dev_addr); } } adapter->netdev = netdev; @@ -391,7 +441,6 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_dev; } adapter->port_count++; - adapter->active_ports = 0; adapter->port[i] = port; } @@ -412,6 +461,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) break; } + adapter->number = netxen_cards_found; adapter->driver_mismatch = 0; return 0; @@ -426,7 +476,8 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) free_netdev(port->netdev); } } - kfree(adapter->ops); + + netxen_free_adapter_offload(adapter); err_out_free_rx_buffer: for (i = 0; i < MAX_RCV_CTX; ++i) { @@ -439,19 +490,23 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } } - vfree(cmd_buf_arr); - kfree(adapter->port); - err_out_free_adapter: pci_set_drvdata(pdev, NULL); kfree(adapter); + err_out_dbunmap: + if (db_ptr) + iounmap(db_ptr); + err_out_iounmap: - iounmap(mem_ptr0); - iounmap(mem_ptr1); - iounmap(mem_ptr2); + if (mem_ptr0) + iounmap(mem_ptr0); + if (mem_ptr1) + iounmap(mem_ptr1); + if (mem_ptr2) + iounmap(mem_ptr2); err_out_free_res: pci_release_regions(pdev); @@ -476,12 +531,8 @@ static void __devexit netxen_nic_remove(struct pci_dev *pdev) netxen_nic_stop_all_ports(adapter); /* leave the hw in the same state as reboot */ - netxen_pinit_from_rom(adapter, 0); - udelay(500); netxen_load_firmware(adapter); - - if ((adapter->flags & NETXEN_NIC_MSI_ENABLED)) - netxen_nic_disable_int(adapter); + netxen_free_adapter_offload(adapter); udelay(500); /* Delay for a while to drain the DMA engines */ for (i = 0; i < adapter->port_count; i++) { @@ -498,6 +549,7 @@ static void __devexit netxen_nic_remove(struct pci_dev *pdev) if (adapter->is_up == NETXEN_ADAPTER_UP_MAGIC) netxen_free_hw_resources(adapter); + iounmap(adapter->ahw.db_base); iounmap(adapter->ahw.pci_base0); iounmap(adapter->ahw.pci_base1); iounmap(adapter->ahw.pci_base2); @@ -524,7 +576,6 @@ static void __devexit netxen_nic_remove(struct pci_dev *pdev) } vfree(adapter->cmd_buf_arr); - kfree(adapter->ops); kfree(adapter); } @@ -546,6 +597,8 @@ static int netxen_nic_open(struct net_device *netdev) return -EIO; } netxen_nic_flash_print(adapter); + if (adapter->init_niu) + adapter->init_niu(adapter); /* setup all the resources for the Phantom... */ /* this include the descriptors for rcv, tx, and status */ @@ -556,32 +609,31 @@ static int netxen_nic_open(struct net_device *netdev) err); return err; } - if (adapter->ops->init_port - && adapter->ops->init_port(adapter, port->portnum) != 0) { + if (adapter->init_port + && adapter->init_port(adapter, port->portnum) != 0) { printk(KERN_ERR "%s: Failed to initialize port %d\n", netxen_nic_driver_name, port->portnum); netxen_free_hw_resources(adapter); return -EIO; } - if (adapter->ops->init_niu) - adapter->ops->init_niu(adapter); for (ctx = 0; ctx < MAX_RCV_CTX; ++ctx) { for (ring = 0; ring < NUM_RCV_DESC_RINGS; ring++) netxen_post_rx_buffers(adapter, ctx, ring); } - adapter->is_up = NETXEN_ADAPTER_UP_MAGIC; - } - adapter->active_ports++; - if (adapter->active_ports == 1) { + adapter->irq = adapter->ahw.pdev->irq; err = request_irq(adapter->ahw.pdev->irq, &netxen_intr, SA_SHIRQ | SA_SAMPLE_RANDOM, netdev->name, adapter); if (err) { printk(KERN_ERR "request_irq failed with: %d\n", err); - adapter->active_ports--; + netxen_free_hw_resources(adapter); return err; } - adapter->irq = adapter->ahw.pdev->irq; + + adapter->is_up = NETXEN_ADAPTER_UP_MAGIC; + } + adapter->active_ports++; + if (adapter->active_ports == 1) { if (!adapter->driver_mismatch) mod_timer(&adapter->watchdog_timer, jiffies); @@ -590,11 +642,14 @@ static int netxen_nic_open(struct net_device *netdev) /* Done here again so that even if phantom sw overwrote it, * we set it */ - if (adapter->ops->macaddr_set) - adapter->ops->macaddr_set(port, netdev->dev_addr); + if (adapter->macaddr_set) + adapter->macaddr_set(port, netdev->dev_addr); netxen_nic_set_link_parameters(port); netxen_nic_set_multi(netdev); + if (adapter->set_mtu) + adapter->set_mtu(port, netdev->mtu); + if (!adapter->driver_mismatch) netif_start_queue(netdev); @@ -647,6 +702,7 @@ static int netxen_nic_close(struct net_device *netdev) } cmd_buff++; } + FLUSH_SCHEDULED_WORK(); del_timer_sync(&adapter->watchdog_timer); } @@ -667,7 +723,6 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) struct cmd_desc_type0 *hwdesc; int k; struct netxen_cmd_buffer *pbuf = NULL; - unsigned int tries = 0; static int dropped_packet = 0; int frag_count; u32 local_producer = 0; @@ -729,7 +784,7 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (((skb->nh.iph)->ihl * sizeof(u32)) + ((skb->h.th)->doff * sizeof(u32)) + sizeof(struct ethhdr) > - (sizeof(struct cmd_desc_type0) - NET_IP_ALIGN)) { + (sizeof(struct cmd_desc_type0) - 2)) { no_of_desc++; } } @@ -740,27 +795,17 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if ((k + no_of_desc) >= ((last_cmd_consumer <= k) ? last_cmd_consumer + max_tx_desc_count : last_cmd_consumer)) { + port->stats.nocmddescriptor++; + DPRINTK(ERR, "No command descriptors available," + " producer = %d, consumer = %d count=%llu," + " dropping packet\n", producer, + adapter->last_cmd_consumer, + port->stats.nocmddescriptor); + + netif_stop_queue(netdev); + port->flags |= NETXEN_NETDEV_STATUS; spin_unlock_bh(&adapter->tx_lock); - if (tries == 0) { - local_bh_disable(); - netxen_process_cmd_ring((unsigned long)adapter); - local_bh_enable(); - ++tries; - goto retry_getting_window; - } else { - port->stats.nocmddescriptor++; - DPRINTK(ERR, "No command descriptors available," - " producer = %d, consumer = %d count=%llu," - " dropping packet\n", producer, - adapter->last_cmd_consumer, - port->stats.nocmddescriptor); - - spin_lock_bh(&adapter->tx_lock); - netif_stop_queue(netdev); - port->flags |= NETXEN_NETDEV_STATUS; - spin_unlock_bh(&adapter->tx_lock); - return NETDEV_TX_BUSY; - } + return NETDEV_TX_BUSY; } k = get_index_range(k, max_tx_desc_count, no_of_desc); adapter->cmd_producer = k; @@ -782,7 +827,6 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) pbuf->mss = 0; hwdesc->mss = 0; } - pbuf->no_of_descriptors = no_of_desc; pbuf->total_length = skb->len; pbuf->skb = skb; pbuf->cmd = TX_ETHER_PKT; @@ -792,11 +836,11 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) buffrag->dma = pci_map_single(port->pdev, skb->data, first_seg_len, PCI_DMA_TODEVICE); buffrag->length = first_seg_len; - CMD_DESC_TOTAL_LENGTH_WRT(hwdesc, skb->len); - hwdesc->num_of_buffers = frag_count; - hwdesc->opcode = TX_ETHER_PKT; + netxen_set_cmd_desc_totallength(hwdesc, skb->len); + netxen_set_cmd_desc_num_of_buff(hwdesc, frag_count); + netxen_set_cmd_desc_opcode(hwdesc, TX_ETHER_PKT); - CMD_DESC_PORT_WRT(hwdesc, port->portnum); + netxen_set_cmd_desc_port(hwdesc, port->portnum); hwdesc->buffer1_length = cpu_to_le16(first_seg_len); hwdesc->addr_buffer1 = cpu_to_le64(buffrag->dma); @@ -855,12 +899,12 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) /* For LSO, we need to copy the MAC/IP/TCP headers into * the descriptor ring */ - if (hw->cmd_desc_head[saved_producer].opcode == TX_TCP_LSO) { + if (netxen_get_cmd_desc_opcode(&hw->cmd_desc_head[saved_producer]) + == TX_TCP_LSO) { int hdr_len, first_hdr_len, more_hdr; hdr_len = hw->cmd_desc_head[saved_producer].total_hdr_length; - if (hdr_len > (sizeof(struct cmd_desc_type0) - NET_IP_ALIGN)) { - first_hdr_len = - sizeof(struct cmd_desc_type0) - NET_IP_ALIGN; + if (hdr_len > (sizeof(struct cmd_desc_type0) - 2)) { + first_hdr_len = sizeof(struct cmd_desc_type0) - 2; more_hdr = 1; } else { first_hdr_len = hdr_len; @@ -870,7 +914,7 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) hwdesc = &hw->cmd_desc_head[producer]; /* copy the first 64 bytes */ - memcpy(((void *)hwdesc) + NET_IP_ALIGN, + memcpy(((void *)hwdesc) + 2, (void *)(skb->data), first_hdr_len); producer = get_next_index(producer, max_tx_desc_count); @@ -886,7 +930,7 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } spin_lock_bh(&adapter->tx_lock); port->stats.txbytes += - CMD_DESC_TOTAL_LENGTH(&hw->cmd_desc_head[saved_producer]); + netxen_get_cmd_desc_totallength(&hw->cmd_desc_head[saved_producer]); /* Code to update the adapter considering how many producer threads are currently working */ if ((--adapter->num_threads) == 0) { @@ -896,20 +940,6 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) NETXEN_CRB_NORMALIZE(adapter, CRB_CMD_PRODUCER_OFFSET)); wmb(); adapter->total_threads = 0; - } else { - u32 crb_producer = 0; - crb_producer = - readl(NETXEN_CRB_NORMALIZE - (adapter, CRB_CMD_PRODUCER_OFFSET)); - if (crb_producer == local_producer) { - crb_producer = get_index_range(crb_producer, - max_tx_desc_count, - no_of_desc); - writel(crb_producer, - NETXEN_CRB_NORMALIZE(adapter, - CRB_CMD_PRODUCER_OFFSET)); - wmb(); - } } port->stats.xmitfinished++; @@ -926,15 +956,20 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) static void netxen_watchdog(unsigned long v) { struct netxen_adapter *adapter = (struct netxen_adapter *)v; - schedule_work(&adapter->watchdog_task); + if (adapter != g_adapter) { + printk("%s: ***BUG*** adapter[%p] != g_adapter[%p]\n", + __FUNCTION__, adapter, g_adapter); + return; + } + + SCHEDULE_WORK(&adapter->watchdog_task); } static void netxen_tx_timeout(struct net_device *netdev) { struct netxen_port *port = (struct netxen_port *)netdev_priv(netdev); - struct netxen_adapter *adapter = port->adapter; - schedule_work(&adapter->tx_timeout_task); + SCHEDULE_WORK(&port->adapter->tx_timeout_task); } static void netxen_tx_timeout_task(struct work_struct *work) @@ -967,6 +1002,11 @@ netxen_handle_int(struct netxen_adapter *adapter, struct net_device *netdev) if (!(adapter->flags & NETXEN_NIC_MSI_ENABLED)) { int count = 0; u32 mask; + mask = readl(pci_base_offset(adapter, ISR_INT_VECTOR)); + if ((mask & 0x80) == 0) { + /* not our interrupt */ + return ret; + } netxen_nic_disable_int(adapter); /* Window = 0 or 1 */ do { @@ -1026,7 +1066,10 @@ irqreturn_t netxen_intr(int irq, void *data) netdev = port->netdev; /* process our status queue (for all 4 ports) */ - netxen_handle_int(adapter, netdev); + if (netif_running(netdev)) { + netxen_handle_int(adapter, netdev); + break; + } } return IRQ_HANDLED; @@ -1040,11 +1083,12 @@ static int netxen_nic_poll(struct net_device *netdev, int *budget) int done = 1; int ctx; int this_work_done; + int work_done = 0; DPRINTK(INFO, "polling for %d descriptors\n", *budget); port->stats.polled++; - adapter->work_done = 0; + work_done = 0; for (ctx = 0; ctx < MAX_RCV_CTX; ++ctx) { /* * Fairness issue. This will give undue weight to the @@ -1061,20 +1105,20 @@ static int netxen_nic_poll(struct net_device *netdev, int *budget) this_work_done = netxen_process_rcv_ring(adapter, ctx, work_to_do / MAX_RCV_CTX); - adapter->work_done += this_work_done; + work_done += this_work_done; } - netdev->quota -= adapter->work_done; - *budget -= adapter->work_done; + netdev->quota -= work_done; + *budget -= work_done; - if (adapter->work_done >= work_to_do - && netxen_nic_rx_has_work(adapter) != 0) + if (work_done >= work_to_do && netxen_nic_rx_has_work(adapter) != 0) done = 0; - netxen_process_cmd_ring((unsigned long)adapter); + if (netxen_process_cmd_ring((unsigned long)adapter) == 0) + done = 0; DPRINTK(INFO, "new work_done: %d work_to_do: %d\n", - adapter->work_done, work_to_do); + work_done, work_to_do); if (done) { netif_rx_complete(netdev); netxen_nic_enable_int(adapter); @@ -1117,8 +1161,9 @@ netxen_nic_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) if (ifr->ifr_data) { sprintf(dev_name, "%s-%d", NETXEN_NIC_NAME_RSP, port->portnum); - nr_bytes = copy_to_user((char *)ifr->ifr_data, dev_name, - NETXEN_NIC_NAME_LEN); + nr_bytes = + copy_to_user((char __user *)ifr->ifr_data, dev_name, + NETXEN_NIC_NAME_LEN); if (nr_bytes) err = -EIO; @@ -1145,6 +1190,9 @@ static struct pci_driver netxen_driver = { static int __init netxen_init_module(void) { + if ((netxen_workq = create_singlethread_workqueue("netxen")) == 0) + return -ENOMEM; + return pci_module_init(&netxen_driver); } @@ -1155,7 +1203,7 @@ static void __exit netxen_exit_module(void) /* * Wait for some time to allow the dma to drain, if any. */ - mdelay(5); + destroy_workqueue(netxen_workq); pci_unregister_driver(&netxen_driver); } diff --git a/drivers/net/netxen/netxen_nic_niu.c b/drivers/net/netxen/netxen_nic_niu.c index 7950a04532e..4987dc765d9 100644 --- a/drivers/net/netxen/netxen_nic_niu.c +++ b/drivers/net/netxen/netxen_nic_niu.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2003 - 2006 NetXen, Inc. * All rights reserved. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 @@ -16,10 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. - * + * * The full GNU General Public License is included in this distribution * in the file called LICENSE. - * + * * Contact Information: * info@netxen.com * NetXen, @@ -40,13 +40,15 @@ static long phy_lock_timeout = 100000000; -static inline int phy_lock(void) +static inline int phy_lock(struct netxen_adapter *adapter) { int i; int done = 0, timeout = 0; while (!done) { - done = readl((void __iomem *)NETXEN_PCIE_REG(PCIE_SEM3_LOCK)); + done = + readl(pci_base_offset + (adapter, NETXEN_PCIE_REG(PCIE_SEM3_LOCK))); if (done == 1) break; if (timeout >= phy_lock_timeout) { @@ -61,13 +63,15 @@ static inline int phy_lock(void) } } - writel(NETXEN_PHY_LOCK_ID, (void __iomem *)PHY_LOCK_DRIVER); + writel(PHY_LOCK_DRIVER, + NETXEN_CRB_NORMALIZE(adapter, NETXEN_PHY_LOCK_ID)); return 0; } -static inline int phy_unlock(void) +static inline int phy_unlock(struct netxen_adapter *adapter) { - readl((void __iomem *)NETXEN_PCIE_REG(PCIE_SEM3_UNLOCK)); + readl(pci_base_offset(adapter, NETXEN_PCIE_REG(PCIE_SEM3_UNLOCK))); + return 0; } @@ -95,7 +99,7 @@ int netxen_niu_gbe_phy_read(struct netxen_adapter *adapter, long phy, __le32 status; __le32 mac_cfg0; - if (phy_lock() != 0) { + if (phy_lock(adapter) != 0) { return -1; } @@ -162,7 +166,7 @@ int netxen_niu_gbe_phy_read(struct netxen_adapter *adapter, long phy, NETXEN_NIU_GB_MAC_CONFIG_0(0), &mac_cfg0, 4)) return -EIO; - phy_unlock(); + phy_unlock(adapter); return result; } @@ -399,8 +403,8 @@ int netxen_niu_gbe_init_port(struct netxen_adapter *adapter, int port) { int result = 0; __le32 status; - if (adapter->ops->disable_phy_interrupts) - adapter->ops->disable_phy_interrupts(adapter, port); + if (adapter->disable_phy_interrupts) + adapter->disable_phy_interrupts(adapter, port); mdelay(2); if (0 == @@ -612,7 +616,7 @@ int netxen_niu_macaddr_set(struct netxen_port *port, __le32 temp = 0; struct netxen_adapter *adapter = port->adapter; int phy = port->portnum; - unsigned char mac_addr[MAX_ADDR_LEN]; + unsigned char mac_addr[6]; int i; for (i = 0; i < 10; i++) { @@ -631,7 +635,7 @@ int netxen_niu_macaddr_set(struct netxen_port *port, netxen_niu_macaddr_get(adapter, phy, (netxen_ethernet_macaddr_t *) mac_addr); - if (memcmp(mac_addr, addr, MAX_ADDR_LEN == 0)) + if (memcmp(mac_addr, addr, 6) == 0) break; } diff --git a/drivers/net/netxen/netxen_nic_phan_reg.h b/drivers/net/netxen/netxen_nic_phan_reg.h index 8181d436783..7879f855af0 100644 --- a/drivers/net/netxen/netxen_nic_phan_reg.h +++ b/drivers/net/netxen/netxen_nic_phan_reg.h @@ -33,15 +33,74 @@ /* * CRB Registers or queue message done only at initialization time. */ +#define NIC_CRB_BASE NETXEN_CAM_RAM(0x200) +#define NETXEN_NIC_REG(X) (NIC_CRB_BASE+(X)) -/* - * The following 2 are the base adresses for the CRB registers and their - * offsets will be added to get addresses for the index addresses. - */ -#define NIC_CRB_BASE_PORT1 NETXEN_CAM_RAM(0x200) -#define NIC_CRB_BASE_PORT2 NETXEN_CAM_RAM(0x250) +#define CRB_PHAN_CNTRL_LO_OFFSET NETXEN_NIC_REG(0x00) +#define CRB_PHAN_CNTRL_HI_OFFSET NETXEN_NIC_REG(0x04) +#define CRB_CMD_PRODUCER_OFFSET NETXEN_NIC_REG(0x08) +#define CRB_CMD_CONSUMER_OFFSET NETXEN_NIC_REG(0x0c) +#define CRB_PAUSE_ADDR_LO NETXEN_NIC_REG(0x10) /* C0 EPG BUG */ +#define CRB_PAUSE_ADDR_HI NETXEN_NIC_REG(0x14) +#define CRB_HOST_CMD_ADDR_HI NETXEN_NIC_REG(0x18) /* host add:cmd ring */ +#define CRB_HOST_CMD_ADDR_LO NETXEN_NIC_REG(0x1c) +#define CRB_CMD_INTR_LOOP NETXEN_NIC_REG(0x20) /* 4 regs for perf */ +#define CRB_CMD_DMA_LOOP NETXEN_NIC_REG(0x24) +#define CRB_RCV_INTR_LOOP NETXEN_NIC_REG(0x28) +#define CRB_RCV_DMA_LOOP NETXEN_NIC_REG(0x2c) +#define CRB_ENABLE_TX_INTR NETXEN_NIC_REG(0x30) /* phantom init status */ +#define CRB_MMAP_ADDR_3 NETXEN_NIC_REG(0x34) +#define CRB_CMDPEG_CMDRING NETXEN_NIC_REG(0x38) +#define CRB_HOST_DUMMY_BUF_ADDR_HI NETXEN_NIC_REG(0x3c) +#define CRB_HOST_DUMMY_BUF_ADDR_LO NETXEN_NIC_REG(0x40) +#define CRB_MMAP_ADDR_0 NETXEN_NIC_REG(0x44) +#define CRB_MMAP_ADDR_1 NETXEN_NIC_REG(0x48) +#define CRB_MMAP_ADDR_2 NETXEN_NIC_REG(0x4c) +#define CRB_CMDPEG_STATE NETXEN_NIC_REG(0x50) +#define CRB_MMAP_SIZE_0 NETXEN_NIC_REG(0x54) +#define CRB_MMAP_SIZE_1 NETXEN_NIC_REG(0x58) +#define CRB_MMAP_SIZE_2 NETXEN_NIC_REG(0x5c) +#define CRB_MMAP_SIZE_3 NETXEN_NIC_REG(0x60) +#define CRB_GLOBAL_INT_COAL NETXEN_NIC_REG(0x64) /* interrupt coalescing */ +#define CRB_INT_COAL_MODE NETXEN_NIC_REG(0x68) +#define CRB_MAX_RCV_BUFS NETXEN_NIC_REG(0x6c) +#define CRB_TX_INT_THRESHOLD NETXEN_NIC_REG(0x70) +#define CRB_RX_PKT_TIMER NETXEN_NIC_REG(0x74) +#define CRB_TX_PKT_TIMER NETXEN_NIC_REG(0x78) +#define CRB_RX_PKT_CNT NETXEN_NIC_REG(0x7c) +#define CRB_RX_TMR_CNT NETXEN_NIC_REG(0x80) +#define CRB_RX_LRO_TIMER NETXEN_NIC_REG(0x84) +#define CRB_RX_LRO_MID_TIMER NETXEN_NIC_REG(0x88) +#define CRB_DMA_MAX_RCV_BUFS NETXEN_NIC_REG(0x8c) +#define CRB_MAX_DMA_ENTRIES NETXEN_NIC_REG(0x90) +#define CRB_XG_STATE NETXEN_NIC_REG(0x94) /* XG Link status */ +#define CRB_AGENT_GO NETXEN_NIC_REG(0x98) /* NIC pkt gen agent */ +#define CRB_AGENT_TX_SIZE NETXEN_NIC_REG(0x9c) +#define CRB_AGENT_TX_TYPE NETXEN_NIC_REG(0xa0) +#define CRB_AGENT_TX_ADDR NETXEN_NIC_REG(0xa4) +#define CRB_AGENT_TX_MSS NETXEN_NIC_REG(0xa8) +#define CRB_TX_STATE NETXEN_NIC_REG(0xac) /* Debug -performance */ +#define CRB_TX_COUNT NETXEN_NIC_REG(0xb0) +#define CRB_RX_STATE NETXEN_NIC_REG(0xb4) +#define CRB_RX_PERF_DEBUG_1 NETXEN_NIC_REG(0xb8) +#define CRB_RX_LRO_CONTROL NETXEN_NIC_REG(0xbc) /* LRO On/OFF */ +#define CRB_RX_LRO_START_NUM NETXEN_NIC_REG(0xc0) +#define CRB_MPORT_MODE NETXEN_NIC_REG(0xc4) /* Multiport Mode */ +#define CRB_CMD_RING_SIZE NETXEN_NIC_REG(0xc8) +#define CRB_INT_VECTOR NETXEN_NIC_REG(0xd4) +#define CRB_CTX_RESET NETXEN_NIC_REG(0xd8) +#define CRB_HOST_STS_PROD NETXEN_NIC_REG(0xdc) +#define CRB_HOST_STS_CONS NETXEN_NIC_REG(0xe0) +#define CRB_PEG_CMD_PROD NETXEN_NIC_REG(0xe4) +#define CRB_PEG_CMD_CONS NETXEN_NIC_REG(0xe8) +#define CRB_HOST_BUFFER_PROD NETXEN_NIC_REG(0xec) +#define CRB_HOST_BUFFER_CONS NETXEN_NIC_REG(0xf0) +#define CRB_JUMBO_BUFFER_PROD NETXEN_NIC_REG(0xf4) +#define CRB_JUMBO_BUFFER_CONS NETXEN_NIC_REG(0xf8) -#define NETXEN_NIC_REG(X) (NIC_CRB_BASE_PORT1+(X)) +#define CRB_CMD_PRODUCER_OFFSET_1 NETXEN_NIC_REG(0x1ac) +#define CRB_CMD_CONSUMER_OFFSET_1 NETXEN_NIC_REG(0x1b0) +#define CRB_TEMP_STATE NETXEN_NIC_REG(0x1b4) /* * CrbPortPhanCntrHi/Lo is used to pass the address of HostPhantomIndex address @@ -51,74 +110,20 @@ * on the Phantom. */ -#define CRB_PHAN_CNTRL_LO_OFFSET NETXEN_NIC_REG(0x00) -#define CRB_PHAN_CNTRL_HI_OFFSET NETXEN_NIC_REG(0x04) - -/* point to the indexes */ -#define CRB_CMD_PRODUCER_OFFSET NETXEN_NIC_REG(0x08) -#define CRB_CMD_CONSUMER_OFFSET NETXEN_NIC_REG(0x0c) - -#define CRB_PAUSE_ADDR_LO NETXEN_NIC_REG(0x10) -#define CRB_PAUSE_ADDR_HI NETXEN_NIC_REG(0x14) - -/* address of command descriptors in the host memory */ -#define CRB_HOST_CMD_ADDR_HI NETXEN_NIC_REG(0x30) -#define CRB_HOST_CMD_ADDR_LO NETXEN_NIC_REG(0x34) - -/* The following 4 CRB registers are for doing performance coal */ -#define CRB_CMD_INTR_LOOP NETXEN_NIC_REG(0x38) -#define CRB_CMD_DMA_LOOP NETXEN_NIC_REG(0x3c) -#define CRB_RCV_INTR_LOOP NETXEN_NIC_REG(0x40) -#define CRB_RCV_DMA_LOOP NETXEN_NIC_REG(0x44) - -/* Needed by the host to find out the state of Phantom's initialization */ -#define CRB_ENABLE_TX_INTR NETXEN_NIC_REG(0x4c) -#define CRB_CMDPEG_STATE NETXEN_NIC_REG(0x50) -#define CRB_CMDPEG_CMDRING NETXEN_NIC_REG(0x54) - -/* Interrupt coalescing parameters */ -#define CRB_GLOBAL_INT_COAL NETXEN_NIC_REG(0x80) -#define CRB_INT_COAL_MODE NETXEN_NIC_REG(0x84) -#define CRB_MAX_RCV_BUFS NETXEN_NIC_REG(0x88) -#define CRB_TX_INT_THRESHOLD NETXEN_NIC_REG(0x8c) -#define CRB_RX_PKT_TIMER NETXEN_NIC_REG(0x90) -#define CRB_TX_PKT_TIMER NETXEN_NIC_REG(0x94) -#define CRB_RX_PKT_CNT NETXEN_NIC_REG(0x98) -#define CRB_RX_TMR_CNT NETXEN_NIC_REG(0x9c) -#define CRB_INT_THRESH NETXEN_NIC_REG(0xa4) - -/* Register for communicating XG link status */ -#define CRB_XG_STATE NETXEN_NIC_REG(0xa0) - -/* Register for communicating card temperature */ -/* Upper 16 bits are temperature value. Lower 16 bits are the state */ -#define CRB_TEMP_STATE NETXEN_NIC_REG(0xa8) -#define nx_get_temp_val(x) ((x) >> 16) -#define nx_get_temp_state(x) ((x) & 0xffff) -#define nx_encode_temp(val, state) (((val) << 16) | (state)) - -/* Debug registers for controlling NIC pkt gen agent */ -#define CRB_AGENT_GO NETXEN_NIC_REG(0xb0) -#define CRB_AGENT_TX_SIZE NETXEN_NIC_REG(0xb4) -#define CRB_AGENT_TX_TYPE NETXEN_NIC_REG(0xb8) -#define CRB_AGENT_TX_ADDR NETXEN_NIC_REG(0xbc) -#define CRB_AGENT_TX_MSS NETXEN_NIC_REG(0xc0) - -/* Debug registers for observing NIC performance */ -#define CRB_TX_STATE NETXEN_NIC_REG(0xd0) -#define CRB_TX_COUNT NETXEN_NIC_REG(0xd4) -#define CRB_RX_STATE NETXEN_NIC_REG(0xd8) +#define nx_get_temp_val(x) ((x) >> 16) +#define nx_get_temp_state(x) ((x) & 0xffff) +#define nx_encode_temp(val, state) (((val) << 16) | (state)) /* CRB registers per Rcv Descriptor ring */ struct netxen_rcv_desc_crb { u32 crb_rcv_producer_offset __attribute__ ((aligned(512))); u32 crb_rcv_consumer_offset; u32 crb_globalrcv_ring; + u32 crb_rcv_ring_size; }; /* - * CRB registers used by the receive peg logic. One instance of these - * needs to be instantiated per instance of the receive peg. + * CRB registers used by the receive peg logic. */ struct netxen_recv_crb { @@ -127,6 +132,7 @@ struct netxen_recv_crb { u32 crb_rcv_status_producer; u32 crb_rcv_status_consumer; u32 crb_rcvpeg_state; + u32 crb_status_ring_size; }; #if defined(DEFINE_GLOBAL_RECV_CRB) @@ -139,30 +145,48 @@ struct netxen_recv_crb recv_crb_registers[] = { { { /* crb_rcv_producer_offset: */ - NETXEN_NIC_REG(0x18), + NETXEN_NIC_REG(0x100), /* crb_rcv_consumer_offset: */ - NETXEN_NIC_REG(0x1c), + NETXEN_NIC_REG(0x104), /* crb_gloablrcv_ring: */ - NETXEN_NIC_REG(0x20), + NETXEN_NIC_REG(0x108), + /* crb_rcv_ring_size */ + NETXEN_NIC_REG(0x10c), + }, /* Jumbo frames */ { /* crb_rcv_producer_offset: */ - NETXEN_NIC_REG(0x100), + NETXEN_NIC_REG(0x110), /* crb_rcv_consumer_offset: */ - NETXEN_NIC_REG(0x104), + NETXEN_NIC_REG(0x114), /* crb_gloablrcv_ring: */ - NETXEN_NIC_REG(0x108), + NETXEN_NIC_REG(0x118), + /* crb_rcv_ring_size */ + NETXEN_NIC_REG(0x11c), + }, + /* LRO */ + { + /* crb_rcv_producer_offset: */ + NETXEN_NIC_REG(0x120), + /* crb_rcv_consumer_offset: */ + NETXEN_NIC_REG(0x124), + /* crb_gloablrcv_ring: */ + NETXEN_NIC_REG(0x128), + /* crb_rcv_ring_size */ + NETXEN_NIC_REG(0x12c), } }, /* crb_rcvstatus_ring: */ - NETXEN_NIC_REG(0x24), + NETXEN_NIC_REG(0x130), /* crb_rcv_status_producer: */ - NETXEN_NIC_REG(0x28), + NETXEN_NIC_REG(0x134), /* crb_rcv_status_consumer: */ - NETXEN_NIC_REG(0x2c), + NETXEN_NIC_REG(0x138), /* crb_rcvpeg_state: */ - NETXEN_NIC_REG(0x48), + NETXEN_NIC_REG(0x13c), + /* crb_status_ring_size */ + NETXEN_NIC_REG(0x140), }, /* @@ -173,34 +197,66 @@ struct netxen_recv_crb recv_crb_registers[] = { { { /* crb_rcv_producer_offset: */ - NETXEN_NIC_REG(0x80), + NETXEN_NIC_REG(0x144), /* crb_rcv_consumer_offset: */ - NETXEN_NIC_REG(0x84), + NETXEN_NIC_REG(0x148), /* crb_globalrcv_ring: */ - NETXEN_NIC_REG(0x88), + NETXEN_NIC_REG(0x14c), + /* crb_rcv_ring_size */ + NETXEN_NIC_REG(0x150), + }, /* Jumbo frames */ { /* crb_rcv_producer_offset: */ - NETXEN_NIC_REG(0x10C), + NETXEN_NIC_REG(0x154), /* crb_rcv_consumer_offset: */ - NETXEN_NIC_REG(0x110), + NETXEN_NIC_REG(0x158), /* crb_globalrcv_ring: */ - NETXEN_NIC_REG(0x114), + NETXEN_NIC_REG(0x15c), + /* crb_rcv_ring_size */ + NETXEN_NIC_REG(0x160), + }, + /* LRO */ + { + /* crb_rcv_producer_offset: */ + NETXEN_NIC_REG(0x164), + /* crb_rcv_consumer_offset: */ + NETXEN_NIC_REG(0x168), + /* crb_globalrcv_ring: */ + NETXEN_NIC_REG(0x16c), + /* crb_rcv_ring_size */ + NETXEN_NIC_REG(0x170), } + }, /* crb_rcvstatus_ring: */ - NETXEN_NIC_REG(0x8c), + NETXEN_NIC_REG(0x174), /* crb_rcv_status_producer: */ - NETXEN_NIC_REG(0x90), + NETXEN_NIC_REG(0x178), /* crb_rcv_status_consumer: */ - NETXEN_NIC_REG(0x94), + NETXEN_NIC_REG(0x17c), /* crb_rcvpeg_state: */ - NETXEN_NIC_REG(0x98), + NETXEN_NIC_REG(0x180), + /* crb_status_ring_size */ + NETXEN_NIC_REG(0x184), + }, }; + +u64 ctx_addr_sig_regs[][3] = { + {NETXEN_NIC_REG(0x188), NETXEN_NIC_REG(0x18c), NETXEN_NIC_REG(0x1c0)}, + {NETXEN_NIC_REG(0x190), NETXEN_NIC_REG(0x194), NETXEN_NIC_REG(0x1c4)}, + {NETXEN_NIC_REG(0x198), NETXEN_NIC_REG(0x19c), NETXEN_NIC_REG(0x1c8)}, + {NETXEN_NIC_REG(0x1a0), NETXEN_NIC_REG(0x1a4), NETXEN_NIC_REG(0x1cc)} +}; + #else extern struct netxen_recv_crb recv_crb_registers[]; +extern u64 ctx_addr_sig_regs[][3]; +#define CRB_CTX_ADDR_REG_LO (ctx_addr_sig_regs[0][0]) +#define CRB_CTX_ADDR_REG_HI (ctx_addr_sig_regs[0][2]) +#define CRB_CTX_SIGNATURE_REG (ctx_addr_sig_regs[0][1]) #endif /* DEFINE_GLOBAL_RECEIVE_CRB */ /* diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c index 26e42f6e9fb..196993a29b0 100644 --- a/drivers/net/ni52.c +++ b/drivers/net/ni52.c @@ -1335,7 +1335,7 @@ int __init init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(dev_ni52); release_region(dev_ni52->base_addr, NI52_TOTAL_SIZE); diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c index 340ad0d5388..1578f4d9849 100644 --- a/drivers/net/ni65.c +++ b/drivers/net/ni65.c @@ -1259,7 +1259,7 @@ int __init init_module(void) return IS_ERR(dev_ni65) ? PTR_ERR(dev_ni65) : 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(dev_ni65); cleanup_card(dev_ni65); diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 312e0e33171..568daeb3e9d 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -414,10 +414,10 @@ struct rx_info { struct sk_buff *skbs[NR_RX_DESC]; - u32 *next_rx_desc; + __le32 *next_rx_desc; u16 next_rx, next_empty; - u32 *descs; + __le32 *descs; dma_addr_t phy_descs; }; @@ -460,7 +460,7 @@ struct ns83820 { struct sk_buff *tx_skbs[NR_TX_DESC]; char pad[16] __attribute__((aligned(16))); - u32 *tx_descs; + __le32 *tx_descs; dma_addr_t tx_phy_descs; struct timer_list tx_watchdog; @@ -534,7 +534,7 @@ static void ns83820_vlan_rx_kill_vid(struct net_device *ndev, unsigned short vid * conditions, still route realtime traffic with as low jitter as * possible. */ -static inline void build_rx_desc(struct ns83820 *dev, u32 *desc, dma_addr_t link, dma_addr_t buf, u32 cmdsts, u32 extsts) +static inline void build_rx_desc(struct ns83820 *dev, __le32 *desc, dma_addr_t link, dma_addr_t buf, u32 cmdsts, u32 extsts) { desc_addr_set(desc + DESC_LINK, link); desc_addr_set(desc + DESC_BUFPTR, buf); @@ -548,7 +548,7 @@ static inline int ns83820_add_rx_skb(struct ns83820 *dev, struct sk_buff *skb) { unsigned next_empty; u32 cmdsts; - u32 *sg; + __le32 *sg; dma_addr_t buf; next_empty = dev->rx_info.next_empty; @@ -875,7 +875,8 @@ static void fastcall rx_irq(struct net_device *ndev) struct rx_info *info = &dev->rx_info; unsigned next_rx; int rx_rc, len; - u32 cmdsts, *desc; + u32 cmdsts; + __le32 *desc; unsigned long flags; int nr = 0; @@ -1011,7 +1012,8 @@ static inline void kick_tx(struct ns83820 *dev) static void do_tx_done(struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); - u32 cmdsts, tx_done_idx, *desc; + u32 cmdsts, tx_done_idx; + __le32 *desc; dprintk("do_tx_done(%p)\n", ndev); tx_done_idx = dev->tx_done_idx; @@ -1078,7 +1080,7 @@ static void ns83820_cleanup_tx(struct ns83820 *dev) struct sk_buff *skb = dev->tx_skbs[i]; dev->tx_skbs[i] = NULL; if (skb) { - u32 *desc = dev->tx_descs + (i * DESC_SIZE); + __le32 *desc = dev->tx_descs + (i * DESC_SIZE); pci_unmap_single(dev->pci_dev, desc_addr_get(desc + DESC_BUFPTR), le32_to_cpu(desc[DESC_CMDSTS]) & CMDSTS_LEN_MASK, @@ -1108,7 +1110,7 @@ static int ns83820_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_frag_t *frag; int stopped = 0; int do_intr = 0; - volatile u32 *first_desc; + volatile __le32 *first_desc; dprintk("ns83820_hard_start_xmit\n"); @@ -1181,7 +1183,7 @@ again: first_desc = dev->tx_descs + (free_idx * DESC_SIZE); for (;;) { - volatile u32 *desc = dev->tx_descs + (free_idx * DESC_SIZE); + volatile __le32 *desc = dev->tx_descs + (free_idx * DESC_SIZE); dprintk("frag[%3u]: %4u @ 0x%08Lx\n", free_idx, len, (unsigned long long)buf); @@ -1456,7 +1458,8 @@ static int ns83820_stop(struct net_device *ndev) static void ns83820_tx_timeout(struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); - u32 tx_done_idx, *desc; + u32 tx_done_idx; + __le32 *desc; unsigned long flags; spin_lock_irqsave(&dev->tx_lock, flags); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 4044bb1ada8..e175f3910b1 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -587,8 +587,7 @@ int phy_stop_interrupts(struct phy_device *phydev) * Finish any pending work; we might have been scheduled * to be called from keventd ourselves, though. */ - if (!current_is_keventd()) - flush_scheduled_work(); + run_scheduled_work(&phydev->phy_queue); free_irq(phydev->irq, phydev); diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 85a392fab5c..f83b41d4cb0 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -225,6 +225,7 @@ MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl); static int rx_copybreak = 200; static int use_dac; +static int ignore_parity_err; static struct { u32 msg_enable; } debug = { -1 }; @@ -470,6 +471,8 @@ module_param(use_dac, int, 0); MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot."); module_param_named(debug, debug.msg_enable, int, 0); MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 16=all)"); +module_param_named(ignore_parity_err, ignore_parity_err, bool, 0); +MODULE_PARM_DESC(ignore_parity_err, "Ignore PCI parity error as target. Default: false"); MODULE_LICENSE("GPL"); MODULE_VERSION(RTL8169_VERSION); @@ -1284,11 +1287,6 @@ static void rtl8169_hw_phy_config(struct net_device *dev) /* Shazam ! */ if (tp->mac_version == RTL_GIGA_MAC_VER_04) { - mdio_write(ioaddr, 31, 0x0001); - mdio_write(ioaddr, 9, 0x273a); - mdio_write(ioaddr, 14, 0x7bfb); - mdio_write(ioaddr, 27, 0x841e); - mdio_write(ioaddr, 31, 0x0002); mdio_write(ioaddr, 1, 0x90d0); mdio_write(ioaddr, 31, 0x0000); @@ -1817,12 +1815,25 @@ static void rtl8169_hw_reset(void __iomem *ioaddr) RTL_R8(ChipCmd); } -static void -rtl8169_hw_start(struct net_device *dev) +static void rtl8169_set_rx_tx_config_registers(struct rtl8169_private *tp) +{ + void __iomem *ioaddr = tp->mmio_addr; + u32 cfg = rtl8169_rx_config; + + cfg |= (RTL_R32(RxConfig) & rtl_chip_info[tp->chipset].RxConfigMask); + RTL_W32(RxConfig, cfg); + + /* Set DMA burst size and Interframe Gap Time */ + RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) | + (InterFrameGap << TxInterFrameGapShift)); +} + +static void rtl8169_hw_start(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; struct pci_dev *pdev = tp->pci_dev; + u16 cmd; u32 i; /* Soft reset the chip. */ @@ -1835,6 +1846,11 @@ rtl8169_hw_start(struct net_device *dev) msleep_interruptible(1); } + if (tp->mac_version == RTL_GIGA_MAC_VER_05) { + RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | PCIMulRW); + pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08); + } + if (tp->mac_version == RTL_GIGA_MAC_VER_13) { pci_write_config_word(pdev, 0x68, 0x00); pci_write_config_word(pdev, 0x69, 0x08); @@ -1842,8 +1858,6 @@ rtl8169_hw_start(struct net_device *dev) /* Undocumented stuff. */ if (tp->mac_version == RTL_GIGA_MAC_VER_05) { - u16 cmd; - /* Realtek's r1000_n.c driver uses '&& 0x01' here. Well... */ if ((RTL_R8(Config2) & 0x07) & 0x01) RTL_W32(0x7c, 0x0007ffff); @@ -1855,23 +1869,29 @@ rtl8169_hw_start(struct net_device *dev) pci_write_config_word(pdev, PCI_COMMAND, cmd); } - RTL_W8(Cfg9346, Cfg9346_Unlock); + if ((tp->mac_version == RTL_GIGA_MAC_VER_01) || + (tp->mac_version == RTL_GIGA_MAC_VER_02) || + (tp->mac_version == RTL_GIGA_MAC_VER_03) || + (tp->mac_version == RTL_GIGA_MAC_VER_04)) + RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); + RTL_W8(EarlyTxThres, EarlyTxThld); /* Low hurts. Let's disable the filtering. */ RTL_W16(RxMaxSize, 16383); - /* Set Rx Config register */ - i = rtl8169_rx_config | - (RTL_R32(RxConfig) & rtl_chip_info[tp->chipset].RxConfigMask); - RTL_W32(RxConfig, i); + if ((tp->mac_version == RTL_GIGA_MAC_VER_01) || + (tp->mac_version == RTL_GIGA_MAC_VER_02) || + (tp->mac_version == RTL_GIGA_MAC_VER_03) || + (tp->mac_version == RTL_GIGA_MAC_VER_04)) + RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); + rtl8169_set_rx_tx_config_registers(tp); - /* Set DMA burst size and Interframe Gap Time */ - RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) | - (InterFrameGap << TxInterFrameGapShift)); + cmd = RTL_R16(CPlusCmd); + RTL_W16(CPlusCmd, cmd); - tp->cp_cmd |= RTL_R16(CPlusCmd) | PCIMulRW; + tp->cp_cmd |= cmd | PCIMulRW; if ((tp->mac_version == RTL_GIGA_MAC_VER_02) || (tp->mac_version == RTL_GIGA_MAC_VER_03)) { @@ -1897,7 +1917,15 @@ rtl8169_hw_start(struct net_device *dev) RTL_W32(TxDescStartAddrLow, ((u64) tp->TxPhyAddr & DMA_32BIT_MASK)); RTL_W32(RxDescAddrHigh, ((u64) tp->RxPhyAddr >> 32)); RTL_W32(RxDescAddrLow, ((u64) tp->RxPhyAddr & DMA_32BIT_MASK)); - RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); + + if ((tp->mac_version != RTL_GIGA_MAC_VER_01) && + (tp->mac_version != RTL_GIGA_MAC_VER_02) && + (tp->mac_version != RTL_GIGA_MAC_VER_03) && + (tp->mac_version != RTL_GIGA_MAC_VER_04)) { + RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); + rtl8169_set_rx_tx_config_registers(tp); + } + RTL_W8(Cfg9346, Cfg9346_Lock); /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ @@ -1992,7 +2020,7 @@ static int rtl8169_alloc_rx_skb(struct pci_dev *pdev, struct sk_buff **sk_buff, if (!skb) goto err_out; - skb_reserve(skb, align); + skb_reserve(skb, (align - 1) & (u32)skb->data); *sk_buff = skb; mapping = pci_map_single(pdev, skb->data, rx_buf_sz, @@ -2355,12 +2383,17 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) /* * The recovery sequence below admits a very elaborated explanation: * - it seems to work; - * - I did not see what else could be done. + * - I did not see what else could be done; + * - it makes iop3xx happy. * * Feel free to adjust to your needs. */ - pci_write_config_word(pdev, PCI_COMMAND, - pci_cmd | PCI_COMMAND_SERR | PCI_COMMAND_PARITY); + if (ignore_parity_err) + pci_cmd &= ~PCI_COMMAND_PARITY; + else + pci_cmd |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY; + + pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); pci_write_config_word(pdev, PCI_STATUS, pci_status & (PCI_STATUS_DETECTED_PARITY | @@ -2374,10 +2407,11 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) tp->cp_cmd &= ~PCIDAC; RTL_W16(CPlusCmd, tp->cp_cmd); dev->features &= ~NETIF_F_HIGHDMA; - rtl8169_schedule_work(dev, rtl8169_reinit_task); } rtl8169_hw_reset(ioaddr); + + rtl8169_schedule_work(dev, rtl8169_reinit_task); } static void @@ -2457,7 +2491,7 @@ static inline int rtl8169_try_rx_copy(struct sk_buff **sk_buff, int pkt_size, skb = dev_alloc_skb(pkt_size + align); if (skb) { - skb_reserve(skb, align); + skb_reserve(skb, (align - 1) & (u32)skb->data); eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0); *sk_buff = skb; rtl8169_mark_to_asic(desc, rx_buf_sz); diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c index d9d0a3a3c55..0d6c95c7aed 100644 --- a/drivers/net/seeq8005.c +++ b/drivers/net/seeq8005.c @@ -750,7 +750,7 @@ int __init init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(dev_seeq); release_region(dev_seeq->base_addr, SEEQ8005_IO_EXTENT); diff --git a/drivers/net/sk98lin/skgesirq.c b/drivers/net/sk98lin/skgesirq.c index ab66d80a445..3e7aa49afd0 100644 --- a/drivers/net/sk98lin/skgesirq.c +++ b/drivers/net/sk98lin/skgesirq.c @@ -1319,7 +1319,7 @@ SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */ SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_STAT, &Isrc); #ifdef xDEBUG - if ((Isrc & ~(PHY_B_IS_HCT | PHY_B_IS_LCT) == + if ((Isrc & ~(PHY_B_IS_HCT | PHY_B_IS_LCT)) == (PHY_B_IS_SCR_S_ER | PHY_B_IS_RRS_CHANGE | PHY_B_IS_LRS_CHANGE)) { SK_U32 Stat1, Stat2, Stat3; diff --git a/drivers/net/skge.h b/drivers/net/skge.h index 23e5275d920..f6223c533c0 100644 --- a/drivers/net/skge.h +++ b/drivers/net/skge.h @@ -389,10 +389,10 @@ enum { /* Packet Arbiter Registers */ /* B3_PA_CTRL 16 bit Packet Arbiter Ctrl Register */ enum { - PA_CLR_TO_TX2 = 1<<13, /* Clear IRQ Packet Timeout TX2 */ - PA_CLR_TO_TX1 = 1<<12, /* Clear IRQ Packet Timeout TX1 */ - PA_CLR_TO_RX2 = 1<<11, /* Clear IRQ Packet Timeout RX2 */ - PA_CLR_TO_RX1 = 1<<10, /* Clear IRQ Packet Timeout RX1 */ + PA_CLR_TO_TX2 = 1<<13,/* Clear IRQ Packet Timeout TX2 */ + PA_CLR_TO_TX1 = 1<<12,/* Clear IRQ Packet Timeout TX1 */ + PA_CLR_TO_RX2 = 1<<11,/* Clear IRQ Packet Timeout RX2 */ + PA_CLR_TO_RX1 = 1<<10,/* Clear IRQ Packet Timeout RX1 */ PA_ENA_TO_TX2 = 1<<9, /* Enable Timeout Timer TX2 */ PA_DIS_TO_TX2 = 1<<8, /* Disable Timeout Timer TX2 */ PA_ENA_TO_TX1 = 1<<7, /* Enable Timeout Timer TX1 */ @@ -481,14 +481,14 @@ enum { /* RAM Buffer Register Offsets */ enum { - RB_START = 0x00,/* 32 bit RAM Buffer Start Address */ + RB_START= 0x00,/* 32 bit RAM Buffer Start Address */ RB_END = 0x04,/* 32 bit RAM Buffer End Address */ RB_WP = 0x08,/* 32 bit RAM Buffer Write Pointer */ RB_RP = 0x0c,/* 32 bit RAM Buffer Read Pointer */ - RB_RX_UTPP = 0x10,/* 32 bit Rx Upper Threshold, Pause Packet */ - RB_RX_LTPP = 0x14,/* 32 bit Rx Lower Threshold, Pause Packet */ - RB_RX_UTHP = 0x18,/* 32 bit Rx Upper Threshold, High Prio */ - RB_RX_LTHP = 0x1c,/* 32 bit Rx Lower Threshold, High Prio */ + RB_RX_UTPP= 0x10,/* 32 bit Rx Upper Threshold, Pause Packet */ + RB_RX_LTPP= 0x14,/* 32 bit Rx Lower Threshold, Pause Packet */ + RB_RX_UTHP= 0x18,/* 32 bit Rx Upper Threshold, High Prio */ + RB_RX_LTHP= 0x1c,/* 32 bit Rx Lower Threshold, High Prio */ /* 0x10 - 0x1f: reserved at Tx RAM Buffer Registers */ RB_PC = 0x20,/* 32 bit RAM Buffer Packet Counter */ RB_LEV = 0x24,/* 32 bit RAM Buffer Level Register */ @@ -532,7 +532,7 @@ enum { PHY_ADDR_MARV = 0, }; -#define RB_ADDR(offs, queue) (B16_RAM_REGS + (queue) + (offs)) +#define RB_ADDR(offs, queue) ((u16)B16_RAM_REGS + (u16)(queue) + (offs)) /* Receive MAC FIFO, Receive LED, and Link_Sync regs (GENESIS only) */ enum { @@ -578,15 +578,15 @@ enum { MFF_DIS_TIST = 1<<2, /* Disable Time Stamp Gener */ MFF_CLR_INTIST = 1<<1, /* Clear IRQ No Time Stamp */ MFF_CLR_INSTAT = 1<<0, /* Clear IRQ No Status */ -#define MFF_RX_CTRL_DEF MFF_ENA_TIM_PAT + MFF_RX_CTRL_DEF = MFF_ENA_TIM_PAT, }; /* TX_MFF_CTRL1 16 bit Transmit MAC FIFO Control Reg 1 */ enum { - MFF_CLR_PERR = 1<<15, /* Clear Parity Error IRQ */ - /* Bit 14: reserved */ - MFF_ENA_PKT_REC = 1<<13, /* Enable Packet Recovery */ - MFF_DIS_PKT_REC = 1<<12, /* Disable Packet Recovery */ + MFF_CLR_PERR = 1<<15, /* Clear Parity Error IRQ */ + + MFF_ENA_PKT_REC = 1<<13, /* Enable Packet Recovery */ + MFF_DIS_PKT_REC = 1<<12, /* Disable Packet Recovery */ MFF_ENA_W4E = 1<<7, /* Enable Wait for Empty */ MFF_DIS_W4E = 1<<6, /* Disable Wait for Empty */ @@ -595,9 +595,10 @@ enum { MFF_DIS_LOOPB = 1<<2, /* Disable Loopback */ MFF_CLR_MAC_RST = 1<<1, /* Clear XMAC Reset */ MFF_SET_MAC_RST = 1<<0, /* Set XMAC Reset */ + + MFF_TX_CTRL_DEF = MFF_ENA_PKT_REC | (u16) MFF_ENA_TIM_PAT | MFF_ENA_FLUSH, }; -#define MFF_TX_CTRL_DEF (MFF_ENA_PKT_REC | MFF_ENA_TIM_PAT | MFF_ENA_FLUSH) /* RX_MFF_TST2 8 bit Receive MAC FIFO Test Register 2 */ /* TX_MFF_TST2 8 bit Transmit MAC FIFO Test Register 2 */ @@ -1304,8 +1305,8 @@ enum { /* special defines for FIBER (88E1011S only) */ enum { - PHY_M_AN_ASP_X = 1<<8, /* Asymmetric Pause */ - PHY_M_AN_PC_X = 1<<7, /* MAC Pause implemented */ + PHY_M_AN_ASP_X = 1<<8, /* Asymmetric Pause */ + PHY_M_AN_PC_X = 1<<7, /* MAC Pause implemented */ PHY_M_AN_1000X_AHD = 1<<6, /* Advertise 10000Base-X Half Duplex */ PHY_M_AN_1000X_AFD = 1<<5, /* Advertise 10000Base-X Full Duplex */ }; @@ -1320,7 +1321,7 @@ enum { /***** PHY_MARV_1000T_CTRL 16 bit r/w 1000Base-T Control Reg *****/ enum { - PHY_M_1000C_TEST = 7<<13,/* Bit 15..13: Test Modes */ + PHY_M_1000C_TEST= 7<<13,/* Bit 15..13: Test Modes */ PHY_M_1000C_MSE = 1<<12, /* Manual Master/Slave Enable */ PHY_M_1000C_MSC = 1<<11, /* M/S Configuration (1=Master) */ PHY_M_1000C_MPD = 1<<10, /* Multi-Port Device */ @@ -1349,7 +1350,7 @@ enum { PHY_M_PC_EN_DET_PLUS = 3<<8, /* Energy Detect Plus (Mode 2) */ }; -#define PHY_M_PC_MDI_XMODE(x) (((x)<<5) & PHY_M_PC_MDIX_MSK) +#define PHY_M_PC_MDI_XMODE(x) ((((u16)(x)<<5) & PHY_M_PC_MDIX_MSK) enum { PHY_M_PC_MAN_MDI = 0, /* 00 = Manual MDI configuration */ @@ -1432,24 +1433,24 @@ enum { PHY_M_EC_DIS_LINK_P = 1<<12, /* Disable Link Pulses (88E1111 only) */ PHY_M_EC_M_DSC_MSK = 3<<10, /* Bit 11..10: Master Downshift Counter */ /* (88E1011 only) */ - PHY_M_EC_S_DSC_MSK = 3<<8,/* Bit 9.. 8: Slave Downshift Counter */ + PHY_M_EC_S_DSC_MSK = 3<<8, /* Bit 9.. 8: Slave Downshift Counter */ /* (88E1011 only) */ - PHY_M_EC_M_DSC_MSK2 = 7<<9,/* Bit 11.. 9: Master Downshift Counter */ + PHY_M_EC_M_DSC_MSK2 = 7<<9, /* Bit 11.. 9: Master Downshift Counter */ /* (88E1111 only) */ - PHY_M_EC_DOWN_S_ENA = 1<<8, /* Downshift Enable (88E1111 only) */ + PHY_M_EC_DOWN_S_ENA = 1<<8, /* Downshift Enable (88E1111 only) */ /* !!! Errata in spec. (1 = disable) */ - PHY_M_EC_RX_TIM_CT = 1<<7, /* RGMII Rx Timing Control*/ - PHY_M_EC_MAC_S_MSK = 7<<4,/* Bit 6.. 4: Def. MAC interface speed */ - PHY_M_EC_FIB_AN_ENA = 1<<3, /* Fiber Auto-Neg. Enable (88E1011S only) */ - PHY_M_EC_DTE_D_ENA = 1<<2, /* DTE Detect Enable (88E1111 only) */ - PHY_M_EC_TX_TIM_CT = 1<<1, /* RGMII Tx Timing Control */ - PHY_M_EC_TRANS_DIS = 1<<0, /* Transmitter Disable (88E1111 only) */}; - -#define PHY_M_EC_M_DSC(x) ((x)<<10) /* 00=1x; 01=2x; 10=3x; 11=4x */ -#define PHY_M_EC_S_DSC(x) ((x)<<8) /* 00=dis; 01=1x; 10=2x; 11=3x */ -#define PHY_M_EC_MAC_S(x) ((x)<<4) /* 01X=0; 110=2.5; 111=25 (MHz) */ - -#define PHY_M_EC_M_DSC_2(x) ((x)<<9) /* 000=1x; 001=2x; 010=3x; 011=4x */ + PHY_M_EC_RX_TIM_CT = 1<<7, /* RGMII Rx Timing Control*/ + PHY_M_EC_MAC_S_MSK = 7<<4, /* Bit 6.. 4: Def. MAC interface speed */ + PHY_M_EC_FIB_AN_ENA = 1<<3, /* Fiber Auto-Neg. Enable (88E1011S only) */ + PHY_M_EC_DTE_D_ENA = 1<<2, /* DTE Detect Enable (88E1111 only) */ + PHY_M_EC_TX_TIM_CT = 1<<1, /* RGMII Tx Timing Control */ + PHY_M_EC_TRANS_DIS = 1<<0, /* Transmitter Disable (88E1111 only) */}; + +#define PHY_M_EC_M_DSC(x) ((u16)(x)<<10) /* 00=1x; 01=2x; 10=3x; 11=4x */ +#define PHY_M_EC_S_DSC(x) ((u16)(x)<<8) /* 00=dis; 01=1x; 10=2x; 11=3x */ +#define PHY_M_EC_MAC_S(x) ((u16)(x)<<4) /* 01X=0; 110=2.5; 111=25 (MHz) */ + +#define PHY_M_EC_M_DSC_2(x) ((u16)(x)<<9) /* 000=1x; 001=2x; 010=3x; 011=4x */ /* 100=5x; 101=6x; 110=7x; 111=8x */ enum { MAC_TX_CLK_0_MHZ = 2, @@ -1468,10 +1469,12 @@ enum { PHY_M_LEDC_LK_C_MSK = 7<<3,/* Bit 5.. 3: Link Control Mask */ /* (88E1111 only) */ }; +#define PHY_M_LED_PULS_DUR(x) (((u16)(x)<<12) & PHY_M_LEDC_PULS_MSK) +#define PHY_M_LED_BLINK_RT(x) (((u16)(x)<<8) & PHY_M_LEDC_BL_R_MSK) enum { - PHY_M_LEDC_LINK_MSK = 3<<3,/* Bit 4.. 3: Link Control Mask */ - /* (88E1011 only) */ + PHY_M_LEDC_LINK_MSK = 3<<3, /* Bit 4.. 3: Link Control Mask */ + /* (88E1011 only) */ PHY_M_LEDC_DP_CTRL = 1<<2, /* Duplex Control */ PHY_M_LEDC_DP_C_MSB = 1<<2, /* Duplex Control (MSB, 88E1111 only) */ PHY_M_LEDC_RX_CTRL = 1<<1, /* Rx Activity / Link */ @@ -1479,27 +1482,24 @@ enum { PHY_M_LEDC_TX_C_MSB = 1<<0, /* Tx Control (MSB, 88E1111 only) */ }; -#define PHY_M_LED_PULS_DUR(x) (((x)<<12) & PHY_M_LEDC_PULS_MSK) - enum { - PULS_NO_STR = 0,/* no pulse stretching */ - PULS_21MS = 1,/* 21 ms to 42 ms */ - PULS_42MS = 2,/* 42 ms to 84 ms */ - PULS_84MS = 3,/* 84 ms to 170 ms */ - PULS_170MS = 4,/* 170 ms to 340 ms */ - PULS_340MS = 5,/* 340 ms to 670 ms */ - PULS_670MS = 6,/* 670 ms to 1.3 s */ - PULS_1300MS = 7,/* 1.3 s to 2.7 s */ + PULS_NO_STR = 0, /* no pulse stretching */ + PULS_21MS = 1, /* 21 ms to 42 ms */ + PULS_42MS = 2, /* 42 ms to 84 ms */ + PULS_84MS = 3, /* 84 ms to 170 ms */ + PULS_170MS = 4, /* 170 ms to 340 ms */ + PULS_340MS = 5, /* 340 ms to 670 ms */ + PULS_670MS = 6, /* 670 ms to 1.3 s */ + PULS_1300MS = 7, /* 1.3 s to 2.7 s */ }; -#define PHY_M_LED_BLINK_RT(x) (((x)<<8) & PHY_M_LEDC_BL_R_MSK) enum { - BLINK_42MS = 0,/* 42 ms */ - BLINK_84MS = 1,/* 84 ms */ - BLINK_170MS = 2,/* 170 ms */ - BLINK_340MS = 3,/* 340 ms */ - BLINK_670MS = 4,/* 670 ms */ + BLINK_42MS = 0, /* 42 ms */ + BLINK_84MS = 1, /* 84 ms */ + BLINK_170MS = 2, /* 170 ms */ + BLINK_340MS = 3, /* 340 ms */ + BLINK_670MS = 4, /* 670 ms */ }; /***** PHY_MARV_LED_OVER 16 bit r/w Manual LED Override Reg *****/ @@ -1525,7 +1525,7 @@ enum { PHY_M_EC2_FO_IMPED = 1<<5, /* Fiber Output Impedance */ PHY_M_EC2_FO_M_CLK = 1<<4, /* Fiber Mode Clock Enable */ PHY_M_EC2_FO_BOOST = 1<<3, /* Fiber Output Boost */ - PHY_M_EC2_FO_AM_MSK = 7,/* Bit 2.. 0: Fiber Output Amplitude */ + PHY_M_EC2_FO_AM_MSK = 7, /* Bit 2.. 0: Fiber Output Amplitude */ }; /***** PHY_MARV_EXT_P_STAT 16 bit r/w Ext. PHY Specific Status *****/ @@ -1550,7 +1550,7 @@ enum { PHY_M_CABD_DIS_WAIT = 1<<15, /* Disable Waiting Period (Page 1) */ /* (88E1111 only) */ PHY_M_CABD_STAT_MSK = 3<<13, /* Bit 14..13: Status Mask */ - PHY_M_CABD_AMPL_MSK = 0x1f<<8,/* Bit 12.. 8: Amplitude Mask */ + PHY_M_CABD_AMPL_MSK = 0x1f<<8, /* Bit 12.. 8: Amplitude Mask */ /* (88E1111 only) */ PHY_M_CABD_DIST_MSK = 0xff, /* Bit 7.. 0: Distance Mask */ }; @@ -1605,9 +1605,9 @@ enum { /***** PHY_MARV_PHY_CTRL (page 3) 16 bit r/w LED Control Reg. *****/ enum { - PHY_M_LEDC_LOS_MSK = 0xf<<12,/* Bit 15..12: LOS LED Ctrl. Mask */ + PHY_M_LEDC_LOS_MSK = 0xf<<12, /* Bit 15..12: LOS LED Ctrl. Mask */ PHY_M_LEDC_INIT_MSK = 0xf<<8, /* Bit 11.. 8: INIT LED Ctrl. Mask */ - PHY_M_LEDC_STA1_MSK = 0xf<<4,/* Bit 7.. 4: STAT1 LED Ctrl. Mask */ + PHY_M_LEDC_STA1_MSK = 0xf<<4, /* Bit 7.. 4: STAT1 LED Ctrl. Mask */ PHY_M_LEDC_STA0_MSK = 0xf, /* Bit 3.. 0: STAT0 LED Ctrl. Mask */ }; @@ -1804,8 +1804,8 @@ enum { /* GM_SMI_CTRL 16 bit r/w SMI Control Register */ enum { - GM_SMI_CT_PHY_A_MSK = 0x1f<<11,/* Bit 15..11: PHY Device Address */ - GM_SMI_CT_REG_A_MSK = 0x1f<<6,/* Bit 10.. 6: PHY Register Address */ + GM_SMI_CT_PHY_A_MSK = 0x1f<<11, /* Bit 15..11: PHY Device Address */ + GM_SMI_CT_REG_A_MSK = 0x1f<<6, /* Bit 10.. 6: PHY Register Address */ GM_SMI_CT_OP_RD = 1<<5, /* Bit 5: OpCode Read (0=Write)*/ GM_SMI_CT_RD_VAL = 1<<4, /* Bit 4: Read Valid (Read completed) */ GM_SMI_CT_BUSY = 1<<3, /* Bit 3: Busy (Operation in progress) */ @@ -1875,9 +1875,9 @@ enum { /* TX_GMF_CTRL_T 32 bit Tx GMAC FIFO Control/Test */ enum { - GMF_WSP_TST_ON = 1<<18,/* Write Shadow Pointer Test On */ - GMF_WSP_TST_OFF = 1<<17,/* Write Shadow Pointer Test Off */ - GMF_WSP_STEP = 1<<16,/* Write Shadow Pointer Step/Increment */ + GMF_WSP_TST_ON = 1<<18, /* Write Shadow Pointer Test On */ + GMF_WSP_TST_OFF = 1<<17, /* Write Shadow Pointer Test Off */ + GMF_WSP_STEP = 1<<16, /* Write Shadow Pointer Step/Increment */ GMF_CLI_TX_FU = 1<<6, /* Clear IRQ Tx FIFO Underrun */ GMF_CLI_TX_FC = 1<<5, /* Clear IRQ Tx Frame Complete */ @@ -2111,18 +2111,18 @@ enum { /* XM_MMU_CMD 16 bit r/w MMU Command Register */ enum { - XM_MMU_PHY_RDY = 1<<12,/* Bit 12: PHY Read Ready */ - XM_MMU_PHY_BUSY = 1<<11,/* Bit 11: PHY Busy */ - XM_MMU_IGN_PF = 1<<10,/* Bit 10: Ignore Pause Frame */ - XM_MMU_MAC_LB = 1<<9, /* Bit 9: Enable MAC Loopback */ - XM_MMU_FRC_COL = 1<<7, /* Bit 7: Force Collision */ - XM_MMU_SIM_COL = 1<<6, /* Bit 6: Simulate Collision */ - XM_MMU_NO_PRE = 1<<5, /* Bit 5: No MDIO Preamble */ - XM_MMU_GMII_FD = 1<<4, /* Bit 4: GMII uses Full Duplex */ - XM_MMU_RAT_CTRL = 1<<3, /* Bit 3: Enable Rate Control */ - XM_MMU_GMII_LOOP= 1<<2, /* Bit 2: PHY is in Loopback Mode */ - XM_MMU_ENA_RX = 1<<1, /* Bit 1: Enable Receiver */ - XM_MMU_ENA_TX = 1<<0, /* Bit 0: Enable Transmitter */ + XM_MMU_PHY_RDY = 1<<12, /* Bit 12: PHY Read Ready */ + XM_MMU_PHY_BUSY = 1<<11, /* Bit 11: PHY Busy */ + XM_MMU_IGN_PF = 1<<10, /* Bit 10: Ignore Pause Frame */ + XM_MMU_MAC_LB = 1<<9, /* Bit 9: Enable MAC Loopback */ + XM_MMU_FRC_COL = 1<<7, /* Bit 7: Force Collision */ + XM_MMU_SIM_COL = 1<<6, /* Bit 6: Simulate Collision */ + XM_MMU_NO_PRE = 1<<5, /* Bit 5: No MDIO Preamble */ + XM_MMU_GMII_FD = 1<<4, /* Bit 4: GMII uses Full Duplex */ + XM_MMU_RAT_CTRL = 1<<3, /* Bit 3: Enable Rate Control */ + XM_MMU_GMII_LOOP= 1<<2, /* Bit 2: PHY is in Loopback Mode */ + XM_MMU_ENA_RX = 1<<1, /* Bit 1: Enable Receiver */ + XM_MMU_ENA_TX = 1<<0, /* Bit 0: Enable Transmitter */ }; @@ -2506,7 +2506,7 @@ static inline void skge_write8(const struct skge_hw *hw, int reg, u8 val) } /* MAC Related Registers inside the device. */ -#define SK_REG(port,reg) (((port)<<7)+(reg)) +#define SK_REG(port,reg) (((port)<<7)+(u16)(reg)) #define SK_XMAC_REG(port, reg) \ ((BASE_XMAC_1 + (port) * (BASE_XMAC_2 - BASE_XMAC_1)) | (reg) << 1) diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 842abd9396c..fb1d2c30c1b 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -100,33 +100,32 @@ module_param(idle_timeout, int, 0); MODULE_PARM_DESC(idle_timeout, "Watchdog timer for lost interrupts (ms)"); static const struct pci_device_id sky2_id_table[] = { - { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, - { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, + { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, /* SK-9Sxx */ + { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, /* SK-9Exx */ { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4b00) }, /* DGE-560T */ { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4001) }, /* DGE-550SX */ { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4B02) }, /* DGE-560SX */ - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4340) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4341) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4342) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4343) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4344) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4345) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4346) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4347) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4350) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4351) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4352) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4353) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4360) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4361) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4362) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4363) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4364) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4365) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4366) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4367) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4368) }, - { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4369) }, + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4340) }, /* 88E8021 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4341) }, /* 88E8022 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4342) }, /* 88E8061 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4343) }, /* 88E8062 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4344) }, /* 88E8021 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4345) }, /* 88E8022 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4346) }, /* 88E8061 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4347) }, /* 88E8062 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4350) }, /* 88E8035 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4351) }, /* 88E8036 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4352) }, /* 88E8038 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4353) }, /* 88E8039 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4356) }, /* 88EC033 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4360) }, /* 88E8052 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4361) }, /* 88E8050 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4362) }, /* 88E8053 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4363) }, /* 88E8055 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4364) }, /* 88E8056 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4366) }, /* 88EC036 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4367) }, /* 88EC032 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4368) }, /* 88EC034 */ { 0 } }; @@ -522,7 +521,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) /* set Tx LED (LED_TX) to blink mode on Rx OR Tx activity */ ledctrl |= PHY_M_LED_BLINK_RT(BLINK_84MS) | PHY_M_LEDC_TX_CTRL; /* turn off the Rx LED (LED_RX) */ - ledover |= PHY_M_LED_MO_RX(MO_LED_OFF); + ledover &= ~PHY_M_LED_MO_RX; } if (hw->chip_id == CHIP_ID_YUKON_EC_U && hw->chip_rev == CHIP_REV_YU_EC_A1) { @@ -545,7 +544,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) if (sky2->autoneg == AUTONEG_DISABLE || sky2->speed == SPEED_100) { /* turn on 100 Mbps LED (LED_LINK100) */ - ledover |= PHY_M_LED_MO_100(MO_LED_ON); + ledover |= PHY_M_LED_MO_100; } if (ledover) @@ -697,10 +696,15 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port) } -/* Assign Ram Buffer allocation in units of 64bit (8 bytes) */ -static void sky2_ramset(struct sky2_hw *hw, u16 q, u32 start, u32 end) +/* Assign Ram Buffer allocation to queue */ +static void sky2_ramset(struct sky2_hw *hw, u16 q, u32 start, u32 space) { - pr_debug(PFX "q %d %#x %#x\n", q, start, end); + u32 end; + + /* convert from K bytes to qwords used for hw register */ + start *= 1024/8; + space *= 1024/8; + end = start + space - 1; sky2_write8(hw, RB_ADDR(q, RB_CTRL), RB_RST_CLR); sky2_write32(hw, RB_ADDR(q, RB_START), start); @@ -709,7 +713,6 @@ static void sky2_ramset(struct sky2_hw *hw, u16 q, u32 start, u32 end) sky2_write32(hw, RB_ADDR(q, RB_RP), start); if (q == Q_R1 || q == Q_R2) { - u32 space = end - start + 1; u32 tp = space - space/4; /* On receive queue's set the thresholds @@ -1059,11 +1062,16 @@ static int sky2_rx_start(struct sky2_port *sky2) sky2->rx_put = sky2->rx_next = 0; sky2_qset(hw, rxq); + /* On PCI express lowering the watermark gives better performance */ + if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) + sky2_write32(hw, Q_ADDR(rxq, Q_WM), BMU_WM_PEX); + + /* These chips have no ram buffer? + * MAC Rx RAM Read is controlled by hardware */ if (hw->chip_id == CHIP_ID_YUKON_EC_U && - (hw->chip_rev == CHIP_REV_YU_EC_U_A1 || hw->chip_rev == CHIP_REV_YU_EC_U_B0)) { - /* MAC Rx RAM Read is controlled by hardware */ + (hw->chip_rev == CHIP_REV_YU_EC_U_A1 + || hw->chip_rev == CHIP_REV_YU_EC_U_B0)) sky2_write32(hw, Q_ADDR(rxq, Q_F), F_M_RX_RAM_DIS); - } sky2_prefetch_init(hw, rxq, sky2->rx_le_map, RX_LE_SIZE - 1); @@ -1139,7 +1147,7 @@ static int sky2_up(struct net_device *dev) struct sky2_port *sky2 = netdev_priv(dev); struct sky2_hw *hw = sky2->hw; unsigned port = sky2->port; - u32 ramsize, rxspace, imask; + u32 ramsize, imask; int cap, err = -ENOMEM; struct net_device *otherdev = hw->dev[sky2->port^1]; @@ -1192,20 +1200,25 @@ static int sky2_up(struct net_device *dev) sky2_mac_init(hw, port); - /* Determine available ram buffer space in qwords. */ - ramsize = sky2_read8(hw, B2_E_0) * 4096/8; + /* Register is number of 4K blocks on internal RAM buffer. */ + ramsize = sky2_read8(hw, B2_E_0) * 4; + printk(KERN_INFO PFX "%s: ram buffer %dK\n", dev->name, ramsize); - if (ramsize > 6*1024/8) - rxspace = ramsize - (ramsize + 2) / 3; - else - rxspace = ramsize / 2; + if (ramsize > 0) { + u32 rxspace; - sky2_ramset(hw, rxqaddr[port], 0, rxspace-1); - sky2_ramset(hw, txqaddr[port], rxspace, ramsize-1); + if (ramsize < 16) + rxspace = ramsize / 2; + else + rxspace = 8 + (2*(ramsize - 16))/3; + + sky2_ramset(hw, rxqaddr[port], 0, rxspace); + sky2_ramset(hw, txqaddr[port], rxspace, ramsize - rxspace); - /* Make sure SyncQ is disabled */ - sky2_write8(hw, RB_ADDR(port == 0 ? Q_XS1 : Q_XS2, RB_CTRL), - RB_RST_SET); + /* Make sure SyncQ is disabled */ + sky2_write8(hw, RB_ADDR(port == 0 ? Q_XS1 : Q_XS2, RB_CTRL), + RB_RST_SET); + } sky2_qset(hw, txqaddr[port]); @@ -2917,18 +2930,8 @@ static void sky2_led(struct sky2_hw *hw, unsigned port, int on) default: gm_phy_write(hw, port, PHY_MARV_LED_CTRL, 0); - gm_phy_write(hw, port, PHY_MARV_LED_OVER, - on ? PHY_M_LED_MO_DUP(MO_LED_ON) | - PHY_M_LED_MO_10(MO_LED_ON) | - PHY_M_LED_MO_100(MO_LED_ON) | - PHY_M_LED_MO_1000(MO_LED_ON) | - PHY_M_LED_MO_RX(MO_LED_ON) - : PHY_M_LED_MO_DUP(MO_LED_OFF) | - PHY_M_LED_MO_10(MO_LED_OFF) | - PHY_M_LED_MO_100(MO_LED_OFF) | - PHY_M_LED_MO_1000(MO_LED_OFF) | - PHY_M_LED_MO_RX(MO_LED_OFF)); - + gm_phy_write(hw, port, PHY_MARV_LED_OVER, + on ? PHY_M_LED_ALL : 0); } } diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index 7760545edbf..6ed1d47dbbd 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h @@ -608,7 +608,7 @@ enum { PHY_ADDR_MARV = 0, }; -#define RB_ADDR(offs, queue) (B16_RAM_REGS + (queue) + (offs)) +#define RB_ADDR(offs, queue) ((u16) B16_RAM_REGS + (queue) + (offs)) enum { @@ -680,6 +680,7 @@ enum { BMU_FIFO_ENA | BMU_OP_ON, BMU_WM_DEFAULT = 0x600, + BMU_WM_PEX = 0x80, }; /* Tx BMU Control / Status Registers (Yukon-2) */ @@ -1060,7 +1061,7 @@ enum { PHY_M_PC_EN_DET_PLUS = 3<<8, /* Energy Detect Plus (Mode 2) */ }; -#define PHY_M_PC_MDI_XMODE(x) (((x)<<5) & PHY_M_PC_MDIX_MSK) +#define PHY_M_PC_MDI_XMODE(x) (((u16)(x)<<5) & PHY_M_PC_MDIX_MSK) enum { PHY_M_PC_MAN_MDI = 0, /* 00 = Manual MDI configuration */ @@ -1156,13 +1157,13 @@ enum { PHY_M_EC_TX_TIM_CT = 1<<1, /* RGMII Tx Timing Control */ PHY_M_EC_TRANS_DIS = 1<<0, /* Transmitter Disable (88E1111 only) */}; -#define PHY_M_EC_M_DSC(x) ((x)<<10 & PHY_M_EC_M_DSC_MSK) +#define PHY_M_EC_M_DSC(x) ((u16)(x)<<10 & PHY_M_EC_M_DSC_MSK) /* 00=1x; 01=2x; 10=3x; 11=4x */ -#define PHY_M_EC_S_DSC(x) ((x)<<8 & PHY_M_EC_S_DSC_MSK) +#define PHY_M_EC_S_DSC(x) ((u16)(x)<<8 & PHY_M_EC_S_DSC_MSK) /* 00=dis; 01=1x; 10=2x; 11=3x */ -#define PHY_M_EC_DSC_2(x) ((x)<<9 & PHY_M_EC_M_DSC_MSK2) +#define PHY_M_EC_DSC_2(x) ((u16)(x)<<9 & PHY_M_EC_M_DSC_MSK2) /* 000=1x; 001=2x; 010=3x; 011=4x */ -#define PHY_M_EC_MAC_S(x) ((x)<<4 & PHY_M_EC_MAC_S_MSK) +#define PHY_M_EC_MAC_S(x) ((u16)(x)<<4 & PHY_M_EC_MAC_S_MSK) /* 01X=0; 110=2.5; 111=25 (MHz) */ /* for Yukon-2 Gigabit Ethernet PHY (88E1112 only) */ @@ -1173,7 +1174,7 @@ enum { }; /* !!! Errata in spec. (1 = disable) */ -#define PHY_M_PC_DSC(x) (((x)<<12) & PHY_M_PC_DSC_MSK) +#define PHY_M_PC_DSC(x) (((u16)(x)<<12) & PHY_M_PC_DSC_MSK) /* 100=5x; 101=6x; 110=7x; 111=8x */ enum { MAC_TX_CLK_0_MHZ = 2, @@ -1203,7 +1204,7 @@ enum { PHY_M_LEDC_TX_C_MSB = 1<<0, /* Tx Control (MSB, 88E1111 only) */ }; -#define PHY_M_LED_PULS_DUR(x) (((x)<<12) & PHY_M_LEDC_PULS_MSK) +#define PHY_M_LED_PULS_DUR(x) (((u16)(x)<<12) & PHY_M_LEDC_PULS_MSK) /***** PHY_MARV_PHY_STAT (page 3)16 bit r/w Polarity Control Reg. *****/ enum { @@ -1233,7 +1234,7 @@ enum { PULS_1300MS = 7,/* 1.3 s to 2.7 s */ }; -#define PHY_M_LED_BLINK_RT(x) (((x)<<8) & PHY_M_LEDC_BL_R_MSK) +#define PHY_M_LED_BLINK_RT(x) (((u16)(x)<<8) & PHY_M_LEDC_BL_R_MSK) enum { BLINK_42MS = 0,/* 42 ms */ @@ -1243,21 +1244,18 @@ enum { BLINK_670MS = 4,/* 670 ms */ }; -/***** PHY_MARV_LED_OVER 16 bit r/w Manual LED Override Reg *****/ -#define PHY_M_LED_MO_SGMII(x) ((x)<<14) /* Bit 15..14: SGMII AN Timer */ - /* Bit 13..12: reserved */ -#define PHY_M_LED_MO_DUP(x) ((x)<<10) /* Bit 11..10: Duplex */ -#define PHY_M_LED_MO_10(x) ((x)<<8) /* Bit 9.. 8: Link 10 */ -#define PHY_M_LED_MO_100(x) ((x)<<6) /* Bit 7.. 6: Link 100 */ -#define PHY_M_LED_MO_1000(x) ((x)<<4) /* Bit 5.. 4: Link 1000 */ -#define PHY_M_LED_MO_RX(x) ((x)<<2) /* Bit 3.. 2: Rx */ -#define PHY_M_LED_MO_TX(x) ((x)<<0) /* Bit 1.. 0: Tx */ - +/**** PHY_MARV_LED_OVER 16 bit r/w LED control */ enum { - MO_LED_NORM = 0, - MO_LED_BLINK = 1, - MO_LED_OFF = 2, - MO_LED_ON = 3, + PHY_M_LED_MO_DUP = 3<<10,/* Bit 11..10: Duplex */ + PHY_M_LED_MO_10 = 3<<8, /* Bit 9.. 8: Link 10 */ + PHY_M_LED_MO_100 = 3<<6, /* Bit 7.. 6: Link 100 */ + PHY_M_LED_MO_1000 = 3<<4, /* Bit 5.. 4: Link 1000 */ + PHY_M_LED_MO_RX = 3<<2, /* Bit 3.. 2: Rx */ + PHY_M_LED_MO_TX = 3<<0, /* Bit 1.. 0: Tx */ + + PHY_M_LED_ALL = PHY_M_LED_MO_DUP | PHY_M_LED_MO_10 + | PHY_M_LED_MO_100 | PHY_M_LED_MO_1000 + | PHY_M_LED_MO_RX, }; /***** PHY_MARV_EXT_CTRL_2 16 bit r/w Ext. PHY Specific Ctrl 2 *****/ @@ -1294,9 +1292,9 @@ enum { PHY_M_FELP_LED0_MSK = 0xf, /* Bit 3.. 0: LED0 Mask (SPEED) */ }; -#define PHY_M_FELP_LED2_CTRL(x) (((x)<<8) & PHY_M_FELP_LED2_MSK) -#define PHY_M_FELP_LED1_CTRL(x) (((x)<<4) & PHY_M_FELP_LED1_MSK) -#define PHY_M_FELP_LED0_CTRL(x) (((x)<<0) & PHY_M_FELP_LED0_MSK) +#define PHY_M_FELP_LED2_CTRL(x) (((u16)(x)<<8) & PHY_M_FELP_LED2_MSK) +#define PHY_M_FELP_LED1_CTRL(x) (((u16)(x)<<4) & PHY_M_FELP_LED1_MSK) +#define PHY_M_FELP_LED0_CTRL(x) (((u16)(x)<<0) & PHY_M_FELP_LED0_MSK) enum { LED_PAR_CTRL_COLX = 0x00, @@ -1552,8 +1550,8 @@ enum { GM_SMI_CT_BUSY = 1<<3, /* Bit 3: Busy (Operation in progress) */ }; -#define GM_SMI_CT_PHY_AD(x) (((x)<<11) & GM_SMI_CT_PHY_A_MSK) -#define GM_SMI_CT_REG_AD(x) (((x)<<6) & GM_SMI_CT_REG_A_MSK) +#define GM_SMI_CT_PHY_AD(x) (((u16)(x)<<11) & GM_SMI_CT_PHY_A_MSK) +#define GM_SMI_CT_REG_AD(x) (((u16)(x)<<6) & GM_SMI_CT_REG_A_MSK) /* GM_PHY_ADDR 16 bit r/w GPHY Address Register */ enum { diff --git a/drivers/net/smc-ultra.c b/drivers/net/smc-ultra.c index 889ef0d7c37..d70bc979534 100644 --- a/drivers/net/smc-ultra.c +++ b/drivers/net/smc-ultra.c @@ -593,7 +593,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/smc-ultra32.c b/drivers/net/smc-ultra32.c index e10755ec5de..2c5319c62fa 100644 --- a/drivers/net/smc-ultra32.c +++ b/drivers/net/smc-ultra32.c @@ -437,7 +437,7 @@ int __init init_module(void) return -ENXIO; } -void cleanup_module(void) +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c index c0d13d65091..bd6e84506c2 100644 --- a/drivers/net/smc9194.c +++ b/drivers/net/smc9194.c @@ -1616,7 +1616,7 @@ int __init init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(devSMC9194); free_irq(devSMC9194->irq, devSMC9194); diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h index a8640169fc7..9367c574477 100644 --- a/drivers/net/smc91x.h +++ b/drivers/net/smc91x.h @@ -238,7 +238,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_CAN_USE_16BIT 1 #define SMC_CAN_USE_32BIT 0 -#define SMC_inb(a, r) inb((u32)a) + (r)) +#define SMC_inb(a, r) inb(((u32)a) + (r)) #define SMC_inw(a, r) inw(((u32)a) + (r)) #define SMC_outb(v, a, r) outb(v, ((u32)a) + (r)) #define SMC_outw(v, a, r) outw(v, ((u32)a) + (r)) @@ -434,6 +434,24 @@ static inline void LPD7_SMC_outsw (unsigned char* a, int r, #define SMC_IRQ_FLAGS (0) +#elif defined(CONFIG_ARCH_VERSATILE) + +#define SMC_CAN_USE_8BIT 1 +#define SMC_CAN_USE_16BIT 1 +#define SMC_CAN_USE_32BIT 1 +#define SMC_NOWAIT 1 + +#define SMC_inb(a, r) readb((a) + (r)) +#define SMC_inw(a, r) readw((a) + (r)) +#define SMC_inl(a, r) readl((a) + (r)) +#define SMC_outb(v, a, r) writeb(v, (a) + (r)) +#define SMC_outw(v, a, r) writew(v, (a) + (r)) +#define SMC_outl(v, a, r) writel(v, (a) + (r)) +#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) +#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) + +#define SMC_IRQ_FLAGS (0) + #else #define SMC_CAN_USE_8BIT 1 @@ -1216,7 +1234,7 @@ static const char * chip_ids[ 16 ] = { if (SMC_CAN_USE_32BIT) { \ void *__ptr = (p); \ int __len = (l); \ - void *__ioaddr = ioaddr; \ + void __iomem *__ioaddr = ioaddr; \ if (__len >= 2 && (unsigned long)__ptr & 2) { \ __len -= 2; \ SMC_outw(*(u16 *)__ptr, ioaddr, DATA_REG); \ @@ -1240,7 +1258,7 @@ static const char * chip_ids[ 16 ] = { if (SMC_CAN_USE_32BIT) { \ void *__ptr = (p); \ int __len = (l); \ - void *__ioaddr = ioaddr; \ + void __iomem *__ioaddr = ioaddr; \ if ((unsigned long)__ptr & 2) { \ /* \ * We want 32bit alignment here. \ diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c index 47a1c09d19a..c62e85d89f4 100644 --- a/drivers/net/sun3lance.c +++ b/drivers/net/sun3lance.c @@ -945,7 +945,7 @@ static void set_multicast_list( struct net_device *dev ) static struct net_device *sun3lance_dev; -int init_module(void) +int __init init_module(void) { sun3lance_dev = sun3lance_probe(-1); if (IS_ERR(sun3lance_dev)) @@ -953,7 +953,7 @@ int init_module(void) return 0; } -void cleanup_module(void) +void __exit cleanup_module(void) { unregister_netdev(sun3lance_dev); #ifdef CONFIG_SUN3 diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index d9123c9adc1..571320ae87a 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -68,8 +68,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.69" -#define DRV_MODULE_RELDATE "November 15, 2006" +#define DRV_MODULE_VERSION "3.70" +#define DRV_MODULE_RELDATE "December 1, 2006" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -192,6 +192,7 @@ static struct pci_device_id tg3_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787F)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715)}, @@ -1061,7 +1062,7 @@ static void tg3_frob_aux_power(struct tg3 *tp) { struct tg3 *tp_peer = tp; - if ((tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) != 0) + if ((tp->tg3_flags2 & TG3_FLG2_IS_NIC) == 0) return; if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) || @@ -1212,8 +1213,8 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state) power_control); udelay(100); /* Delay after power state change */ - /* Switch out of Vaux if it is not a LOM */ - if (!(tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT)) + /* Switch out of Vaux if it is a NIC */ + if (tp->tg3_flags2 & TG3_FLG2_IS_NIC) tw32_wait_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl, 100); return 0; @@ -1401,8 +1402,10 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state) static void tg3_link_report(struct tg3 *tp) { if (!netif_carrier_ok(tp->dev)) { - printk(KERN_INFO PFX "%s: Link is down.\n", tp->dev->name); - } else { + if (netif_msg_link(tp)) + printk(KERN_INFO PFX "%s: Link is down.\n", + tp->dev->name); + } else if (netif_msg_link(tp)) { printk(KERN_INFO PFX "%s: Link is up at %d Mbps, %s duplex.\n", tp->dev->name, (tp->link_config.active_speed == SPEED_1000 ? @@ -1557,12 +1560,6 @@ static void tg3_phy_copper_begin(struct tg3 *tp) tg3_writephy(tp, MII_ADVERTISE, new_adv); } else if (tp->link_config.speed == SPEED_INVALID) { - tp->link_config.advertising = - (ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | - ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | - ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full | - ADVERTISED_Autoneg | ADVERTISED_MII); - if (tp->tg3_flags & TG3_FLAG_10_100_ONLY) tp->link_config.advertising &= ~(ADVERTISED_1000baseT_Half | @@ -1706,25 +1703,36 @@ static int tg3_init_5401phy_dsp(struct tg3 *tp) return err; } -static int tg3_copper_is_advertising_all(struct tg3 *tp) +static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask) { - u32 adv_reg, all_mask; + u32 adv_reg, all_mask = 0; + + if (mask & ADVERTISED_10baseT_Half) + all_mask |= ADVERTISE_10HALF; + if (mask & ADVERTISED_10baseT_Full) + all_mask |= ADVERTISE_10FULL; + if (mask & ADVERTISED_100baseT_Half) + all_mask |= ADVERTISE_100HALF; + if (mask & ADVERTISED_100baseT_Full) + all_mask |= ADVERTISE_100FULL; if (tg3_readphy(tp, MII_ADVERTISE, &adv_reg)) return 0; - all_mask = (ADVERTISE_10HALF | ADVERTISE_10FULL | - ADVERTISE_100HALF | ADVERTISE_100FULL); if ((adv_reg & all_mask) != all_mask) return 0; if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY)) { u32 tg3_ctrl; + all_mask = 0; + if (mask & ADVERTISED_1000baseT_Half) + all_mask |= ADVERTISE_1000HALF; + if (mask & ADVERTISED_1000baseT_Full) + all_mask |= ADVERTISE_1000FULL; + if (tg3_readphy(tp, MII_TG3_CTRL, &tg3_ctrl)) return 0; - all_mask = (MII_TG3_CTRL_ADV_1000_HALF | - MII_TG3_CTRL_ADV_1000_FULL); if ((tg3_ctrl & all_mask) != all_mask) return 0; } @@ -1884,7 +1892,8 @@ static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset) /* Force autoneg restart if we are exiting * low power mode. */ - if (!tg3_copper_is_advertising_all(tp)) + if (!tg3_copper_is_advertising_all(tp, + tp->link_config.advertising)) current_link_up = 0; } else { current_link_up = 0; @@ -3703,8 +3712,9 @@ static void tg3_tx_timeout(struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); - printk(KERN_ERR PFX "%s: transmit timed out, resetting\n", - dev->name); + if (netif_msg_tx_err(tp)) + printk(KERN_ERR PFX "%s: transmit timed out, resetting\n", + dev->name); schedule_work(&tp->reset_task); } @@ -6396,16 +6406,17 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) udelay(40); /* tp->grc_local_ctrl is partially set up during tg3_get_invariants(). - * If TG3_FLAG_EEPROM_WRITE_PROT is set, we should read the + * If TG3_FLG2_IS_NIC is zero, we should read the * register to preserve the GPIO settings for LOMs. The GPIOs, * whether used as inputs or outputs, are set by boot code after * reset. */ - if (tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) { + if (!(tp->tg3_flags2 & TG3_FLG2_IS_NIC)) { u32 gpio_mask; - gpio_mask = GRC_LCLCTRL_GPIO_OE0 | GRC_LCLCTRL_GPIO_OE2 | - GRC_LCLCTRL_GPIO_OUTPUT0 | GRC_LCLCTRL_GPIO_OUTPUT2; + gpio_mask = GRC_LCLCTRL_GPIO_OE0 | GRC_LCLCTRL_GPIO_OE1 | + GRC_LCLCTRL_GPIO_OE2 | GRC_LCLCTRL_GPIO_OUTPUT0 | + GRC_LCLCTRL_GPIO_OUTPUT1 | GRC_LCLCTRL_GPIO_OUTPUT2; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752) gpio_mask |= GRC_LCLCTRL_GPIO_OE3 | @@ -6417,8 +6428,9 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) tp->grc_local_ctrl |= tr32(GRC_LOCAL_CTRL) & gpio_mask; /* GPIO1 must be driven high for eeprom write protect */ - tp->grc_local_ctrl |= (GRC_LCLCTRL_GPIO_OE1 | - GRC_LCLCTRL_GPIO_OUTPUT1); + if (tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) + tp->grc_local_ctrl |= (GRC_LCLCTRL_GPIO_OE1 | + GRC_LCLCTRL_GPIO_OUTPUT1); } tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl); udelay(100); @@ -8656,7 +8668,9 @@ static int tg3_test_registers(struct tg3 *tp) return 0; out: - printk(KERN_ERR PFX "Register test failed at offset %x\n", offset); + if (netif_msg_hw(tp)) + printk(KERN_ERR PFX "Register test failed at offset %x\n", + offset); tw32(offset, save_val); return -EIO; } @@ -8781,17 +8795,20 @@ static int tg3_run_loopback(struct tg3 *tp, int loopback_mode) tg3_writephy(tp, 0x10, phy & ~0x4000); tg3_writephy(tp, MII_TG3_EPHY_TEST, phytest); } - } - val = BMCR_LOOPBACK | BMCR_FULLDPLX; - if (tp->tg3_flags & TG3_FLAG_10_100_ONLY) - val |= BMCR_SPEED100; - else - val |= BMCR_SPEED1000; + val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED100; + } else + val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED1000; tg3_writephy(tp, MII_BMCR, val); udelay(40); - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) + + mac_mode = (tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK) | + MAC_MODE_LINK_POLARITY; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { tg3_writephy(tp, MII_TG3_EPHY_PTEST, 0x1800); + mac_mode |= MAC_MODE_PORT_MODE_MII; + } else + mac_mode |= MAC_MODE_PORT_MODE_GMII; /* reset to prevent losing 1st rx packet intermittently */ if (tp->tg3_flags2 & TG3_FLG2_MII_SERDES) { @@ -8799,12 +8816,6 @@ static int tg3_run_loopback(struct tg3 *tp, int loopback_mode) udelay(10); tw32_f(MAC_RX_MODE, tp->rx_mode); } - mac_mode = (tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK) | - MAC_MODE_LINK_POLARITY; - if (tp->tg3_flags & TG3_FLAG_10_100_ONLY) - mac_mode |= MAC_MODE_PORT_MODE_MII; - else - mac_mode |= MAC_MODE_PORT_MODE_GMII; if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) { mac_mode &= ~MAC_MODE_LINK_POLARITY; tg3_writephy(tp, MII_TG3_EXT_CTRL, @@ -9456,16 +9467,12 @@ static void __devinit tg3_get_5906_nvram_info(struct tg3 *tp) /* Chips other than 5700/5701 use the NVRAM for fetching info. */ static void __devinit tg3_nvram_init(struct tg3 *tp) { - int j; - tw32_f(GRC_EEPROM_ADDR, (EEPROM_ADDR_FSM_RESET | (EEPROM_DEFAULT_CLOCK_PERIOD << EEPROM_ADDR_CLKPERD_SHIFT))); - /* XXX schedule_timeout() ... */ - for (j = 0; j < 100; j++) - udelay(10); + msleep(1); /* Enable seeprom accesses. */ tw32_f(GRC_LOCAL_CTRL, @@ -9526,12 +9533,12 @@ static int tg3_nvram_read_using_eeprom(struct tg3 *tp, EEPROM_ADDR_ADDR_MASK) | EEPROM_ADDR_READ | EEPROM_ADDR_START); - for (i = 0; i < 10000; i++) { + for (i = 0; i < 1000; i++) { tmp = tr32(GRC_EEPROM_ADDR); if (tmp & EEPROM_ADDR_COMPLETE) break; - udelay(100); + msleep(1); } if (!(tmp & EEPROM_ADDR_COMPLETE)) return -EBUSY; @@ -9656,12 +9663,12 @@ static int tg3_nvram_write_block_using_eeprom(struct tg3 *tp, EEPROM_ADDR_START | EEPROM_ADDR_WRITE); - for (j = 0; j < 10000; j++) { + for (j = 0; j < 1000; j++) { val = tr32(GRC_EEPROM_ADDR); if (val & EEPROM_ADDR_COMPLETE) break; - udelay(100); + msleep(1); } if (!(val & EEPROM_ADDR_COMPLETE)) { rc = -EBUSY; @@ -9965,8 +9972,10 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { - if (!(tr32(PCIE_TRANSACTION_CFG) & PCIE_TRANS_CFG_LOM)) + if (!(tr32(PCIE_TRANSACTION_CFG) & PCIE_TRANS_CFG_LOM)) { tp->tg3_flags &= ~TG3_FLAG_EEPROM_WRITE_PROT; + tp->tg3_flags2 |= TG3_FLG2_IS_NIC; + } return; } @@ -10066,10 +10075,17 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) tp->led_ctrl = LED_CTRL_MODE_PHY_2; - if (nic_cfg & NIC_SRAM_DATA_CFG_EEPROM_WP) + if (nic_cfg & NIC_SRAM_DATA_CFG_EEPROM_WP) { tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT; - else + if ((tp->pdev->subsystem_vendor == + PCI_VENDOR_ID_ARIMA) && + (tp->pdev->subsystem_device == 0x205a || + tp->pdev->subsystem_device == 0x2063)) + tp->tg3_flags &= ~TG3_FLAG_EEPROM_WRITE_PROT; + } else { tp->tg3_flags &= ~TG3_FLAG_EEPROM_WRITE_PROT; + tp->tg3_flags2 |= TG3_FLG2_IS_NIC; + } if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE) { tp->tg3_flags |= TG3_FLAG_ENABLE_ASF; @@ -10147,7 +10163,7 @@ static int __devinit tg3_phy_probe(struct tg3 *tp) if (!(tp->tg3_flags2 & TG3_FLG2_ANY_SERDES) && !(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) { - u32 bmsr, adv_reg, tg3_ctrl; + u32 bmsr, adv_reg, tg3_ctrl, mask; tg3_readphy(tp, MII_BMSR, &bmsr); if (!tg3_readphy(tp, MII_BMSR, &bmsr) && @@ -10171,7 +10187,10 @@ static int __devinit tg3_phy_probe(struct tg3 *tp) MII_TG3_CTRL_ENABLE_AS_MASTER); } - if (!tg3_copper_is_advertising_all(tp)) { + mask = (ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | + ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full); + if (!tg3_copper_is_advertising_all(tp, mask)) { tg3_writephy(tp, MII_ADVERTISE, adv_reg); if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY)) @@ -10695,7 +10714,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->tg3_flags |= TG3_FLAG_SRAM_USE_CONFIG; /* Get eeprom hw config before calling tg3_set_power_state(). - * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be + * In particular, the TG3_FLG2_IS_NIC flag must be * determined before calling tg3_set_power_state() so that * we know whether or not to switch out of Vaux power. * When the flag is set, it means that GPIO1 is used for eeprom @@ -10862,7 +10881,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->pdev->device == PCI_DEVICE_ID_TIGON3_5705F)) || (tp->pdev->vendor == PCI_VENDOR_ID_BROADCOM && (tp->pdev->device == PCI_DEVICE_ID_TIGON3_5751F || - tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F)) || + tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F || + tp->pdev->device == PCI_DEVICE_ID_TIGON3_5787F)) || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) tp->tg3_flags |= TG3_FLAG_10_100_ONLY; @@ -11912,13 +11932,15 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); - printk(KERN_INFO "%s: Tigon3 [partno(%s) rev %04x PHY(%s)] (%s) %sBaseT Ethernet ", + printk(KERN_INFO "%s: Tigon3 [partno(%s) rev %04x PHY(%s)] (%s) %s Ethernet ", dev->name, tp->board_part_number, tp->pci_chip_rev_id, tg3_phy_string(tp), tg3_bus_string(tp, str), - (tp->tg3_flags & TG3_FLAG_10_100_ONLY) ? "10/100" : "10/100/1000"); + ((tp->tg3_flags & TG3_FLAG_10_100_ONLY) ? "10/100Base-TX" : + ((tp->tg3_flags2 & TG3_FLG2_ANY_SERDES) ? "1000Base-SX" : + "10/100/1000Base-T"))); for (i = 0; i < 6; i++) printk("%2.2x%c", dev->dev_addr[i], diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 92f53000bce..dfaf4ed127b 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2233,6 +2233,7 @@ struct tg3 { #define TG3_FLG2_PCI_EXPRESS 0x00000200 #define TG3_FLG2_ASF_NEW_HANDSHAKE 0x00000400 #define TG3_FLG2_HW_AUTONEG 0x00000800 +#define TG3_FLG2_IS_NIC 0x00001000 #define TG3_FLG2_PHY_SERDES 0x00002000 #define TG3_FLG2_CAPACITIVE_COUPLING 0x00004000 #define TG3_FLG2_FLASH 0x00008000 diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c index 46dabdb1207..cec282a6f62 100644 --- a/drivers/net/tokenring/smctr.c +++ b/drivers/net/tokenring/smctr.c @@ -5706,7 +5706,7 @@ int __init init_module(void) return found ? 0 : -ENODEV; } -void cleanup_module(void) +void __exit cleanup_module(void) { int i; diff --git a/drivers/net/wd.c b/drivers/net/wd.c index 41f1d677884..7f38012b9c9 100644 --- a/drivers/net/wd.c +++ b/drivers/net/wd.c @@ -538,7 +538,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void +void __exit cleanup_module(void) { int this_dev; diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index efcdaf1c5f7..44a22701da9 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -49,6 +49,7 @@ #include <asm/uaccess.h> #include <net/ieee80211.h> #include <linux/kthread.h> +#include <linux/freezer.h> #include "airo.h" diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c index 08bc57a4b89..974a8e5bec8 100644 --- a/drivers/net/wireless/hostap/hostap_ap.c +++ b/drivers/net/wireless/hostap/hostap_ap.c @@ -1100,15 +1100,13 @@ static struct sta_info * ap_add_sta(struct ap_data *ap, u8 *addr) { struct sta_info *sta; - sta = (struct sta_info *) - kmalloc(sizeof(struct sta_info), GFP_ATOMIC); + sta = kzalloc(sizeof(struct sta_info), GFP_ATOMIC); if (sta == NULL) { PDEBUG(DEBUG_AP, "AP: kmalloc failed\n"); return NULL; } /* initialize STA info data */ - memset(sta, 0, sizeof(struct sta_info)); sta->local = ap->local; skb_queue_head_init(&sta->tx_buf); memcpy(sta->addr, addr, ETH_ALEN); diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index ee542ec6d6a..8d8f4b9b8b0 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -563,12 +563,11 @@ static int prism2_config(struct pcmcia_device *link) PDEBUG(DEBUG_FLOW, "prism2_config()\n"); parse = kmalloc(sizeof(cisparse_t), GFP_KERNEL); - hw_priv = kmalloc(sizeof(*hw_priv), GFP_KERNEL); + hw_priv = kzalloc(sizeof(*hw_priv), GFP_KERNEL); if (parse == NULL || hw_priv == NULL) { ret = -ENOMEM; goto failed; } - memset(hw_priv, 0, sizeof(*hw_priv)); tuple.Attributes = 0; tuple.TupleData = buf; diff --git a/drivers/net/wireless/hostap/hostap_download.c b/drivers/net/wireless/hostap/hostap_download.c index ab26b52b3e7..24fc387bba6 100644 --- a/drivers/net/wireless/hostap/hostap_download.c +++ b/drivers/net/wireless/hostap/hostap_download.c @@ -685,14 +685,12 @@ static int prism2_download(local_info_t *local, goto out; } - dl = kmalloc(sizeof(*dl) + param->num_areas * + dl = kzalloc(sizeof(*dl) + param->num_areas * sizeof(struct prism2_download_data_area), GFP_KERNEL); if (dl == NULL) { ret = -ENOMEM; goto out; } - memset(dl, 0, sizeof(*dl) + param->num_areas * - sizeof(struct prism2_download_data_area)); dl->dl_cmd = param->dl_cmd; dl->start_addr = param->start_addr; dl->num_areas = param->num_areas; diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index c19e68636a1..a394a23b9a2 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -347,14 +347,12 @@ static int hfa384x_cmd(struct net_device *dev, u16 cmd, u16 param0, if (signal_pending(current)) return -EINTR; - entry = (struct hostap_cmd_queue *) - kmalloc(sizeof(*entry), GFP_ATOMIC); + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) { printk(KERN_DEBUG "%s: hfa384x_cmd - kmalloc failed\n", dev->name); return -ENOMEM; } - memset(entry, 0, sizeof(*entry)); atomic_set(&entry->usecnt, 1); entry->type = CMD_SLEEP; entry->cmd = cmd; @@ -517,14 +515,12 @@ static int hfa384x_cmd_callback(struct net_device *dev, u16 cmd, u16 param0, return -1; } - entry = (struct hostap_cmd_queue *) - kmalloc(sizeof(*entry), GFP_ATOMIC); + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) { printk(KERN_DEBUG "%s: hfa384x_cmd_callback - kmalloc " "failed\n", dev->name); return -ENOMEM; } - memset(entry, 0, sizeof(*entry)); atomic_set(&entry->usecnt, 1); entry->type = CMD_CALLBACK; entry->cmd = cmd; @@ -3016,14 +3012,12 @@ static int prism2_set_tim(struct net_device *dev, int aid, int set) iface = netdev_priv(dev); local = iface->local; - new_entry = (struct set_tim_data *) - kmalloc(sizeof(*new_entry), GFP_ATOMIC); + new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC); if (new_entry == NULL) { printk(KERN_DEBUG "%s: prism2_set_tim: kmalloc failed\n", local->dev->name); return -ENOMEM; } - memset(new_entry, 0, sizeof(*new_entry)); new_entry->aid = aid; new_entry->set = set; diff --git a/drivers/net/wireless/hostap/hostap_info.c b/drivers/net/wireless/hostap/hostap_info.c index 5fd2b1ad7f5..b6a02a02da7 100644 --- a/drivers/net/wireless/hostap/hostap_info.c +++ b/drivers/net/wireless/hostap/hostap_info.c @@ -327,11 +327,10 @@ static void prism2_info_hostscanresults(local_info_t *local, ptr = (u8 *) pos; new_count = left / result_size; - results = kmalloc(new_count * sizeof(struct hfa384x_hostscan_result), + results = kcalloc(new_count, sizeof(struct hfa384x_hostscan_result), GFP_ATOMIC); if (results == NULL) return; - memset(results, 0, new_count * sizeof(struct hfa384x_hostscan_result)); for (i = 0; i < new_count; i++) { memcpy(&results[i], ptr, copy_len); diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c index d061fb3443f..3b7b8063ff1 100644 --- a/drivers/net/wireless/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/hostap/hostap_ioctl.c @@ -181,12 +181,10 @@ static int prism2_ioctl_siwencode(struct net_device *dev, struct ieee80211_crypt_data *new_crypt; /* take WEP into use */ - new_crypt = (struct ieee80211_crypt_data *) - kmalloc(sizeof(struct ieee80211_crypt_data), + new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), GFP_KERNEL); if (new_crypt == NULL) return -ENOMEM; - memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data)); new_crypt->ops = ieee80211_get_crypto_ops("WEP"); if (!new_crypt->ops) { request_module("ieee80211_crypt_wep"); @@ -3320,14 +3318,12 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, prism2_crypt_delayed_deinit(local, crypt); - new_crypt = (struct ieee80211_crypt_data *) - kmalloc(sizeof(struct ieee80211_crypt_data), + new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), GFP_KERNEL); if (new_crypt == NULL) { ret = -ENOMEM; goto done; } - memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data)); new_crypt->ops = ops; new_crypt->priv = new_crypt->ops->init(i); if (new_crypt->priv == NULL) { @@ -3538,14 +3534,12 @@ static int prism2_ioctl_set_encryption(local_info_t *local, prism2_crypt_delayed_deinit(local, crypt); - new_crypt = (struct ieee80211_crypt_data *) - kmalloc(sizeof(struct ieee80211_crypt_data), + new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), GFP_KERNEL); if (new_crypt == NULL) { ret = -ENOMEM; goto done; } - memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data)); new_crypt->ops = ops; new_crypt->priv = new_crypt->ops->init(param->u.crypt.idx); if (new_crypt->priv == NULL) { diff --git a/drivers/net/wireless/hostap/hostap_pci.c b/drivers/net/wireless/hostap/hostap_pci.c index d1de9766c83..c4f6020baa9 100644 --- a/drivers/net/wireless/hostap/hostap_pci.c +++ b/drivers/net/wireless/hostap/hostap_pci.c @@ -300,10 +300,9 @@ static int prism2_pci_probe(struct pci_dev *pdev, struct hostap_interface *iface; struct hostap_pci_priv *hw_priv; - hw_priv = kmalloc(sizeof(*hw_priv), GFP_KERNEL); + hw_priv = kzalloc(sizeof(*hw_priv), GFP_KERNEL); if (hw_priv == NULL) return -ENOMEM; - memset(hw_priv, 0, sizeof(*hw_priv)); if (pci_enable_device(pdev)) goto err_out_free; diff --git a/drivers/net/wireless/hostap/hostap_plx.c b/drivers/net/wireless/hostap/hostap_plx.c index bc81b13a5a2..e235e064789 100644 --- a/drivers/net/wireless/hostap/hostap_plx.c +++ b/drivers/net/wireless/hostap/hostap_plx.c @@ -447,10 +447,9 @@ static int prism2_plx_probe(struct pci_dev *pdev, int tmd7160; struct hostap_plx_priv *hw_priv; - hw_priv = kmalloc(sizeof(*hw_priv), GFP_KERNEL); + hw_priv = kzalloc(sizeof(*hw_priv), GFP_KERNEL); if (hw_priv == NULL) return -ENOMEM; - memset(hw_priv, 0, sizeof(*hw_priv)); if (pci_enable_device(pdev)) goto err_out_free; diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 1bcd352a813..dd9ba4aad7b 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -6220,7 +6220,7 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, /* Allocate and initialize the Tx/Rx queues and lists */ if (ipw2100_queues_allocate(priv)) { printk(KERN_WARNING DRV_NAME - "Error calilng ipw2100_queues_allocate.\n"); + "Error calling ipw2100_queues_allocate.\n"); err = -ENOMEM; goto fail; } diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index e82e56bb85e..22cb3fb7502 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -70,7 +70,7 @@ #define VQ #endif -#define IPW2200_VERSION "1.1.4" VK VD VM VP VR VQ +#define IPW2200_VERSION "1.2.0" VK VD VM VP VR VQ #define DRV_DESCRIPTION "Intel(R) PRO/Wireless 2200/2915 Network Driver" #define DRV_COPYRIGHT "Copyright(c) 2003-2006 Intel Corporation" #define DRV_VERSION IPW2200_VERSION @@ -7677,7 +7677,8 @@ static void ipw_handle_data_packet_monitor(struct ipw_priv *priv, /* Big bitfield of all the fields we provide in radiotap */ ipw_rt->rt_hdr.it_present = - ((1 << IEEE80211_RADIOTAP_FLAGS) | + ((1 << IEEE80211_RADIOTAP_TSFT) | + (1 << IEEE80211_RADIOTAP_FLAGS) | (1 << IEEE80211_RADIOTAP_RATE) | (1 << IEEE80211_RADIOTAP_CHANNEL) | (1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL) | @@ -7686,10 +7687,14 @@ static void ipw_handle_data_packet_monitor(struct ipw_priv *priv, /* Zero the flags, we'll add to them as we go */ ipw_rt->rt_flags = 0; - ipw_rt->rt_tsf = 0ULL; + ipw_rt->rt_tsf = (u64)(frame->parent_tsf[3] << 24 | + frame->parent_tsf[2] << 16 | + frame->parent_tsf[1] << 8 | + frame->parent_tsf[0]); /* Convert signal to DBM */ ipw_rt->rt_dbmsignal = antsignal; + ipw_rt->rt_dbmnoise = frame->noise; /* Convert the channel data and set the flags */ ipw_rt->rt_channel = cpu_to_le16(ieee80211chan2mhz(received_channel)); @@ -7889,7 +7894,8 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv, /* Big bitfield of all the fields we provide in radiotap */ ipw_rt->rt_hdr.it_present = - ((1 << IEEE80211_RADIOTAP_FLAGS) | + ((1 << IEEE80211_RADIOTAP_TSFT) | + (1 << IEEE80211_RADIOTAP_FLAGS) | (1 << IEEE80211_RADIOTAP_RATE) | (1 << IEEE80211_RADIOTAP_CHANNEL) | (1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL) | @@ -7898,7 +7904,10 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv, /* Zero the flags, we'll add to them as we go */ ipw_rt->rt_flags = 0; - ipw_rt->rt_tsf = 0ULL; + ipw_rt->rt_tsf = (u64)(frame->parent_tsf[3] << 24 | + frame->parent_tsf[2] << 16 | + frame->parent_tsf[1] << 8 | + frame->parent_tsf[0]); /* Convert to DBM */ ipw_rt->rt_dbmsignal = signal; @@ -8297,7 +8306,7 @@ static void ipw_rx(struct ipw_priv *priv) ("Notification: subtype=%02X flags=%02X size=%d\n", pkt->u.notification.subtype, pkt->u.notification.flags, - pkt->u.notification.size); + le16_to_cpu(pkt->u.notification.size)); ipw_rx_notification(priv, &pkt->u.notification); break; } @@ -11145,14 +11154,13 @@ static int ipw_up(struct ipw_priv *priv) return -EIO; if (cmdlog && !priv->cmdlog) { - priv->cmdlog = kmalloc(sizeof(*priv->cmdlog) * cmdlog, + priv->cmdlog = kcalloc(cmdlog, sizeof(*priv->cmdlog), GFP_KERNEL); if (priv->cmdlog == NULL) { IPW_ERROR("Error allocating %d command log entries.\n", cmdlog); return -ENOMEM; } else { - memset(priv->cmdlog, 0, sizeof(*priv->cmdlog) * cmdlog); priv->cmdlog_len = cmdlog; } } diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c index a87eb51886c..96606ed1007 100644 --- a/drivers/net/wireless/prism54/isl_ioctl.c +++ b/drivers/net/wireless/prism54/isl_ioctl.c @@ -2141,11 +2141,9 @@ prism54_wpa_bss_ie_add(islpci_private *priv, u8 *bssid, struct islpci_bss_wpa_ie, list); list_del(&bss->list); } else { - bss = kmalloc(sizeof (*bss), GFP_ATOMIC); - if (bss != NULL) { + bss = kzalloc(sizeof (*bss), GFP_ATOMIC); + if (bss != NULL) priv->num_bss_wpa++; - memset(bss, 0, sizeof (*bss)); - } } if (bss != NULL) { memcpy(bss->bssid, bssid, ETH_ALEN); @@ -2686,11 +2684,10 @@ prism2_ioctl_set_generic_element(struct net_device *ndev, return -EINVAL; alen = sizeof(*attach) + len; - attach = kmalloc(alen, GFP_KERNEL); + attach = kzalloc(alen, GFP_KERNEL); if (attach == NULL) return -ENOMEM; - memset(attach, 0, alen); #define WLAN_FC_TYPE_MGMT 0 #define WLAN_FC_STYPE_ASSOC_REQ 0 #define WLAN_FC_STYPE_REASSOC_REQ 2 diff --git a/drivers/net/wireless/prism54/oid_mgt.c b/drivers/net/wireless/prism54/oid_mgt.c index fbc52b6a302..e6cf9df2c20 100644 --- a/drivers/net/wireless/prism54/oid_mgt.c +++ b/drivers/net/wireless/prism54/oid_mgt.c @@ -235,12 +235,10 @@ mgt_init(islpci_private *priv) { int i; - priv->mib = kmalloc(OID_NUM_LAST * sizeof (void *), GFP_KERNEL); + priv->mib = kcalloc(OID_NUM_LAST, sizeof (void *), GFP_KERNEL); if (!priv->mib) return -ENOMEM; - memset(priv->mib, 0, OID_NUM_LAST * sizeof (void *)); - /* Alloc the cache */ for (i = 0; i < OID_NUM_LAST; i++) { if (isl_oid[i].flags & OID_FLAG_CACHED) { diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c index 8be99ebbe1c..77e11ddad83 100644 --- a/drivers/net/wireless/zd1211rw/zd_chip.c +++ b/drivers/net/wireless/zd1211rw/zd_chip.c @@ -1673,3 +1673,16 @@ int zd_rfwritev_cr_locked(struct zd_chip *chip, return 0; } + +int zd_chip_set_multicast_hash(struct zd_chip *chip, + struct zd_mc_hash *hash) +{ + struct zd_ioreq32 ioreqs[] = { + { CR_GROUP_HASH_P1, hash->low }, + { CR_GROUP_HASH_P2, hash->high }, + }; + + dev_dbg_f(zd_chip_dev(chip), "hash l 0x%08x h 0x%08x\n", + ioreqs[0].value, ioreqs[1].value); + return zd_iowrite32a(chip, ioreqs, ARRAY_SIZE(ioreqs)); +} diff --git a/drivers/net/wireless/zd1211rw/zd_chip.h b/drivers/net/wireless/zd1211rw/zd_chip.h index ca892b9a644..a4e3cee9b59 100644 --- a/drivers/net/wireless/zd1211rw/zd_chip.h +++ b/drivers/net/wireless/zd1211rw/zd_chip.h @@ -390,10 +390,19 @@ #define CR_BSSID_P1 CTL_REG(0x0618) #define CR_BSSID_P2 CTL_REG(0x061C) #define CR_BCN_PLCP_CFG CTL_REG(0x0620) + +/* Group hash table for filtering incoming packets. + * + * The group hash table is 64 bit large and split over two parts. The first + * part is the lower part. The upper 6 bits of the last byte of the target + * address are used as index. Packets are received if the hash table bit is + * set. This is used for multicast handling, but for broadcasts (address + * ff:ff:ff:ff:ff:ff) the highest bit in the second table must also be set. + */ #define CR_GROUP_HASH_P1 CTL_REG(0x0624) #define CR_GROUP_HASH_P2 CTL_REG(0x0628) -#define CR_RX_TIMEOUT CTL_REG(0x062C) +#define CR_RX_TIMEOUT CTL_REG(0x062C) /* Basic rates supported by the BSS. When producing ACK or CTS messages, the * device will use a rate in this table that is less than or equal to the rate * of the incoming frame which prompted the response */ @@ -864,4 +873,36 @@ u8 zd_rx_strength_percent(u8 rssi); u16 zd_rx_rate(const void *rx_frame, const struct rx_status *status); +struct zd_mc_hash { + u32 low; + u32 high; +}; + +static inline void zd_mc_clear(struct zd_mc_hash *hash) +{ + hash->low = 0; + /* The interfaces must always received broadcasts. + * The hash of the broadcast address ff:ff:ff:ff:ff:ff is 63. + */ + hash->high = 0x80000000; +} + +static inline void zd_mc_add_all(struct zd_mc_hash *hash) +{ + hash->low = hash->high = 0xffffffff; +} + +static inline void zd_mc_add_addr(struct zd_mc_hash *hash, u8 *addr) +{ + unsigned int i = addr[5] >> 2; + if (i < 32) { + hash->low |= 1 << i; + } else { + hash->high |= 1 << (i-32); + } +} + +int zd_chip_set_multicast_hash(struct zd_chip *chip, + struct zd_mc_hash *hash); + #endif /* _ZD_CHIP_H */ diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index f1573a9c233..00ca704ece3 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -39,6 +39,8 @@ static void housekeeping_init(struct zd_mac *mac); static void housekeeping_enable(struct zd_mac *mac); static void housekeeping_disable(struct zd_mac *mac); +static void set_multicast_hash_handler(struct work_struct *work); + int zd_mac_init(struct zd_mac *mac, struct net_device *netdev, struct usb_interface *intf) @@ -55,6 +57,7 @@ int zd_mac_init(struct zd_mac *mac, softmac_init(ieee80211_priv(netdev)); zd_chip_init(&mac->chip, netdev, intf); housekeeping_init(mac); + INIT_WORK(&mac->set_multicast_hash_work, set_multicast_hash_handler); return 0; } @@ -136,6 +139,7 @@ out: void zd_mac_clear(struct zd_mac *mac) { + flush_workqueue(zd_workqueue); zd_chip_clear(&mac->chip); ZD_ASSERT(!spin_is_locked(&mac->lock)); ZD_MEMCLEAR(mac, sizeof(struct zd_mac)); @@ -256,6 +260,43 @@ int zd_mac_set_mac_address(struct net_device *netdev, void *p) return 0; } +static void set_multicast_hash_handler(struct work_struct *work) +{ + struct zd_mac *mac = container_of(work, struct zd_mac, + set_multicast_hash_work); + struct zd_mc_hash hash; + + spin_lock_irq(&mac->lock); + hash = mac->multicast_hash; + spin_unlock_irq(&mac->lock); + + zd_chip_set_multicast_hash(&mac->chip, &hash); +} + +void zd_mac_set_multicast_list(struct net_device *dev) +{ + struct zd_mc_hash hash; + struct zd_mac *mac = zd_netdev_mac(dev); + struct dev_mc_list *mc; + unsigned long flags; + + if (dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) { + zd_mc_add_all(&hash); + } else { + zd_mc_clear(&hash); + for (mc = dev->mc_list; mc; mc = mc->next) { + dev_dbg_f(zd_mac_dev(mac), "mc addr " MAC_FMT "\n", + MAC_ARG(mc->dmi_addr)); + zd_mc_add_addr(&hash, mc->dmi_addr); + } + } + + spin_lock_irqsave(&mac->lock, flags); + mac->multicast_hash = hash; + spin_unlock_irqrestore(&mac->lock, flags); + queue_work(zd_workqueue, &mac->set_multicast_hash_work); +} + int zd_mac_set_regdomain(struct zd_mac *mac, u8 regdomain) { int r; @@ -618,6 +659,9 @@ int zd_mac_get_range(struct zd_mac *mac, struct iw_range *range) range->we_version_compiled = WIRELESS_EXT; range->we_version_source = 20; + range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | + IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; + ZD_ASSERT(!irqs_disabled()); spin_lock_irq(&mac->lock); regdomain = mac->regdomain; @@ -930,7 +974,8 @@ static int is_data_packet_for_us(struct ieee80211_device *ieee, } return memcmp(hdr->addr1, netdev->dev_addr, ETH_ALEN) == 0 || - is_multicast_ether_addr(hdr->addr1) || + (is_multicast_ether_addr(hdr->addr1) && + memcmp(hdr->addr3, netdev->dev_addr, ETH_ALEN) != 0) || (netdev->flags & IFF_PROMISC); } @@ -1062,10 +1107,8 @@ int zd_mac_rx(struct zd_mac *mac, const u8 *buffer, unsigned int length) memcpy(skb_put(skb, length), buffer, length); r = ieee80211_rx(ieee, skb, &stats); - if (!r) { - ZD_ASSERT(in_irq()); - dev_kfree_skb_irq(skb); - } + if (!r) + dev_kfree_skb_any(skb); return 0; } diff --git a/drivers/net/wireless/zd1211rw/zd_mac.h b/drivers/net/wireless/zd1211rw/zd_mac.h index d4e8b870409..f0cf05dc7d3 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.h +++ b/drivers/net/wireless/zd1211rw/zd_mac.h @@ -133,6 +133,8 @@ struct zd_mac { struct iw_statistics iw_stats; struct housekeeping housekeeping; + struct work_struct set_multicast_hash_work; + struct zd_mc_hash multicast_hash; struct delayed_work set_rts_cts_work; struct delayed_work set_basic_rates_work; @@ -189,6 +191,7 @@ int zd_mac_init_hw(struct zd_mac *mac, u8 device_type); int zd_mac_open(struct net_device *netdev); int zd_mac_stop(struct net_device *netdev); int zd_mac_set_mac_address(struct net_device *dev, void *p); +void zd_mac_set_multicast_list(struct net_device *netdev); int zd_mac_rx(struct zd_mac *mac, const u8 *buffer, unsigned int length); diff --git a/drivers/net/wireless/zd1211rw/zd_netdev.c b/drivers/net/wireless/zd1211rw/zd_netdev.c index 60f1b0f6d45..8bda48de31e 100644 --- a/drivers/net/wireless/zd1211rw/zd_netdev.c +++ b/drivers/net/wireless/zd1211rw/zd_netdev.c @@ -242,7 +242,7 @@ struct net_device *zd_netdev_alloc(struct usb_interface *intf) netdev->open = zd_mac_open; netdev->stop = zd_mac_stop; /* netdev->get_stats = */ - /* netdev->set_multicast_list = */ + netdev->set_multicast_list = zd_mac_set_multicast_list; netdev->set_mac_address = zd_mac_set_mac_address; netdev->wireless_handlers = &iw_handler_def; /* netdev->ethtool_ops = */ diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 39c96641bc7..b61c17b3e29 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -1975,7 +1975,7 @@ static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;} /* --- IRQ detection -------------------------------------- */ /* Only if supports ECP mode */ -static int __devinit programmable_irq_support(struct parport *pb) +static int programmable_irq_support(struct parport *pb) { int irq, intrLine; unsigned char oecr = inb (ECONTROL (pb)); @@ -1992,7 +1992,7 @@ static int __devinit programmable_irq_support(struct parport *pb) return irq; } -static int __devinit irq_probe_ECP(struct parport *pb) +static int irq_probe_ECP(struct parport *pb) { int i; unsigned long irqs; @@ -2020,7 +2020,7 @@ static int __devinit irq_probe_ECP(struct parport *pb) * This detection seems that only works in National Semiconductors * This doesn't work in SMC, LGS, and Winbond */ -static int __devinit irq_probe_EPP(struct parport *pb) +static int irq_probe_EPP(struct parport *pb) { #ifndef ADVANCED_DETECT return PARPORT_IRQ_NONE; @@ -2059,7 +2059,7 @@ static int __devinit irq_probe_EPP(struct parport *pb) #endif /* Advanced detection */ } -static int __devinit irq_probe_SPP(struct parport *pb) +static int irq_probe_SPP(struct parport *pb) { /* Don't even try to do this. */ return PARPORT_IRQ_NONE; @@ -2747,6 +2747,7 @@ enum parport_pc_pci_cards { titan_1284p2, avlab_1p, avlab_2p, + oxsemi_952, oxsemi_954, oxsemi_840, aks_0100, @@ -2822,6 +2823,7 @@ static struct parport_pc_pci { /* avlab_2p */ { 2, { { 0, 1}, { 2, 3 },} }, /* The Oxford Semi cards are unusual: 954 doesn't support ECP, * and 840 locks up if you write 1 to bit 2! */ + /* oxsemi_952 */ { 1, { { 0, 1 }, } }, /* oxsemi_954 */ { 1, { { 0, -1 }, } }, /* oxsemi_840 */ { 1, { { 0, -1 }, } }, /* aks_0100 */ { 1, { { 0, -1 }, } }, @@ -2895,6 +2897,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { /* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/ { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */ { 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p}, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954PP, PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_954 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_12PCI840, diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9fc9a34ef24..9168401401b 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(msi_lock); static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL }; -static kmem_cache_t* msi_cachep; +static struct kmem_cache* msi_cachep; static int pci_msi_enable = 1; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0eeac60042b..6a3c1e72890 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -873,6 +873,7 @@ void __devinit pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.release = pci_release_dev; pci_dev_get(dev); + set_dev_node(&dev->dev, pcibus_to_node(bus)); dev->dev.dma_mask = &dev->dma_mask; dev->dev.coherent_dma_mask = 0xffffffffull; diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index f9cd831a3f3..606a4674033 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -29,6 +29,7 @@ #include <linux/pci.h> #include <linux/device.h> #include <linux/kthread.h> +#include <linux/freezer.h> #include <asm/system.h> #include <asm/irq.h> diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c index 227600cd636..91c047a7e63 100644 --- a/drivers/pnp/card.c +++ b/drivers/pnp/card.c @@ -164,9 +164,17 @@ static DEVICE_ATTR(card_id,S_IRUGO,pnp_show_card_ids,NULL); static int pnp_interface_attach_card(struct pnp_card *card) { - device_create_file(&card->dev,&dev_attr_name); - device_create_file(&card->dev,&dev_attr_card_id); + int rc = device_create_file(&card->dev,&dev_attr_name); + if (rc) return rc; + + rc = device_create_file(&card->dev,&dev_attr_card_id); + if (rc) goto err_name; + return 0; + +err_name: + device_remove_file(&card->dev,&dev_attr_name); + return rc; } /** @@ -306,16 +314,20 @@ found: down_write(&dev->dev.bus->subsys.rwsem); dev->card_link = clink; dev->dev.driver = &drv->link.driver; - if (pnp_bus_type.probe(&dev->dev)) { - dev->dev.driver = NULL; - dev->card_link = NULL; - up_write(&dev->dev.bus->subsys.rwsem); - return NULL; - } - device_bind_driver(&dev->dev); + if (pnp_bus_type.probe(&dev->dev)) + goto err_out; + if (device_bind_driver(&dev->dev)) + goto err_out; + up_write(&dev->dev.bus->subsys.rwsem); return dev; + +err_out: + dev->dev.driver = NULL; + dev->card_link = NULL; + up_write(&dev->dev.bus->subsys.rwsem); + return NULL; } /** diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 9d8b415eca7..ac9fcd499f3 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -461,8 +461,19 @@ static DEVICE_ATTR(id,S_IRUGO,pnp_show_current_ids,NULL); int pnp_interface_attach_device(struct pnp_dev *dev) { - device_create_file(&dev->dev,&dev_attr_options); - device_create_file(&dev->dev,&dev_attr_resources); - device_create_file(&dev->dev,&dev_attr_id); + int rc = device_create_file(&dev->dev,&dev_attr_options); + if (rc) goto err; + rc = device_create_file(&dev->dev,&dev_attr_resources); + if (rc) goto err_opt; + rc = device_create_file(&dev->dev,&dev_attr_id); + if (rc) goto err_res; + return 0; + +err_res: + device_remove_file(&dev->dev,&dev_attr_resources); +err_opt: + device_remove_file(&dev->dev,&dev_attr_options); +err: + return rc; } diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 81a6c83d89a..33adeba1a31 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -61,6 +61,7 @@ #include <linux/dmi.h> #include <linux/delay.h> #include <linux/acpi.h> +#include <linux/freezer.h> #include <asm/page.h> #include <asm/desc.h> @@ -530,7 +531,8 @@ static int __init pnpbios_init(void) if (check_legacy_ioport(PNPBIOS_BASE)) return -ENODEV; #endif - if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table)) { + if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table) || + paravirt_enabled()) { printk(KERN_INFO "PnPBIOS: Disabled\n"); return -ENODEV; } diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index fc766a7a611..2a63ab2b47f 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -154,15 +154,23 @@ config RTC_DRV_DS1672 will be called rtc-ds1672. config RTC_DRV_DS1742 - tristate "Dallas DS1742" + tristate "Dallas DS1742/1743" depends on RTC_CLASS help If you say yes here you get support for the - Dallas DS1742 timekeeping chip. + Dallas DS1742/1743 timekeeping chip. This driver can also be built as a module. If so, the module will be called rtc-ds1742. +config RTC_DRV_OMAP + tristate "TI OMAP1" + depends on RTC_CLASS && ( \ + ARCH_OMAP15XX || ARCH_OMAP16XX || ARCH_OMAP730 ) + help + Say "yes" here to support the real time clock on TI OMAP1 chips. + This driver can also be built as a module called rtc-omap. + config RTC_DRV_PCF8563 tristate "Philips PCF8563/Epson RTC8564" depends on RTC_CLASS && I2C diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 3ba5ff6e680..bd4c45d333f 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o obj-$(CONFIG_RTC_DRV_DS1307) += rtc-ds1307.o obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o +obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c index 67e816a9a39..dfef1637bfb 100644 --- a/drivers/rtc/rtc-ds1672.c +++ b/drivers/rtc/rtc-ds1672.c @@ -237,17 +237,22 @@ static int ds1672_probe(struct i2c_adapter *adapter, int address, int kind) /* read control register */ err = ds1672_get_control(client, &control); if (err) - goto exit_detach; + goto exit_devreg; if (control & DS1672_REG_CONTROL_EOSC) dev_warn(&client->dev, "Oscillator not enabled. " "Set time to enable.\n"); /* Register sysfs hooks */ - device_create_file(&client->dev, &dev_attr_control); + err = device_create_file(&client->dev, &dev_attr_control); + if (err) + goto exit_devreg; return 0; +exit_devreg: + rtc_device_unregister(rtc); + exit_detach: i2c_detach_client(client); diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 6273a3d240a..17633bfa848 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -6,6 +6,10 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * + * Copyright (C) 2006 Torsten Ertbjerg Rasmussen <tr@newtec.dk> + * - nvram size determined from resource + * - this ds1742 driver now supports ds1743. */ #include <linux/bcd.h> @@ -17,20 +21,19 @@ #include <linux/platform_device.h> #include <linux/io.h> -#define DRV_VERSION "0.2" +#define DRV_VERSION "0.3" -#define RTC_REG_SIZE 0x800 -#define RTC_OFFSET 0x7f8 +#define RTC_SIZE 8 -#define RTC_CONTROL (RTC_OFFSET + 0) -#define RTC_CENTURY (RTC_OFFSET + 0) -#define RTC_SECONDS (RTC_OFFSET + 1) -#define RTC_MINUTES (RTC_OFFSET + 2) -#define RTC_HOURS (RTC_OFFSET + 3) -#define RTC_DAY (RTC_OFFSET + 4) -#define RTC_DATE (RTC_OFFSET + 5) -#define RTC_MONTH (RTC_OFFSET + 6) -#define RTC_YEAR (RTC_OFFSET + 7) +#define RTC_CONTROL 0 +#define RTC_CENTURY 0 +#define RTC_SECONDS 1 +#define RTC_MINUTES 2 +#define RTC_HOURS 3 +#define RTC_DAY 4 +#define RTC_DATE 5 +#define RTC_MONTH 6 +#define RTC_YEAR 7 #define RTC_CENTURY_MASK 0x3f #define RTC_SECONDS_MASK 0x7f @@ -48,7 +51,10 @@ struct rtc_plat_data { struct rtc_device *rtc; - void __iomem *ioaddr; + void __iomem *ioaddr_nvram; + void __iomem *ioaddr_rtc; + size_t size_nvram; + size_t size; unsigned long baseaddr; unsigned long last_jiffies; }; @@ -57,7 +63,7 @@ static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - void __iomem *ioaddr = pdata->ioaddr; + void __iomem *ioaddr = pdata->ioaddr_rtc; u8 century; century = BIN2BCD((tm->tm_year + 1900) / 100); @@ -82,7 +88,7 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm) { struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - void __iomem *ioaddr = pdata->ioaddr; + void __iomem *ioaddr = pdata->ioaddr_rtc; unsigned int year, month, day, hour, minute, second, week; unsigned int century; @@ -127,10 +133,10 @@ static ssize_t ds1742_nvram_read(struct kobject *kobj, char *buf, struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - void __iomem *ioaddr = pdata->ioaddr; + void __iomem *ioaddr = pdata->ioaddr_nvram; ssize_t count; - for (count = 0; size > 0 && pos < RTC_OFFSET; count++, size--) + for (count = 0; size > 0 && pos < pdata->size_nvram; count++, size--) *buf++ = readb(ioaddr + pos++); return count; } @@ -141,10 +147,10 @@ static ssize_t ds1742_nvram_write(struct kobject *kobj, char *buf, struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - void __iomem *ioaddr = pdata->ioaddr; + void __iomem *ioaddr = pdata->ioaddr_nvram; ssize_t count; - for (count = 0; size > 0 && pos < RTC_OFFSET; count++, size--) + for (count = 0; size > 0 && pos < pdata->size_nvram; count++, size--) writeb(*buf++, ioaddr + pos++); return count; } @@ -155,7 +161,6 @@ static struct bin_attribute ds1742_nvram_attr = { .mode = S_IRUGO | S_IWUGO, .owner = THIS_MODULE, }, - .size = RTC_OFFSET, .read = ds1742_nvram_read, .write = ds1742_nvram_write, }; @@ -175,19 +180,23 @@ static int __init ds1742_rtc_probe(struct platform_device *pdev) pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) { + pdata->size = res->end - res->start + 1; + if (!request_mem_region(res->start, pdata->size, pdev->name)) { ret = -EBUSY; goto out; } pdata->baseaddr = res->start; - ioaddr = ioremap(pdata->baseaddr, RTC_REG_SIZE); + ioaddr = ioremap(pdata->baseaddr, pdata->size); if (!ioaddr) { ret = -ENOMEM; goto out; } - pdata->ioaddr = ioaddr; + pdata->ioaddr_nvram = ioaddr; + pdata->size_nvram = pdata->size - RTC_SIZE; + pdata->ioaddr_rtc = ioaddr + pdata->size_nvram; /* turn RTC on if it was not on */ + ioaddr = pdata->ioaddr_rtc; sec = readb(ioaddr + RTC_SECONDS); if (sec & RTC_STOP) { sec &= RTC_SECONDS_MASK; @@ -208,6 +217,8 @@ static int __init ds1742_rtc_probe(struct platform_device *pdev) pdata->rtc = rtc; pdata->last_jiffies = jiffies; platform_set_drvdata(pdev, pdata); + ds1742_nvram_attr.size = max(ds1742_nvram_attr.size, + pdata->size_nvram); ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1742_nvram_attr); if (ret) goto out; @@ -215,10 +226,10 @@ static int __init ds1742_rtc_probe(struct platform_device *pdev) out: if (pdata->rtc) rtc_device_unregister(pdata->rtc); - if (ioaddr) - iounmap(ioaddr); + if (pdata->ioaddr_nvram) + iounmap(pdata->ioaddr_nvram); if (pdata->baseaddr) - release_mem_region(pdata->baseaddr, RTC_REG_SIZE); + release_mem_region(pdata->baseaddr, pdata->size); kfree(pdata); return ret; } @@ -229,8 +240,8 @@ static int __devexit ds1742_rtc_remove(struct platform_device *pdev) sysfs_remove_bin_file(&pdev->dev.kobj, &ds1742_nvram_attr); rtc_device_unregister(pdata->rtc); - iounmap(pdata->ioaddr); - release_mem_region(pdata->baseaddr, RTC_REG_SIZE); + iounmap(pdata->ioaddr_nvram); + release_mem_region(pdata->baseaddr, pdata->size); kfree(pdata); return 0; } diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c new file mode 100644 index 00000000000..eac5fb1fc02 --- /dev/null +++ b/drivers/rtc/rtc-omap.c @@ -0,0 +1,572 @@ +/* + * TI OMAP1 Real Time Clock interface for Linux + * + * Copyright (C) 2003 MontaVista Software, Inc. + * Author: George G. Davis <gdavis@mvista.com> or <source@mvista.com> + * + * Copyright (C) 2006 David Brownell (new RTC framework) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/rtc.h> +#include <linux/bcd.h> +#include <linux/platform_device.h> + +#include <asm/io.h> +#include <asm/mach/time.h> + + +/* The OMAP1 RTC is a year/month/day/hours/minutes/seconds BCD clock + * with century-range alarm matching, driven by the 32kHz clock. + * + * The main user-visible ways it differs from PC RTCs are by omitting + * "don't care" alarm fields and sub-second periodic IRQs, and having + * an autoadjust mechanism to calibrate to the true oscillator rate. + * + * Board-specific wiring options include using split power mode with + * RTC_OFF_NOFF used as the reset signal (so the RTC won't be reset), + * and wiring RTC_WAKE_INT (so the RTC alarm can wake the system from + * low power modes). See the BOARD-SPECIFIC CUSTOMIZATION comment. + */ + +#define OMAP_RTC_BASE 0xfffb4800 + +/* RTC registers */ +#define OMAP_RTC_SECONDS_REG 0x00 +#define OMAP_RTC_MINUTES_REG 0x04 +#define OMAP_RTC_HOURS_REG 0x08 +#define OMAP_RTC_DAYS_REG 0x0C +#define OMAP_RTC_MONTHS_REG 0x10 +#define OMAP_RTC_YEARS_REG 0x14 +#define OMAP_RTC_WEEKS_REG 0x18 + +#define OMAP_RTC_ALARM_SECONDS_REG 0x20 +#define OMAP_RTC_ALARM_MINUTES_REG 0x24 +#define OMAP_RTC_ALARM_HOURS_REG 0x28 +#define OMAP_RTC_ALARM_DAYS_REG 0x2c +#define OMAP_RTC_ALARM_MONTHS_REG 0x30 +#define OMAP_RTC_ALARM_YEARS_REG 0x34 + +#define OMAP_RTC_CTRL_REG 0x40 +#define OMAP_RTC_STATUS_REG 0x44 +#define OMAP_RTC_INTERRUPTS_REG 0x48 + +#define OMAP_RTC_COMP_LSB_REG 0x4c +#define OMAP_RTC_COMP_MSB_REG 0x50 +#define OMAP_RTC_OSC_REG 0x54 + +/* OMAP_RTC_CTRL_REG bit fields: */ +#define OMAP_RTC_CTRL_SPLIT (1<<7) +#define OMAP_RTC_CTRL_DISABLE (1<<6) +#define OMAP_RTC_CTRL_SET_32_COUNTER (1<<5) +#define OMAP_RTC_CTRL_TEST (1<<4) +#define OMAP_RTC_CTRL_MODE_12_24 (1<<3) +#define OMAP_RTC_CTRL_AUTO_COMP (1<<2) +#define OMAP_RTC_CTRL_ROUND_30S (1<<1) +#define OMAP_RTC_CTRL_STOP (1<<0) + +/* OMAP_RTC_STATUS_REG bit fields: */ +#define OMAP_RTC_STATUS_POWER_UP (1<<7) +#define OMAP_RTC_STATUS_ALARM (1<<6) +#define OMAP_RTC_STATUS_1D_EVENT (1<<5) +#define OMAP_RTC_STATUS_1H_EVENT (1<<4) +#define OMAP_RTC_STATUS_1M_EVENT (1<<3) +#define OMAP_RTC_STATUS_1S_EVENT (1<<2) +#define OMAP_RTC_STATUS_RUN (1<<1) +#define OMAP_RTC_STATUS_BUSY (1<<0) + +/* OMAP_RTC_INTERRUPTS_REG bit fields: */ +#define OMAP_RTC_INTERRUPTS_IT_ALARM (1<<3) +#define OMAP_RTC_INTERRUPTS_IT_TIMER (1<<2) + + +#define rtc_read(addr) omap_readb(OMAP_RTC_BASE + (addr)) +#define rtc_write(val, addr) omap_writeb(val, OMAP_RTC_BASE + (addr)) + + +/* platform_bus isn't hotpluggable, so for static linkage it'd be safe + * to get rid of probe() and remove() code ... too bad the driver struct + * remembers probe(), that's about 25% of the runtime footprint!! + */ +#ifndef MODULE +#undef __devexit +#undef __devexit_p +#define __devexit __exit +#define __devexit_p __exit_p +#endif + + +/* we rely on the rtc framework to handle locking (rtc->ops_lock), + * so the only other requirement is that register accesses which + * require BUSY to be clear are made with IRQs locally disabled + */ +static void rtc_wait_not_busy(void) +{ + int count = 0; + u8 status; + + /* BUSY may stay active for 1/32768 second (~30 usec) */ + for (count = 0; count < 50; count++) { + status = rtc_read(OMAP_RTC_STATUS_REG); + if ((status & (u8)OMAP_RTC_STATUS_BUSY) == 0) + break; + udelay(1); + } + /* now we have ~15 usec to read/write various registers */ +} + +static irqreturn_t rtc_irq(int irq, void *class_dev) +{ + unsigned long events = 0; + u8 irq_data; + + irq_data = rtc_read(OMAP_RTC_STATUS_REG); + + /* alarm irq? */ + if (irq_data & OMAP_RTC_STATUS_ALARM) { + rtc_write(OMAP_RTC_STATUS_ALARM, OMAP_RTC_STATUS_REG); + events |= RTC_IRQF | RTC_AF; + } + + /* 1/sec periodic/update irq? */ + if (irq_data & OMAP_RTC_STATUS_1S_EVENT) + events |= RTC_IRQF | RTC_UF; + + rtc_update_irq(class_dev, 1, events); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_RTC_INTF_DEV + +static int +omap_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + u8 reg; + + switch (cmd) { + case RTC_AIE_OFF: + case RTC_AIE_ON: + case RTC_UIE_OFF: + case RTC_UIE_ON: + break; + default: + return -ENOIOCTLCMD; + } + + local_irq_disable(); + rtc_wait_not_busy(); + reg = rtc_read(OMAP_RTC_INTERRUPTS_REG); + switch (cmd) { + /* AIE = Alarm Interrupt Enable */ + case RTC_AIE_OFF: + reg &= ~OMAP_RTC_INTERRUPTS_IT_ALARM; + break; + case RTC_AIE_ON: + reg |= OMAP_RTC_INTERRUPTS_IT_ALARM; + break; + /* UIE = Update Interrupt Enable (1/second) */ + case RTC_UIE_OFF: + reg &= ~OMAP_RTC_INTERRUPTS_IT_TIMER; + break; + case RTC_UIE_ON: + reg |= OMAP_RTC_INTERRUPTS_IT_TIMER; + break; + } + rtc_wait_not_busy(); + rtc_write(reg, OMAP_RTC_INTERRUPTS_REG); + local_irq_enable(); + + return 0; +} + +#else +#define omap_rtc_ioctl NULL +#endif + +/* this hardware doesn't support "don't care" alarm fields */ +static int tm2bcd(struct rtc_time *tm) +{ + if (rtc_valid_tm(tm) != 0) + return -EINVAL; + + tm->tm_sec = BIN2BCD(tm->tm_sec); + tm->tm_min = BIN2BCD(tm->tm_min); + tm->tm_hour = BIN2BCD(tm->tm_hour); + tm->tm_mday = BIN2BCD(tm->tm_mday); + + tm->tm_mon = BIN2BCD(tm->tm_mon + 1); + + /* epoch == 1900 */ + if (tm->tm_year < 100 || tm->tm_year > 199) + return -EINVAL; + tm->tm_year = BIN2BCD(tm->tm_year - 100); + + return 0; +} + +static void bcd2tm(struct rtc_time *tm) +{ + tm->tm_sec = BCD2BIN(tm->tm_sec); + tm->tm_min = BCD2BIN(tm->tm_min); + tm->tm_hour = BCD2BIN(tm->tm_hour); + tm->tm_mday = BCD2BIN(tm->tm_mday); + tm->tm_mon = BCD2BIN(tm->tm_mon) - 1; + /* epoch == 1900 */ + tm->tm_year = BCD2BIN(tm->tm_year) + 100; +} + + +static int omap_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + /* we don't report wday/yday/isdst ... */ + local_irq_disable(); + rtc_wait_not_busy(); + + tm->tm_sec = rtc_read(OMAP_RTC_SECONDS_REG); + tm->tm_min = rtc_read(OMAP_RTC_MINUTES_REG); + tm->tm_hour = rtc_read(OMAP_RTC_HOURS_REG); + tm->tm_mday = rtc_read(OMAP_RTC_DAYS_REG); + tm->tm_mon = rtc_read(OMAP_RTC_MONTHS_REG); + tm->tm_year = rtc_read(OMAP_RTC_YEARS_REG); + + local_irq_enable(); + + bcd2tm(tm); + return 0; +} + +static int omap_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + if (tm2bcd(tm) < 0) + return -EINVAL; + local_irq_disable(); + rtc_wait_not_busy(); + + rtc_write(tm->tm_year, OMAP_RTC_YEARS_REG); + rtc_write(tm->tm_mon, OMAP_RTC_MONTHS_REG); + rtc_write(tm->tm_mday, OMAP_RTC_DAYS_REG); + rtc_write(tm->tm_hour, OMAP_RTC_HOURS_REG); + rtc_write(tm->tm_min, OMAP_RTC_MINUTES_REG); + rtc_write(tm->tm_sec, OMAP_RTC_SECONDS_REG); + + local_irq_enable(); + + return 0; +} + +static int omap_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) +{ + local_irq_disable(); + rtc_wait_not_busy(); + + alm->time.tm_sec = rtc_read(OMAP_RTC_ALARM_SECONDS_REG); + alm->time.tm_min = rtc_read(OMAP_RTC_ALARM_MINUTES_REG); + alm->time.tm_hour = rtc_read(OMAP_RTC_ALARM_HOURS_REG); + alm->time.tm_mday = rtc_read(OMAP_RTC_ALARM_DAYS_REG); + alm->time.tm_mon = rtc_read(OMAP_RTC_ALARM_MONTHS_REG); + alm->time.tm_year = rtc_read(OMAP_RTC_ALARM_YEARS_REG); + + local_irq_enable(); + + bcd2tm(&alm->time); + alm->pending = !!(rtc_read(OMAP_RTC_INTERRUPTS_REG) + & OMAP_RTC_INTERRUPTS_IT_ALARM); + alm->enabled = alm->pending && device_may_wakeup(dev); + + return 0; +} + +static int omap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) +{ + u8 reg; + + /* Much userspace code uses RTC_ALM_SET, thus "don't care" for + * day/month/year specifies alarms up to 24 hours in the future. + * So we need to handle that ... but let's ignore the "don't care" + * values for hours/minutes/seconds. + */ + if (alm->time.tm_mday <= 0 + && alm->time.tm_mon < 0 + && alm->time.tm_year < 0) { + struct rtc_time tm; + unsigned long now, then; + + omap_rtc_read_time(dev, &tm); + rtc_tm_to_time(&tm, &now); + + alm->time.tm_mday = tm.tm_mday; + alm->time.tm_mon = tm.tm_mon; + alm->time.tm_year = tm.tm_year; + rtc_tm_to_time(&alm->time, &then); + + /* sometimes the alarm wraps into tomorrow */ + if (then < now) { + rtc_time_to_tm(now + 24 * 60 * 60, &tm); + alm->time.tm_mday = tm.tm_mday; + alm->time.tm_mon = tm.tm_mon; + alm->time.tm_year = tm.tm_year; + } + } + + if (tm2bcd(&alm->time) < 0) + return -EINVAL; + + local_irq_disable(); + rtc_wait_not_busy(); + + rtc_write(alm->time.tm_year, OMAP_RTC_ALARM_YEARS_REG); + rtc_write(alm->time.tm_mon, OMAP_RTC_ALARM_MONTHS_REG); + rtc_write(alm->time.tm_mday, OMAP_RTC_ALARM_DAYS_REG); + rtc_write(alm->time.tm_hour, OMAP_RTC_ALARM_HOURS_REG); + rtc_write(alm->time.tm_min, OMAP_RTC_ALARM_MINUTES_REG); + rtc_write(alm->time.tm_sec, OMAP_RTC_ALARM_SECONDS_REG); + + reg = rtc_read(OMAP_RTC_INTERRUPTS_REG); + if (alm->enabled) + reg |= OMAP_RTC_INTERRUPTS_IT_ALARM; + else + reg &= ~OMAP_RTC_INTERRUPTS_IT_ALARM; + rtc_write(reg, OMAP_RTC_INTERRUPTS_REG); + + local_irq_enable(); + + return 0; +} + +static struct rtc_class_ops omap_rtc_ops = { + .ioctl = omap_rtc_ioctl, + .read_time = omap_rtc_read_time, + .set_time = omap_rtc_set_time, + .read_alarm = omap_rtc_read_alarm, + .set_alarm = omap_rtc_set_alarm, +}; + +static int omap_rtc_alarm; +static int omap_rtc_timer; + +static int __devinit omap_rtc_probe(struct platform_device *pdev) +{ + struct resource *res, *mem; + struct rtc_device *rtc; + u8 reg, new_ctrl; + + omap_rtc_timer = platform_get_irq(pdev, 0); + if (omap_rtc_timer <= 0) { + pr_debug("%s: no update irq?\n", pdev->name); + return -ENOENT; + } + + omap_rtc_alarm = platform_get_irq(pdev, 1); + if (omap_rtc_alarm <= 0) { + pr_debug("%s: no alarm irq?\n", pdev->name); + return -ENOENT; + } + + /* NOTE: using static mapping for RTC registers */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res && res->start != OMAP_RTC_BASE) { + pr_debug("%s: RTC registers at %08x, expected %08x\n", + pdev->name, (unsigned) res->start, OMAP_RTC_BASE); + return -ENOENT; + } + + if (res) + mem = request_mem_region(res->start, + res->end - res->start + 1, + pdev->name); + else + mem = NULL; + if (!mem) { + pr_debug("%s: RTC registers at %08x are not free\n", + pdev->name, OMAP_RTC_BASE); + return -EBUSY; + } + + rtc = rtc_device_register(pdev->name, &pdev->dev, + &omap_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc)) { + pr_debug("%s: can't register RTC device, err %ld\n", + pdev->name, PTR_ERR(rtc)); + goto fail; + } + platform_set_drvdata(pdev, rtc); + class_set_devdata(&rtc->class_dev, mem); + + /* clear pending irqs, and set 1/second periodic, + * which we'll use instead of update irqs + */ + rtc_write(0, OMAP_RTC_INTERRUPTS_REG); + + /* clear old status */ + reg = rtc_read(OMAP_RTC_STATUS_REG); + if (reg & (u8) OMAP_RTC_STATUS_POWER_UP) { + pr_info("%s: RTC power up reset detected\n", + pdev->name); + rtc_write(OMAP_RTC_STATUS_POWER_UP, OMAP_RTC_STATUS_REG); + } + if (reg & (u8) OMAP_RTC_STATUS_ALARM) + rtc_write(OMAP_RTC_STATUS_ALARM, OMAP_RTC_STATUS_REG); + + /* handle periodic and alarm irqs */ + if (request_irq(omap_rtc_timer, rtc_irq, SA_INTERRUPT, + rtc->class_dev.class_id, &rtc->class_dev)) { + pr_debug("%s: RTC timer interrupt IRQ%d already claimed\n", + pdev->name, omap_rtc_timer); + goto fail0; + } + if (request_irq(omap_rtc_alarm, rtc_irq, SA_INTERRUPT, + rtc->class_dev.class_id, &rtc->class_dev)) { + pr_debug("%s: RTC alarm interrupt IRQ%d already claimed\n", + pdev->name, omap_rtc_alarm); + goto fail1; + } + + /* On boards with split power, RTC_ON_NOFF won't reset the RTC */ + reg = rtc_read(OMAP_RTC_CTRL_REG); + if (reg & (u8) OMAP_RTC_CTRL_STOP) + pr_info("%s: already running\n", pdev->name); + + /* force to 24 hour mode */ + new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); + new_ctrl |= OMAP_RTC_CTRL_STOP; + + /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE: + * + * - Boards wired so that RTC_WAKE_INT does something, and muxed + * right (W13_1610_RTC_WAKE_INT is the default after chip reset), + * should initialize the device wakeup flag appropriately. + * + * - Boards wired so RTC_ON_nOFF is used as the reset signal, + * rather than nPWRON_RESET, should forcibly enable split + * power mode. (Some chip errata report that RTC_CTRL_SPLIT + * is write-only, and always reads as zero...) + */ + device_init_wakeup(&pdev->dev, 0); + + if (new_ctrl & (u8) OMAP_RTC_CTRL_SPLIT) + pr_info("%s: split power mode\n", pdev->name); + + if (reg != new_ctrl) + rtc_write(new_ctrl, OMAP_RTC_CTRL_REG); + + return 0; + +fail1: + free_irq(omap_rtc_timer, NULL); +fail0: + rtc_device_unregister(rtc); +fail: + release_resource(mem); + return -EIO; +} + +static int __devexit omap_rtc_remove(struct platform_device *pdev) +{ + struct rtc_device *rtc = platform_get_drvdata(pdev);; + + device_init_wakeup(&pdev->dev, 0); + + /* leave rtc running, but disable irqs */ + rtc_write(0, OMAP_RTC_INTERRUPTS_REG); + + free_irq(omap_rtc_timer, rtc); + free_irq(omap_rtc_alarm, rtc); + + release_resource(class_get_devdata(&rtc->class_dev)); + rtc_device_unregister(rtc); + return 0; +} + +#ifdef CONFIG_PM + +static struct timespec rtc_delta; +static u8 irqstat; + +static int omap_rtc_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct rtc_time rtc_tm; + struct timespec time; + + time.tv_nsec = 0; + omap_rtc_read_time(NULL, &rtc_tm); + rtc_tm_to_time(&rtc_tm, &time.tv_sec); + + save_time_delta(&rtc_delta, &time); + irqstat = rtc_read(OMAP_RTC_INTERRUPTS_REG); + + /* FIXME the RTC alarm is not currently acting as a wakeup event + * source, and in fact this enable() call is just saving a flag + * that's never used... + */ + if (device_may_wakeup(&pdev->dev)) + enable_irq_wake(omap_rtc_alarm); + else + rtc_write(0, OMAP_RTC_INTERRUPTS_REG); + + return 0; +} + +static int omap_rtc_resume(struct platform_device *pdev) +{ + struct rtc_time rtc_tm; + struct timespec time; + + time.tv_nsec = 0; + omap_rtc_read_time(NULL, &rtc_tm); + rtc_tm_to_time(&rtc_tm, &time.tv_sec); + + restore_time_delta(&rtc_delta, &time); + if (device_may_wakeup(&pdev->dev)) + disable_irq_wake(omap_rtc_alarm); + else + rtc_write(irqstat, OMAP_RTC_INTERRUPTS_REG); + return 0; +} + +#else +#define omap_rtc_suspend NULL +#define omap_rtc_resume NULL +#endif + +static void omap_rtc_shutdown(struct platform_device *pdev) +{ + rtc_write(0, OMAP_RTC_INTERRUPTS_REG); +} + +MODULE_ALIAS("omap_rtc"); +static struct platform_driver omap_rtc_driver = { + .probe = omap_rtc_probe, + .remove = __devexit_p(omap_rtc_remove), + .suspend = omap_rtc_suspend, + .resume = omap_rtc_resume, + .shutdown = omap_rtc_shutdown, + .driver = { + .name = "omap_rtc", + .owner = THIS_MODULE, + }, +}; + +static int __init rtc_init(void) +{ + return platform_driver_register(&omap_rtc_driver); +} +module_init(rtc_init); + +static void __exit rtc_exit(void) +{ + platform_driver_unregister(&omap_rtc_driver); +} +module_exit(rtc_exit); + +MODULE_AUTHOR("George G. Davis (and others)"); +MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index a44fe4efa21..e2c7698fdba 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -13,7 +13,7 @@ #include <linux/rtc.h> #include <linux/bcd.h> -#define DRV_VERSION "0.2" +#define DRV_VERSION "0.3" /* Addresses to scan */ static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END }; @@ -39,6 +39,14 @@ static int rs5c372_attach(struct i2c_adapter *adapter); static int rs5c372_detach(struct i2c_client *client); static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind); +struct rs5c372 { + u8 reg_addr; + u8 regs[17]; + struct i2c_msg msg[1]; + struct i2c_client client; + struct rtc_device *rtc; +}; + static struct i2c_driver rs5c372_driver = { .driver = { .name = "rs5c372", @@ -49,18 +57,16 @@ static struct i2c_driver rs5c372_driver = { static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm) { - unsigned char buf[7] = { RS5C372_REG_BASE }; - /* this implements the 1st reading method, according - * to the datasheet. buf[0] is initialized with - * address ptr and transmission format register. + struct rs5c372 *rs5c372 = i2c_get_clientdata(client); + u8 *buf = &(rs5c372->regs[1]); + + /* this implements the 3rd reading method, according + * to the datasheet. rs5c372 defaults to internal + * address 0xF, so 0x0 is in regs[1] */ - struct i2c_msg msgs[] = { - { client->addr, 0, 1, buf }, - { client->addr, I2C_M_RD, 7, buf }, - }; - if ((i2c_transfer(client->adapter, msgs, 2)) != 2) { + if ((i2c_transfer(client->adapter, rs5c372->msg, 1)) != 1) { dev_err(&client->dev, "%s: read error\n", __FUNCTION__); return -EIO; } @@ -114,23 +120,14 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm) static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim) { - unsigned char buf = RS5C372_REG_TRIM; - - struct i2c_msg msgs[] = { - { client->addr, 0, 1, &buf }, - { client->addr, I2C_M_RD, 1, &buf }, - }; - - if ((i2c_transfer(client->adapter, msgs, 2)) != 2) { - dev_err(&client->dev, "%s: read error\n", __FUNCTION__); - return -EIO; - } + struct rs5c372 *rs5c372 = i2c_get_clientdata(client); + u8 tmp = rs5c372->regs[RS5C372_REG_TRIM + 1]; if (osc) - *osc = (buf & RS5C372_TRIM_XSL) ? 32000 : 32768; + *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768; if (trim) { - *trim = buf & RS5C372_TRIM_MASK; + *trim = tmp & RS5C372_TRIM_MASK; dev_dbg(&client->dev, "%s: raw trim=%x\n", __FUNCTION__, *trim); } @@ -201,7 +198,7 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind) { int err = 0; struct i2c_client *client; - struct rtc_device *rtc; + struct rs5c372 *rs5c372; dev_dbg(&adapter->dev, "%s\n", __FUNCTION__); @@ -210,10 +207,11 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind) goto exit; } - if (!(client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL))) { + if (!(rs5c372 = kzalloc(sizeof(struct rs5c372), GFP_KERNEL))) { err = -ENOMEM; goto exit; } + client = &rs5c372->client; /* I2C client */ client->addr = address; @@ -222,32 +220,47 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind) strlcpy(client->name, rs5c372_driver.driver.name, I2C_NAME_SIZE); + i2c_set_clientdata(client, rs5c372); + + rs5c372->msg[0].addr = address; + rs5c372->msg[0].flags = I2C_M_RD; + rs5c372->msg[0].len = sizeof(rs5c372->regs); + rs5c372->msg[0].buf = rs5c372->regs; + /* Inform the i2c layer */ if ((err = i2c_attach_client(client))) goto exit_kfree; dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); - rtc = rtc_device_register(rs5c372_driver.driver.name, &client->dev, - &rs5c372_rtc_ops, THIS_MODULE); + rs5c372->rtc = rtc_device_register(rs5c372_driver.driver.name, + &client->dev, &rs5c372_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - err = PTR_ERR(rtc); + if (IS_ERR(rs5c372->rtc)) { + err = PTR_ERR(rs5c372->rtc); goto exit_detach; } - i2c_set_clientdata(client, rtc); - - device_create_file(&client->dev, &dev_attr_trim); - device_create_file(&client->dev, &dev_attr_osc); + err = device_create_file(&client->dev, &dev_attr_trim); + if (err) + goto exit_devreg; + err = device_create_file(&client->dev, &dev_attr_osc); + if (err) + goto exit_trim; return 0; +exit_trim: + device_remove_file(&client->dev, &dev_attr_trim); + +exit_devreg: + rtc_device_unregister(rs5c372->rtc); + exit_detach: i2c_detach_client(client); exit_kfree: - kfree(client); + kfree(rs5c372); exit: return err; @@ -256,16 +269,15 @@ exit: static int rs5c372_detach(struct i2c_client *client) { int err; - struct rtc_device *rtc = i2c_get_clientdata(client); + struct rs5c372 *rs5c372 = i2c_get_clientdata(client); - if (rtc) - rtc_device_unregister(rtc); + if (rs5c372->rtc) + rtc_device_unregister(rs5c372->rtc); if ((err = i2c_detach_client(client))) return err; - kfree(client); - + kfree(rs5c372); return 0; } diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index 6ef9c62d503..f50a1b8e160 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -123,11 +123,18 @@ static int test_probe(struct platform_device *plat_dev) err = PTR_ERR(rtc); return err; } - device_create_file(&plat_dev->dev, &dev_attr_irq); + + err = device_create_file(&plat_dev->dev, &dev_attr_irq); + if (err) + goto err; platform_set_drvdata(plat_dev, rtc); return 0; + +err: + rtc_device_unregister(rtc); + return err; } static int __devexit test_remove(struct platform_device *plat_dev) diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index 522c69753bb..9a67487d086 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -562,11 +562,19 @@ static int x1205_probe(struct i2c_adapter *adapter, int address, int kind) else dev_err(&client->dev, "couldn't read status\n"); - device_create_file(&client->dev, &dev_attr_atrim); - device_create_file(&client->dev, &dev_attr_dtrim); + err = device_create_file(&client->dev, &dev_attr_atrim); + if (err) goto exit_devreg; + err = device_create_file(&client->dev, &dev_attr_dtrim); + if (err) goto exit_atrim; return 0; +exit_atrim: + device_remove_file(&client->dev, &dev_attr_atrim); + +exit_devreg: + rtc_device_unregister(rtc); + exit_detach: i2c_detach_client(client); diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 17fdd8c9f74..cf28ccc5794 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -25,7 +25,7 @@ #include "dasd_int.h" -kmem_cache_t *dasd_page_cache; +struct kmem_cache *dasd_page_cache; EXPORT_SYMBOL_GPL(dasd_page_cache); /* diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 5ecea3e4fde..fdaa471e845 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1215,7 +1215,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req) dst = page_address(bv->bv_page) + bv->bv_offset; if (dasd_page_cache) { char *copy = kmem_cache_alloc(dasd_page_cache, - SLAB_DMA | __GFP_NOWARN); + GFP_DMA | __GFP_NOWARN); if (copy && rq_data_dir(req) == WRITE) memcpy(copy + bv->bv_offset, dst, bv->bv_len); if (copy) diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 80926c54822..b857fd5893f 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -308,7 +308,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req) dst = page_address(bv->bv_page) + bv->bv_offset; if (dasd_page_cache) { char *copy = kmem_cache_alloc(dasd_page_cache, - SLAB_DMA | __GFP_NOWARN); + GFP_DMA | __GFP_NOWARN); if (copy && rq_data_dir(req) == WRITE) memcpy(copy + bv->bv_offset, dst, bv->bv_len); if (copy) diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 9f52004f6fc..dc5dd509434 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -474,7 +474,7 @@ extern struct dasd_profile_info_t dasd_global_profile; extern unsigned int dasd_profile_level; extern struct block_device_operations dasd_device_operations; -extern kmem_cache_t *dasd_page_cache; +extern struct kmem_cache *dasd_page_cache; struct dasd_ccw_req * dasd_kmalloc_request(char *, int, int, struct dasd_device *); diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 74c0eac083e..32933ed54b8 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -1032,9 +1032,9 @@ struct zfcp_data { wwn_t init_wwpn; fcp_lun_t init_fcp_lun; char *driver_version; - kmem_cache_t *fsf_req_qtcb_cache; - kmem_cache_t *sr_buffer_cache; - kmem_cache_t *gid_pn_cache; + struct kmem_cache *fsf_req_qtcb_cache; + struct kmem_cache *sr_buffer_cache; + struct kmem_cache *gid_pn_cache; }; /** diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 277826cdd0c..067f1519eb0 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -109,7 +109,7 @@ zfcp_fsf_req_alloc(mempool_t *pool, int req_flags) ptr = kmalloc(size, GFP_ATOMIC); else ptr = kmem_cache_alloc(zfcp_data.fsf_req_qtcb_cache, - SLAB_ATOMIC); + GFP_ATOMIC); } if (unlikely(!ptr)) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 335a25540c0..68103e508db 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -313,7 +313,7 @@ NCR_700_detect(struct scsi_host_template *tpnt, hostdata->status = memory + STATUS_OFFSET; /* all of these offsets are L1_CACHE_BYTES separated. It is fatal * if this isn't sufficient separation to avoid dma flushing issues */ - BUG_ON(!dma_is_consistent(pScript) && L1_CACHE_BYTES < dma_get_cache_alignment()); + BUG_ON(!dma_is_consistent(hostdata->dev, pScript) && L1_CACHE_BYTES < dma_get_cache_alignment()); hostdata->slots = (struct NCR_700_command_slot *)(memory + SLOTS_OFFSET); hostdata->dev = dev; @@ -362,11 +362,11 @@ NCR_700_detect(struct scsi_host_template *tpnt, for (j = 0; j < PATCHES; j++) script[LABELPATCHES[j]] = bS_to_host(pScript + SCRIPT[LABELPATCHES[j]]); /* now patch up fixed addresses. */ - script_patch_32(script, MessageLocation, + script_patch_32(hostdata->dev, script, MessageLocation, pScript + MSGOUT_OFFSET); - script_patch_32(script, StatusAddress, + script_patch_32(hostdata->dev, script, StatusAddress, pScript + STATUS_OFFSET); - script_patch_32(script, ReceiveMsgAddress, + script_patch_32(hostdata->dev, script, ReceiveMsgAddress, pScript + MSGIN_OFFSET); hostdata->script = script; @@ -821,8 +821,9 @@ process_extended_message(struct Scsi_Host *host, shost_printk(KERN_WARNING, host, "Unexpected SDTR msg\n"); hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->script, MessageCount, 1); + dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); + script_patch_16(hostdata->dev, hostdata->script, + MessageCount, 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -833,8 +834,9 @@ process_extended_message(struct Scsi_Host *host, printk(KERN_INFO "scsi%d: (%d:%d), Unsolicited WDTR after CMD, Rejecting\n", host->host_no, pun, lun); hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->script, MessageCount, 1); + dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); + script_patch_16(hostdata->dev, hostdata->script, MessageCount, + 1); resume_offset = hostdata->pScript + Ent_SendMessageWithATN; break; @@ -847,8 +849,9 @@ process_extended_message(struct Scsi_Host *host, printk("\n"); /* just reject it */ hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->script, MessageCount, 1); + dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); + script_patch_16(hostdata->dev, hostdata->script, MessageCount, + 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -929,8 +932,9 @@ process_message(struct Scsi_Host *host, struct NCR_700_Host_Parameters *hostdata printk("\n"); /* just reject it */ hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->script, MessageCount, 1); + dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); + script_patch_16(hostdata->dev, hostdata->script, MessageCount, + 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -939,7 +943,7 @@ process_message(struct Scsi_Host *host, struct NCR_700_Host_Parameters *hostdata } NCR_700_writel(temp, host, TEMP_REG); /* set us up to receive another message */ - dma_cache_sync(hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); return resume_offset; } @@ -1019,9 +1023,9 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, slot->SG[1].ins = bS_to_host(SCRIPT_RETURN); slot->SG[1].pAddr = 0; slot->resume_offset = hostdata->pScript; - dma_cache_sync(slot->SG, sizeof(slot->SG[0])*2, DMA_TO_DEVICE); - dma_cache_sync(SCp->sense_buffer, sizeof(SCp->sense_buffer), DMA_FROM_DEVICE); - + dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG[0])*2, DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, SCp->sense_buffer, sizeof(SCp->sense_buffer), DMA_FROM_DEVICE); + /* queue the command for reissue */ slot->state = NCR_700_SLOT_QUEUED; slot->flags = NCR_700_FLAG_AUTOSENSE; @@ -1136,11 +1140,12 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, hostdata->cmd = slot->cmnd; /* re-patch for this command */ - script_patch_32_abs(hostdata->script, CommandAddress, - slot->pCmd); - script_patch_16(hostdata->script, + script_patch_32_abs(hostdata->dev, hostdata->script, + CommandAddress, slot->pCmd); + script_patch_16(hostdata->dev, hostdata->script, CommandCount, slot->cmnd->cmd_len); - script_patch_32_abs(hostdata->script, SGScriptStartAddress, + script_patch_32_abs(hostdata->dev, hostdata->script, + SGScriptStartAddress, to32bit(&slot->pSG[0].ins)); /* Note: setting SXFER only works if we're @@ -1150,13 +1155,13 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, * should therefore always clear ACK */ NCR_700_writeb(NCR_700_get_SXFER(hostdata->cmd->device), host, SXFER_REG); - dma_cache_sync(hostdata->msgin, + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); - dma_cache_sync(hostdata->msgout, + dma_cache_sync(hostdata->dev, hostdata->msgout, MSG_ARRAY_SIZE, DMA_TO_DEVICE); /* I'm just being paranoid here, the command should * already have been flushed from the cache */ - dma_cache_sync(slot->cmnd->cmnd, + dma_cache_sync(hostdata->dev, slot->cmnd->cmnd, slot->cmnd->cmd_len, DMA_TO_DEVICE); @@ -1220,7 +1225,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, hostdata->reselection_id = reselection_id; /* just in case we have a stale simple tag message, clear it */ hostdata->msgin[1] = 0; - dma_cache_sync(hostdata->msgin, + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_BIDIRECTIONAL); if(hostdata->tag_negotiated & (1<<reselection_id)) { resume_offset = hostdata->pScript + Ent_GetReselectionWithTag; @@ -1336,7 +1341,7 @@ process_selection(struct Scsi_Host *host, __u32 dsp) hostdata->cmd = NULL; /* clear any stale simple tag message */ hostdata->msgin[1] = 0; - dma_cache_sync(hostdata->msgin, MSG_ARRAY_SIZE, + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_BIDIRECTIONAL); if(id == 0xff) { @@ -1433,29 +1438,30 @@ NCR_700_start_command(struct scsi_cmnd *SCp) NCR_700_set_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION); } - script_patch_16(hostdata->script, MessageCount, count); + script_patch_16(hostdata->dev, hostdata->script, MessageCount, count); - script_patch_ID(hostdata->script, + script_patch_ID(hostdata->dev, hostdata->script, Device_ID, 1<<scmd_id(SCp)); - script_patch_32_abs(hostdata->script, CommandAddress, + script_patch_32_abs(hostdata->dev, hostdata->script, CommandAddress, slot->pCmd); - script_patch_16(hostdata->script, CommandCount, SCp->cmd_len); + script_patch_16(hostdata->dev, hostdata->script, CommandCount, + SCp->cmd_len); /* finally plumb the beginning of the SG list into the script * */ - script_patch_32_abs(hostdata->script, SGScriptStartAddress, - to32bit(&slot->pSG[0].ins)); + script_patch_32_abs(hostdata->dev, hostdata->script, + SGScriptStartAddress, to32bit(&slot->pSG[0].ins)); NCR_700_clear_fifo(SCp->device->host); if(slot->resume_offset == 0) slot->resume_offset = hostdata->pScript; /* now perform all the writebacks and invalidates */ - dma_cache_sync(hostdata->msgout, count, DMA_TO_DEVICE); - dma_cache_sync(hostdata->msgin, MSG_ARRAY_SIZE, + dma_cache_sync(hostdata->dev, hostdata->msgout, count, DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); - dma_cache_sync(SCp->cmnd, SCp->cmd_len, DMA_TO_DEVICE); - dma_cache_sync(hostdata->status, 1, DMA_FROM_DEVICE); + dma_cache_sync(hostdata->dev, SCp->cmnd, SCp->cmd_len, DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, hostdata->status, 1, DMA_FROM_DEVICE); /* set the synchronous period/offset */ NCR_700_writeb(NCR_700_get_SXFER(SCp->device), @@ -1631,7 +1637,7 @@ NCR_700_intr(int irq, void *dev_id) slot->SG[i].ins = bS_to_host(SCRIPT_NOP); slot->SG[i].pAddr = 0; } - dma_cache_sync(slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); /* and pretend we disconnected after * the command phase */ resume_offset = hostdata->pScript + Ent_MsgInDuringData; @@ -1897,9 +1903,9 @@ NCR_700_queuecommand(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)) } slot->SG[i].ins = bS_to_host(SCRIPT_RETURN); slot->SG[i].pAddr = 0; - dma_cache_sync(slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); DEBUG((" SETTING %08lx to %x\n", - (&slot->pSG[i].ins), + (&slot->pSG[i].ins), slot->SG[i].ins)); } slot->resume_offset = 0; diff --git a/drivers/scsi/53c700.h b/drivers/scsi/53c700.h index f5c3caf344a..f38822db421 100644 --- a/drivers/scsi/53c700.h +++ b/drivers/scsi/53c700.h @@ -415,31 +415,31 @@ struct NCR_700_Host_Parameters { #define NCR_710_MIN_XFERP 0 #define NCR_700_MIN_PERIOD 25 /* for SDTR message, 100ns */ -#define script_patch_32(script, symbol, value) \ +#define script_patch_32(dev, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ __u32 val = bS_to_cpu((script)[A_##symbol##_used[i]]) + value; \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync(&(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ DEBUG((" script, patching %s at %d to 0x%lx\n", \ #symbol, A_##symbol##_used[i], (value))); \ } \ } -#define script_patch_32_abs(script, symbol, value) \ +#define script_patch_32_abs(dev, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ (script)[A_##symbol##_used[i]] = bS_to_host(value); \ - dma_cache_sync(&(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ DEBUG((" script, patching %s at %d to 0x%lx\n", \ #symbol, A_##symbol##_used[i], (value))); \ } \ } /* Used for patching the SCSI ID in the SELECT instruction */ -#define script_patch_ID(script, symbol, value) \ +#define script_patch_ID(dev, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ @@ -447,13 +447,13 @@ struct NCR_700_Host_Parameters { val &= 0xff00ffff; \ val |= ((value) & 0xff) << 16; \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync(&(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ DEBUG((" script, patching ID field %s at %d to 0x%x\n", \ #symbol, A_##symbol##_used[i], val)); \ } \ } -#define script_patch_16(script, symbol, value) \ +#define script_patch_16(dev, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ @@ -461,7 +461,7 @@ struct NCR_700_Host_Parameters { val &= 0xffff0000; \ val |= ((value) & 0xffff); \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync(&(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ DEBUG((" script, patching short field %s at %d to 0x%x\n", \ #symbol, A_##symbol##_used[i], val)); \ } \ diff --git a/drivers/scsi/aic94xx/aic94xx.h b/drivers/scsi/aic94xx/aic94xx.h index 71a031df7a3..32f513b1b78 100644 --- a/drivers/scsi/aic94xx/aic94xx.h +++ b/drivers/scsi/aic94xx/aic94xx.h @@ -56,8 +56,8 @@ /* 2*ITNL timeout + 1 second */ #define AIC94XX_SCB_TIMEOUT (5*HZ) -extern kmem_cache_t *asd_dma_token_cache; -extern kmem_cache_t *asd_ascb_cache; +extern struct kmem_cache *asd_dma_token_cache; +extern struct kmem_cache *asd_ascb_cache; extern char sas_addr_str[2*SAS_ADDR_SIZE + 1]; static inline void asd_stringify_sas_addr(char *p, const u8 *sas_addr) diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c index af7e0113436..da94e126ca8 100644 --- a/drivers/scsi/aic94xx/aic94xx_hwi.c +++ b/drivers/scsi/aic94xx/aic94xx_hwi.c @@ -1047,7 +1047,7 @@ irqreturn_t asd_hw_isr(int irq, void *dev_id) static inline struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha, gfp_t gfp_flags) { - extern kmem_cache_t *asd_ascb_cache; + extern struct kmem_cache *asd_ascb_cache; struct asd_seq_data *seq = &asd_ha->seq; struct asd_ascb *ascb; unsigned long flags; diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 42302ef05ee..fbc82b00a41 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -450,8 +450,8 @@ static inline void asd_destroy_ha_caches(struct asd_ha_struct *asd_ha) asd_ha->scb_pool = NULL; } -kmem_cache_t *asd_dma_token_cache; -kmem_cache_t *asd_ascb_cache; +struct kmem_cache *asd_dma_token_cache; +struct kmem_cache *asd_ascb_cache; static int asd_create_global_caches(void) { diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 1427a41e844..8f6b5bf580f 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -110,6 +110,7 @@ typedef struct ide_scsi_obj { } idescsi_scsi_t; static DEFINE_MUTEX(idescsi_ref_mutex); +static int idescsi_nocd; /* Set by module param to skip cd */ #define ide_scsi_g(disk) \ container_of((disk)->private_data, struct ide_scsi_obj, driver) @@ -1127,6 +1128,9 @@ static int ide_scsi_probe(ide_drive_t *drive) warned = 1; } + if (idescsi_nocd && drive->media == ide_cdrom) + return -ENODEV; + if (!strstr("ide-scsi", drive->driver_req) || !drive->present || drive->media == ide_disk || @@ -1187,6 +1191,8 @@ static void __exit exit_idescsi_module(void) driver_unregister(&idescsi_driver.gen_driver); } +module_param(idescsi_nocd, int, 0600); +MODULE_PARM_DESC(idescsi_nocd, "Disable handling of CD-ROMs so they may be driven by ide-cd"); module_init(init_idescsi_module); module_exit(exit_idescsi_module); MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index ccd4dafce8e..b318500785e 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6940,7 +6940,7 @@ static int __devinit ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) return -ENOMEM; for (i = 0; i < IPR_NUM_CMD_BLKS; i++) { - ipr_cmd = pci_pool_alloc (ioa_cfg->ipr_cmd_pool, SLAB_KERNEL, &dma_addr); + ipr_cmd = pci_pool_alloc (ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); if (!ipr_cmd) { ipr_free_cmd_blks(ioa_cfg); diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index d65bc4e0f21..2f0c07fc3f4 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -36,7 +36,7 @@ #include "../scsi_sas_internal.h" -kmem_cache_t *sas_task_cache; +struct kmem_cache *sas_task_cache; /*------------ SAS addr hash -----------*/ void sas_hash_addr(u8 *hashed, const u8 *sas_addr) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index cbe0cad83b6..d03523d3bf3 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -24,7 +24,7 @@ char qla2x00_version_str[40]; /* * SRB allocation cache */ -static kmem_cache_t *srb_cachep; +static struct kmem_cache *srb_cachep; /* * Ioctl related information. diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 969c9e43102..9ef693c8809 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -19,7 +19,7 @@ char qla4xxx_version_str[40]; /* * SRB allocation cache */ -static kmem_cache_t *srb_cachep; +static struct kmem_cache *srb_cachep; /* * Module parameter information and variables diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index fafc00deaad..24cffd98ee6 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -136,7 +136,7 @@ const char * scsi_device_type(unsigned type) EXPORT_SYMBOL(scsi_device_type); struct scsi_host_cmd_pool { - kmem_cache_t *slab; + struct kmem_cache *slab; unsigned int users; char *name; unsigned int slab_flags; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index fb616c69151..1748e27501c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -36,7 +36,7 @@ struct scsi_host_sg_pool { size_t size; char *name; - kmem_cache_t *slab; + struct kmem_cache *slab; mempool_t *pool; }; @@ -241,7 +241,7 @@ struct scsi_io_context { char sense[SCSI_SENSE_BUFFERSIZE]; }; -static kmem_cache_t *scsi_io_context_cache; +static struct kmem_cache *scsi_io_context_cache; static void scsi_end_async(struct request *req, int uptodate) { diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 386dbae17b4..d402aff5f31 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -33,7 +33,7 @@ #include "scsi_tgt_priv.h" static struct workqueue_struct *scsi_tgtd; -static kmem_cache_t *scsi_tgt_cmd_cache; +static struct kmem_cache *scsi_tgt_cmd_cache; /* * TODO: this struct will be killed when the block layer supports large bios diff --git a/drivers/serial/8250_exar_st16c554.c b/drivers/serial/8250_exar_st16c554.c new file mode 100644 index 00000000000..567143ace15 --- /dev/null +++ b/drivers/serial/8250_exar_st16c554.c @@ -0,0 +1,52 @@ +/* + * linux/drivers/serial/8250_exar.c + * + * Written by Paul B Schroeder < pschroeder "at" uplogix "dot" com > + * Based on 8250_boca. + * + * Copyright (C) 2005 Russell King. + * Data taken from include/asm-i386/serial.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/serial_8250.h> + +#define PORT(_base,_irq) \ + { \ + .iobase = _base, \ + .irq = _irq, \ + .uartclk = 1843200, \ + .iotype = UPIO_PORT, \ + .flags = UPF_BOOT_AUTOCONF, \ + } + +static struct plat_serial8250_port exar_data[] = { + PORT(0x100, 5), + PORT(0x108, 5), + PORT(0x110, 5), + PORT(0x118, 5), + { }, +}; + +static struct platform_device exar_device = { + .name = "serial8250", + .id = PLAT8250_DEV_EXAR_ST16C554, + .dev = { + .platform_data = exar_data, + }, +}; + +static int __init exar_init(void) +{ + return platform_device_register(&exar_device); +} + +module_init(exar_init); + +MODULE_AUTHOR("Paul B Schroeder"); +MODULE_DESCRIPTION("8250 serial probe module for Exar cards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index 71d907c8288..d3d6b82706b 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -464,11 +464,38 @@ static void __devexit serial_pnp_remove(struct pnp_dev *dev) serial8250_unregister_port(line - 1); } +#ifdef CONFIG_PM +static int serial_pnp_suspend(struct pnp_dev *dev, pm_message_t state) +{ + long line = (long)pnp_get_drvdata(dev); + + if (!line) + return -ENODEV; + serial8250_suspend_port(line - 1); + return 0; +} + +static int serial_pnp_resume(struct pnp_dev *dev) +{ + long line = (long)pnp_get_drvdata(dev); + + if (!line) + return -ENODEV; + serial8250_resume_port(line - 1); + return 0; +} +#else +#define serial_pnp_suspend NULL +#define serial_pnp_resume NULL +#endif /* CONFIG_PM */ + static struct pnp_driver serial_pnp_driver = { .name = "serial", - .id_table = pnp_dev_table, .probe = serial_pnp_probe, .remove = __devexit_p(serial_pnp_remove), + .suspend = serial_pnp_suspend, + .resume = serial_pnp_resume, + .id_table = pnp_dev_table, }; static int __init serial8250_pnp_init(void) diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 0b71e7d1890..fc12d5df10e 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -210,6 +210,17 @@ config SERIAL_8250_BOCA To compile this driver as a module, choose M here: the module will be called 8250_boca. +config SERIAL_8250_EXAR_ST16C554 + tristate "Support Exar ST16C554/554D Quad UART" + depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS + help + The Uplogix Envoy TU301 uses this Exar Quad UART. If you are + tinkering with your Envoy TU301, or have a machine with this UART, + say Y here. + + To compile this driver as a module, choose M here: the module + will be called 8250_exar_st16c554. + config SERIAL_8250_HUB6 tristate "Support Hub6 cards" depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS @@ -511,6 +522,25 @@ config SERIAL_IMX_CONSOLE your boot loader (lilo or loadlin) about how to pass options to the kernel at boot time.) +config SERIAL_UARTLITE + tristate "Xilinx uartlite serial port support" + depends on PPC32 + select SERIAL_CORE + help + Say Y here if you want to use the Xilinx uartlite serial controller. + + To compile this driver as a module, choose M here: the + module will be called uartlite.ko. + +config SERIAL_UARTLITE_CONSOLE + bool "Support for console on Xilinx uartlite serial port" + depends on SERIAL_UARTLITE=y + select SERIAL_CORE_CONSOLE + help + Say Y here if you wish to use a Xilinx uartlite as the system + console (the system console is the device which receives all kernel + messages and warnings and which allows logins in single user mode). + config SERIAL_SUNCORE bool depends on SPARC diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index b4d8a7c182e..df3632cd7df 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_SERIAL_8250_CONSOLE) += 8250_early.o obj-$(CONFIG_SERIAL_8250_FOURPORT) += 8250_fourport.o obj-$(CONFIG_SERIAL_8250_ACCENT) += 8250_accent.o obj-$(CONFIG_SERIAL_8250_BOCA) += 8250_boca.o +obj-$(CONFIG_SERIAL_8250_EXAR_ST16C554) += 8250_exar_st16c554.o obj-$(CONFIG_SERIAL_8250_HUB6) += 8250_hub6.o obj-$(CONFIG_SERIAL_8250_MCA) += 8250_mca.o obj-$(CONFIG_SERIAL_8250_AU1X00) += 8250_au1x00.o @@ -55,4 +56,5 @@ obj-$(CONFIG_SERIAL_VR41XX) += vr41xx_siu.o obj-$(CONFIG_SERIAL_SGI_IOC4) += ioc4_serial.o obj-$(CONFIG_SERIAL_SGI_IOC3) += ioc3_serial.o obj-$(CONFIG_SERIAL_ATMEL) += atmel_serial.o +obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o obj-$(CONFIG_SERIAL_NETX) += netx-serial.o diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c index 4213fabc62b..4d3626ef464 100644 --- a/drivers/serial/amba-pl010.c +++ b/drivers/serial/amba-pl010.c @@ -129,6 +129,8 @@ static void pl010_rx_chars(struct uart_port *port) */ rsr = readb(port->membase + UART01x_RSR) | UART_DUMMY_RSR_RX; if (unlikely(rsr & UART01x_RSR_ANY)) { + writel(0, port->membase + UART01x_ECR); + if (rsr & UART01x_RSR_BE) { rsr &= ~(UART01x_RSR_FE | UART01x_RSR_PE); port->icount.brk++; diff --git a/drivers/serial/dz.c b/drivers/serial/dz.c index 53662b33b84..af1544f3356 100644 --- a/drivers/serial/dz.c +++ b/drivers/serial/dz.c @@ -1,11 +1,13 @@ /* - * dz.c: Serial port driver for DECStations equiped + * dz.c: Serial port driver for DECstations equipped * with the DZ chipset. * * Copyright (C) 1998 Olivier A. D. Lebaillif * * Email: olivier.lebaillif@ifrsys.com * + * Copyright (C) 2004, 2006 Maciej W. Rozycki + * * [31-AUG-98] triemer * Changed IRQ to use Harald's dec internals interrupts.h * removed base_addr code - moving address assignment to setup.c @@ -26,10 +28,16 @@ #undef DEBUG_DZ +#if defined(CONFIG_SERIAL_DZ_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include <linux/delay.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/console.h> +#include <linux/sysrq.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/serial_core.h> @@ -45,14 +53,10 @@ #include <asm/system.h> #include <asm/uaccess.h> -#define CONSOLE_LINE (3) /* for definition of struct console */ - #include "dz.h" -#define DZ_INTR_DEBUG 1 - static char *dz_name = "DECstation DZ serial driver version "; -static char *dz_version = "1.02"; +static char *dz_version = "1.03"; struct dz_port { struct uart_port port; @@ -61,22 +65,6 @@ struct dz_port { static struct dz_port dz_ports[DZ_NB_PORT]; -#ifdef DEBUG_DZ -/* - * debugging code to send out chars via prom - */ -static void debug_console(const char *s, int count) -{ - unsigned i; - - for (i = 0; i < count; i++) { - if (*s == 10) - prom_printf("%c", 13); - prom_printf("%c", *s++); - } -} -#endif - /* * ------------------------------------------------------------ * dz_in () and dz_out () @@ -90,6 +78,7 @@ static inline unsigned short dz_in(struct dz_port *dport, unsigned offset) { volatile unsigned short *addr = (volatile unsigned short *) (dport->port.membase + offset); + return *addr; } @@ -98,6 +87,7 @@ static inline void dz_out(struct dz_port *dport, unsigned offset, { volatile unsigned short *addr = (volatile unsigned short *) (dport->port.membase + offset); + *addr = value; } @@ -144,7 +134,7 @@ static void dz_stop_rx(struct uart_port *uport) spin_lock_irqsave(&dport->port.lock, flags); dport->cflag &= ~DZ_CREAD; - dz_out(dport, DZ_LPR, dport->cflag); + dz_out(dport, DZ_LPR, dport->cflag | dport->port.line); spin_unlock_irqrestore(&dport->port.lock, flags); } @@ -155,14 +145,14 @@ static void dz_enable_ms(struct uart_port *port) /* * ------------------------------------------------------------ - * Here starts the interrupt handling routines. All of the - * following subroutines are declared as inline and are folded - * into dz_interrupt. They were separated out for readability's - * sake. * - * Note: rs_interrupt() is a "fast" interrupt, which means that it + * Here start the interrupt handling routines. All of the following + * subroutines are declared as inline and are folded into + * dz_interrupt. They were separated out for readability's sake. + * + * Note: dz_interrupt() is a "fast" interrupt, which means that it * runs with interrupts turned off. People who may want to modify - * rs_interrupt() should try to keep the interrupt handler as fast as + * dz_interrupt() should try to keep the interrupt handler as fast as * possible. After you are done making modifications, it is not a bad * idea to do: * @@ -180,92 +170,74 @@ static void dz_enable_ms(struct uart_port *port) * This routine deals with inputs from any lines. * ------------------------------------------------------------ */ -static inline void dz_receive_chars(struct dz_port *dport) +static inline void dz_receive_chars(struct dz_port *dport_in, + struct pt_regs *regs) { + struct dz_port *dport; struct tty_struct *tty = NULL; struct uart_icount *icount; - int ignore = 0; - unsigned short status, tmp; + int lines_rx[DZ_NB_PORT] = { [0 ... DZ_NB_PORT - 1] = 0 }; + unsigned short status; unsigned char ch, flag; + int i; - /* this code is going to be a problem... - the call to tty_flip_buffer is going to need - to be rethought... - */ - do { - status = dz_in(dport, DZ_RBUF); - - /* punt so we don't get duplicate characters */ - if (!(status & DZ_DVAL)) - goto ignore_char; - - - ch = UCHAR(status); /* grab the char */ - flag = TTY_NORMAL; + while ((status = dz_in(dport_in, DZ_RBUF)) & DZ_DVAL) { + dport = &dz_ports[LINE(status)]; + tty = dport->port.info->tty; /* point to the proper dev */ -#if 0 - if (info->is_console) { - if (ch == 0) - return; /* it's a break ... */ - } -#endif + ch = UCHAR(status); /* grab the char */ - tty = dport->port.info->tty;/* now tty points to the proper dev */ icount = &dport->port.icount; - - if (!tty) - break; - icount->rx++; - /* keep track of the statistics */ - if (status & (DZ_OERR | DZ_FERR | DZ_PERR)) { - if (status & DZ_PERR) /* parity error */ - icount->parity++; - else if (status & DZ_FERR) /* frame error */ - icount->frame++; - if (status & DZ_OERR) /* overrun error */ - icount->overrun++; - - /* check to see if we should ignore the character - and mask off conditions that should be ignored + flag = TTY_NORMAL; + if (status & DZ_FERR) { /* frame error */ + /* + * There is no separate BREAK status bit, so + * treat framing errors as BREAKs for Magic SysRq + * and SAK; normally, otherwise. */ - - if (status & dport->port.ignore_status_mask) { - if (++ignore > 100) - break; - goto ignore_char; - } - /* mask off the error conditions we want to ignore */ - tmp = status & dport->port.read_status_mask; - - if (tmp & DZ_PERR) { - flag = TTY_PARITY; -#ifdef DEBUG_DZ - debug_console("PERR\n", 5); -#endif - } else if (tmp & DZ_FERR) { + if (uart_handle_break(&dport->port)) + continue; + if (dport->port.flags & UPF_SAK) + flag = TTY_BREAK; + else flag = TTY_FRAME; -#ifdef DEBUG_DZ - debug_console("FERR\n", 5); -#endif - } - if (tmp & DZ_OERR) { -#ifdef DEBUG_DZ - debug_console("OERR\n", 5); -#endif - tty_insert_flip_char(tty, ch, flag); - ch = 0; - flag = TTY_OVERRUN; - } + } else if (status & DZ_OERR) /* overrun error */ + flag = TTY_OVERRUN; + else if (status & DZ_PERR) /* parity error */ + flag = TTY_PARITY; + + /* keep track of the statistics */ + switch (flag) { + case TTY_FRAME: + icount->frame++; + break; + case TTY_PARITY: + icount->parity++; + break; + case TTY_OVERRUN: + icount->overrun++; + break; + case TTY_BREAK: + icount->brk++; + break; + default: + break; } - tty_insert_flip_char(tty, ch, flag); - ignore_char: - ; - } while (status & DZ_DVAL); - if (tty) - tty_flip_buffer_push(tty); + if (uart_handle_sysrq_char(&dport->port, ch, regs)) + continue; + + if ((status & dport->port.ignore_status_mask) == 0) { + uart_insert_char(&dport->port, + status, DZ_OERR, ch, flag); + lines_rx[LINE(status)] = 1; + } + } + for (i = 0; i < DZ_NB_PORT; i++) + if (lines_rx[i]) + tty_flip_buffer_push(dz_ports[i].port.info->tty); } /* @@ -275,26 +247,32 @@ static inline void dz_receive_chars(struct dz_port *dport) * This routine deals with outputs to any lines. * ------------------------------------------------------------ */ -static inline void dz_transmit_chars(struct dz_port *dport) +static inline void dz_transmit_chars(struct dz_port *dport_in) { - struct circ_buf *xmit = &dport->port.info->xmit; + struct dz_port *dport; + struct circ_buf *xmit; + unsigned short status; unsigned char tmp; - if (dport->port.x_char) { /* XON/XOFF chars */ + status = dz_in(dport_in, DZ_CSR); + dport = &dz_ports[LINE(status)]; + xmit = &dport->port.info->xmit; + + if (dport->port.x_char) { /* XON/XOFF chars */ dz_out(dport, DZ_TDR, dport->port.x_char); dport->port.icount.tx++; dport->port.x_char = 0; return; } - /* if nothing to do or stopped or hardware stopped */ + /* If nothing to do or stopped or hardware stopped. */ if (uart_circ_empty(xmit) || uart_tx_stopped(&dport->port)) { dz_stop_tx(&dport->port); return; } /* - * if something to do ... (rember the dz has no output fifo so we go - * one char at a time :-< + * If something to do... (remember the dz has no output fifo, + * so we go one char at a time) :-< */ tmp = xmit->buf[xmit->tail]; xmit->tail = (xmit->tail + 1) & (DZ_XMIT_SIZE - 1); @@ -304,23 +282,29 @@ static inline void dz_transmit_chars(struct dz_port *dport) if (uart_circ_chars_pending(xmit) < DZ_WAKEUP_CHARS) uart_write_wakeup(&dport->port); - /* Are we done */ + /* Are we are done. */ if (uart_circ_empty(xmit)) dz_stop_tx(&dport->port); } /* * ------------------------------------------------------------ - * check_modem_status () + * check_modem_status() * - * Only valid for the MODEM line duh ! + * DS 3100 & 5100: Only valid for the MODEM line, duh! + * DS 5000/200: Valid for the MODEM and PRINTER line. * ------------------------------------------------------------ */ static inline void check_modem_status(struct dz_port *dport) { + /* + * FIXME: + * 1. No status change interrupt; use a timer. + * 2. Handle the 3100/5000 as appropriate. --macro + */ unsigned short status; - /* if not ne modem line just return */ + /* If not the modem line just return. */ if (dport->port.line != DZ_MODEM) return; @@ -341,21 +325,18 @@ static inline void check_modem_status(struct dz_port *dport) */ static irqreturn_t dz_interrupt(int irq, void *dev) { - struct dz_port *dport; + struct dz_port *dport = (struct dz_port *)dev; unsigned short status; /* get the reason why we just got an irq */ - status = dz_in((struct dz_port *)dev, DZ_CSR); - dport = &dz_ports[LINE(status)]; + status = dz_in(dport, DZ_CSR); - if (status & DZ_RDONE) - dz_receive_chars(dport); + if ((status & (DZ_RDONE | DZ_RIE)) == (DZ_RDONE | DZ_RIE)) + dz_receive_chars(dport, regs); - if (status & DZ_TRDY) + if ((status & (DZ_TRDY | DZ_TIE)) == (DZ_TRDY | DZ_TIE)) dz_transmit_chars(dport); - /* FIXME: what about check modem status??? --rmk */ - return IRQ_HANDLED; } @@ -367,13 +348,13 @@ static irqreturn_t dz_interrupt(int irq, void *dev) static unsigned int dz_get_mctrl(struct uart_port *uport) { + /* + * FIXME: Handle the 3100/5000 as appropriate. --macro + */ struct dz_port *dport = (struct dz_port *)uport; unsigned int mctrl = TIOCM_CAR | TIOCM_DSR | TIOCM_CTS; if (dport->port.line == DZ_MODEM) { - /* - * CHECKME: This is a guess from the other code... --rmk - */ if (dz_in(dport, DZ_MSR) & DZ_MODEM_DSR) mctrl &= ~TIOCM_DSR; } @@ -383,6 +364,9 @@ static unsigned int dz_get_mctrl(struct uart_port *uport) static void dz_set_mctrl(struct uart_port *uport, unsigned int mctrl) { + /* + * FIXME: Handle the 3100/5000 as appropriate. --macro + */ struct dz_port *dport = (struct dz_port *)uport; unsigned short tmp; @@ -409,13 +393,6 @@ static int dz_startup(struct uart_port *uport) unsigned long flags; unsigned short tmp; - /* The dz lines for the mouse/keyboard must be - * opened using their respective drivers. - */ - if ((dport->port.line == DZ_KEYBOARD) || - (dport->port.line == DZ_MOUSE)) - return -ENODEV; - spin_lock_irqsave(&dport->port.lock, flags); /* enable the interrupt and the scanning */ @@ -442,7 +419,8 @@ static void dz_shutdown(struct uart_port *uport) } /* - * get_lsr_info - get line status register info + * ------------------------------------------------------------------- + * dz_tx_empty() -- get the transmitter empty status * * Purpose: Let user call ioctl() to get info when the UART physically * is emptied. On bus types like RS485, the transmitter must @@ -450,21 +428,28 @@ static void dz_shutdown(struct uart_port *uport) * the transmit shift register is empty, not be done when the * transmit holding register is empty. This functionality * allows an RS485 driver to be written in user space. + * ------------------------------------------------------------------- */ static unsigned int dz_tx_empty(struct uart_port *uport) { struct dz_port *dport = (struct dz_port *)uport; - unsigned short status = dz_in(dport, DZ_LPR); + unsigned short tmp, mask = 1 << dport->port.line; - /* FIXME: this appears to be obviously broken --rmk. */ - return status ? TIOCSER_TEMT : 0; + tmp = dz_in(dport, DZ_TCR); + tmp &= mask; + + return tmp ? 0 : TIOCSER_TEMT; } static void dz_break_ctl(struct uart_port *uport, int break_state) { + /* + * FIXME: Can't access BREAK bits in TDR easily; + * reuse the code for polled TX. --macro + */ struct dz_port *dport = (struct dz_port *)uport; unsigned long flags; - unsigned short tmp, mask = 1 << uport->line; + unsigned short tmp, mask = 1 << dport->port.line; spin_lock_irqsave(&uport->lock, flags); tmp = dz_in(dport, DZ_TCR); @@ -561,7 +546,7 @@ static void dz_set_termios(struct uart_port *uport, struct termios *termios, spin_lock_irqsave(&dport->port.lock, flags); - dz_out(dport, DZ_LPR, cflag); + dz_out(dport, DZ_LPR, cflag | dport->port.line); dport->cflag = cflag; /* setup accept flag */ @@ -650,7 +635,7 @@ static void __init dz_init_ports(void) for (i = 0, dport = dz_ports; i < DZ_NB_PORT; i++, dport++) { spin_lock_init(&dport->port.lock); dport->port.membase = (char *) base; - dport->port.iotype = UPIO_PORT; + dport->port.iotype = UPIO_MEM; dport->port.irq = dec_interrupt[DEC_IRQ_DZ11]; dport->port.line = i; dport->port.fifosize = 1; @@ -662,10 +647,7 @@ static void __init dz_init_ports(void) static void dz_reset(struct dz_port *dport) { dz_out(dport, DZ_CSR, DZ_CLR); - while (dz_in(dport, DZ_CSR) & DZ_CLR); - /* FIXME: cpu_relax? */ - iob(); /* enable scanning */ @@ -673,26 +655,55 @@ static void dz_reset(struct dz_port *dport) } #ifdef CONFIG_SERIAL_DZ_CONSOLE +/* + * ------------------------------------------------------------------- + * dz_console_putchar() -- transmit a character + * + * Polled transmission. This is tricky. We need to mask transmit + * interrupts so that they do not interfere, enable the transmitter + * for the line requested and then wait till the transmit scanner + * requests data for this line. But it may request data for another + * line first, in which case we have to disable its transmitter and + * repeat waiting till our line pops up. Only then the character may + * be transmitted. Finally, the state of the transmitter mask is + * restored. Welcome to the world of PDP-11! + * ------------------------------------------------------------------- + */ static void dz_console_putchar(struct uart_port *uport, int ch) { struct dz_port *dport = (struct dz_port *)uport; unsigned long flags; - int loops = 2500; - unsigned short tmp = (unsigned char)ch; - /* this code sends stuff out to serial device - spinning its - wheels and waiting. */ + unsigned short csr, tcr, trdy, mask; + int loops = 10000; spin_lock_irqsave(&dport->port.lock, flags); + csr = dz_in(dport, DZ_CSR); + dz_out(dport, DZ_CSR, csr & ~DZ_TIE); + tcr = dz_in(dport, DZ_TCR); + tcr |= 1 << dport->port.line; + mask = tcr; + dz_out(dport, DZ_TCR, mask); + iob(); + spin_unlock_irqrestore(&dport->port.lock, flags); - /* spin our wheels */ - while (((dz_in(dport, DZ_CSR) & DZ_TRDY) != DZ_TRDY) && loops--) - /* FIXME: cpu_relax, udelay? --rmk */ - ; + while (loops--) { + trdy = dz_in(dport, DZ_CSR); + if (!(trdy & DZ_TRDY)) + continue; + trdy = (trdy & DZ_TLINE) >> 8; + if (trdy == dport->port.line) + break; + mask &= ~(1 << trdy); + dz_out(dport, DZ_TCR, mask); + iob(); + udelay(2); + } - /* Actually transmit the character. */ - dz_out(dport, DZ_TDR, tmp); + if (loops) /* Cannot send otherwise. */ + dz_out(dport, DZ_TDR, ch); - spin_unlock_irqrestore(&dport->port.lock, flags); + dz_out(dport, DZ_TCR, tcr); + dz_out(dport, DZ_CSR, csr); } /* @@ -703,11 +714,11 @@ static void dz_console_putchar(struct uart_port *uport, int ch) * The console must be locked when we get here. * ------------------------------------------------------------------- */ -static void dz_console_print(struct console *cons, +static void dz_console_print(struct console *co, const char *str, unsigned int count) { - struct dz_port *dport = &dz_ports[CONSOLE_LINE]; + struct dz_port *dport = &dz_ports[co->index]; #ifdef DEBUG_DZ prom_printf((char *) str); #endif @@ -716,49 +727,43 @@ static void dz_console_print(struct console *cons, static int __init dz_console_setup(struct console *co, char *options) { - struct dz_port *dport = &dz_ports[CONSOLE_LINE]; + struct dz_port *dport = &dz_ports[co->index]; int baud = 9600; int bits = 8; int parity = 'n'; int flow = 'n'; - int ret; - unsigned short mask, tmp; if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); dz_reset(dport); - ret = uart_set_options(&dport->port, co, baud, parity, bits, flow); - if (ret == 0) { - mask = 1 << dport->port.line; - tmp = dz_in(dport, DZ_TCR); /* read the TX flag */ - if (!(tmp & mask)) { - tmp |= mask; /* set the TX flag */ - dz_out(dport, DZ_TCR, tmp); - } - } - - return ret; + return uart_set_options(&dport->port, co, baud, parity, bits, flow); } -static struct console dz_sercons = -{ +static struct uart_driver dz_reg; +static struct console dz_sercons = { .name = "ttyS", .write = dz_console_print, .device = uart_console_device, .setup = dz_console_setup, - .flags = CON_CONSDEV | CON_PRINTBUFFER, - .index = CONSOLE_LINE, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &dz_reg, }; -void __init dz_serial_console_init(void) +static int __init dz_serial_console_init(void) { - dz_init_ports(); - - register_console(&dz_sercons); + if (!IOASIC) { + dz_init_ports(); + register_console(&dz_sercons); + return 0; + } else + return -ENXIO; } +console_initcall(dz_serial_console_init); + #define SERIAL_DZ_CONSOLE &dz_sercons #else #define SERIAL_DZ_CONSOLE NULL @@ -767,35 +772,29 @@ void __init dz_serial_console_init(void) static struct uart_driver dz_reg = { .owner = THIS_MODULE, .driver_name = "serial", - .dev_name = "ttyS%d", + .dev_name = "ttyS", .major = TTY_MAJOR, .minor = 64, .nr = DZ_NB_PORT, .cons = SERIAL_DZ_CONSOLE, }; -int __init dz_init(void) +static int __init dz_init(void) { - unsigned long flags; int ret, i; + if (IOASIC) + return -ENXIO; + printk("%s%s\n", dz_name, dz_version); dz_init_ports(); - save_flags(flags); - cli(); - #ifndef CONFIG_SERIAL_DZ_CONSOLE /* reset the chip */ dz_reset(&dz_ports[0]); #endif - /* order matters here... the trick is that flags - is updated... in request_irq - to immediatedly obliterate - it is unwise. */ - restore_flags(flags); - if (request_irq(dz_ports[0].port.irq, dz_interrupt, IRQF_DISABLED, "DZ", &dz_ports[0])) panic("Unable to register DZ interrupt"); @@ -810,5 +809,7 @@ int __init dz_init(void) return ret; } +module_init(dz_init); + MODULE_DESCRIPTION("DECstation DZ serial driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/serial/dz.h b/drivers/serial/dz.h index 86ef417382b..9674d4e4987 100644 --- a/drivers/serial/dz.h +++ b/drivers/serial/dz.h @@ -1,20 +1,22 @@ /* - * dz.h: Serial port driver for DECStations equiped + * dz.h: Serial port driver for DECstations equipped * with the DZ chipset. * * Copyright (C) 1998 Olivier A. D. Lebaillif * * Email: olivier.lebaillif@ifrsys.com * + * Copyright (C) 2004, 2006 Maciej W. Rozycki */ #ifndef DZ_SERIAL_H #define DZ_SERIAL_H /* - * Definitions for the Control and Status Received. + * Definitions for the Control and Status Register. */ #define DZ_TRDY 0x8000 /* Transmitter empty */ -#define DZ_TIE 0x4000 /* Transmitter Interrupt Enable */ +#define DZ_TIE 0x4000 /* Transmitter Interrupt Enbl */ +#define DZ_TLINE 0x0300 /* Transmitter Line Number */ #define DZ_RDONE 0x0080 /* Receiver data ready */ #define DZ_RIE 0x0040 /* Receive Interrupt Enable */ #define DZ_MSE 0x0020 /* Master Scan Enable */ @@ -22,32 +24,44 @@ #define DZ_MAINT 0x0008 /* Loop Back Mode */ /* - * Definitions for the Received buffer. + * Definitions for the Receiver Buffer Register. */ -#define DZ_RBUF_MASK 0x00FF /* Data Mask in the Receive Buffer */ -#define DZ_LINE_MASK 0x0300 /* Line Mask in the Receive Buffer */ +#define DZ_RBUF_MASK 0x00FF /* Data Mask */ +#define DZ_LINE_MASK 0x0300 /* Line Mask */ #define DZ_DVAL 0x8000 /* Valid Data indicator */ #define DZ_OERR 0x4000 /* Overrun error indicator */ #define DZ_FERR 0x2000 /* Frame error indicator */ #define DZ_PERR 0x1000 /* Parity error indicator */ -#define LINE(x) (x & DZ_LINE_MASK) >> 8 /* Get the line number from the input buffer */ -#define UCHAR(x) (unsigned char)(x & DZ_RBUF_MASK) +#define LINE(x) ((x & DZ_LINE_MASK) >> 8) /* Get the line number + from the input buffer */ +#define UCHAR(x) ((unsigned char)(x & DZ_RBUF_MASK)) /* - * Definitions for the Transmit Register. + * Definitions for the Transmit Control Register. */ #define DZ_LINE_KEYBOARD 0x0001 #define DZ_LINE_MOUSE 0x0002 #define DZ_LINE_MODEM 0x0004 #define DZ_LINE_PRINTER 0x0008 +#define DZ_MODEM_RTS 0x0800 /* RTS for the modem line (2) */ #define DZ_MODEM_DTR 0x0400 /* DTR for the modem line (2) */ +#define DZ_PRINT_RTS 0x0200 /* RTS for the prntr line (3) */ +#define DZ_PRINT_DTR 0x0100 /* DTR for the prntr line (3) */ +#define DZ_LNENB 0x000f /* Transmitter Line Enable */ /* * Definitions for the Modem Status Register. */ +#define DZ_MODEM_RI 0x0800 /* RI for the modem line (2) */ +#define DZ_MODEM_CD 0x0400 /* CD for the modem line (2) */ #define DZ_MODEM_DSR 0x0200 /* DSR for the modem line (2) */ +#define DZ_MODEM_CTS 0x0100 /* CTS for the modem line (2) */ +#define DZ_PRINT_RI 0x0008 /* RI for the printer line (3) */ +#define DZ_PRINT_CD 0x0004 /* CD for the printer line (3) */ +#define DZ_PRINT_DSR 0x0002 /* DSR for the prntr line (3) */ +#define DZ_PRINT_CTS 0x0001 /* CTS for the prntr line (3) */ /* * Definitions for the Transmit Data Register. diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c index 8eea69f2998..29823bd60fb 100644 --- a/drivers/serial/mpsc.c +++ b/drivers/serial/mpsc.c @@ -555,7 +555,7 @@ mpsc_sdma_start_tx(struct mpsc_port_info *pi) if (!mpsc_sdma_tx_active(pi)) { txre = (struct mpsc_tx_desc *)(pi->txr + (pi->txr_tail * MPSC_TXRE_SIZE)); - dma_cache_sync((void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)txre, @@ -931,7 +931,7 @@ mpsc_init_rings(struct mpsc_port_info *pi) } txre->link = cpu_to_be32(pi->txr_p); /* Wrap last back to first */ - dma_cache_sync((void *) pi->dma_region, MPSC_DMA_ALLOC_SIZE, + dma_cache_sync(pi->port.dev, (void *) pi->dma_region, MPSC_DMA_ALLOC_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ @@ -1005,7 +1005,7 @@ mpsc_rx_intr(struct mpsc_port_info *pi) rxre = (struct mpsc_rx_desc *)(pi->rxr + (pi->rxr_posn*MPSC_RXRE_SIZE)); - dma_cache_sync((void *)rxre, MPSC_RXRE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *)rxre, MPSC_RXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)rxre, @@ -1029,7 +1029,7 @@ mpsc_rx_intr(struct mpsc_port_info *pi) } bp = pi->rxb + (pi->rxr_posn * MPSC_RXBE_SIZE); - dma_cache_sync((void *) bp, MPSC_RXBE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *) bp, MPSC_RXBE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)bp, @@ -1098,7 +1098,7 @@ next_frame: SDMA_DESC_CMDSTAT_F | SDMA_DESC_CMDSTAT_L); wmb(); - dma_cache_sync((void *)rxre, MPSC_RXRE_SIZE, DMA_BIDIRECTIONAL); + dma_cache_sync(pi->port.dev, (void *)rxre, MPSC_RXRE_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ flush_dcache_range((ulong)rxre, @@ -1109,7 +1109,7 @@ next_frame: pi->rxr_posn = (pi->rxr_posn + 1) & (MPSC_RXR_ENTRIES - 1); rxre = (struct mpsc_rx_desc *)(pi->rxr + (pi->rxr_posn * MPSC_RXRE_SIZE)); - dma_cache_sync((void *)rxre, MPSC_RXRE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *)rxre, MPSC_RXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)rxre, @@ -1143,7 +1143,7 @@ mpsc_setup_tx_desc(struct mpsc_port_info *pi, u32 count, u32 intr) SDMA_DESC_CMDSTAT_EI : 0)); wmb(); - dma_cache_sync((void *) txre, MPSC_TXRE_SIZE, DMA_BIDIRECTIONAL); + dma_cache_sync(pi->port.dev, (void *) txre, MPSC_TXRE_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ flush_dcache_range((ulong)txre, @@ -1192,7 +1192,7 @@ mpsc_copy_tx_data(struct mpsc_port_info *pi) else /* All tx data copied into ring bufs */ return; - dma_cache_sync((void *) bp, MPSC_TXBE_SIZE, DMA_BIDIRECTIONAL); + dma_cache_sync(pi->port.dev, (void *) bp, MPSC_TXBE_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ flush_dcache_range((ulong)bp, @@ -1217,7 +1217,7 @@ mpsc_tx_intr(struct mpsc_port_info *pi) txre = (struct mpsc_tx_desc *)(pi->txr + (pi->txr_tail * MPSC_TXRE_SIZE)); - dma_cache_sync((void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)txre, @@ -1235,7 +1235,7 @@ mpsc_tx_intr(struct mpsc_port_info *pi) txre = (struct mpsc_tx_desc *)(pi->txr + (pi->txr_tail * MPSC_TXRE_SIZE)); - dma_cache_sync((void *) txre, MPSC_TXRE_SIZE, + dma_cache_sync(pi->port.dev, (void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ @@ -1652,7 +1652,7 @@ mpsc_console_write(struct console *co, const char *s, uint count) count--; } - dma_cache_sync((void *) bp, MPSC_TXBE_SIZE, DMA_BIDIRECTIONAL); + dma_cache_sync(pi->port.dev, (void *) bp, MPSC_TXBE_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ flush_dcache_range((ulong)bp, diff --git a/drivers/serial/uartlite.c b/drivers/serial/uartlite.c new file mode 100644 index 00000000000..83690653b78 --- /dev/null +++ b/drivers/serial/uartlite.c @@ -0,0 +1,505 @@ +/* + * uartlite.c: Serial driver for Xilinx uartlite serial controller + * + * Peter Korsgaard <jacmet@sunsite.dk> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/console.h> +#include <linux/serial.h> +#include <linux/serial_core.h> +#include <linux/tty.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <asm/io.h> + +#define ULITE_MAJOR 204 +#define ULITE_MINOR 187 +#define ULITE_NR_UARTS 4 + +/* For register details see datasheet: + http://www.xilinx.com/bvdocs/ipcenter/data_sheet/opb_uartlite.pdf +*/ +#define ULITE_RX 0x00 +#define ULITE_TX 0x04 +#define ULITE_STATUS 0x08 +#define ULITE_CONTROL 0x0c + +#define ULITE_REGION 16 + +#define ULITE_STATUS_RXVALID 0x01 +#define ULITE_STATUS_RXFULL 0x02 +#define ULITE_STATUS_TXEMPTY 0x04 +#define ULITE_STATUS_TXFULL 0x08 +#define ULITE_STATUS_IE 0x10 +#define ULITE_STATUS_OVERRUN 0x20 +#define ULITE_STATUS_FRAME 0x40 +#define ULITE_STATUS_PARITY 0x80 + +#define ULITE_CONTROL_RST_TX 0x01 +#define ULITE_CONTROL_RST_RX 0x02 +#define ULITE_CONTROL_IE 0x10 + + +static struct uart_port ports[ULITE_NR_UARTS]; + +static int ulite_receive(struct uart_port *port, int stat) +{ + struct tty_struct *tty = port->info->tty; + unsigned char ch = 0; + char flag = TTY_NORMAL; + + if ((stat & (ULITE_STATUS_RXVALID | ULITE_STATUS_OVERRUN + | ULITE_STATUS_FRAME)) == 0) + return 0; + + /* stats */ + if (stat & ULITE_STATUS_RXVALID) { + port->icount.rx++; + ch = readb(port->membase + ULITE_RX); + + if (stat & ULITE_STATUS_PARITY) + port->icount.parity++; + } + + if (stat & ULITE_STATUS_OVERRUN) + port->icount.overrun++; + + if (stat & ULITE_STATUS_FRAME) + port->icount.frame++; + + + /* drop byte with parity error if IGNPAR specificed */ + if (stat & port->ignore_status_mask & ULITE_STATUS_PARITY) + stat &= ~ULITE_STATUS_RXVALID; + + stat &= port->read_status_mask; + + if (stat & ULITE_STATUS_PARITY) + flag = TTY_PARITY; + + + stat &= ~port->ignore_status_mask; + + if (stat & ULITE_STATUS_RXVALID) + tty_insert_flip_char(tty, ch, flag); + + if (stat & ULITE_STATUS_FRAME) + tty_insert_flip_char(tty, 0, TTY_FRAME); + + if (stat & ULITE_STATUS_OVERRUN) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); + + return 1; +} + +static int ulite_transmit(struct uart_port *port, int stat) +{ + struct circ_buf *xmit = &port->info->xmit; + + if (stat & ULITE_STATUS_TXFULL) + return 0; + + if (port->x_char) { + writeb(port->x_char, port->membase + ULITE_TX); + port->x_char = 0; + port->icount.tx++; + return 1; + } + + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) + return 0; + + writeb(xmit->buf[xmit->tail], port->membase + ULITE_TX); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE-1); + port->icount.tx++; + + /* wake up */ + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + + return 1; +} + +static irqreturn_t ulite_isr(int irq, void *dev_id) +{ + struct uart_port *port = (struct uart_port *)dev_id; + int busy; + + do { + int stat = readb(port->membase + ULITE_STATUS); + busy = ulite_receive(port, stat); + busy |= ulite_transmit(port, stat); + } while (busy); + + tty_flip_buffer_push(port->info->tty); + + return IRQ_HANDLED; +} + +static unsigned int ulite_tx_empty(struct uart_port *port) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(&port->lock, flags); + ret = readb(port->membase + ULITE_STATUS); + spin_unlock_irqrestore(&port->lock, flags); + + return ret & ULITE_STATUS_TXEMPTY ? TIOCSER_TEMT : 0; +} + +static unsigned int ulite_get_mctrl(struct uart_port *port) +{ + return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; +} + +static void ulite_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + /* N/A */ +} + +static void ulite_stop_tx(struct uart_port *port) +{ + /* N/A */ +} + +static void ulite_start_tx(struct uart_port *port) +{ + ulite_transmit(port, readb(port->membase + ULITE_STATUS)); +} + +static void ulite_stop_rx(struct uart_port *port) +{ + /* don't forward any more data (like !CREAD) */ + port->ignore_status_mask = ULITE_STATUS_RXVALID | ULITE_STATUS_PARITY + | ULITE_STATUS_FRAME | ULITE_STATUS_OVERRUN; +} + +static void ulite_enable_ms(struct uart_port *port) +{ + /* N/A */ +} + +static void ulite_break_ctl(struct uart_port *port, int ctl) +{ + /* N/A */ +} + +static int ulite_startup(struct uart_port *port) +{ + int ret; + + ret = request_irq(port->irq, ulite_isr, + IRQF_DISABLED | IRQF_SAMPLE_RANDOM, "uartlite", port); + if (ret) + return ret; + + writeb(ULITE_CONTROL_RST_RX | ULITE_CONTROL_RST_TX, + port->membase + ULITE_CONTROL); + writeb(ULITE_CONTROL_IE, port->membase + ULITE_CONTROL); + + return 0; +} + +static void ulite_shutdown(struct uart_port *port) +{ + writeb(0, port->membase + ULITE_CONTROL); + readb(port->membase + ULITE_CONTROL); /* dummy */ + free_irq(port->irq, port); +} + +static void ulite_set_termios(struct uart_port *port, struct termios *termios, + struct termios *old) +{ + unsigned long flags; + unsigned int baud; + + spin_lock_irqsave(&port->lock, flags); + + port->read_status_mask = ULITE_STATUS_RXVALID | ULITE_STATUS_OVERRUN + | ULITE_STATUS_TXFULL; + + if (termios->c_iflag & INPCK) + port->read_status_mask |= + ULITE_STATUS_PARITY | ULITE_STATUS_FRAME; + + port->ignore_status_mask = 0; + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= ULITE_STATUS_PARITY + | ULITE_STATUS_FRAME | ULITE_STATUS_OVERRUN; + + /* ignore all characters if CREAD is not set */ + if ((termios->c_cflag & CREAD) == 0) + port->ignore_status_mask |= + ULITE_STATUS_RXVALID | ULITE_STATUS_PARITY + | ULITE_STATUS_FRAME | ULITE_STATUS_OVERRUN; + + /* update timeout */ + baud = uart_get_baud_rate(port, termios, old, 0, 460800); + uart_update_timeout(port, termios->c_cflag, baud); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *ulite_type(struct uart_port *port) +{ + return port->type == PORT_UARTLITE ? "uartlite" : NULL; +} + +static void ulite_release_port(struct uart_port *port) +{ + release_mem_region(port->mapbase, ULITE_REGION); + iounmap(port->membase); + port->membase = 0; +} + +static int ulite_request_port(struct uart_port *port) +{ + if (!request_mem_region(port->mapbase, ULITE_REGION, "uartlite")) { + dev_err(port->dev, "Memory region busy\n"); + return -EBUSY; + } + + port->membase = ioremap(port->mapbase, ULITE_REGION); + if (!port->membase) { + dev_err(port->dev, "Unable to map registers\n"); + release_mem_region(port->mapbase, ULITE_REGION); + return -EBUSY; + } + + return 0; +} + +static void ulite_config_port(struct uart_port *port, int flags) +{ + ulite_request_port(port); + port->type = PORT_UARTLITE; +} + +static int ulite_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + /* we don't want the core code to modify any port params */ + return -EINVAL; +} + +static struct uart_ops ulite_ops = { + .tx_empty = ulite_tx_empty, + .set_mctrl = ulite_set_mctrl, + .get_mctrl = ulite_get_mctrl, + .stop_tx = ulite_stop_tx, + .start_tx = ulite_start_tx, + .stop_rx = ulite_stop_rx, + .enable_ms = ulite_enable_ms, + .break_ctl = ulite_break_ctl, + .startup = ulite_startup, + .shutdown = ulite_shutdown, + .set_termios = ulite_set_termios, + .type = ulite_type, + .release_port = ulite_release_port, + .request_port = ulite_request_port, + .config_port = ulite_config_port, + .verify_port = ulite_verify_port +}; + +#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE +static void ulite_console_wait_tx(struct uart_port *port) +{ + int i; + + /* wait up to 10ms for the character(s) to be sent */ + for (i = 0; i < 10000; i++) { + if (readb(port->membase + ULITE_STATUS) & ULITE_STATUS_TXEMPTY) + break; + udelay(1); + } +} + +static void ulite_console_putchar(struct uart_port *port, int ch) +{ + ulite_console_wait_tx(port); + writeb(ch, port->membase + ULITE_TX); +} + +static void ulite_console_write(struct console *co, const char *s, + unsigned int count) +{ + struct uart_port *port = &ports[co->index]; + unsigned long flags; + unsigned int ier; + int locked = 1; + + if (oops_in_progress) { + locked = spin_trylock_irqsave(&port->lock, flags); + } else + spin_lock_irqsave(&port->lock, flags); + + /* save and disable interrupt */ + ier = readb(port->membase + ULITE_STATUS) & ULITE_STATUS_IE; + writeb(0, port->membase + ULITE_CONTROL); + + uart_console_write(port, s, count, ulite_console_putchar); + + ulite_console_wait_tx(port); + + /* restore interrupt state */ + if (ier) + writeb(ULITE_CONTROL_IE, port->membase + ULITE_CONTROL); + + if (locked) + spin_unlock_irqrestore(&port->lock, flags); +} + +static int __init ulite_console_setup(struct console *co, char *options) +{ + struct uart_port *port; + int baud = 9600; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + if (co->index < 0 || co->index >= ULITE_NR_UARTS) + return -EINVAL; + + port = &ports[co->index]; + + /* not initialized yet? */ + if (!port->membase) + return -ENODEV; + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + + return uart_set_options(port, co, baud, parity, bits, flow); +} + +static struct uart_driver ulite_uart_driver; + +static struct console ulite_console = { + .name = "ttyUL", + .write = ulite_console_write, + .device = uart_console_device, + .setup = ulite_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, /* Specified on the cmdline (e.g. console=ttyUL0 ) */ + .data = &ulite_uart_driver, +}; + +static int __init ulite_console_init(void) +{ + register_console(&ulite_console); + return 0; +} + +console_initcall(ulite_console_init); + +#endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */ + +static struct uart_driver ulite_uart_driver = { + .owner = THIS_MODULE, + .driver_name = "uartlite", + .dev_name = "ttyUL", + .major = ULITE_MAJOR, + .minor = ULITE_MINOR, + .nr = ULITE_NR_UARTS, +#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE + .cons = &ulite_console, +#endif +}; + +static int __devinit ulite_probe(struct platform_device *pdev) +{ + struct resource *res, *res2; + struct uart_port *port; + + if (pdev->id < 0 || pdev->id >= ULITE_NR_UARTS) + return -EINVAL; + + if (ports[pdev->id].membase) + return -EBUSY; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res2) + return -ENODEV; + + port = &ports[pdev->id]; + + port->fifosize = 16; + port->regshift = 2; + port->iotype = UPIO_MEM; + port->iobase = 1; /* mark port in use */ + port->mapbase = res->start; + port->membase = 0; + port->ops = &ulite_ops; + port->irq = res2->start; + port->flags = UPF_BOOT_AUTOCONF; + port->dev = &pdev->dev; + port->type = PORT_UNKNOWN; + port->line = pdev->id; + + uart_add_one_port(&ulite_uart_driver, port); + platform_set_drvdata(pdev, port); + + return 0; +} + +static int ulite_remove(struct platform_device *pdev) +{ + struct uart_port *port = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + + if (port) + uart_remove_one_port(&ulite_uart_driver, port); + + /* mark port as free */ + port->membase = 0; + + return 0; +} + +static struct platform_driver ulite_platform_driver = { + .probe = ulite_probe, + .remove = ulite_remove, + .driver = { + .owner = THIS_MODULE, + .name = "uartlite", + }, +}; + +int __init ulite_init(void) +{ + int ret; + + ret = uart_register_driver(&ulite_uart_driver); + if (ret) + return ret; + + ret = platform_driver_register(&ulite_platform_driver); + if (ret) + uart_unregister_driver(&ulite_uart_driver); + + return ret; +} + +void __exit ulite_exit(void) +{ + platform_driver_unregister(&ulite_platform_driver); + uart_unregister_driver(&ulite_uart_driver); +} + +module_init(ulite_init); +module_exit(ulite_exit); + +MODULE_AUTHOR("Peter Korsgaard <jacmet@sunsite.dk>"); +MODULE_DESCRIPTION("Xilinx uartlite serial driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c3c0626f550..270e6211c2e 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -360,12 +360,13 @@ spi_alloc_master(struct device *dev, unsigned size) if (!dev) return NULL; - master = kzalloc(size + sizeof *master, SLAB_KERNEL); + master = kzalloc(size + sizeof *master, GFP_KERNEL); if (!master) return NULL; class_device_initialize(&master->cdev); master->cdev.class = &spi_master_class; + kobj_set_kset_s(&master->cdev, spi_master_class.subsys); master->cdev.dev = get_device(dev); spi_master_set_devdata(master, &master[1]); @@ -447,7 +448,9 @@ static int __unregister(struct device *dev, void *unused) */ void spi_unregister_master(struct spi_master *master) { - (void) device_for_each_child(master->cdev.dev, NULL, __unregister); + int dummy; + + dummy = device_for_each_child(master->cdev.dev, NULL, __unregister); class_device_unregister(&master->cdev); } EXPORT_SYMBOL_GPL(spi_unregister_master); @@ -463,15 +466,13 @@ EXPORT_SYMBOL_GPL(spi_unregister_master); */ struct spi_master *spi_busnum_to_master(u16 bus_num) { - if (bus_num) { - char name[8]; - struct kobject *bus; - - snprintf(name, sizeof name, "spi%u", bus_num); - bus = kset_find_obj(&spi_master_class.subsys.kset, name); - if (bus) - return container_of(bus, struct spi_master, cdev.kobj); - } + char name[9]; + struct kobject *bus; + + snprintf(name, sizeof name, "spi%u", bus_num); + bus = kset_find_obj(&spi_master_class.subsys.kset, name); + if (bus) + return container_of(bus, struct spi_master, cdev.kobj); return NULL; } EXPORT_SYMBOL_GPL(spi_busnum_to_master); @@ -607,7 +608,7 @@ static int __init spi_init(void) { int status; - buf = kmalloc(SPI_BUFSIZ, SLAB_KERNEL); + buf = kmalloc(SPI_BUFSIZ, GFP_KERNEL); if (!buf) { status = -ENOMEM; goto err0; diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c index 08c1c57c612..57289b61d0b 100644 --- a/drivers/spi/spi_bitbang.c +++ b/drivers/spi/spi_bitbang.c @@ -196,7 +196,7 @@ int spi_bitbang_setup(struct spi_device *spi) return -EINVAL; if (!cs) { - cs = kzalloc(sizeof *cs, SLAB_KERNEL); + cs = kzalloc(sizeof *cs, GFP_KERNEL); if (!cs) return -ENOMEM; spi->controller_state = cs; diff --git a/drivers/spi/spi_butterfly.c b/drivers/spi/spi_butterfly.c index c2f601f8e4f..312987a0321 100644 --- a/drivers/spi/spi_butterfly.c +++ b/drivers/spi/spi_butterfly.c @@ -251,6 +251,8 @@ static void butterfly_attach(struct parport *p) * setting up a platform device like this is an ugly kluge... */ pdev = platform_device_register_simple("butterfly", -1, NULL, 0); + if (IS_ERR(pdev)) + return; master = spi_alloc_master(&pdev->dev, sizeof *pp); if (!master) { diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index f2d196fa1e8..dae4ef1e8fe 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -64,6 +64,8 @@ #include <linux/kthread.h> #include <linux/version.h> #include <linux/mutex.h> +#include <linux/freezer.h> + #include <asm/unaligned.h> #include "usbatm.h" diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index 840442a25b6..c3915dc2860 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -93,7 +93,7 @@ void hcd_buffer_destroy (struct usb_hcd *hcd) } -/* sometimes alloc/free could use kmalloc with SLAB_DMA, for +/* sometimes alloc/free could use kmalloc with GFP_DMA, for * better sharing and to leverage mm/slab.c intelligence. */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 9be41ed1f9a..2651c2e2a89 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -22,6 +22,7 @@ #include <linux/usbdevice_fs.h> #include <linux/kthread.h> #include <linux/mutex.h> +#include <linux/freezer.h> #include <asm/semaphore.h> #include <asm/uaccess.h> @@ -460,7 +461,7 @@ void usb_hub_tt_clear_buffer (struct usb_device *udev, int pipe) * since each TT has "at least two" buffers that can need it (and * there can be many TTs per hub). even if they're uncommon. */ - if ((clear = kmalloc (sizeof *clear, SLAB_ATOMIC)) == NULL) { + if ((clear = kmalloc (sizeof *clear, GFP_ATOMIC)) == NULL) { dev_err (&udev->dev, "can't save CLEAR_TT_BUFFER state\n"); /* FIXME recover somehow ... RESET_TT? */ return; @@ -2371,7 +2372,7 @@ check_highspeed (struct usb_hub *hub, struct usb_device *udev, int port1) struct usb_qualifier_descriptor *qual; int status; - qual = kmalloc (sizeof *qual, SLAB_KERNEL); + qual = kmalloc (sizeof *qual, GFP_KERNEL); if (qual == NULL) return; @@ -2922,7 +2923,7 @@ static int config_descriptors_changed(struct usb_device *udev) if (len < le16_to_cpu(udev->config[index].desc.wTotalLength)) len = le16_to_cpu(udev->config[index].desc.wTotalLength); } - buf = kmalloc (len, SLAB_KERNEL); + buf = kmalloc (len, GFP_KERNEL); if (buf == NULL) { dev_err(&udev->dev, "no mem to re-read configs after reset\n"); /* assume the worst */ diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 7390b67c609..149aa8bfb1f 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -488,7 +488,7 @@ void usb_sg_wait (struct usb_sg_request *io) int retval; io->urbs [i]->dev = io->dev; - retval = usb_submit_urb (io->urbs [i], SLAB_ATOMIC); + retval = usb_submit_urb (io->urbs [i], GFP_ATOMIC); /* after we submit, let completions or cancelations fire; * we handshake using io->status. diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 8b975d15538..c98316ce838 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -250,7 +250,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/utsname.h> #include <linux/usb_ch9.h> diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c index 64554acad63..31351826f2b 100644 --- a/drivers/usb/gadget/gmidi.c +++ b/drivers/usb/gadget/gmidi.c @@ -1236,7 +1236,7 @@ autoconf_fail: /* ok, we made sense of the hardware ... */ - dev = kzalloc(sizeof(*dev), SLAB_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { return -ENOMEM; } diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index a3076da3f4e..805a9826842 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -1864,7 +1864,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) } /* alloc, and start init */ - dev = kmalloc (sizeof *dev, SLAB_KERNEL); + dev = kmalloc (sizeof *dev, GFP_KERNEL); if (dev == NULL){ pr_debug("enomem %s\n", pci_name(pdev)); retval = -ENOMEM; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 86924f9cdd7..3fb1044a4db 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -412,7 +412,7 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) /* FIXME readahead for O_NONBLOCK and poll(); careful with ZLPs */ value = -ENOMEM; - kbuf = kmalloc (len, SLAB_KERNEL); + kbuf = kmalloc (len, GFP_KERNEL); if (unlikely (!kbuf)) goto free1; @@ -456,7 +456,7 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) /* FIXME writebehind for O_NONBLOCK and poll(), qlen = 1 */ value = -ENOMEM; - kbuf = kmalloc (len, SLAB_KERNEL); + kbuf = kmalloc (len, GFP_KERNEL); if (!kbuf) goto free1; if (copy_from_user (kbuf, buf, len)) { @@ -1898,7 +1898,7 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) buf += 4; length -= 4; - kbuf = kmalloc (length, SLAB_KERNEL); + kbuf = kmalloc (length, GFP_KERNEL); if (!kbuf) return -ENOMEM; if (copy_from_user (kbuf, buf, length)) { diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index 0b590831582..3024c679e38 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -2861,7 +2861,7 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id) } /* alloc, and start init */ - dev = kzalloc (sizeof *dev, SLAB_KERNEL); + dev = kzalloc (sizeof *dev, GFP_KERNEL); if (dev == NULL){ retval = -ENOMEM; goto done; diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 48a09fd89d1..030d87c28c2 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -2581,7 +2581,7 @@ omap_udc_setup(struct platform_device *odev, struct otg_transceiver *xceiv) /* UDC_PULLUP_EN gates the chip clock */ // OTG_SYSCON_1_REG |= DEV_IDLE_EN; - udc = kzalloc(sizeof(*udc), SLAB_KERNEL); + udc = kzalloc(sizeof(*udc), GFP_KERNEL); if (!udc) return -ENOMEM; diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c index 0f809dd6849..40710ea1b49 100644 --- a/drivers/usb/gadget/zero.c +++ b/drivers/usb/gadget/zero.c @@ -1190,7 +1190,7 @@ autoconf_fail: /* ok, we made sense of the hardware ... */ - dev = kzalloc(sizeof(*dev), SLAB_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; spin_lock_init (&dev->lock); diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index 34b7a31cd85..56349d21e6e 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -492,7 +492,7 @@ show_periodic (struct class_device *class_dev, char *buf) unsigned i; __le32 tag; - if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, SLAB_ATOMIC))) + if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, GFP_ATOMIC))) return 0; seen_count = 0; diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c index 87eca6aeacf..9325e46a68c 100644 --- a/drivers/usb/host/hc_crisv10.c +++ b/drivers/usb/host/hc_crisv10.c @@ -188,7 +188,7 @@ static DEFINE_TIMER(bulk_eot_timer, NULL, 0, 0); #define CHECK_ALIGN(x) if (((__u32)(x)) & 0x00000003) \ {panic("Alignment check (DWORD) failed at %s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__);} -#define SLAB_FLAG (in_interrupt() ? SLAB_ATOMIC : SLAB_KERNEL) +#define SLAB_FLAG (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) #define KMALLOC_FLAG (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) /* Most helpful debugging aid */ @@ -275,13 +275,13 @@ static volatile USB_SB_Desc_t TxIntrSB_zout __attribute__ ((aligned (4))); static int zout_buffer[4] __attribute__ ((aligned (4))); /* Cache for allocating new EP and SB descriptors. */ -static kmem_cache_t *usb_desc_cache; +static struct kmem_cache *usb_desc_cache; /* Cache for the registers allocated in the top half. */ -static kmem_cache_t *top_half_reg_cache; +static struct kmem_cache *top_half_reg_cache; /* Cache for the data allocated in the isoc descr top half. */ -static kmem_cache_t *isoc_compl_cache; +static struct kmem_cache *isoc_compl_cache; static struct usb_bus *etrax_usb_bus; @@ -1743,7 +1743,7 @@ static irqreturn_t etrax_usb_tx_interrupt(int irq, void *vhc) *R_DMA_CH8_SUB3_CLR_INTR = IO_STATE(R_DMA_CH8_SUB3_CLR_INTR, clr_descr, do); - comp_data = (usb_isoc_complete_data_t*)kmem_cache_alloc(isoc_compl_cache, SLAB_ATOMIC); + comp_data = (usb_isoc_complete_data_t*)kmem_cache_alloc(isoc_compl_cache, GFP_ATOMIC); assert(comp_data != NULL); INIT_WORK(&comp_data->usb_bh, etrax_usb_isoc_descr_interrupt_bottom_half, comp_data); @@ -3010,7 +3010,7 @@ static void etrax_usb_add_to_isoc_sb_list(struct urb *urb, int epid) if (!urb->iso_frame_desc[i].length) continue; - next_sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, SLAB_ATOMIC); + next_sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, GFP_ATOMIC); assert(next_sb_desc != NULL); if (urb->iso_frame_desc[i].length > 0) { @@ -3063,7 +3063,7 @@ static void etrax_usb_add_to_isoc_sb_list(struct urb *urb, int epid) if (TxIsocEPList[epid].sub == 0) { dbg_isoc("Isoc traffic not already running, allocating SB"); - next_sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, SLAB_ATOMIC); + next_sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, GFP_ATOMIC); assert(next_sb_desc != NULL); next_sb_desc->command = (IO_STATE(USB_SB_command, tt, in) | @@ -3317,7 +3317,7 @@ static irqreturn_t etrax_usb_hc_interrupt_top_half(int irq, void *vhc) restore_flags(flags); - reg = (usb_interrupt_registers_t *)kmem_cache_alloc(top_half_reg_cache, SLAB_ATOMIC); + reg = (usb_interrupt_registers_t *)kmem_cache_alloc(top_half_reg_cache, GFP_ATOMIC); assert(reg != NULL); diff --git a/drivers/usb/host/ohci-dbg.c b/drivers/usb/host/ohci-dbg.c index 8293c1d4be3..0f47a57dac2 100644 --- a/drivers/usb/host/ohci-dbg.c +++ b/drivers/usb/host/ohci-dbg.c @@ -505,7 +505,7 @@ show_periodic (struct class_device *class_dev, char *buf) char *next; unsigned i; - if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, SLAB_ATOMIC))) + if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, GFP_ATOMIC))) return 0; seen_count = 0; diff --git a/drivers/usb/host/ohci-pnx4008.c b/drivers/usb/host/ohci-pnx4008.c index 2dbb7741490..7f26f9bdbaf 100644 --- a/drivers/usb/host/ohci-pnx4008.c +++ b/drivers/usb/host/ohci-pnx4008.c @@ -134,7 +134,7 @@ static int isp1301_attach(struct i2c_adapter *adap, int addr, int kind) { struct i2c_client *c; - c = (struct i2c_client *)kzalloc(sizeof(*c), SLAB_KERNEL); + c = (struct i2c_client *)kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return -ENOMEM; diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 226bf3de8ed..e87692c31be 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -81,7 +81,7 @@ MODULE_PARM_DESC(debug, "Debug level"); static char *errbuf; #define ERRBUF_LEN (32 * 1024) -static kmem_cache_t *uhci_up_cachep; /* urb_priv */ +static struct kmem_cache *uhci_up_cachep; /* urb_priv */ static void suspend_rh(struct uhci_hcd *uhci, enum uhci_rh_state new_state); static void wakeup_rh(struct uhci_hcd *uhci); diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c index 06115f22a4f..30b88459ac7 100644 --- a/drivers/usb/host/uhci-q.c +++ b/drivers/usb/host/uhci-q.c @@ -498,7 +498,7 @@ static inline struct urb_priv *uhci_alloc_urb_priv(struct uhci_hcd *uhci, { struct urb_priv *urbp; - urbp = kmem_cache_alloc(uhci_up_cachep, SLAB_ATOMIC); + urbp = kmem_cache_alloc(uhci_up_cachep, GFP_ATOMIC); if (!urbp) return NULL; diff --git a/drivers/usb/input/acecad.c b/drivers/usb/input/acecad.c index 0096373b5f9..909138e5aa0 100644 --- a/drivers/usb/input/acecad.c +++ b/drivers/usb/input/acecad.c @@ -152,7 +152,7 @@ static int usb_acecad_probe(struct usb_interface *intf, const struct usb_device_ if (!acecad || !input_dev) goto fail1; - acecad->data = usb_buffer_alloc(dev, 8, SLAB_KERNEL, &acecad->data_dma); + acecad->data = usb_buffer_alloc(dev, 8, GFP_KERNEL, &acecad->data_dma); if (!acecad->data) goto fail1; diff --git a/drivers/usb/input/aiptek.c b/drivers/usb/input/aiptek.c index bf428184608..9f52429ce65 100644 --- a/drivers/usb/input/aiptek.c +++ b/drivers/usb/input/aiptek.c @@ -1988,7 +1988,7 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id) goto fail1; aiptek->data = usb_buffer_alloc(usbdev, AIPTEK_PACKET_LENGTH, - SLAB_ATOMIC, &aiptek->data_dma); + GFP_ATOMIC, &aiptek->data_dma); if (!aiptek->data) goto fail1; diff --git a/drivers/usb/input/ati_remote.c b/drivers/usb/input/ati_remote.c index ff23318dc30..b724e36f7b9 100644 --- a/drivers/usb/input/ati_remote.c +++ b/drivers/usb/input/ati_remote.c @@ -592,7 +592,7 @@ static void ati_remote_irq_in(struct urb *urb) __FUNCTION__, urb->status); } - retval = usb_submit_urb(urb, SLAB_ATOMIC); + retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&ati_remote->interface->dev, "%s: usb_submit_urb()=%d\n", __FUNCTION__, retval); @@ -604,12 +604,12 @@ static void ati_remote_irq_in(struct urb *urb) static int ati_remote_alloc_buffers(struct usb_device *udev, struct ati_remote *ati_remote) { - ati_remote->inbuf = usb_buffer_alloc(udev, DATA_BUFSIZE, SLAB_ATOMIC, + ati_remote->inbuf = usb_buffer_alloc(udev, DATA_BUFSIZE, GFP_ATOMIC, &ati_remote->inbuf_dma); if (!ati_remote->inbuf) return -1; - ati_remote->outbuf = usb_buffer_alloc(udev, DATA_BUFSIZE, SLAB_ATOMIC, + ati_remote->outbuf = usb_buffer_alloc(udev, DATA_BUFSIZE, GFP_ATOMIC, &ati_remote->outbuf_dma); if (!ati_remote->outbuf) return -1; diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c index 4295bab4f1e..f1d0e1d6982 100644 --- a/drivers/usb/input/hid-core.c +++ b/drivers/usb/input/hid-core.c @@ -1079,7 +1079,7 @@ static void hid_irq_in(struct urb *urb) warn("input irq status %d received", urb->status); } - status = usb_submit_urb(urb, SLAB_ATOMIC); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { clear_bit(HID_IN_RUNNING, &hid->iofl); if (status != -EPERM) { @@ -1864,13 +1864,13 @@ static void hid_find_max_report(struct hid_device *hid, unsigned int type, int * static int hid_alloc_buffers(struct usb_device *dev, struct hid_device *hid) { - if (!(hid->inbuf = usb_buffer_alloc(dev, hid->bufsize, SLAB_ATOMIC, &hid->inbuf_dma))) + if (!(hid->inbuf = usb_buffer_alloc(dev, hid->bufsize, GFP_ATOMIC, &hid->inbuf_dma))) return -1; - if (!(hid->outbuf = usb_buffer_alloc(dev, hid->bufsize, SLAB_ATOMIC, &hid->outbuf_dma))) + if (!(hid->outbuf = usb_buffer_alloc(dev, hid->bufsize, GFP_ATOMIC, &hid->outbuf_dma))) return -1; - if (!(hid->cr = usb_buffer_alloc(dev, sizeof(*(hid->cr)), SLAB_ATOMIC, &hid->cr_dma))) + if (!(hid->cr = usb_buffer_alloc(dev, sizeof(*(hid->cr)), GFP_ATOMIC, &hid->cr_dma))) return -1; - if (!(hid->ctrlbuf = usb_buffer_alloc(dev, hid->bufsize, SLAB_ATOMIC, &hid->ctrlbuf_dma))) + if (!(hid->ctrlbuf = usb_buffer_alloc(dev, hid->bufsize, GFP_ATOMIC, &hid->ctrlbuf_dma))) return -1; return 0; diff --git a/drivers/usb/input/keyspan_remote.c b/drivers/usb/input/keyspan_remote.c index 50aa8108a50..98bd323369c 100644 --- a/drivers/usb/input/keyspan_remote.c +++ b/drivers/usb/input/keyspan_remote.c @@ -456,7 +456,7 @@ static int keyspan_probe(struct usb_interface *interface, const struct usb_devic remote->in_endpoint = endpoint; remote->toggle = -1; /* Set to -1 so we will always not match the toggle from the first remote message. */ - remote->in_buffer = usb_buffer_alloc(udev, RECV_SIZE, SLAB_ATOMIC, &remote->in_dma); + remote->in_buffer = usb_buffer_alloc(udev, RECV_SIZE, GFP_ATOMIC, &remote->in_dma); if (!remote->in_buffer) { retval = -ENOMEM; goto fail1; diff --git a/drivers/usb/input/mtouchusb.c b/drivers/usb/input/mtouchusb.c index 79a85d46cb1..92c4e07da4c 100644 --- a/drivers/usb/input/mtouchusb.c +++ b/drivers/usb/input/mtouchusb.c @@ -164,7 +164,7 @@ static int mtouchusb_alloc_buffers(struct usb_device *udev, struct mtouch_usb *m dbg("%s - called", __FUNCTION__); mtouch->data = usb_buffer_alloc(udev, MTOUCHUSB_REPORT_DATA_SIZE, - SLAB_ATOMIC, &mtouch->data_dma); + GFP_ATOMIC, &mtouch->data_dma); if (!mtouch->data) return -1; diff --git a/drivers/usb/input/powermate.c b/drivers/usb/input/powermate.c index 0bf91778c40..fea97e5437f 100644 --- a/drivers/usb/input/powermate.c +++ b/drivers/usb/input/powermate.c @@ -277,12 +277,12 @@ static int powermate_input_event(struct input_dev *dev, unsigned int type, unsig static int powermate_alloc_buffers(struct usb_device *udev, struct powermate_device *pm) { pm->data = usb_buffer_alloc(udev, POWERMATE_PAYLOAD_SIZE_MAX, - SLAB_ATOMIC, &pm->data_dma); + GFP_ATOMIC, &pm->data_dma); if (!pm->data) return -1; pm->configcr = usb_buffer_alloc(udev, sizeof(*(pm->configcr)), - SLAB_ATOMIC, &pm->configcr_dma); + GFP_ATOMIC, &pm->configcr_dma); if (!pm->configcr) return -1; diff --git a/drivers/usb/input/touchkitusb.c b/drivers/usb/input/touchkitusb.c index 05c0d1ca39a..2a314b06592 100644 --- a/drivers/usb/input/touchkitusb.c +++ b/drivers/usb/input/touchkitusb.c @@ -248,7 +248,7 @@ static int touchkit_alloc_buffers(struct usb_device *udev, struct touchkit_usb *touchkit) { touchkit->data = usb_buffer_alloc(udev, TOUCHKIT_REPORT_DATA_SIZE, - SLAB_ATOMIC, &touchkit->data_dma); + GFP_ATOMIC, &touchkit->data_dma); if (!touchkit->data) return -1; diff --git a/drivers/usb/input/usbkbd.c b/drivers/usb/input/usbkbd.c index dac88640eab..8505824848f 100644 --- a/drivers/usb/input/usbkbd.c +++ b/drivers/usb/input/usbkbd.c @@ -122,7 +122,7 @@ static void usb_kbd_irq(struct urb *urb) memcpy(kbd->old, kbd->new, 8); resubmit: - i = usb_submit_urb (urb, SLAB_ATOMIC); + i = usb_submit_urb (urb, GFP_ATOMIC); if (i) err ("can't resubmit intr, %s-%s/input0, status %d", kbd->usbdev->bus->bus_name, @@ -196,11 +196,11 @@ static int usb_kbd_alloc_mem(struct usb_device *dev, struct usb_kbd *kbd) return -1; if (!(kbd->led = usb_alloc_urb(0, GFP_KERNEL))) return -1; - if (!(kbd->new = usb_buffer_alloc(dev, 8, SLAB_ATOMIC, &kbd->new_dma))) + if (!(kbd->new = usb_buffer_alloc(dev, 8, GFP_ATOMIC, &kbd->new_dma))) return -1; - if (!(kbd->cr = usb_buffer_alloc(dev, sizeof(struct usb_ctrlrequest), SLAB_ATOMIC, &kbd->cr_dma))) + if (!(kbd->cr = usb_buffer_alloc(dev, sizeof(struct usb_ctrlrequest), GFP_ATOMIC, &kbd->cr_dma))) return -1; - if (!(kbd->leds = usb_buffer_alloc(dev, 1, SLAB_ATOMIC, &kbd->leds_dma))) + if (!(kbd->leds = usb_buffer_alloc(dev, 1, GFP_ATOMIC, &kbd->leds_dma))) return -1; return 0; diff --git a/drivers/usb/input/usbmouse.c b/drivers/usb/input/usbmouse.c index 68a55642c08..64a33e420cf 100644 --- a/drivers/usb/input/usbmouse.c +++ b/drivers/usb/input/usbmouse.c @@ -86,7 +86,7 @@ static void usb_mouse_irq(struct urb *urb) input_sync(dev); resubmit: - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status) err ("can't resubmit intr, %s-%s/input0, status %d", mouse->usbdev->bus->bus_name, @@ -137,7 +137,7 @@ static int usb_mouse_probe(struct usb_interface *intf, const struct usb_device_i if (!mouse || !input_dev) goto fail1; - mouse->data = usb_buffer_alloc(dev, 8, SLAB_ATOMIC, &mouse->data_dma); + mouse->data = usb_buffer_alloc(dev, 8, GFP_ATOMIC, &mouse->data_dma); if (!mouse->data) goto fail1; diff --git a/drivers/usb/input/usbtouchscreen.c b/drivers/usb/input/usbtouchscreen.c index 49704d4ed0e..7f3c57da9bc 100644 --- a/drivers/usb/input/usbtouchscreen.c +++ b/drivers/usb/input/usbtouchscreen.c @@ -680,7 +680,7 @@ static int usbtouch_probe(struct usb_interface *intf, type->process_pkt = usbtouch_process_pkt; usbtouch->data = usb_buffer_alloc(udev, type->rept_size, - SLAB_KERNEL, &usbtouch->data_dma); + GFP_KERNEL, &usbtouch->data_dma); if (!usbtouch->data) goto out_free; diff --git a/drivers/usb/input/xpad.c b/drivers/usb/input/xpad.c index df97e5c803f..e4bc76ebc83 100644 --- a/drivers/usb/input/xpad.c +++ b/drivers/usb/input/xpad.c @@ -325,7 +325,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id goto fail1; xpad->idata = usb_buffer_alloc(udev, XPAD_PKT_LEN, - SLAB_ATOMIC, &xpad->idata_dma); + GFP_ATOMIC, &xpad->idata_dma); if (!xpad->idata) goto fail1; diff --git a/drivers/usb/input/yealink.c b/drivers/usb/input/yealink.c index 2268ca311ad..caff8e6d744 100644 --- a/drivers/usb/input/yealink.c +++ b/drivers/usb/input/yealink.c @@ -874,17 +874,17 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id) /* allocate usb buffers */ yld->irq_data = usb_buffer_alloc(udev, USB_PKT_LEN, - SLAB_ATOMIC, &yld->irq_dma); + GFP_ATOMIC, &yld->irq_dma); if (yld->irq_data == NULL) return usb_cleanup(yld, -ENOMEM); yld->ctl_data = usb_buffer_alloc(udev, USB_PKT_LEN, - SLAB_ATOMIC, &yld->ctl_dma); + GFP_ATOMIC, &yld->ctl_dma); if (!yld->ctl_data) return usb_cleanup(yld, -ENOMEM); yld->ctl_req = usb_buffer_alloc(udev, sizeof(*(yld->ctl_req)), - SLAB_ATOMIC, &yld->ctl_req_dma); + GFP_ATOMIC, &yld->ctl_req_dma); if (yld->ctl_req == NULL) return usb_cleanup(yld, -ENOMEM); diff --git a/drivers/usb/misc/phidgetkit.c b/drivers/usb/misc/phidgetkit.c index 9659c79e187..371bf2b1197 100644 --- a/drivers/usb/misc/phidgetkit.c +++ b/drivers/usb/misc/phidgetkit.c @@ -377,7 +377,7 @@ static void interfacekit_irq(struct urb *urb) schedule_delayed_work(&kit->do_notify, 0); resubmit: - status = usb_submit_urb(urb, SLAB_ATOMIC); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status) err("can't resubmit intr, %s-%s/interfacekit0, status %d", kit->udev->bus->bus_name, @@ -568,7 +568,7 @@ static int interfacekit_probe(struct usb_interface *intf, const struct usb_devic kit->dev_no = -1; kit->ifkit = ifkit; - kit->data = usb_buffer_alloc(dev, URB_INT_SIZE, SLAB_ATOMIC, &kit->data_dma); + kit->data = usb_buffer_alloc(dev, URB_INT_SIZE, GFP_ATOMIC, &kit->data_dma); if (!kit->data) goto out; diff --git a/drivers/usb/misc/phidgetmotorcontrol.c b/drivers/usb/misc/phidgetmotorcontrol.c index 2bb4fa572bb..5727e1ea2f9 100644 --- a/drivers/usb/misc/phidgetmotorcontrol.c +++ b/drivers/usb/misc/phidgetmotorcontrol.c @@ -151,7 +151,7 @@ static void motorcontrol_irq(struct urb *urb) schedule_delayed_work(&mc->do_notify, 0); resubmit: - status = usb_submit_urb(urb, SLAB_ATOMIC); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status) dev_err(&mc->intf->dev, "can't resubmit intr, %s-%s/motorcontrol0, status %d", @@ -338,7 +338,7 @@ static int motorcontrol_probe(struct usb_interface *intf, const struct usb_devic goto out; mc->dev_no = -1; - mc->data = usb_buffer_alloc(dev, URB_INT_SIZE, SLAB_ATOMIC, &mc->data_dma); + mc->data = usb_buffer_alloc(dev, URB_INT_SIZE, GFP_ATOMIC, &mc->data_dma); if (!mc->data) goto out; diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 194065dbb51..fb321864a92 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -213,7 +213,7 @@ static struct urb *simple_alloc_urb ( if (bytes < 0) return NULL; - urb = usb_alloc_urb (0, SLAB_KERNEL); + urb = usb_alloc_urb (0, GFP_KERNEL); if (!urb) return urb; usb_fill_bulk_urb (urb, udev, pipe, NULL, bytes, simple_callback, NULL); @@ -223,7 +223,7 @@ static struct urb *simple_alloc_urb ( urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; if (usb_pipein (pipe)) urb->transfer_flags |= URB_SHORT_NOT_OK; - urb->transfer_buffer = usb_buffer_alloc (udev, bytes, SLAB_KERNEL, + urb->transfer_buffer = usb_buffer_alloc (udev, bytes, GFP_KERNEL, &urb->transfer_dma); if (!urb->transfer_buffer) { usb_free_urb (urb); @@ -315,7 +315,7 @@ static int simple_io ( init_completion (&completion); if (usb_pipeout (urb->pipe)) simple_fill_buf (urb); - if ((retval = usb_submit_urb (urb, SLAB_KERNEL)) != 0) + if ((retval = usb_submit_urb (urb, GFP_KERNEL)) != 0) break; /* NOTE: no timeouts; can't be broken out of by interrupt */ @@ -374,7 +374,7 @@ alloc_sglist (int nents, int max, int vary) unsigned i; unsigned size = max; - sg = kmalloc (nents * sizeof *sg, SLAB_KERNEL); + sg = kmalloc (nents * sizeof *sg, GFP_KERNEL); if (!sg) return NULL; @@ -382,7 +382,7 @@ alloc_sglist (int nents, int max, int vary) char *buf; unsigned j; - buf = kzalloc (size, SLAB_KERNEL); + buf = kzalloc (size, GFP_KERNEL); if (!buf) { free_sglist (sg, i); return NULL; @@ -428,7 +428,7 @@ static int perform_sglist ( (udev->speed == USB_SPEED_HIGH) ? (INTERRUPT_RATE << 3) : INTERRUPT_RATE, - sg, nents, 0, SLAB_KERNEL); + sg, nents, 0, GFP_KERNEL); if (retval) break; @@ -819,7 +819,7 @@ error: /* resubmit if we need to, else mark this as done */ if ((status == 0) && (ctx->pending < ctx->count)) { - if ((status = usb_submit_urb (urb, SLAB_ATOMIC)) != 0) { + if ((status = usb_submit_urb (urb, GFP_ATOMIC)) != 0) { dbg ("can't resubmit ctrl %02x.%02x, err %d", reqp->bRequestType, reqp->bRequest, status); urb->dev = NULL; @@ -855,7 +855,7 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param) * as with bulk/intr sglists, sglen is the queue depth; it also * controls which subtests run (more tests than sglen) or rerun. */ - urb = kcalloc(param->sglen, sizeof(struct urb *), SLAB_KERNEL); + urb = kcalloc(param->sglen, sizeof(struct urb *), GFP_KERNEL); if (!urb) return -ENOMEM; for (i = 0; i < param->sglen; i++) { @@ -981,7 +981,7 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param) if (!u) goto cleanup; - reqp = usb_buffer_alloc (udev, sizeof *reqp, SLAB_KERNEL, + reqp = usb_buffer_alloc (udev, sizeof *reqp, GFP_KERNEL, &u->setup_dma); if (!reqp) goto cleanup; @@ -999,7 +999,7 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param) context.urb = urb; spin_lock_irq (&context.lock); for (i = 0; i < param->sglen; i++) { - context.status = usb_submit_urb (urb [i], SLAB_ATOMIC); + context.status = usb_submit_urb (urb [i], GFP_ATOMIC); if (context.status != 0) { dbg ("can't submit urb[%d], status %d", i, context.status); @@ -1041,7 +1041,7 @@ static void unlink1_callback (struct urb *urb) // we "know" -EPIPE (stall) never happens if (!status) - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status) { urb->status = status; complete ((struct completion *) urb->context); @@ -1067,7 +1067,7 @@ static int unlink1 (struct usbtest_dev *dev, int pipe, int size, int async) * FIXME want additional tests for when endpoint is STALLing * due to errors, or is just NAKing requests. */ - if ((retval = usb_submit_urb (urb, SLAB_KERNEL)) != 0) { + if ((retval = usb_submit_urb (urb, GFP_KERNEL)) != 0) { dev_dbg (&dev->intf->dev, "submit fail %d\n", retval); return retval; } @@ -1251,7 +1251,7 @@ static int ctrl_out (struct usbtest_dev *dev, if (length < 1 || length > 0xffff || vary >= length) return -EINVAL; - buf = kmalloc(length, SLAB_KERNEL); + buf = kmalloc(length, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1403,7 +1403,7 @@ static struct urb *iso_alloc_urb ( maxp *= 1 + (0x3 & (le16_to_cpu(desc->wMaxPacketSize) >> 11)); packets = (bytes + maxp - 1) / maxp; - urb = usb_alloc_urb (packets, SLAB_KERNEL); + urb = usb_alloc_urb (packets, GFP_KERNEL); if (!urb) return urb; urb->dev = udev; @@ -1411,7 +1411,7 @@ static struct urb *iso_alloc_urb ( urb->number_of_packets = packets; urb->transfer_buffer_length = bytes; - urb->transfer_buffer = usb_buffer_alloc (udev, bytes, SLAB_KERNEL, + urb->transfer_buffer = usb_buffer_alloc (udev, bytes, GFP_KERNEL, &urb->transfer_dma); if (!urb->transfer_buffer) { usb_free_urb (urb); @@ -1481,7 +1481,7 @@ test_iso_queue (struct usbtest_dev *dev, struct usbtest_param *param, spin_lock_irq (&context.lock); for (i = 0; i < param->sglen; i++) { ++context.pending; - status = usb_submit_urb (urbs [i], SLAB_ATOMIC); + status = usb_submit_urb (urbs [i], GFP_ATOMIC); if (status < 0) { ERROR (dev, "submit iso[%d], error %d\n", i, status); if (i == 0) { @@ -1900,7 +1900,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id) } #endif - dev = kzalloc(sizeof(*dev), SLAB_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; info = (struct usbtest_info *) id->driver_info; @@ -1910,7 +1910,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id) dev->intf = intf; /* cacheline-aligned scratch for i/o */ - if ((dev->buf = kmalloc (TBUF_SIZE, SLAB_KERNEL)) == NULL) { + if ((dev->buf = kmalloc (TBUF_SIZE, GFP_KERNEL)) == NULL) { kfree (dev); return -ENOMEM; } diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index 7a2346c5328..05cf2c9a8f8 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -50,7 +50,7 @@ struct mon_event_text { #define SLAB_NAME_SZ 30 struct mon_reader_text { - kmem_cache_t *e_slab; + struct kmem_cache *e_slab; int nevents; struct list_head e_list; struct mon_reader r; /* In C, parent class can be placed anywhere */ @@ -63,7 +63,7 @@ struct mon_reader_text { char slab_name[SLAB_NAME_SZ]; }; -static void mon_text_ctor(void *, kmem_cache_t *, unsigned long); +static void mon_text_ctor(void *, struct kmem_cache *, unsigned long); /* * mon_text_submit @@ -147,7 +147,7 @@ static void mon_text_event(struct mon_reader_text *rp, struct urb *urb, stamp = mon_get_timestamp(); if (rp->nevents >= EVENT_MAX || - (ep = kmem_cache_alloc(rp->e_slab, SLAB_ATOMIC)) == NULL) { + (ep = kmem_cache_alloc(rp->e_slab, GFP_ATOMIC)) == NULL) { rp->r.m_bus->cnt_text_lost++; return; } @@ -188,7 +188,7 @@ static void mon_text_error(void *data, struct urb *urb, int error) struct mon_event_text *ep; if (rp->nevents >= EVENT_MAX || - (ep = kmem_cache_alloc(rp->e_slab, SLAB_ATOMIC)) == NULL) { + (ep = kmem_cache_alloc(rp->e_slab, GFP_ATOMIC)) == NULL) { rp->r.m_bus->cnt_text_lost++; return; } @@ -450,7 +450,7 @@ const struct file_operations mon_fops_text = { /* * Slab interface: constructor. */ -static void mon_text_ctor(void *mem, kmem_cache_t *slab, unsigned long sflags) +static void mon_text_ctor(void *mem, struct kmem_cache *slab, unsigned long sflags) { /* * Nothing to initialize. No, really! diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c index 907b820a5fa..4852012735f 100644 --- a/drivers/usb/net/catc.c +++ b/drivers/usb/net/catc.c @@ -345,7 +345,7 @@ static void catc_irq_done(struct urb *urb) } } resubmit: - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status) err ("can't resubmit intr, %s-%s, status %d", catc->usbdev->bus->bus_name, diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c index a77410562e1..49363595451 100644 --- a/drivers/usb/net/net1080.c +++ b/drivers/usb/net/net1080.c @@ -383,7 +383,7 @@ static void nc_ensure_sync(struct usbnet *dev) int status; /* Send a flush */ - urb = usb_alloc_urb(0, SLAB_ATOMIC); + urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return; diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c index b5690b3834e..d48c024cff5 100644 --- a/drivers/usb/net/pegasus.c +++ b/drivers/usb/net/pegasus.c @@ -856,7 +856,7 @@ static void intr_callback(struct urb *urb) pegasus->stats.rx_missed_errors += ((d[3] & 0x7f) << 8) | d[4]; } - status = usb_submit_urb(urb, SLAB_ATOMIC); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status == -ENODEV) netif_device_detach(pegasus->net); if (status && netif_msg_timer(pegasus)) diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c index c2a28d88ef3..99f26b3e502 100644 --- a/drivers/usb/net/rndis_host.c +++ b/drivers/usb/net/rndis_host.c @@ -469,7 +469,7 @@ static void rndis_unbind(struct usbnet *dev, struct usb_interface *intf) struct rndis_halt *halt; /* try to clear any rndis state/activity (no i/o from stack!) */ - halt = kcalloc(1, sizeof *halt, SLAB_KERNEL); + halt = kcalloc(1, sizeof *halt, GFP_KERNEL); if (halt) { halt->msg_type = RNDIS_MSG_HALT; halt->msg_len = ccpu2(sizeof *halt); diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c index 72171f94ded..c54235f73cb 100644 --- a/drivers/usb/net/rtl8150.c +++ b/drivers/usb/net/rtl8150.c @@ -587,7 +587,7 @@ static void intr_callback(struct urb *urb) } resubmit: - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status == -ENODEV) netif_device_detach(dev->netdev); else if (status) diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c index 327f9755567..6e39e998825 100644 --- a/drivers/usb/net/usbnet.c +++ b/drivers/usb/net/usbnet.c @@ -179,9 +179,9 @@ static int init_status (struct usbnet *dev, struct usb_interface *intf) period = max ((int) dev->status->desc.bInterval, (dev->udev->speed == USB_SPEED_HIGH) ? 7 : 3); - buf = kmalloc (maxp, SLAB_KERNEL); + buf = kmalloc (maxp, GFP_KERNEL); if (buf) { - dev->interrupt = usb_alloc_urb (0, SLAB_KERNEL); + dev->interrupt = usb_alloc_urb (0, GFP_KERNEL); if (!dev->interrupt) { kfree (buf); return -ENOMEM; diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 82cd15b894b..70f93b18292 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -363,7 +363,7 @@ static int mos7720_open(struct usb_serial_port *port, struct file * filp) /* Initialising the write urb pool */ for (j = 0; j < NUM_URBS; ++j) { - urb = usb_alloc_urb(0,SLAB_ATOMIC); + urb = usb_alloc_urb(0,GFP_ATOMIC); mos7720_port->write_urb_pool[j] = urb; if (urb == NULL) { diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 02c89e10b2c..5432c634008 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -826,7 +826,7 @@ static int mos7840_open(struct usb_serial_port *port, struct file *filp) /* Initialising the write urb pool */ for (j = 0; j < NUM_URBS; ++j) { - urb = usb_alloc_urb(0, SLAB_ATOMIC); + urb = usb_alloc_urb(0, GFP_ATOMIC); mos7840_port->write_urb_pool[j] = urb; if (urb == NULL) { @@ -2786,7 +2786,7 @@ static int mos7840_startup(struct usb_serial *serial) i + 1, status); } - mos7840_port->control_urb = usb_alloc_urb(0, SLAB_ATOMIC); + mos7840_port->control_urb = usb_alloc_urb(0, GFP_ATOMIC); mos7840_port->ctrl_buf = kmalloc(16, GFP_KERNEL); } diff --git a/drivers/usb/storage/onetouch.c b/drivers/usb/storage/onetouch.c index 3a158d58441..e565d3d2ab2 100644 --- a/drivers/usb/storage/onetouch.c +++ b/drivers/usb/storage/onetouch.c @@ -76,7 +76,7 @@ static void usb_onetouch_irq(struct urb *urb) input_sync(dev); resubmit: - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status) err ("can't resubmit intr, %s-%s/input0, status %d", onetouch->udev->bus->bus_name, @@ -154,7 +154,7 @@ int onetouch_connect_input(struct us_data *ss) goto fail1; onetouch->data = usb_buffer_alloc(udev, ONETOUCH_PKT_LEN, - SLAB_ATOMIC, &onetouch->data_dma); + GFP_ATOMIC, &onetouch->data_dma); if (!onetouch->data) goto fail1; diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 47644b5b615..323293a3e61 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -427,7 +427,7 @@ static int usb_stor_bulk_transfer_sglist(struct us_data *us, unsigned int pipe, US_DEBUGP("%s: xfer %u bytes, %d entries\n", __FUNCTION__, length, num_sg); result = usb_sg_init(&us->current_sg, us->pusb_dev, pipe, 0, - sg, num_sg, length, SLAB_NOIO); + sg, num_sg, length, GFP_NOIO); if (result) { US_DEBUGP("usb_sg_init returned %d\n", result); return USB_STOR_XFER_ERROR; diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index b401084b3d2..70644506651 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -49,7 +49,7 @@ #include <linux/sched.h> #include <linux/errno.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c index 0d3643fc629..a454dcb8e21 100644 --- a/drivers/video/geode/gxfb_core.c +++ b/drivers/video/geode/gxfb_core.c @@ -380,7 +380,7 @@ static void gxfb_remove(struct pci_dev *pdev) } static struct pci_device_id gxfb_id_table[] = { - { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_VIDEO, + { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_GX_VIDEO, PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY << 16, 0xff0000, 0 }, { 0, } diff --git a/drivers/w1/Makefile b/drivers/w1/Makefile index 93845a2c7c2..6bb0b54965f 100644 --- a/drivers/w1/Makefile +++ b/drivers/w1/Makefile @@ -2,10 +2,6 @@ # Makefile for the Dallas's 1-wire bus. # -ifeq ($(CONFIG_W1_DS2433_CRC), y) -EXTRA_CFLAGS += -DCONFIG_W1_F23_CRC -endif - obj-$(CONFIG_W1) += wire.o wire-objs := w1.o w1_int.o w1_family.o w1_netlink.o w1_io.o diff --git a/drivers/w1/slaves/Makefile b/drivers/w1/slaves/Makefile index 70e21e2d70c..725dcfdfddb 100644 --- a/drivers/w1/slaves/Makefile +++ b/drivers/w1/slaves/Makefile @@ -2,10 +2,6 @@ # Makefile for the Dallas's 1-wire slaves. # -ifeq ($(CONFIG_W1_SLAVE_DS2433_CRC), y) -EXTRA_CFLAGS += -DCONFIG_W1_F23_CRC -endif - obj-$(CONFIG_W1_SLAVE_THERM) += w1_therm.o obj-$(CONFIG_W1_SLAVE_SMEM) += w1_smem.o obj-$(CONFIG_W1_SLAVE_DS2433) += w1_ds2433.o diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c index 2ac238f1480..8ea17a53eed 100644 --- a/drivers/w1/slaves/w1_ds2433.c +++ b/drivers/w1/slaves/w1_ds2433.c @@ -13,7 +13,7 @@ #include <linux/device.h> #include <linux/types.h> #include <linux/delay.h> -#ifdef CONFIG_W1_F23_CRC +#ifdef CONFIG_W1_SLAVE_DS2433_CRC #include <linux/crc16.h> #define CRC16_INIT 0 @@ -62,7 +62,7 @@ static inline size_t w1_f23_fix_count(loff_t off, size_t count, size_t size) return count; } -#ifdef CONFIG_W1_F23_CRC +#ifdef CONFIG_W1_SLAVE_DS2433_CRC static int w1_f23_refresh_block(struct w1_slave *sl, struct w1_f23_data *data, int block) { @@ -89,13 +89,13 @@ static int w1_f23_refresh_block(struct w1_slave *sl, struct w1_f23_data *data, return 0; } -#endif /* CONFIG_W1_F23_CRC */ +#endif /* CONFIG_W1_SLAVE_DS2433_CRC */ static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); -#ifdef CONFIG_W1_F23_CRC +#ifdef CONFIG_W1_SLAVE_DS2433_CRC struct w1_f23_data *data = sl->family_data; int i, min_page, max_page; #else @@ -107,7 +107,7 @@ static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off, mutex_lock(&sl->master->mutex); -#ifdef CONFIG_W1_F23_CRC +#ifdef CONFIG_W1_SLAVE_DS2433_CRC min_page = (off >> W1_PAGE_BITS); max_page = (off + count - 1) >> W1_PAGE_BITS; @@ -119,7 +119,7 @@ static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off, } memcpy(buf, &data->memory[off], count); -#else /* CONFIG_W1_F23_CRC */ +#else /* CONFIG_W1_SLAVE_DS2433_CRC */ /* read directly from the EEPROM */ if (w1_reset_select_slave(sl)) { @@ -133,7 +133,7 @@ static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off, w1_write_block(sl->master, wrbuf, 3); w1_read_block(sl->master, buf, count); -#endif /* CONFIG_W1_F23_CRC */ +#endif /* CONFIG_W1_SLAVE_DS2433_CRC */ out_up: mutex_unlock(&sl->master->mutex); @@ -208,7 +208,7 @@ static ssize_t w1_f23_write_bin(struct kobject *kobj, char *buf, loff_t off, if ((count = w1_f23_fix_count(off, count, W1_EEPROM_SIZE)) == 0) return 0; -#ifdef CONFIG_W1_F23_CRC +#ifdef CONFIG_W1_SLAVE_DS2433_CRC /* can only write full blocks in cached mode */ if ((off & W1_PAGE_MASK) || (count & W1_PAGE_MASK)) { dev_err(&sl->dev, "invalid offset/count off=%d cnt=%zd\n", @@ -223,7 +223,7 @@ static ssize_t w1_f23_write_bin(struct kobject *kobj, char *buf, loff_t off, return -EINVAL; } } -#endif /* CONFIG_W1_F23_CRC */ +#endif /* CONFIG_W1_SLAVE_DS2433_CRC */ mutex_lock(&sl->master->mutex); @@ -262,7 +262,7 @@ static struct bin_attribute w1_f23_bin_attr = { static int w1_f23_add_slave(struct w1_slave *sl) { int err; -#ifdef CONFIG_W1_F23_CRC +#ifdef CONFIG_W1_SLAVE_DS2433_CRC struct w1_f23_data *data; data = kmalloc(sizeof(struct w1_f23_data), GFP_KERNEL); @@ -271,24 +271,24 @@ static int w1_f23_add_slave(struct w1_slave *sl) memset(data, 0, sizeof(struct w1_f23_data)); sl->family_data = data; -#endif /* CONFIG_W1_F23_CRC */ +#endif /* CONFIG_W1_SLAVE_DS2433_CRC */ err = sysfs_create_bin_file(&sl->dev.kobj, &w1_f23_bin_attr); -#ifdef CONFIG_W1_F23_CRC +#ifdef CONFIG_W1_SLAVE_DS2433_CRC if (err) kfree(data); -#endif /* CONFIG_W1_F23_CRC */ +#endif /* CONFIG_W1_SLAVE_DS2433_CRC */ return err; } static void w1_f23_remove_slave(struct w1_slave *sl) { -#ifdef CONFIG_W1_F23_CRC +#ifdef CONFIG_W1_SLAVE_DS2433_CRC kfree(sl->family_data); sl->family_data = NULL; -#endif /* CONFIG_W1_F23_CRC */ +#endif /* CONFIG_W1_SLAVE_DS2433_CRC */ sysfs_remove_bin_file(&sl->dev.kobj, &w1_f23_bin_attr); } diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index de3e9791f80..63c07243993 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -31,6 +31,7 @@ #include <linux/slab.h> #include <linux/sched.h> #include <linux/kthread.h> +#include <linux/freezer.h> #include <asm/atomic.h> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 5241c600ce2..18f26cdfd88 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -256,7 +256,7 @@ static int v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm, u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit) { - u32 fid; + int fid; int err; struct v9fs_fcall *fcall; @@ -310,7 +310,7 @@ static struct v9fs_fid* v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry) { int err; - u32 nfid; + int nfid; struct v9fs_fid *ret; struct v9fs_fcall *fcall; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 9ade139086f..5023351a7af 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -36,7 +36,7 @@ void __adfs_error(struct super_block *sb, const char *function, const char *fmt, va_list args; va_start(args, fmt); - vsprintf(error_buf, fmt, args); + vsnprintf(error_buf, sizeof(error_buf), fmt, args); va_end(args); printk(KERN_CRIT "ADFS-fs error (device %s)%s%s: %s\n", @@ -212,12 +212,12 @@ static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static kmem_cache_t *adfs_inode_cachep; +static struct kmem_cache *adfs_inode_cachep; static struct inode *adfs_alloc_inode(struct super_block *sb) { struct adfs_inode_info *ei; - ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, SLAB_KERNEL); + ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -228,7 +228,7 @@ static void adfs_destroy_inode(struct inode *inode) kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index ccd624ef427..f4de4b98004 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -445,7 +445,7 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...) va_list args; va_start(args,fmt); - vsprintf(ErrorBuffer,fmt,args); + vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args); va_end(args); printk(KERN_CRIT "AFFS error (device %s): %s(): %s\n", sb->s_id, @@ -461,7 +461,7 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...) va_list args; va_start(args,fmt); - vsprintf(ErrorBuffer,fmt,args); + vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args); va_end(args); printk(KERN_WARNING "AFFS warning (device %s): %s(): %s\n", sb->s_id, diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index b0b953683c1..b330009fe42 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -289,12 +289,11 @@ int affs_init_bitmap(struct super_block *sb, int *flags) sbi->s_bmap_count = (sbi->s_partition_size - sbi->s_reserved + sbi->s_bmap_bits - 1) / sbi->s_bmap_bits; size = sbi->s_bmap_count * sizeof(*bm); - bm = sbi->s_bitmap = kmalloc(size, GFP_KERNEL); + bm = sbi->s_bitmap = kzalloc(size, GFP_KERNEL); if (!sbi->s_bitmap) { printk(KERN_ERR "AFFS: Bitmap allocation failed\n"); return -ENOMEM; } - memset(sbi->s_bitmap, 0, size); bmap_blk = (__be32 *)sbi->s_root_bh->b_data; blk = sb->s_blocksize / 4 - 49; diff --git a/fs/affs/super.c b/fs/affs/super.c index 5ea72c3a16c..3de93e79994 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -66,12 +66,12 @@ affs_write_super(struct super_block *sb) pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); } -static kmem_cache_t * affs_inode_cachep; +static struct kmem_cache * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) { struct affs_inode_info *ei; - ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, SLAB_KERNEL); + ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->vfs_inode.i_version = 1; @@ -83,7 +83,7 @@ static void affs_destroy_inode(struct inode *inode) kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct affs_inode_info *ei = (struct affs_inode_info *) foo; diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c index f09a794f248..615df2407cb 100644 --- a/fs/afs/kafsasyncd.c +++ b/fs/afs/kafsasyncd.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/completion.h> +#include <linux/freezer.h> #include "cell.h" #include "server.h" #include "volume.h" diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c index 65bc05ab818..694344e4d3c 100644 --- a/fs/afs/kafstimod.c +++ b/fs/afs/kafstimod.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/completion.h> +#include <linux/freezer.h> #include "cell.h" #include "volume.h" #include "kafstimod.h" diff --git a/fs/afs/server.c b/fs/afs/server.c index 22afaae1a4c..44aff81dc6a 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -55,13 +55,12 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr, _enter("%p,%08x,", cell, ntohl(addr->s_addr)); /* allocate and initialise a server record */ - server = kmalloc(sizeof(struct afs_server), GFP_KERNEL); + server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); if (!server) { _leave(" = -ENOMEM"); return -ENOMEM; } - memset(server, 0, sizeof(struct afs_server)); atomic_set(&server->usage, 1); INIT_LIST_HEAD(&server->link); diff --git a/fs/afs/super.c b/fs/afs/super.c index 67d1f5c819e..18d9b77ba40 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -35,7 +35,7 @@ struct afs_mount_params { struct afs_volume *volume; }; -static void afs_i_init_once(void *foo, kmem_cache_t *cachep, +static void afs_i_init_once(void *foo, struct kmem_cache *cachep, unsigned long flags); static int afs_get_sb(struct file_system_type *fs_type, @@ -65,7 +65,7 @@ static struct super_operations afs_super_ops = { .put_super = afs_put_super, }; -static kmem_cache_t *afs_inode_cachep; +static struct kmem_cache *afs_inode_cachep; static atomic_t afs_count_active_inodes; /*****************************************************************************/ @@ -242,14 +242,12 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent) kenter(""); /* allocate a superblock info record */ - as = kmalloc(sizeof(struct afs_super_info), GFP_KERNEL); + as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (!as) { _leave(" = -ENOMEM"); return -ENOMEM; } - memset(as, 0, sizeof(struct afs_super_info)); - afs_get_volume(params->volume); as->volume = params->volume; @@ -384,7 +382,7 @@ static void afs_put_super(struct super_block *sb) /* * initialise an inode cache slab element prior to any use */ -static void afs_i_init_once(void *_vnode, kmem_cache_t *cachep, +static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, unsigned long flags) { struct afs_vnode *vnode = (struct afs_vnode *) _vnode; @@ -412,7 +410,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb) struct afs_vnode *vnode; vnode = (struct afs_vnode *) - kmem_cache_alloc(afs_inode_cachep, SLAB_KERNEL); + kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL); if (!vnode) return NULL; @@ -47,8 +47,8 @@ unsigned long aio_nr; /* current system wide number of aio requests */ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ /*----end sysctl variables---*/ -static kmem_cache_t *kiocb_cachep; -static kmem_cache_t *kioctx_cachep; +static struct kmem_cache *kiocb_cachep; +static struct kmem_cache *kioctx_cachep; static struct workqueue_struct *aio_wq; @@ -666,17 +666,6 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) ssize_t (*retry)(struct kiocb *); ssize_t ret; - if (iocb->ki_retried++ > 1024*1024) { - printk("Maximal retry count. Bytes done %Zd\n", - iocb->ki_nbytes - iocb->ki_left); - return -EAGAIN; - } - - if (!(iocb->ki_retried & 0xff)) { - pr_debug("%ld retry: %zd of %zd\n", iocb->ki_retried, - iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); - } - if (!(retry = iocb->ki_retry)) { printk("aio_run_iocb: iocb->ki_retry = NULL\n"); return 0; @@ -1005,9 +994,6 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) kunmap_atomic(ring, KM_IRQ1); pr_debug("added to ring %p at [%lu]\n", iocb, tail); - - pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried, - iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); put_rq: /* everything turned out well, dispose of the aiocb. */ ret = __aio_put_req(ctx, iocb); @@ -1413,7 +1399,6 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb) kiocb->ki_iovec->iov_len = kiocb->ki_left; kiocb->ki_nr_segs = 1; kiocb->ki_cur_seg = 0; - kiocb->ki_nbytes = kiocb->ki_left; return 0; } @@ -1591,7 +1576,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_opcode = iocb->aio_lio_opcode; init_waitqueue_func_entry(&req->ki_wait, aio_wake_function); INIT_LIST_HEAD(&req->ki_wait.task_list); - req->ki_retried = 0; ret = aio_setup_iocb(req); diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 38ede5c9d6f..f968d134280 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -28,10 +28,11 @@ void autofs_kill_sb(struct super_block *sb) /* * In the event of a failure in get_sb_nodev the superblock * info is not present so nothing else has been setup, so - * just exit when we are called from deactivate_super. + * just call kill_anon_super when we are called from + * deactivate_super. */ if (!sbi) - return; + goto out_kill_sb; if ( !sbi->catatonic ) autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ @@ -44,6 +45,7 @@ void autofs_kill_sb(struct super_block *sb) kfree(sb->s_fs_info); +out_kill_sb: DPRINTK(("autofs: shutting down\n")); kill_anon_super(sb); } @@ -209,7 +211,6 @@ fail_iput: fail_free: kfree(sbi); s->s_fs_info = NULL; - kill_anon_super(s); fail_unlock: return -EINVAL; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ce7c0f1dd52..9c48250fd72 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -152,10 +152,11 @@ void autofs4_kill_sb(struct super_block *sb) /* * In the event of a failure in get_sb_nodev the superblock * info is not present so nothing else has been setup, so - * just exit when we are called from deactivate_super. + * just call kill_anon_super when we are called from + * deactivate_super. */ if (!sbi) - return; + goto out_kill_sb; sb->s_fs_info = NULL; @@ -167,6 +168,7 @@ void autofs4_kill_sb(struct super_block *sb) kfree(sbi); +out_kill_sb: DPRINTK("shutting down"); kill_anon_super(sb); } @@ -426,7 +428,6 @@ fail_ino: fail_free: kfree(sbi); s->s_fs_info = NULL; - kill_anon_super(s); fail_unlock: return -EINVAL; } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 07f7144f0e2..bce402eee55 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -61,7 +61,7 @@ static const struct super_operations befs_sops = { }; /* slab cache for befs_inode_info objects */ -static kmem_cache_t *befs_inode_cachep; +static struct kmem_cache *befs_inode_cachep; static const struct file_operations befs_dir_operations = { .read = generic_read_dir, @@ -277,7 +277,7 @@ befs_alloc_inode(struct super_block *sb) { struct befs_inode_info *bi; bi = (struct befs_inode_info *)kmem_cache_alloc(befs_inode_cachep, - SLAB_KERNEL); + GFP_KERNEL); if (!bi) return NULL; return &bi->vfs_inode; @@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode) kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index ed27ffb3459..eac175ed9f4 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -228,12 +228,12 @@ static void bfs_write_super(struct super_block *s) unlock_kernel(); } -static kmem_cache_t * bfs_inode_cachep; +static struct kmem_cache * bfs_inode_cachep; static struct inode *bfs_alloc_inode(struct super_block *sb) { struct bfs_inode_info *bi; - bi = kmem_cache_alloc(bfs_inode_cachep, SLAB_KERNEL); + bi = kmem_cache_alloc(bfs_inode_cachep, GFP_KERNEL); if (!bi) return NULL; return &bi->vfs_inode; @@ -244,7 +244,7 @@ static void bfs_destroy_inode(struct inode *inode) kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct bfs_inode_info *bi = foo; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index cc72bb43061..be5869d3499 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -47,10 +47,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); -#ifndef elf_addr_t -#define elf_addr_t unsigned long -#endif - /* * If we don't support core dumping, then supply a NULL so we * don't even try. @@ -243,8 +239,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, if (interp_aout) { argv = sp + 2; envp = argv + argc + 1; - __put_user((elf_addr_t)(unsigned long)argv, sp++); - __put_user((elf_addr_t)(unsigned long)envp, sp++); + if (__put_user((elf_addr_t)(unsigned long)argv, sp++) || + __put_user((elf_addr_t)(unsigned long)envp, sp++)) + return -EFAULT; } else { argv = sp; envp = argv + argc + 1; @@ -254,7 +251,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, p = current->mm->arg_end = current->mm->arg_start; while (argc-- > 0) { size_t len; - __put_user((elf_addr_t)p, argv++); + if (__put_user((elf_addr_t)p, argv++)) + return -EFAULT; len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) return 0; @@ -265,7 +263,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, current->mm->arg_end = current->mm->env_start = p; while (envc-- > 0) { size_t len; - __put_user((elf_addr_t)p, envp++); + if (__put_user((elf_addr_t)p, envp++)) + return -EFAULT; len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) return 0; @@ -545,7 +544,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) unsigned long reloc_func_desc = 0; char passed_fileno[6]; struct files_struct *files; - int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT; + int executable_stack = EXSTACK_DEFAULT; unsigned long def_flags = 0; struct { struct elfhdr elf_ex; @@ -708,7 +707,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) executable_stack = EXSTACK_DISABLE_X; break; } - have_pt_gnu_stack = (i < loc->elf_ex.e_phnum); /* Some simple consistency checks for the interpreter */ if (elf_interpreter) { @@ -856,7 +854,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); + if (current->flags & PF_RANDOMIZE) + load_bias = randomize_range(0x10000, + ELF_ET_DYN_BASE, + 0); + else + load_bias = ELF_ET_DYN_BASE; + load_bias = ELF_PAGESTART(load_bias - vaddr); } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index f86d5c9ce5e..ed9a61c6beb 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -40,9 +40,6 @@ #include <asm/pgalloc.h> typedef char *elf_caddr_t; -#ifndef elf_addr_t -#define elf_addr_t unsigned long -#endif #if 0 #define kdebug(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ ) @@ -30,7 +30,7 @@ #define BIO_POOL_SIZE 256 -static kmem_cache_t *bio_slab __read_mostly; +static struct kmem_cache *bio_slab __read_mostly; #define BIOVEC_NR_POOLS 6 @@ -44,7 +44,7 @@ mempool_t *bio_split_pool __read_mostly; struct biovec_slab { int nr_vecs; char *name; - kmem_cache_t *slab; + struct kmem_cache *slab; }; /* diff --git a/fs/block_dev.c b/fs/block_dev.c index 36c0e7af9d0..13816b4d76f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -235,11 +235,11 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); -static kmem_cache_t * bdev_cachep __read_mostly; +static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { - struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL); + struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -253,7 +253,7 @@ static void bdev_destroy_inode(struct inode *inode) kmem_cache_free(bdev_cachep, bdi); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; diff --git a/fs/buffer.c b/fs/buffer.c index 35527dca1db..517860f2d75 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2908,7 +2908,7 @@ asmlinkage long sys_bdflush(int func, long data) /* * Buffer-head allocation */ -static kmem_cache_t *bh_cachep; +static struct kmem_cache *bh_cachep; /* * Once the number of bh's in the machine exceeds this level, we start @@ -2961,7 +2961,7 @@ void free_buffer_head(struct buffer_head *bh) EXPORT_SYMBOL(free_buffer_head); static void -init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags) +init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) { if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { @@ -2972,7 +2972,6 @@ init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags) } } -#ifdef CONFIG_HOTPLUG_CPU static void buffer_exit_cpu(int cpu) { int i; @@ -2994,7 +2993,6 @@ static int buffer_cpu_notify(struct notifier_block *self, buffer_exit_cpu((unsigned long)hcpu); return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ void __init buffer_init(void) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 84976cdbe71..71bc87a37fc 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -34,6 +34,7 @@ #include <linux/mempool.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/freezer.h> #include "cifsfs.h" #include "cifspdu.h" #define DECLARE_GLOBALS_HERE @@ -81,7 +82,7 @@ extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; -extern kmem_cache_t *cifs_oplock_cachep; +extern struct kmem_cache *cifs_oplock_cachep; static int cifs_read_super(struct super_block *sb, void *data, @@ -232,11 +233,11 @@ static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd) return generic_permission(inode, mask, NULL); } -static kmem_cache_t *cifs_inode_cachep; -static kmem_cache_t *cifs_req_cachep; -static kmem_cache_t *cifs_mid_cachep; -kmem_cache_t *cifs_oplock_cachep; -static kmem_cache_t *cifs_sm_req_cachep; +static struct kmem_cache *cifs_inode_cachep; +static struct kmem_cache *cifs_req_cachep; +static struct kmem_cache *cifs_mid_cachep; +struct kmem_cache *cifs_oplock_cachep; +static struct kmem_cache *cifs_sm_req_cachep; mempool_t *cifs_sm_req_poolp; mempool_t *cifs_req_poolp; mempool_t *cifs_mid_poolp; @@ -245,7 +246,7 @@ static struct inode * cifs_alloc_inode(struct super_block *sb) { struct cifsInodeInfo *cifs_inode; - cifs_inode = kmem_cache_alloc(cifs_inode_cachep, SLAB_KERNEL); + cifs_inode = kmem_cache_alloc(cifs_inode_cachep, GFP_KERNEL); if (!cifs_inode) return NULL; cifs_inode->cifsAttrs = 0x20; /* default */ @@ -668,7 +669,7 @@ const struct file_operations cifs_dir_ops = { }; static void -cifs_init_once(void *inode, kmem_cache_t * cachep, unsigned long flags) +cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags) { struct cifsInodeInfo *cifsi = inode; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 71f77914ce9..2caca06b4ba 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -31,6 +31,7 @@ #include <linux/delay.h> #include <linux/completion.h> #include <linux/pagevec.h> +#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/processor.h> #include "cifspdu.h" diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index bbc9cd34b6e..aedf683f011 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -153,7 +153,7 @@ cifs_buf_get(void) albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ ret_buf = - (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | SLAB_NOFS); + (struct smb_hdr *) mempool_alloc(cifs_req_poolp, GFP_KERNEL | GFP_NOFS); /* clear the first few header bytes */ /* for most paths, more is cleared in header_assemble */ @@ -192,7 +192,7 @@ cifs_small_buf_get(void) albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ ret_buf = - (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | SLAB_NOFS); + (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, GFP_KERNEL | GFP_NOFS); if (ret_buf) { /* No need to clear memory here, cleared in header assemble */ /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 48d47b46b1f..f80007eaebf 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -34,7 +34,7 @@ #include "cifs_debug.h" extern mempool_t *cifs_mid_poolp; -extern kmem_cache_t *cifs_oplock_cachep; +extern struct kmem_cache *cifs_oplock_cachep; static struct mid_q_entry * AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) @@ -51,7 +51,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) } temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp, - SLAB_KERNEL | SLAB_NOFS); + GFP_KERNEL | GFP_NOFS); if (temp == NULL) return temp; else { @@ -118,7 +118,7 @@ AllocOplockQEntry(struct inode * pinode, __u16 fid, struct cifsTconInfo * tcon) return NULL; } temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep, - SLAB_KERNEL); + GFP_KERNEL); if (temp == NULL) return temp; else { diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 88d12332116..b64659fa82d 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -38,12 +38,12 @@ static void coda_clear_inode(struct inode *); static void coda_put_super(struct super_block *); static int coda_statfs(struct dentry *dentry, struct kstatfs *buf); -static kmem_cache_t * coda_inode_cachep; +static struct kmem_cache * coda_inode_cachep; static struct inode *coda_alloc_inode(struct super_block *sb) { struct coda_inode_info *ei; - ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, SLAB_KERNEL); + ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL); if (!ei) return NULL; memset(&ei->c_fid, 0, sizeof(struct CodaFid)); @@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode) kmem_cache_free(coda_inode_cachep, ITOC(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct coda_inode_info *ei = (struct coda_inode_info *) foo; diff --git a/fs/compat.c b/fs/compat.c index 06dad665b88..a7e3f162fb1 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -871,7 +871,7 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, retval = -EINVAL; - if (type_page) { + if (type_page && data_page) { if (!strcmp((char *)type_page, SMBFS_NAME)) { do_smb_super_data_conv((void *)data_page); } else if (!strcmp((char *)type_page, NCPFS_NAME)) { @@ -1144,7 +1144,9 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, lastdirent = buf.previous; if (lastdirent) { typeof(lastdirent->d_off) d_off = file->f_pos; - __put_user_unaligned(d_off, &lastdirent->d_off); + error = -EFAULT; + if (__put_user_unaligned(d_off, &lastdirent->d_off)) + goto out_putf; error = count - buf.count; } @@ -1611,14 +1613,14 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, nr &= ~1UL; while (nr) { unsigned long h, l; - __get_user(l, ufdset); - __get_user(h, ufdset+1); + if (__get_user(l, ufdset) || __get_user(h, ufdset+1)) + return -EFAULT; ufdset += 2; *fdset++ = h << 32 | l; nr -= 2; } - if (odd) - __get_user(*fdset, ufdset); + if (odd && __get_user(*fdset, ufdset)) + return -EFAULT; } else { /* Tricky, must clear full unsigned long in the * kernel fdset at the end, this makes sure that @@ -1630,14 +1632,14 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, } static -void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, - unsigned long *fdset) +int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, + unsigned long *fdset) { unsigned long odd; nr = ROUND_UP(nr, __COMPAT_NFDBITS); if (!ufdset) - return; + return 0; odd = nr & 1UL; nr &= ~1UL; @@ -1645,13 +1647,14 @@ void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, unsigned long h, l; l = *fdset++; h = l >> 32; - __put_user(l, ufdset); - __put_user(h, ufdset+1); + if (__put_user(l, ufdset) || __put_user(h, ufdset+1)) + return -EFAULT; ufdset += 2; nr -= 2; } - if (odd) - __put_user(*fdset, ufdset); + if (odd && __put_user(*fdset, ufdset)) + return -EFAULT; + return 0; } @@ -1726,10 +1729,10 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, ret = 0; } - compat_set_fd_set(n, inp, fds.res_in); - compat_set_fd_set(n, outp, fds.res_out); - compat_set_fd_set(n, exp, fds.res_ex); - + if (compat_set_fd_set(n, inp, fds.res_in) || + compat_set_fd_set(n, outp, fds.res_out) || + compat_set_fd_set(n, exp, fds.res_ex)) + ret = -EFAULT; out: kfree(bits); out_nofds: diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index a91f2628c98..bcc3caf5d82 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -211,8 +211,10 @@ static int do_video_stillpicture(unsigned int fd, unsigned int cmd, unsigned lon up_native = compat_alloc_user_space(sizeof(struct video_still_picture)); - put_user(compat_ptr(fp), &up_native->iFrame); - put_user(size, &up_native->size); + err = put_user(compat_ptr(fp), &up_native->iFrame); + err |= put_user(size, &up_native->size); + if (err) + return -EFAULT; err = sys_ioctl(fd, cmd, (unsigned long) up_native); @@ -236,8 +238,10 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned err |= get_user(length, &up->length); up_native = compat_alloc_user_space(sizeof(struct video_spu_palette)); - put_user(compat_ptr(palp), &up_native->palette); - put_user(length, &up_native->length); + err = put_user(compat_ptr(palp), &up_native->palette); + err |= put_user(length, &up_native->length); + if (err) + return -EFAULT; err = sys_ioctl(fd, cmd, (unsigned long) up_native); @@ -2043,16 +2047,19 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg) struct serial_struct ss; mm_segment_t oldseg = get_fs(); __u32 udata; + unsigned int base; if (cmd == TIOCSSERIAL) { if (!access_ok(VERIFY_READ, ss32, sizeof(SS32))) return -EFAULT; if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base))) return -EFAULT; - __get_user(udata, &ss32->iomem_base); + if (__get_user(udata, &ss32->iomem_base)) + return -EFAULT; ss.iomem_base = compat_ptr(udata); - __get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift); - __get_user(ss.port_high, &ss32->port_high); + if (__get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) || + __get_user(ss.port_high, &ss32->port_high)) + return -EFAULT; ss.iomap_base = 0UL; } set_fs(KERNEL_DS); @@ -2063,12 +2070,12 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg) return -EFAULT; if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base))) return -EFAULT; - __put_user((unsigned long)ss.iomem_base >> 32 ? - 0xffffffff : (unsigned)(unsigned long)ss.iomem_base, - &ss32->iomem_base); - __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift); - __put_user(ss.port_high, &ss32->port_high); - + base = (unsigned long)ss.iomem_base >> 32 ? + 0xffffffff : (unsigned)(unsigned long)ss.iomem_base; + if (__put_user(base, &ss32->iomem_base) || + __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) || + __put_user(ss.port_high, &ss32->port_high)) + return -EFAULT; } return err; } @@ -2397,6 +2404,7 @@ HANDLE_IOCTL(SIOCGIFMAP, dev_ifsioc) HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc) HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc) HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc) +HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc) /* ioctls used by appletalk ddp.c */ HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc) diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 3f4ff7a242b..f92cd303d2c 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -49,7 +49,7 @@ struct configfs_dirent { #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) extern struct vfsmount * configfs_mount; -extern kmem_cache_t *configfs_dir_cachep; +extern struct kmem_cache *configfs_dir_cachep; extern int configfs_is_root(struct config_item *item); diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 68bd5c93ca5..ed678529ebb 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -38,7 +38,7 @@ struct vfsmount * configfs_mount = NULL; struct super_block * configfs_sb = NULL; -kmem_cache_t *configfs_dir_cachep; +struct kmem_cache *configfs_dir_cachep; static int configfs_mnt_count = 0; static struct super_operations configfs_ops = { diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a624c3ec818..0509cedd415 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -481,6 +481,8 @@ static int cramfs_readpage(struct file *file, struct page * page) pgdata = kmap(page); if (compr_len == 0) ; /* hole */ + else if (compr_len > (PAGE_CACHE_SIZE << 1)) + printk(KERN_ERR "cramfs: bad compressed blocksize %u\n", compr_len); else { mutex_lock(&read_mutex); bytes_filled = cramfs_uncompress_block(pgdata, diff --git a/fs/dcache.c b/fs/dcache.c index fd4a428998e..d68631f18df 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -43,7 +43,7 @@ static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(dcache_lock); -static kmem_cache_t *dentry_cache __read_mostly; +static struct kmem_cache *dentry_cache __read_mostly; #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) @@ -68,15 +68,19 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static void d_callback(struct rcu_head *head) +static void __d_free(struct dentry *dentry) { - struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); - if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); } +static void d_callback(struct rcu_head *head) +{ + struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); + __d_free(dentry); +} + /* * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry * inside dcache_lock. @@ -85,7 +89,11 @@ static void d_free(struct dentry *dentry) { if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - call_rcu(&dentry->d_u.d_rcu, d_callback); + /* if dentry was never inserted into hash, immediate free is OK */ + if (dentry->d_hash.pprev == NULL) + __d_free(dentry); + else + call_rcu(&dentry->d_u.d_rcu, d_callback); } /* @@ -2072,10 +2080,10 @@ static void __init dcache_init(unsigned long mempages) } /* SLAB cache for __getname() consumers */ -kmem_cache_t *names_cachep __read_mostly; +struct kmem_cache *names_cachep __read_mostly; /* SLAB cache for file structures */ -kmem_cache_t *filp_cachep __read_mostly; +struct kmem_cache *filp_cachep __read_mostly; EXPORT_SYMBOL(d_genocide); diff --git a/fs/dcookies.c b/fs/dcookies.c index 0c4b0674854..21af1629f9b 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -37,7 +37,7 @@ struct dcookie_struct { static LIST_HEAD(dcookie_users); static DEFINE_MUTEX(dcookie_mutex); -static kmem_cache_t *dcookie_cache __read_mostly; +static struct kmem_cache *dcookie_cache __read_mostly; static struct list_head *dcookie_hashtable __read_mostly; static size_t hash_size __read_mostly; diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 81b2c6465ee..b5654a284fe 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -1,14 +1,32 @@ menu "Distributed Lock Manager" - depends on INET && IP_SCTP && EXPERIMENTAL + depends on EXPERIMENTAL && INET config DLM tristate "Distributed Lock Manager (DLM)" depends on IPV6 || IPV6=n select CONFIGFS_FS + select IP_SCTP if DLM_SCTP help A general purpose distributed lock manager for kernel or userspace applications. +choice + prompt "Select DLM communications protocol" + depends on DLM + default DLM_TCP + help + The DLM Can use TCP or SCTP for it's network communications. + SCTP supports multi-homed operations whereas TCP doesn't. + However, SCTP seems to have stability problems at the moment. + +config DLM_TCP + bool "TCP/IP" + +config DLM_SCTP + bool "SCTP" + +endchoice + config DLM_DEBUG bool "DLM debugging" depends on DLM diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index 1832e0297f7..65388944eba 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile @@ -4,7 +4,6 @@ dlm-y := ast.o \ dir.o \ lock.o \ lockspace.o \ - lowcomms.o \ main.o \ member.o \ memory.o \ @@ -17,3 +16,6 @@ dlm-y := ast.o \ util.o dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o +dlm-$(CONFIG_DLM_TCP) += lowcomms-tcp.o + +dlm-$(CONFIG_DLM_SCTP) += lowcomms-sctp.o
\ No newline at end of file diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 1e5cd67e1b7..1ee8195e6fc 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -471,6 +471,7 @@ struct dlm_ls { char *ls_recover_buf; int ls_recover_nodeid; /* for debugging */ uint64_t ls_rcom_seq; + spinlock_t ls_rcom_spin; struct list_head ls_recover_list; spinlock_t ls_recover_list_lock; int ls_recover_list_count; @@ -488,7 +489,8 @@ struct dlm_ls { #define LSFL_RUNNING 1 #define LSFL_RECOVERY_STOP 2 #define LSFL_RCOM_READY 3 -#define LSFL_UEVENT_WAIT 4 +#define LSFL_RCOM_WAIT 4 +#define LSFL_UEVENT_WAIT 5 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 3f2befa4797..30878defaeb 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -2372,6 +2372,7 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in, static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) { lkb->lkb_exflags = ms->m_exflags; + lkb->lkb_sbflags = ms->m_sbflags; lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | (ms->m_flags & 0x0000FFFF); } @@ -3028,10 +3029,17 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) while (1) { if (dlm_locking_stopped(ls)) { - if (!recovery) - dlm_add_requestqueue(ls, nodeid, hd); - error = -EINTR; - goto out; + if (recovery) { + error = -EINTR; + goto out; + } + error = dlm_add_requestqueue(ls, nodeid, hd); + if (error == -EAGAIN) + continue; + else { + error = -EINTR; + goto out; + } } if (lock_recovery_try(ls)) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index f8842ca443c..59012b089e8 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -22,6 +22,7 @@ #include "memory.h" #include "lock.h" #include "recover.h" +#include "requestqueue.h" #ifdef CONFIG_DLM_DEBUG int dlm_create_debug_file(struct dlm_ls *ls); @@ -478,6 +479,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ls->ls_recoverd_task = NULL; mutex_init(&ls->ls_recoverd_active); spin_lock_init(&ls->ls_recover_lock); + spin_lock_init(&ls->ls_rcom_spin); + get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t)); ls->ls_recover_status = 0; ls->ls_recover_seq = 0; ls->ls_recover_args = NULL; @@ -684,6 +687,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) * Free structures on any other lists */ + dlm_purge_requestqueue(ls); kfree(ls->ls_recover_args); dlm_clear_free_entries(ls); dlm_clear_members(ls); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms-sctp.c index 6da6b14d5a6..fe158d7a928 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms-sctp.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -75,13 +75,13 @@ struct nodeinfo { }; static DEFINE_IDR(nodeinfo_idr); -static struct rw_semaphore nodeinfo_lock; -static int max_nodeid; +static DECLARE_RWSEM(nodeinfo_lock); +static int max_nodeid; struct cbuf { - unsigned base; - unsigned len; - unsigned mask; + unsigned int base; + unsigned int len; + unsigned int mask; }; /* Just the one of these, now. But this struct keeps @@ -90,9 +90,9 @@ struct cbuf { #define CF_READ_PENDING 1 struct connection { - struct socket *sock; + struct socket *sock; unsigned long flags; - struct page *rx_page; + struct page *rx_page; atomic_t waiting_requests; struct cbuf cb; int eagain_flag; @@ -102,36 +102,40 @@ struct connection { struct writequeue_entry { struct list_head list; - struct page *page; + struct page *page; int offset; int len; int end; int users; - struct nodeinfo *ni; + struct nodeinfo *ni; }; -#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0) -#define CBUF_EMPTY(cb) ((cb)->len == 0) -#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1)) -#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask) +static void cbuf_add(struct cbuf *cb, int n) +{ + cb->len += n; +} -#define CBUF_INIT(cb, size) \ -do { \ - (cb)->base = (cb)->len = 0; \ - (cb)->mask = ((size)-1); \ -} while(0) +static int cbuf_data(struct cbuf *cb) +{ + return ((cb->base + cb->len) & cb->mask); +} -#define CBUF_EAT(cb, n) \ -do { \ - (cb)->len -= (n); \ - (cb)->base += (n); \ - (cb)->base &= (cb)->mask; \ -} while(0) +static void cbuf_init(struct cbuf *cb, int size) +{ + cb->base = cb->len = 0; + cb->mask = size-1; +} +static void cbuf_eat(struct cbuf *cb, int n) +{ + cb->len -= n; + cb->base += n; + cb->base &= cb->mask; +} /* List of nodes which have writes pending */ -static struct list_head write_nodes; -static spinlock_t write_nodes_lock; +static LIST_HEAD(write_nodes); +static DEFINE_SPINLOCK(write_nodes_lock); /* Maximum number of incoming messages to process before * doing a schedule() @@ -141,8 +145,7 @@ static spinlock_t write_nodes_lock; /* Manage daemons */ static struct task_struct *recv_task; static struct task_struct *send_task; -static wait_queue_head_t lowcomms_recv_wait; -static atomic_t accepting; +static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait); /* The SCTP connection */ static struct connection sctp_con; @@ -161,11 +164,11 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) return error; if (dlm_local_addr[0]->ss_family == AF_INET) { - struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; + struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; ret4->sin_addr.s_addr = in4->sin_addr.s_addr; } else { - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; memcpy(&ret6->sin6_addr, &in6->sin6_addr, sizeof(in6->sin6_addr)); @@ -174,6 +177,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) return 0; } +/* If alloc is 0 here we will not attempt to allocate a new + nodeinfo struct */ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc) { struct nodeinfo *ni; @@ -184,44 +189,45 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc) ni = idr_find(&nodeinfo_idr, nodeid); up_read(&nodeinfo_lock); - if (!ni && alloc) { - down_write(&nodeinfo_lock); + if (ni || !alloc) + return ni; - ni = idr_find(&nodeinfo_idr, nodeid); - if (ni) - goto out_up; + down_write(&nodeinfo_lock); - r = idr_pre_get(&nodeinfo_idr, alloc); - if (!r) - goto out_up; + ni = idr_find(&nodeinfo_idr, nodeid); + if (ni) + goto out_up; - ni = kmalloc(sizeof(struct nodeinfo), alloc); - if (!ni) - goto out_up; + r = idr_pre_get(&nodeinfo_idr, alloc); + if (!r) + goto out_up; - r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n); - if (r) { - kfree(ni); - ni = NULL; - goto out_up; - } - if (n != nodeid) { - idr_remove(&nodeinfo_idr, n); - kfree(ni); - ni = NULL; - goto out_up; - } - memset(ni, 0, sizeof(struct nodeinfo)); - spin_lock_init(&ni->lock); - INIT_LIST_HEAD(&ni->writequeue); - spin_lock_init(&ni->writequeue_lock); - ni->nodeid = nodeid; - - if (nodeid > max_nodeid) - max_nodeid = nodeid; - out_up: - up_write(&nodeinfo_lock); + ni = kmalloc(sizeof(struct nodeinfo), alloc); + if (!ni) + goto out_up; + + r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n); + if (r) { + kfree(ni); + ni = NULL; + goto out_up; } + if (n != nodeid) { + idr_remove(&nodeinfo_idr, n); + kfree(ni); + ni = NULL; + goto out_up; + } + memset(ni, 0, sizeof(struct nodeinfo)); + spin_lock_init(&ni->lock); + INIT_LIST_HEAD(&ni->writequeue); + spin_lock_init(&ni->writequeue_lock); + ni->nodeid = nodeid; + + if (nodeid > max_nodeid) + max_nodeid = nodeid; +out_up: + up_write(&nodeinfo_lock); return ni; } @@ -279,13 +285,13 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, in4_addr->sin_port = cpu_to_be16(port); memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero)); memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) - - sizeof(struct sockaddr_in)); + sizeof(struct sockaddr_in)); *addr_len = sizeof(struct sockaddr_in); } else { struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; in6_addr->sin6_port = cpu_to_be16(port); memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) - - sizeof(struct sockaddr_in6)); + sizeof(struct sockaddr_in6)); *addr_len = sizeof(struct sockaddr_in6); } } @@ -324,7 +330,7 @@ static void send_shutdown(sctp_assoc_t associd) cmsg->cmsg_type = SCTP_SNDRCV; cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); outmessage.msg_controllen = cmsg->cmsg_len; - sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + sinfo = CMSG_DATA(cmsg); memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); sinfo->sinfo_flags |= MSG_EOF; @@ -387,7 +393,7 @@ static void process_sctp_notification(struct msghdr *msg, char *buf) if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) { log_print("COMM_UP for invalid assoc ID %d", - (int)sn->sn_assoc_change.sac_assoc_id); + (int)sn->sn_assoc_change.sac_assoc_id); init_failed(); return; } @@ -398,15 +404,18 @@ static void process_sctp_notification(struct msghdr *msg, char *buf) fs = get_fs(); set_fs(get_ds()); ret = sctp_con.sock->ops->getsockopt(sctp_con.sock, - IPPROTO_SCTP, SCTP_PRIMARY_ADDR, - (char*)&prim, &prim_len); + IPPROTO_SCTP, + SCTP_PRIMARY_ADDR, + (char*)&prim, + &prim_len); set_fs(fs); if (ret < 0) { struct nodeinfo *ni; log_print("getsockopt/sctp_primary_addr on " "new assoc %d failed : %d", - (int)sn->sn_assoc_change.sac_assoc_id, ret); + (int)sn->sn_assoc_change.sac_assoc_id, + ret); /* Retry INIT later */ ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id); @@ -426,12 +435,10 @@ static void process_sctp_notification(struct msghdr *msg, char *buf) return; /* Save the assoc ID */ - spin_lock(&ni->lock); ni->assoc_id = sn->sn_assoc_change.sac_assoc_id; - spin_unlock(&ni->lock); log_print("got new/restarted association %d nodeid %d", - (int)sn->sn_assoc_change.sac_assoc_id, nodeid); + (int)sn->sn_assoc_change.sac_assoc_id, nodeid); /* Send any pending writes */ clear_bit(NI_INIT_PENDING, &ni->flags); @@ -507,13 +514,12 @@ static int receive_from_sock(void) sctp_con.rx_page = alloc_page(GFP_ATOMIC); if (sctp_con.rx_page == NULL) goto out_resched; - CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE); + cbuf_init(&sctp_con.cb, PAGE_CACHE_SIZE); } memset(&incmsg, 0, sizeof(incmsg)); memset(&msgname, 0, sizeof(msgname)); - memset(incmsg, 0, sizeof(incmsg)); msg.msg_name = &msgname; msg.msg_namelen = sizeof(msgname); msg.msg_flags = 0; @@ -532,17 +538,17 @@ static int receive_from_sock(void) * iov[0] is the bit of the circular buffer between the current end * point (cb.base + cb.len) and the end of the buffer. */ - iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb); + iov[0].iov_len = sctp_con.cb.base - cbuf_data(&sctp_con.cb); iov[0].iov_base = page_address(sctp_con.rx_page) + - CBUF_DATA(&sctp_con.cb); + cbuf_data(&sctp_con.cb); iov[1].iov_len = 0; /* * iov[1] is the bit of the circular buffer between the start of the * buffer and the start of the currently used section (cb.base) */ - if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) { - iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb); + if (cbuf_data(&sctp_con.cb) >= sctp_con.cb.base) { + iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&sctp_con.cb); iov[1].iov_len = sctp_con.cb.base; iov[1].iov_base = page_address(sctp_con.rx_page); msg.msg_iovlen = 2; @@ -557,7 +563,7 @@ static int receive_from_sock(void) msg.msg_control = incmsg; msg.msg_controllen = sizeof(incmsg); cmsg = CMSG_FIRSTHDR(&msg); - sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + sinfo = CMSG_DATA(cmsg); if (msg.msg_flags & MSG_NOTIFICATION) { process_sctp_notification(&msg, page_address(sctp_con.rx_page)); @@ -583,29 +589,29 @@ static int receive_from_sock(void) if (r == 1) return 0; - CBUF_ADD(&sctp_con.cb, ret); + cbuf_add(&sctp_con.cb, ret); ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), page_address(sctp_con.rx_page), sctp_con.cb.base, sctp_con.cb.len, PAGE_CACHE_SIZE); if (ret < 0) goto out_close; - CBUF_EAT(&sctp_con.cb, ret); + cbuf_eat(&sctp_con.cb, ret); - out: +out: ret = 0; goto out_ret; - out_resched: +out_resched: lowcomms_data_ready(sctp_con.sock->sk, 0); ret = 0; - schedule(); + cond_resched(); goto out_ret; - out_close: +out_close: if (ret != -EAGAIN) log_print("error reading from sctp socket: %d", ret); - out_ret: +out_ret: return ret; } @@ -619,10 +625,12 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num) set_fs(get_ds()); if (num == 1) result = sctp_con.sock->ops->bind(sctp_con.sock, - (struct sockaddr *) addr, addr_len); + (struct sockaddr *) addr, + addr_len); else result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP, - SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len); + SCTP_SOCKOPT_BINDX_ADD, + (char *)addr, addr_len); set_fs(fs); if (result < 0) @@ -719,10 +727,10 @@ static int init_sock(void) return 0; - create_delsock: +create_delsock: sock_release(sock); sctp_con.sock = NULL; - out: +out: return result; } @@ -756,16 +764,13 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) int users = 0; struct nodeinfo *ni; - if (!atomic_read(&accepting)) - return NULL; - ni = nodeid2nodeinfo(nodeid, allocation); if (!ni) return NULL; spin_lock(&ni->writequeue_lock); e = list_entry(ni->writequeue.prev, struct writequeue_entry, list); - if (((struct list_head *) e == &ni->writequeue) || + if ((&e->list == &ni->writequeue) || (PAGE_CACHE_SIZE - e->end < len)) { e = NULL; } else { @@ -776,7 +781,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) spin_unlock(&ni->writequeue_lock); if (e) { - got_one: + got_one: if (users == 0) kmap(e->page); *ppc = page_address(e->page) + offset; @@ -803,9 +808,6 @@ void dlm_lowcomms_commit_buffer(void *arg) int users; struct nodeinfo *ni = e->ni; - if (!atomic_read(&accepting)) - return; - spin_lock(&ni->writequeue_lock); users = --e->users; if (users) @@ -822,7 +824,7 @@ void dlm_lowcomms_commit_buffer(void *arg) } return; - out: +out: spin_unlock(&ni->writequeue_lock); return; } @@ -878,7 +880,7 @@ static void initiate_association(int nodeid) cmsg->cmsg_level = IPPROTO_SCTP; cmsg->cmsg_type = SCTP_SNDRCV; cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); - sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + sinfo = CMSG_DATA(cmsg); memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid); @@ -892,7 +894,7 @@ static void initiate_association(int nodeid) } /* Send a message */ -static int send_to_sock(struct nodeinfo *ni) +static void send_to_sock(struct nodeinfo *ni) { int ret = 0; struct writequeue_entry *e; @@ -903,13 +905,13 @@ static int send_to_sock(struct nodeinfo *ni) struct sctp_sndrcvinfo *sinfo; struct kvec iov; - /* See if we need to init an association before we start + /* See if we need to init an association before we start sending precious messages */ spin_lock(&ni->lock); if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) { spin_unlock(&ni->lock); initiate_association(ni->nodeid); - return 0; + return; } spin_unlock(&ni->lock); @@ -923,7 +925,7 @@ static int send_to_sock(struct nodeinfo *ni) cmsg->cmsg_level = IPPROTO_SCTP; cmsg->cmsg_type = SCTP_SNDRCV; cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); - sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + sinfo = CMSG_DATA(cmsg); memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid); sinfo->sinfo_assoc_id = ni->assoc_id; @@ -955,7 +957,7 @@ static int send_to_sock(struct nodeinfo *ni) goto send_error; } else { /* Don't starve people filling buffers */ - schedule(); + cond_resched(); } spin_lock(&ni->writequeue_lock); @@ -964,15 +966,16 @@ static int send_to_sock(struct nodeinfo *ni) if (e->len == 0 && e->users == 0) { list_del(&e->list); + kunmap(e->page); free_entry(e); continue; } } spin_unlock(&ni->writequeue_lock); - out: - return ret; +out: + return; - send_error: +send_error: log_print("Error sending to node %d %d", ni->nodeid, ret); spin_lock(&ni->lock); if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) { @@ -982,7 +985,7 @@ static int send_to_sock(struct nodeinfo *ni) } else spin_unlock(&ni->lock); - return ret; + return; } /* Try to send any messages that are pending */ @@ -994,7 +997,7 @@ static void process_output_queue(void) spin_lock_bh(&write_nodes_lock); list_for_each_safe(list, temp, &write_nodes) { struct nodeinfo *ni = - list_entry(list, struct nodeinfo, write_list); + list_entry(list, struct nodeinfo, write_list); clear_bit(NI_WRITE_PENDING, &ni->flags); list_del(&ni->write_list); @@ -1106,7 +1109,7 @@ static int dlm_recvd(void *data) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&lowcomms_recv_wait, &wait); if (!test_bit(CF_READ_PENDING, &sctp_con.flags)) - schedule(); + cond_resched(); remove_wait_queue(&lowcomms_recv_wait, &wait); set_current_state(TASK_RUNNING); @@ -1118,12 +1121,12 @@ static int dlm_recvd(void *data) /* Don't starve out everyone else */ if (++count >= MAX_RX_MSG_COUNT) { - schedule(); + cond_resched(); count = 0; } } while (!kthread_should_stop() && ret >=0); } - schedule(); + cond_resched(); } return 0; @@ -1138,7 +1141,7 @@ static int dlm_sendd(void *data) while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); if (write_list_empty()) - schedule(); + cond_resched(); set_current_state(TASK_RUNNING); if (sctp_con.eagain_flag) { @@ -1166,7 +1169,7 @@ static int daemons_start(void) p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); error = IS_ERR(p); - if (error) { + if (error) { log_print("can't start dlm_recvd %d", error); return error; } @@ -1174,7 +1177,7 @@ static int daemons_start(void) p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); error = IS_ERR(p); - if (error) { + if (error) { log_print("can't start dlm_sendd %d", error); kthread_stop(recv_task); return error; @@ -1197,43 +1200,28 @@ int dlm_lowcomms_start(void) error = daemons_start(); if (error) goto fail_sock; - atomic_set(&accepting, 1); return 0; - fail_sock: +fail_sock: close_connection(); return error; } -/* Set all the activity flags to prevent any socket activity. */ - void dlm_lowcomms_stop(void) { - atomic_set(&accepting, 0); + int i; + sctp_con.flags = 0x7; daemons_stop(); clean_writequeues(); close_connection(); dealloc_nodeinfo(); max_nodeid = 0; -} -int dlm_lowcomms_init(void) -{ - init_waitqueue_head(&lowcomms_recv_wait); - spin_lock_init(&write_nodes_lock); - INIT_LIST_HEAD(&write_nodes); - init_rwsem(&nodeinfo_lock); - return 0; -} - -void dlm_lowcomms_exit(void) -{ - int i; + dlm_local_count = 0; + dlm_local_nodeid = 0; for (i = 0; i < dlm_local_count; i++) kfree(dlm_local_addr[i]); - dlm_local_count = 0; - dlm_local_nodeid = 0; } diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c new file mode 100644 index 00000000000..8f2791fc844 --- /dev/null +++ b/fs/dlm/lowcomms-tcp.c @@ -0,0 +1,1189 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* + * lowcomms.c + * + * This is the "low-level" comms layer. + * + * It is responsible for sending/receiving messages + * from other nodes in the cluster. + * + * Cluster nodes are referred to by their nodeids. nodeids are + * simply 32 bit numbers to the locking module - if they need to + * be expanded for the cluster infrastructure then that is it's + * responsibility. It is this layer's + * responsibility to resolve these into IP address or + * whatever it needs for inter-node communication. + * + * The comms level is two kernel threads that deal mainly with + * the receiving of messages from other nodes and passing them + * up to the mid-level comms layer (which understands the + * message format) for execution by the locking core, and + * a send thread which does all the setting up of connections + * to remote nodes and the sending of data. Threads are not allowed + * to send their own data because it may cause them to wait in times + * of high load. Also, this way, the sending thread can collect together + * messages bound for one node and send them in one block. + * + * I don't see any problem with the recv thread executing the locking + * code on behalf of remote processes as the locking code is + * short, efficient and never waits. + * + */ + + +#include <asm/ioctls.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <linux/pagemap.h> + +#include "dlm_internal.h" +#include "lowcomms.h" +#include "midcomms.h" +#include "config.h" + +struct cbuf { + unsigned int base; + unsigned int len; + unsigned int mask; +}; + +#define NODE_INCREMENT 32 +static void cbuf_add(struct cbuf *cb, int n) +{ + cb->len += n; +} + +static int cbuf_data(struct cbuf *cb) +{ + return ((cb->base + cb->len) & cb->mask); +} + +static void cbuf_init(struct cbuf *cb, int size) +{ + cb->base = cb->len = 0; + cb->mask = size-1; +} + +static void cbuf_eat(struct cbuf *cb, int n) +{ + cb->len -= n; + cb->base += n; + cb->base &= cb->mask; +} + +static bool cbuf_empty(struct cbuf *cb) +{ + return cb->len == 0; +} + +/* Maximum number of incoming messages to process before + doing a cond_resched() +*/ +#define MAX_RX_MSG_COUNT 25 + +struct connection { + struct socket *sock; /* NULL if not connected */ + uint32_t nodeid; /* So we know who we are in the list */ + struct rw_semaphore sock_sem; /* Stop connect races */ + struct list_head read_list; /* On this list when ready for reading */ + struct list_head write_list; /* On this list when ready for writing */ + struct list_head state_list; /* On this list when ready to connect */ + unsigned long flags; /* bit 1,2 = We are on the read/write lists */ +#define CF_READ_PENDING 1 +#define CF_WRITE_PENDING 2 +#define CF_CONNECT_PENDING 3 +#define CF_IS_OTHERCON 4 + struct list_head writequeue; /* List of outgoing writequeue_entries */ + struct list_head listenlist; /* List of allocated listening sockets */ + spinlock_t writequeue_lock; + int (*rx_action) (struct connection *); /* What to do when active */ + struct page *rx_page; + struct cbuf cb; + int retries; + atomic_t waiting_requests; +#define MAX_CONNECT_RETRIES 3 + struct connection *othercon; +}; +#define sock2con(x) ((struct connection *)(x)->sk_user_data) + +/* An entry waiting to be sent */ +struct writequeue_entry { + struct list_head list; + struct page *page; + int offset; + int len; + int end; + int users; + struct connection *con; +}; + +static struct sockaddr_storage dlm_local_addr; + +/* Manage daemons */ +static struct task_struct *recv_task; +static struct task_struct *send_task; + +static wait_queue_t lowcomms_send_waitq_head; +static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq); +static wait_queue_t lowcomms_recv_waitq_head; +static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq); + +/* An array of pointers to connections, indexed by NODEID */ +static struct connection **connections; +static DECLARE_MUTEX(connections_lock); +static kmem_cache_t *con_cache; +static int conn_array_size; + +/* List of sockets that have reads pending */ +static LIST_HEAD(read_sockets); +static DEFINE_SPINLOCK(read_sockets_lock); + +/* List of sockets which have writes pending */ +static LIST_HEAD(write_sockets); +static DEFINE_SPINLOCK(write_sockets_lock); + +/* List of sockets which have connects pending */ +static LIST_HEAD(state_sockets); +static DEFINE_SPINLOCK(state_sockets_lock); + +static struct connection *nodeid2con(int nodeid, gfp_t allocation) +{ + struct connection *con = NULL; + + down(&connections_lock); + if (nodeid >= conn_array_size) { + int new_size = nodeid + NODE_INCREMENT; + struct connection **new_conns; + + new_conns = kzalloc(sizeof(struct connection *) * + new_size, allocation); + if (!new_conns) + goto finish; + + memcpy(new_conns, connections, sizeof(struct connection *) * conn_array_size); + conn_array_size = new_size; + kfree(connections); + connections = new_conns; + + } + + con = connections[nodeid]; + if (con == NULL && allocation) { + con = kmem_cache_zalloc(con_cache, allocation); + if (!con) + goto finish; + + con->nodeid = nodeid; + init_rwsem(&con->sock_sem); + INIT_LIST_HEAD(&con->writequeue); + spin_lock_init(&con->writequeue_lock); + + connections[nodeid] = con; + } + +finish: + up(&connections_lock); + return con; +} + +/* Data available on socket or listen socket received a connect */ +static void lowcomms_data_ready(struct sock *sk, int count_unused) +{ + struct connection *con = sock2con(sk); + + atomic_inc(&con->waiting_requests); + if (test_and_set_bit(CF_READ_PENDING, &con->flags)) + return; + + spin_lock_bh(&read_sockets_lock); + list_add_tail(&con->read_list, &read_sockets); + spin_unlock_bh(&read_sockets_lock); + + wake_up_interruptible(&lowcomms_recv_waitq); +} + +static void lowcomms_write_space(struct sock *sk) +{ + struct connection *con = sock2con(sk); + + if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) + return; + + spin_lock_bh(&write_sockets_lock); + list_add_tail(&con->write_list, &write_sockets); + spin_unlock_bh(&write_sockets_lock); + + wake_up_interruptible(&lowcomms_send_waitq); +} + +static inline void lowcomms_connect_sock(struct connection *con) +{ + if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) + return; + + spin_lock_bh(&state_sockets_lock); + list_add_tail(&con->state_list, &state_sockets); + spin_unlock_bh(&state_sockets_lock); + + wake_up_interruptible(&lowcomms_send_waitq); +} + +static void lowcomms_state_change(struct sock *sk) +{ + if (sk->sk_state == TCP_ESTABLISHED) + lowcomms_write_space(sk); +} + +/* Make a socket active */ +static int add_sock(struct socket *sock, struct connection *con) +{ + con->sock = sock; + + /* Install a data_ready callback */ + con->sock->sk->sk_data_ready = lowcomms_data_ready; + con->sock->sk->sk_write_space = lowcomms_write_space; + con->sock->sk->sk_state_change = lowcomms_state_change; + + return 0; +} + +/* Add the port number to an IP6 or 4 sockaddr and return the address + length */ +static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, + int *addr_len) +{ + saddr->ss_family = dlm_local_addr.ss_family; + if (saddr->ss_family == AF_INET) { + struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; + in4_addr->sin_port = cpu_to_be16(port); + *addr_len = sizeof(struct sockaddr_in); + } else { + struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; + in6_addr->sin6_port = cpu_to_be16(port); + *addr_len = sizeof(struct sockaddr_in6); + } +} + +/* Close a remote connection and tidy up */ +static void close_connection(struct connection *con, bool and_other) +{ + down_write(&con->sock_sem); + + if (con->sock) { + sock_release(con->sock); + con->sock = NULL; + } + if (con->othercon && and_other) { + /* Will only re-enter once. */ + close_connection(con->othercon, false); + } + if (con->rx_page) { + __free_page(con->rx_page); + con->rx_page = NULL; + } + con->retries = 0; + up_write(&con->sock_sem); +} + +/* Data received from remote end */ +static int receive_from_sock(struct connection *con) +{ + int ret = 0; + struct msghdr msg; + struct iovec iov[2]; + mm_segment_t fs; + unsigned len; + int r; + int call_again_soon = 0; + + down_read(&con->sock_sem); + + if (con->sock == NULL) + goto out; + if (con->rx_page == NULL) { + /* + * This doesn't need to be atomic, but I think it should + * improve performance if it is. + */ + con->rx_page = alloc_page(GFP_ATOMIC); + if (con->rx_page == NULL) + goto out_resched; + cbuf_init(&con->cb, PAGE_CACHE_SIZE); + } + + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_iovlen = 1; + msg.msg_iov = iov; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_flags = 0; + + /* + * iov[0] is the bit of the circular buffer between the current end + * point (cb.base + cb.len) and the end of the buffer. + */ + iov[0].iov_len = con->cb.base - cbuf_data(&con->cb); + iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb); + iov[1].iov_len = 0; + + /* + * iov[1] is the bit of the circular buffer between the start of the + * buffer and the start of the currently used section (cb.base) + */ + if (cbuf_data(&con->cb) >= con->cb.base) { + iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb); + iov[1].iov_len = con->cb.base; + iov[1].iov_base = page_address(con->rx_page); + msg.msg_iovlen = 2; + } + len = iov[0].iov_len + iov[1].iov_len; + + fs = get_fs(); + set_fs(get_ds()); + r = ret = sock_recvmsg(con->sock, &msg, len, + MSG_DONTWAIT | MSG_NOSIGNAL); + set_fs(fs); + + if (ret <= 0) + goto out_close; + if (ret == len) + call_again_soon = 1; + cbuf_add(&con->cb, ret); + ret = dlm_process_incoming_buffer(con->nodeid, + page_address(con->rx_page), + con->cb.base, con->cb.len, + PAGE_CACHE_SIZE); + if (ret == -EBADMSG) { + printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, " + "iov_len=%u, iov_base[0]=%p, read=%d\n", + page_address(con->rx_page), con->cb.base, con->cb.len, + len, iov[0].iov_base, r); + } + if (ret < 0) + goto out_close; + cbuf_eat(&con->cb, ret); + + if (cbuf_empty(&con->cb) && !call_again_soon) { + __free_page(con->rx_page); + con->rx_page = NULL; + } + +out: + if (call_again_soon) + goto out_resched; + up_read(&con->sock_sem); + return 0; + +out_resched: + lowcomms_data_ready(con->sock->sk, 0); + up_read(&con->sock_sem); + cond_resched(); + return 0; + +out_close: + up_read(&con->sock_sem); + if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { + close_connection(con, false); + /* Reconnect when there is something to send */ + } + + return ret; +} + +/* Listening socket is busy, accept a connection */ +static int accept_from_sock(struct connection *con) +{ + int result; + struct sockaddr_storage peeraddr; + struct socket *newsock; + int len; + int nodeid; + struct connection *newcon; + + memset(&peeraddr, 0, sizeof(peeraddr)); + result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, + IPPROTO_TCP, &newsock); + if (result < 0) + return -ENOMEM; + + down_read(&con->sock_sem); + + result = -ENOTCONN; + if (con->sock == NULL) + goto accept_err; + + newsock->type = con->sock->type; + newsock->ops = con->sock->ops; + + result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); + if (result < 0) + goto accept_err; + + /* Get the connected socket's peer */ + memset(&peeraddr, 0, sizeof(peeraddr)); + if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, + &len, 2)) { + result = -ECONNABORTED; + goto accept_err; + } + + /* Get the new node's NODEID */ + make_sockaddr(&peeraddr, 0, &len); + if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { + printk("dlm: connect from non cluster node\n"); + sock_release(newsock); + up_read(&con->sock_sem); + return -1; + } + + log_print("got connection from %d", nodeid); + + /* Check to see if we already have a connection to this node. This + * could happen if the two nodes initiate a connection at roughly + * the same time and the connections cross on the wire. + * TEMPORARY FIX: + * In this case we store the incoming one in "othercon" + */ + newcon = nodeid2con(nodeid, GFP_KERNEL); + if (!newcon) { + result = -ENOMEM; + goto accept_err; + } + down_write(&newcon->sock_sem); + if (newcon->sock) { + struct connection *othercon = newcon->othercon; + + if (!othercon) { + othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); + if (!othercon) { + printk("dlm: failed to allocate incoming socket\n"); + up_write(&newcon->sock_sem); + result = -ENOMEM; + goto accept_err; + } + othercon->nodeid = nodeid; + othercon->rx_action = receive_from_sock; + init_rwsem(&othercon->sock_sem); + set_bit(CF_IS_OTHERCON, &othercon->flags); + newcon->othercon = othercon; + } + othercon->sock = newsock; + newsock->sk->sk_user_data = othercon; + add_sock(newsock, othercon); + } + else { + newsock->sk->sk_user_data = newcon; + newcon->rx_action = receive_from_sock; + add_sock(newsock, newcon); + + } + + up_write(&newcon->sock_sem); + + /* + * Add it to the active queue in case we got data + * beween processing the accept adding the socket + * to the read_sockets list + */ + lowcomms_data_ready(newsock->sk, 0); + up_read(&con->sock_sem); + + return 0; + +accept_err: + up_read(&con->sock_sem); + sock_release(newsock); + + if (result != -EAGAIN) + printk("dlm: error accepting connection from node: %d\n", result); + return result; +} + +/* Connect a new socket to its peer */ +static void connect_to_sock(struct connection *con) +{ + int result = -EHOSTUNREACH; + struct sockaddr_storage saddr; + int addr_len; + struct socket *sock; + + if (con->nodeid == 0) { + log_print("attempt to connect sock 0 foiled"); + return; + } + + down_write(&con->sock_sem); + if (con->retries++ > MAX_CONNECT_RETRIES) + goto out; + + /* Some odd races can cause double-connects, ignore them */ + if (con->sock) { + result = 0; + goto out; + } + + /* Create a socket to communicate with */ + result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, + IPPROTO_TCP, &sock); + if (result < 0) + goto out_err; + + memset(&saddr, 0, sizeof(saddr)); + if (dlm_nodeid_to_addr(con->nodeid, &saddr)) + goto out_err; + + sock->sk->sk_user_data = con; + con->rx_action = receive_from_sock; + + make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); + + add_sock(sock, con); + + log_print("connecting to %d", con->nodeid); + result = + sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, + O_NONBLOCK); + if (result == -EINPROGRESS) + result = 0; + if (result == 0) + goto out; + +out_err: + if (con->sock) { + sock_release(con->sock); + con->sock = NULL; + } + /* + * Some errors are fatal and this list might need adjusting. For other + * errors we try again until the max number of retries is reached. + */ + if (result != -EHOSTUNREACH && result != -ENETUNREACH && + result != -ENETDOWN && result != EINVAL + && result != -EPROTONOSUPPORT) { + lowcomms_connect_sock(con); + result = 0; + } +out: + up_write(&con->sock_sem); + return; +} + +static struct socket *create_listen_sock(struct connection *con, + struct sockaddr_storage *saddr) +{ + struct socket *sock = NULL; + mm_segment_t fs; + int result = 0; + int one = 1; + int addr_len; + + if (dlm_local_addr.ss_family == AF_INET) + addr_len = sizeof(struct sockaddr_in); + else + addr_len = sizeof(struct sockaddr_in6); + + /* Create a socket to communicate with */ + result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); + if (result < 0) { + printk("dlm: Can't create listening comms socket\n"); + goto create_out; + } + + fs = get_fs(); + set_fs(get_ds()); + result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + (char *)&one, sizeof(one)); + set_fs(fs); + if (result < 0) { + printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n", + result); + } + sock->sk->sk_user_data = con; + con->rx_action = accept_from_sock; + con->sock = sock; + + /* Bind to our port */ + make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); + result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); + if (result < 0) { + printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); + sock_release(sock); + sock = NULL; + con->sock = NULL; + goto create_out; + } + + fs = get_fs(); + set_fs(get_ds()); + + result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&one, sizeof(one)); + set_fs(fs); + if (result < 0) { + printk("dlm: Set keepalive failed: %d\n", result); + } + + result = sock->ops->listen(sock, 5); + if (result < 0) { + printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); + sock_release(sock); + sock = NULL; + goto create_out; + } + +create_out: + return sock; +} + + +/* Listen on all interfaces */ +static int listen_for_all(void) +{ + struct socket *sock = NULL; + struct connection *con = nodeid2con(0, GFP_KERNEL); + int result = -EINVAL; + + /* We don't support multi-homed hosts */ + set_bit(CF_IS_OTHERCON, &con->flags); + + sock = create_listen_sock(con, &dlm_local_addr); + if (sock) { + add_sock(sock, con); + result = 0; + } + else { + result = -EADDRINUSE; + } + + return result; +} + + + +static struct writequeue_entry *new_writequeue_entry(struct connection *con, + gfp_t allocation) +{ + struct writequeue_entry *entry; + + entry = kmalloc(sizeof(struct writequeue_entry), allocation); + if (!entry) + return NULL; + + entry->page = alloc_page(allocation); + if (!entry->page) { + kfree(entry); + return NULL; + } + + entry->offset = 0; + entry->len = 0; + entry->end = 0; + entry->users = 0; + entry->con = con; + + return entry; +} + +void *dlm_lowcomms_get_buffer(int nodeid, int len, + gfp_t allocation, char **ppc) +{ + struct connection *con; + struct writequeue_entry *e; + int offset = 0; + int users = 0; + + con = nodeid2con(nodeid, allocation); + if (!con) + return NULL; + + e = list_entry(con->writequeue.prev, struct writequeue_entry, list); + if ((&e->list == &con->writequeue) || + (PAGE_CACHE_SIZE - e->end < len)) { + e = NULL; + } else { + offset = e->end; + e->end += len; + users = e->users++; + } + spin_unlock(&con->writequeue_lock); + + if (e) { + got_one: + if (users == 0) + kmap(e->page); + *ppc = page_address(e->page) + offset; + return e; + } + + e = new_writequeue_entry(con, allocation); + if (e) { + spin_lock(&con->writequeue_lock); + offset = e->end; + e->end += len; + users = e->users++; + list_add_tail(&e->list, &con->writequeue); + spin_unlock(&con->writequeue_lock); + goto got_one; + } + return NULL; +} + +void dlm_lowcomms_commit_buffer(void *mh) +{ + struct writequeue_entry *e = (struct writequeue_entry *)mh; + struct connection *con = e->con; + int users; + + users = --e->users; + if (users) + goto out; + e->len = e->end - e->offset; + kunmap(e->page); + spin_unlock(&con->writequeue_lock); + + if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { + spin_lock_bh(&write_sockets_lock); + list_add_tail(&con->write_list, &write_sockets); + spin_unlock_bh(&write_sockets_lock); + + wake_up_interruptible(&lowcomms_send_waitq); + } + return; + +out: + spin_unlock(&con->writequeue_lock); + return; +} + +static void free_entry(struct writequeue_entry *e) +{ + __free_page(e->page); + kfree(e); +} + +/* Send a message */ +static void send_to_sock(struct connection *con) +{ + int ret = 0; + ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int); + const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; + struct writequeue_entry *e; + int len, offset; + + down_read(&con->sock_sem); + if (con->sock == NULL) + goto out_connect; + + sendpage = con->sock->ops->sendpage; + + spin_lock(&con->writequeue_lock); + for (;;) { + e = list_entry(con->writequeue.next, struct writequeue_entry, + list); + if ((struct list_head *) e == &con->writequeue) + break; + + len = e->len; + offset = e->offset; + BUG_ON(len == 0 && e->users == 0); + spin_unlock(&con->writequeue_lock); + + ret = 0; + if (len) { + ret = sendpage(con->sock, e->page, offset, len, + msg_flags); + if (ret == -EAGAIN || ret == 0) + goto out; + if (ret <= 0) + goto send_error; + } + else { + /* Don't starve people filling buffers */ + cond_resched(); + } + + spin_lock(&con->writequeue_lock); + e->offset += ret; + e->len -= ret; + + if (e->len == 0 && e->users == 0) { + list_del(&e->list); + kunmap(e->page); + free_entry(e); + continue; + } + } + spin_unlock(&con->writequeue_lock); +out: + up_read(&con->sock_sem); + return; + +send_error: + up_read(&con->sock_sem); + close_connection(con, false); + lowcomms_connect_sock(con); + return; + +out_connect: + up_read(&con->sock_sem); + lowcomms_connect_sock(con); + return; +} + +static void clean_one_writequeue(struct connection *con) +{ + struct list_head *list; + struct list_head *temp; + + spin_lock(&con->writequeue_lock); + list_for_each_safe(list, temp, &con->writequeue) { + struct writequeue_entry *e = + list_entry(list, struct writequeue_entry, list); + list_del(&e->list); + free_entry(e); + } + spin_unlock(&con->writequeue_lock); +} + +/* Called from recovery when it knows that a node has + left the cluster */ +int dlm_lowcomms_close(int nodeid) +{ + struct connection *con; + + if (!connections) + goto out; + + log_print("closing connection to node %d", nodeid); + con = nodeid2con(nodeid, 0); + if (con) { + clean_one_writequeue(con); + close_connection(con, true); + atomic_set(&con->waiting_requests, 0); + } + return 0; + +out: + return -1; +} + +/* API send message call, may queue the request */ +/* N.B. This is the old interface - use the new one for new calls */ +int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation) +{ + struct writequeue_entry *e; + char *b; + + e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b); + if (e) { + memcpy(b, buf, len); + dlm_lowcomms_commit_buffer(e); + return 0; + } + return -ENOBUFS; +} + +/* Look for activity on active sockets */ +static void process_sockets(void) +{ + struct list_head *list; + struct list_head *temp; + int count = 0; + + spin_lock_bh(&read_sockets_lock); + list_for_each_safe(list, temp, &read_sockets) { + + struct connection *con = + list_entry(list, struct connection, read_list); + list_del(&con->read_list); + clear_bit(CF_READ_PENDING, &con->flags); + + spin_unlock_bh(&read_sockets_lock); + + /* This can reach zero if we are processing requests + * as they come in. + */ + if (atomic_read(&con->waiting_requests) == 0) { + spin_lock_bh(&read_sockets_lock); + continue; + } + + do { + con->rx_action(con); + + /* Don't starve out everyone else */ + if (++count >= MAX_RX_MSG_COUNT) { + cond_resched(); + count = 0; + } + + } while (!atomic_dec_and_test(&con->waiting_requests) && + !kthread_should_stop()); + + spin_lock_bh(&read_sockets_lock); + } + spin_unlock_bh(&read_sockets_lock); +} + +/* Try to send any messages that are pending + */ +static void process_output_queue(void) +{ + struct list_head *list; + struct list_head *temp; + + spin_lock_bh(&write_sockets_lock); + list_for_each_safe(list, temp, &write_sockets) { + struct connection *con = + list_entry(list, struct connection, write_list); + clear_bit(CF_WRITE_PENDING, &con->flags); + list_del(&con->write_list); + + spin_unlock_bh(&write_sockets_lock); + send_to_sock(con); + spin_lock_bh(&write_sockets_lock); + } + spin_unlock_bh(&write_sockets_lock); +} + +static void process_state_queue(void) +{ + struct list_head *list; + struct list_head *temp; + + spin_lock_bh(&state_sockets_lock); + list_for_each_safe(list, temp, &state_sockets) { + struct connection *con = + list_entry(list, struct connection, state_list); + list_del(&con->state_list); + clear_bit(CF_CONNECT_PENDING, &con->flags); + spin_unlock_bh(&state_sockets_lock); + + connect_to_sock(con); + spin_lock_bh(&state_sockets_lock); + } + spin_unlock_bh(&state_sockets_lock); +} + + +/* Discard all entries on the write queues */ +static void clean_writequeues(void) +{ + int nodeid; + + for (nodeid = 1; nodeid < conn_array_size; nodeid++) { + struct connection *con = nodeid2con(nodeid, 0); + + if (con) + clean_one_writequeue(con); + } +} + +static int read_list_empty(void) +{ + int status; + + spin_lock_bh(&read_sockets_lock); + status = list_empty(&read_sockets); + spin_unlock_bh(&read_sockets_lock); + + return status; +} + +/* DLM Transport comms receive daemon */ +static int dlm_recvd(void *data) +{ + init_waitqueue_entry(&lowcomms_recv_waitq_head, current); + add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head); + + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (read_list_empty()) + cond_resched(); + set_current_state(TASK_RUNNING); + + process_sockets(); + } + + return 0; +} + +static int write_and_state_lists_empty(void) +{ + int status; + + spin_lock_bh(&write_sockets_lock); + status = list_empty(&write_sockets); + spin_unlock_bh(&write_sockets_lock); + + spin_lock_bh(&state_sockets_lock); + if (list_empty(&state_sockets) == 0) + status = 0; + spin_unlock_bh(&state_sockets_lock); + + return status; +} + +/* DLM Transport send daemon */ +static int dlm_sendd(void *data) +{ + init_waitqueue_entry(&lowcomms_send_waitq_head, current); + add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head); + + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (write_and_state_lists_empty()) + cond_resched(); + set_current_state(TASK_RUNNING); + + process_state_queue(); + process_output_queue(); + } + + return 0; +} + +static void daemons_stop(void) +{ + kthread_stop(recv_task); + kthread_stop(send_task); +} + +static int daemons_start(void) +{ + struct task_struct *p; + int error; + + p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); + error = IS_ERR(p); + if (error) { + log_print("can't start dlm_recvd %d", error); + return error; + } + recv_task = p; + + p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); + error = IS_ERR(p); + if (error) { + log_print("can't start dlm_sendd %d", error); + kthread_stop(recv_task); + return error; + } + send_task = p; + + return 0; +} + +/* + * Return the largest buffer size we can cope with. + */ +int lowcomms_max_buffer_size(void) +{ + return PAGE_CACHE_SIZE; +} + +void dlm_lowcomms_stop(void) +{ + int i; + + /* Set all the flags to prevent any + socket activity. + */ + for (i = 0; i < conn_array_size; i++) { + if (connections[i]) + connections[i]->flags |= 0xFF; + } + + daemons_stop(); + clean_writequeues(); + + for (i = 0; i < conn_array_size; i++) { + if (connections[i]) { + close_connection(connections[i], true); + if (connections[i]->othercon) + kmem_cache_free(con_cache, connections[i]->othercon); + kmem_cache_free(con_cache, connections[i]); + } + } + + kfree(connections); + connections = NULL; + + kmem_cache_destroy(con_cache); +} + +/* This is quite likely to sleep... */ +int dlm_lowcomms_start(void) +{ + int error = 0; + + error = -ENOMEM; + connections = kzalloc(sizeof(struct connection *) * + NODE_INCREMENT, GFP_KERNEL); + if (!connections) + goto out; + + conn_array_size = NODE_INCREMENT; + + if (dlm_our_addr(&dlm_local_addr, 0)) { + log_print("no local IP address has been set"); + goto fail_free_conn; + } + if (!dlm_our_addr(&dlm_local_addr, 1)) { + log_print("This dlm comms module does not support multi-homed clustering"); + goto fail_free_conn; + } + + con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), + __alignof__(struct connection), 0, + NULL, NULL); + if (!con_cache) + goto fail_free_conn; + + + /* Start listening */ + error = listen_for_all(); + if (error) + goto fail_unlisten; + + error = daemons_start(); + if (error) + goto fail_unlisten; + + return 0; + +fail_unlisten: + close_connection(connections[0], false); + kmem_cache_free(con_cache, connections[0]); + kmem_cache_destroy(con_cache); + +fail_free_conn: + kfree(connections); + +out: + return error; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 2d045e0daae..a9a9618c0d3 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -14,8 +14,6 @@ #ifndef __LOWCOMMS_DOT_H__ #define __LOWCOMMS_DOT_H__ -int dlm_lowcomms_init(void); -void dlm_lowcomms_exit(void); int dlm_lowcomms_start(void); void dlm_lowcomms_stop(void); int dlm_lowcomms_close(int nodeid); diff --git a/fs/dlm/main.c b/fs/dlm/main.c index a8da8dc36b2..162fbae58fe 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -16,7 +16,6 @@ #include "lock.h" #include "user.h" #include "memory.h" -#include "lowcomms.h" #include "config.h" #ifdef CONFIG_DLM_DEBUG @@ -47,20 +46,14 @@ static int __init init_dlm(void) if (error) goto out_config; - error = dlm_lowcomms_init(); - if (error) - goto out_debug; - error = dlm_user_init(); if (error) - goto out_lowcomms; + goto out_debug; printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; - out_lowcomms: - dlm_lowcomms_exit(); out_debug: dlm_unregister_debugfs(); out_config: @@ -76,7 +69,6 @@ static int __init init_dlm(void) static void __exit exit_dlm(void) { dlm_user_exit(); - dlm_lowcomms_exit(); dlm_config_exit(); dlm_memory_exit(); dlm_lockspace_exit(); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index a3f7de7f3a8..85e2897bd74 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -186,6 +186,14 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) struct dlm_member *memb, *safe; int i, error, found, pos = 0, neg = 0, low = -1; + /* previously removed members that we've not finished removing need to + count as a negative change so the "neg" recovery steps will happen */ + + list_for_each_entry(memb, &ls->ls_nodes_gone, list) { + log_debug(ls, "prev removed member %d", memb->nodeid); + neg++; + } + /* move departed members from ls_nodes to ls_nodes_gone */ list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 989b608fd83..5352b03ff5a 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -15,7 +15,7 @@ #include "config.h" #include "memory.h" -static kmem_cache_t *lkb_cache; +static struct kmem_cache *lkb_cache; int dlm_memory_init(void) diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 518239a8b1e..4cc31be9cd9 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -90,13 +90,28 @@ static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) return 0; } +static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq) +{ + spin_lock(&ls->ls_rcom_spin); + *new_seq = ++ls->ls_rcom_seq; + set_bit(LSFL_RCOM_WAIT, &ls->ls_flags); + spin_unlock(&ls->ls_rcom_spin); +} + +static void disallow_sync_reply(struct dlm_ls *ls) +{ + spin_lock(&ls->ls_rcom_spin); + clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags); + clear_bit(LSFL_RCOM_READY, &ls->ls_flags); + spin_unlock(&ls->ls_rcom_spin); +} + int dlm_rcom_status(struct dlm_ls *ls, int nodeid) { struct dlm_rcom *rc; struct dlm_mhandle *mh; int error = 0; - memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); ls->ls_recover_nodeid = nodeid; if (nodeid == dlm_our_nodeid()) { @@ -108,12 +123,14 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); if (error) goto out; - rc->rc_id = ++ls->ls_rcom_seq; + + allow_sync_reply(ls, &rc->rc_id); + memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); send_rcom(ls, mh, rc); error = dlm_wait_function(ls, &rcom_response); - clear_bit(LSFL_RCOM_READY, &ls->ls_flags); + disallow_sync_reply(ls); if (error) goto out; @@ -150,14 +167,21 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) { - if (rc_in->rc_id != ls->ls_rcom_seq) { - log_debug(ls, "reject old reply %d got %llx wanted %llx", - rc_in->rc_type, rc_in->rc_id, ls->ls_rcom_seq); - return; + spin_lock(&ls->ls_rcom_spin); + if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) || + rc_in->rc_id != ls->ls_rcom_seq) { + log_debug(ls, "reject reply %d from %d seq %llx expect %llx", + rc_in->rc_type, rc_in->rc_header.h_nodeid, + (unsigned long long)rc_in->rc_id, + (unsigned long long)ls->ls_rcom_seq); + goto out; } memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length); set_bit(LSFL_RCOM_READY, &ls->ls_flags); + clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags); wake_up(&ls->ls_wait_general); + out: + spin_unlock(&ls->ls_rcom_spin); } static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) @@ -171,7 +195,6 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) struct dlm_mhandle *mh; int error = 0, len = sizeof(struct dlm_rcom); - memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); ls->ls_recover_nodeid = nodeid; if (nodeid == dlm_our_nodeid()) { @@ -185,12 +208,14 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) if (error) goto out; memcpy(rc->rc_buf, last_name, last_len); - rc->rc_id = ++ls->ls_rcom_seq; + + allow_sync_reply(ls, &rc->rc_id); + memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); send_rcom(ls, mh, rc); error = dlm_wait_function(ls, &rcom_response); - clear_bit(LSFL_RCOM_READY, &ls->ls_flags); + disallow_sync_reply(ls); out: return error; } @@ -370,9 +395,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; + struct rcom_config *rf; struct dlm_mhandle *mh; char *mb; - int mb_len = sizeof(struct dlm_rcom); + int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb); if (!mh) @@ -391,6 +417,9 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) rc->rc_id = rc_in->rc_id; rc->rc_result = -ESRCH; + rf = (struct rcom_config *) rc->rc_buf; + rf->rf_lvblen = -1; + dlm_rcom_out(rc); dlm_lowcomms_commit_buffer(mh); @@ -412,9 +441,10 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) ls = dlm_find_lockspace_global(hd->h_lockspace); if (!ls) { - log_print("lockspace %x from %d not found", - hd->h_lockspace, nodeid); - send_ls_not_ready(nodeid, rc); + log_print("lockspace %x from %d type %x not found", + hd->h_lockspace, nodeid, rc->rc_type); + if (rc->rc_type == DLM_RCOM_STATUS) + send_ls_not_ready(nodeid, rc); return; } diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index a5e6d184872..cf9f6831bab 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -252,6 +252,7 @@ static void recover_list_clear(struct dlm_ls *ls) spin_lock(&ls->ls_recover_list_lock); list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) { list_del_init(&r->res_recover_list); + r->res_recover_locks_count = 0; dlm_put_rsb(r); ls->ls_recover_list_count--; } diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 362e3eff4dc..650536aa513 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -45,7 +45,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) unsigned long start; int error, neg = 0; - log_debug(ls, "recover %llx", rv->seq); + log_debug(ls, "recover %llx", (unsigned long long)rv->seq); mutex_lock(&ls->ls_recoverd_active); @@ -94,14 +94,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) } /* - * Purge directory-related requests that are saved in requestqueue. - * All dir requests from before recovery are invalid now due to the dir - * rebuild and will be resent by the requesting nodes. - */ - - dlm_purge_requestqueue(ls); - - /* * Wait for all nodes to complete directory rebuild. */ @@ -164,10 +156,31 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) */ dlm_recover_rsbs(ls); + } else { + /* + * Other lockspace members may be going through the "neg" steps + * while also adding us to the lockspace, in which case they'll + * be doing the recover_locks (RS_LOCKS) barrier. + */ + dlm_set_recover_status(ls, DLM_RS_LOCKS); + + error = dlm_recover_locks_wait(ls); + if (error) { + log_error(ls, "recover_locks_wait failed %d", error); + goto fail; + } } dlm_release_root_list(ls); + /* + * Purge directory-related requests that are saved in requestqueue. + * All dir requests from before recovery are invalid now due to the dir + * rebuild and will be resent by the requesting nodes. + */ + + dlm_purge_requestqueue(ls); + dlm_set_recover_status(ls, DLM_RS_DONE); error = dlm_recover_done_wait(ls); if (error) { @@ -199,7 +212,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_astd_wake(); - log_debug(ls, "recover %llx done: %u ms", rv->seq, + log_debug(ls, "recover %llx done: %u ms", + (unsigned long long)rv->seq, jiffies_to_msecs(jiffies - start)); mutex_unlock(&ls->ls_recoverd_active); @@ -207,11 +221,16 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) fail: dlm_release_root_list(ls); - log_debug(ls, "recover %llx error %d", rv->seq, error); + log_debug(ls, "recover %llx error %d", + (unsigned long long)rv->seq, error); mutex_unlock(&ls->ls_recoverd_active); return error; } +/* The dlm_ls_start() that created the rv we take here may already have been + stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP + flag set. */ + static void do_ls_recovery(struct dlm_ls *ls) { struct dlm_recover *rv = NULL; @@ -219,7 +238,8 @@ static void do_ls_recovery(struct dlm_ls *ls) spin_lock(&ls->ls_recover_lock); rv = ls->ls_recover_args; ls->ls_recover_args = NULL; - clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + if (rv && ls->ls_recover_seq == rv->seq) + clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); spin_unlock(&ls->ls_recover_lock); if (rv) { diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index 7b2b089634a..65008d79c96 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -30,26 +30,36 @@ struct rq_entry { * lockspace is enabled on some while still suspended on others. */ -void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) +int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) { struct rq_entry *e; int length = hd->h_length; - - if (dlm_is_removed(ls, nodeid)) - return; + int rv = 0; e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); if (!e) { log_print("dlm_add_requestqueue: out of memory\n"); - return; + return 0; } e->nodeid = nodeid; memcpy(e->request, hd, length); + /* We need to check dlm_locking_stopped() after taking the mutex to + avoid a race where dlm_recoverd enables locking and runs + process_requestqueue between our earlier dlm_locking_stopped check + and this addition to the requestqueue. */ + mutex_lock(&ls->ls_requestqueue_mutex); - list_add_tail(&e->list, &ls->ls_requestqueue); + if (dlm_locking_stopped(ls)) + list_add_tail(&e->list, &ls->ls_requestqueue); + else { + log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid); + kfree(e); + rv = -EAGAIN; + } mutex_unlock(&ls->ls_requestqueue_mutex); + return rv; } int dlm_process_requestqueue(struct dlm_ls *ls) @@ -120,6 +130,10 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) { uint32_t type = ms->m_type; + /* the ls is being cleaned up and freed by release_lockspace */ + if (!ls->ls_count) + return 1; + if (dlm_is_removed(ls, nodeid)) return 1; diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h index 349f0d292d9..6a53ea03335 100644 --- a/fs/dlm/requestqueue.h +++ b/fs/dlm/requestqueue.h @@ -13,7 +13,7 @@ #ifndef __REQUESTQUEUE_DOT_H__ #define __REQUESTQUEUE_DOT_H__ -void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); +int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); int dlm_process_requestqueue(struct dlm_ls *ls); void dlm_wait_requestqueue(struct dlm_ls *ls); void dlm_purge_requestqueue(struct dlm_ls *ls); diff --git a/fs/dnotify.c b/fs/dnotify.c index 2b0442db67e..1f26a2b9eee 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -23,7 +23,7 @@ int dir_notify_enable __read_mostly = 1; -static kmem_cache_t *dn_cache __read_mostly; +static struct kmem_cache *dn_cache __read_mostly; static void redo_inode_mask(struct inode *inode) { @@ -77,7 +77,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) inode = filp->f_dentry->d_inode; if (!S_ISDIR(inode->i_mode)) return -ENOTDIR; - dn = kmem_cache_alloc(dn_cache, SLAB_KERNEL); + dn = kmem_cache_alloc(dn_cache, GFP_KERNEL); if (dn == NULL) return -ENOMEM; spin_lock(&inode->i_lock); diff --git a/fs/dquot.c b/fs/dquot.c index 9af789567e5..f9cd5e23ebd 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -131,7 +131,7 @@ static struct quota_format_type *quota_formats; /* List of registered formats */ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; /* SLAB cache for dquot structures */ -static kmem_cache_t *dquot_cachep; +static struct kmem_cache *dquot_cachep; int register_quota_format(struct quota_format_type *fmt) { @@ -600,7 +600,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) { struct dquot *dquot; - dquot = kmem_cache_alloc(dquot_cachep, SLAB_NOFS); + dquot = kmem_cache_alloc(dquot_cachep, GFP_NOFS); if(!dquot) return NODQUOT; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f63a7755fe8..7196f50fe15 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -628,7 +628,7 @@ int ecryptfs_decrypt_page(struct file *file, struct page *page) num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; base_extent = (page->index * num_extents_per_page); lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache, - SLAB_KERNEL); + GFP_KERNEL); if (!lower_page_virt) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Error getting page for encrypted " @@ -1334,7 +1334,7 @@ int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, goto out; } /* Released in this function */ - page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER); + page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, GFP_USER); if (!page_virt) { ecryptfs_printk(KERN_ERR, "Out of memory\n"); rc = -ENOMEM; @@ -1493,7 +1493,7 @@ int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; /* Read the first page from the underlying file */ - page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER); + page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER); if (!page_virt) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n"); diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index a92ef05eff8..42099e779a5 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -250,7 +250,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) int lower_flags; /* Released in ecryptfs_release or end of function if failure */ - file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL); + file_info = kmem_cache_alloc(ecryptfs_file_info_cache, GFP_KERNEL); ecryptfs_set_file_private(file, file_info); if (!file_info) { ecryptfs_printk(KERN_ERR, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index dfcc68484f4..8a1945a84c3 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -369,7 +369,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, BUG_ON(!atomic_read(&lower_dentry->d_count)); ecryptfs_set_dentry_private(dentry, kmem_cache_alloc(ecryptfs_dentry_info_cache, - SLAB_KERNEL)); + GFP_KERNEL)); if (!ecryptfs_dentry_to_private(dentry)) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting " @@ -404,7 +404,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, /* Released in this function */ page_virt = (char *)kmem_cache_alloc(ecryptfs_header_cache_2, - SLAB_USER); + GFP_USER); if (!page_virt) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, @@ -795,7 +795,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) /* Released at out_free: label */ ecryptfs_set_file_private(&fake_ecryptfs_file, kmem_cache_alloc(ecryptfs_file_info_cache, - SLAB_KERNEL)); + GFP_KERNEL)); if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) { rc = -ENOMEM; goto out; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index c3746f56d16..745c0f1bfbb 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -207,7 +207,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or * at end of function upon failure */ auth_tok_list_item = - kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL); + kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL); if (!auth_tok_list_item) { ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); rc = -ENOMEM; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a78d87d14ba..3ede12b2593 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -378,7 +378,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) /* Released in ecryptfs_put_super() */ ecryptfs_set_superblock_private(sb, kmem_cache_alloc(ecryptfs_sb_info_cache, - SLAB_KERNEL)); + GFP_KERNEL)); if (!ecryptfs_superblock_to_private(sb)) { ecryptfs_printk(KERN_WARNING, "Out of memory\n"); rc = -ENOMEM; @@ -402,7 +402,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) /* through deactivate_super(sb) from get_sb_nodev() */ ecryptfs_set_dentry_private(sb->s_root, kmem_cache_alloc(ecryptfs_dentry_info_cache, - SLAB_KERNEL)); + GFP_KERNEL)); if (!ecryptfs_dentry_to_private(sb->s_root)) { ecryptfs_printk(KERN_ERR, "dentry_info_cache alloc failed\n"); @@ -546,7 +546,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags) } static struct ecryptfs_cache_info { - kmem_cache_t **cache; + struct kmem_cache **cache; const char *name; size_t size; void (*ctor)(void*, struct kmem_cache *, unsigned long); @@ -691,7 +691,7 @@ static ssize_t version_show(struct ecryptfs_obj *obj, char *buff) static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version); -struct ecryptfs_version_str_map_elem { +static struct ecryptfs_version_str_map_elem { u32 flag; char *str; } ecryptfs_version_str_map[] = { diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 825757ae486..eaa5daaf106 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -50,7 +50,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb) struct inode *inode = NULL; ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache, - SLAB_KERNEL); + GFP_KERNEL); if (unlikely(!ecryptfs_inode)) goto out; ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat); diff --git a/fs/efs/super.c b/fs/efs/super.c index b3f50651eb6..dfebf21289f 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -52,12 +52,12 @@ static struct pt_types sgi_pt_types[] = { }; -static kmem_cache_t * efs_inode_cachep; +static struct kmem_cache * efs_inode_cachep; static struct inode *efs_alloc_inode(struct super_block *sb) { struct efs_inode_info *ei; - ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, SLAB_KERNEL); + ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -68,7 +68,7 @@ static void efs_destroy_inode(struct inode *inode) kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct efs_inode_info *ei = (struct efs_inode_info *) foo; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ae228ec54e9..88a6f8d0b88 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -283,10 +283,10 @@ static struct mutex epmutex; static struct poll_safewake psw; /* Slab cache used to allocate "struct epitem" */ -static kmem_cache_t *epi_cache __read_mostly; +static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ -static kmem_cache_t *pwq_cache __read_mostly; +static struct kmem_cache *pwq_cache __read_mostly; /* Virtual fs used to allocate inodes for eventpoll files */ static struct vfsmount *eventpoll_mnt __read_mostly; @@ -961,7 +961,7 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, struct epitem *epi = ep_item_from_epqueue(pt); struct eppoll_entry *pwq; - if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) { + if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) { init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); pwq->whead = whead; pwq->base = epi; @@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct ep_pqueue epq; error = -ENOMEM; - if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL))) + if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) goto eexit_1; /* Item initialization follow here ... */ diff --git a/fs/exec.c b/fs/exec.c index d993ea1a81a..add0e03c3ea 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -404,7 +404,7 @@ int setup_arg_pages(struct linux_binprm *bprm, bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!mpnt) return -ENOMEM; @@ -1515,7 +1515,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) ispipe = 1; } else file = filp_open(corename, - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600); + O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, + 0600); if (IS_ERR(file)) goto fail_unlock; inode = file->f_dentry->d_inode; diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 1dfba77eab1..e3cf8c81507 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -44,6 +44,7 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, if (!S_ISDIR(inode->i_mode)) flags &= ~EXT2_DIRSYNC_FL; + mutex_lock(&inode->i_mutex); oldflags = ei->i_flags; /* @@ -53,13 +54,16 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * This test looks nicer. Thanks to Pauline Middelink */ if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); return -EPERM; + } } flags = flags & EXT2_FL_USER_MODIFIABLE; flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; ei->i_flags = flags; + mutex_unlock(&inode->i_mutex); ext2_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME_SEC; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d8b9abd95d0..255cef5f742 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -135,12 +135,12 @@ static void ext2_put_super (struct super_block * sb) return; } -static kmem_cache_t * ext2_inode_cachep; +static struct kmem_cache * ext2_inode_cachep; static struct inode *ext2_alloc_inode(struct super_block *sb) { struct ext2_inode_info *ei; - ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL); + ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -156,7 +156,7 @@ static void ext2_destroy_inode(struct inode *inode) kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; @@ -1090,8 +1090,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) { struct super_block *sb = dentry->d_sb; struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; unsigned long overhead; int i; + u64 fsid; if (test_opt (sb, MINIX_DF)) overhead = 0; @@ -1104,7 +1106,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) * All of the blocks before first_data_block are * overhead */ - overhead = le32_to_cpu(sbi->s_es->s_first_data_block); + overhead = le32_to_cpu(es->s_first_data_block); /* * Add the overhead attributed to the superblock and @@ -1125,14 +1127,18 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) buf->f_type = EXT2_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead; + buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; buf->f_bfree = ext2_count_free_blocks(sb); - buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count); - if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count)) + buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); + if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) buf->f_bavail = 0; - buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); - buf->f_ffree = ext2_count_free_inodes (sb); + buf->f_files = le32_to_cpu(es->s_inodes_count); + buf->f_ffree = ext2_count_free_inodes(sb); buf->f_namelen = EXT2_NAME_LEN; + fsid = le64_to_cpup((void *)es->s_uuid) ^ + le64_to_cpup((void *)es->s_uuid + sizeof(u64)); + buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; + buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; return 0; } diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index af52a7f8b29..247efd0b51d 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -342,12 +342,9 @@ static void ext2_xattr_update_super_block(struct super_block *sb) if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) return; - lock_super(sb); - EXT2_SB(sb)->s_es->s_feature_compat |= - cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); + EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); sb->s_dirt = 1; mark_buffer_dirty(EXT2_SB(sb)->s_sbh); - unlock_super(sb); } /* diff --git a/fs/ext3/Makefile b/fs/ext3/Makefile index 704cd44a40c..e77766a8b3f 100644 --- a/fs/ext3/Makefile +++ b/fs/ext3/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o + ioctl.o namei.o super.o symlink.o hash.o resize.o ext3_jbd.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index b41a7d7e20f..22161740ba2 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -144,7 +144,7 @@ restart: printk("Block Allocation Reservation Windows Map (%s):\n", fn); while (n) { - rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); + rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); if (verbose) printk("reservation window 0x%p " "start: %lu, end: %lu\n", @@ -730,7 +730,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, here = 0; p = ((char *)bh->b_data) + (here >> 3); - r = memscan(p, 0, (maxblocks - here + 7) >> 3); + r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); next = (r - ((char *)bh->b_data)) << 3; if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh)) @@ -949,7 +949,7 @@ static int find_next_reservable_window( prev = rsv; next = rb_next(&rsv->rsv_node); - rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node); + rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node); /* * Reached the last reservation, we can just append to the @@ -1148,7 +1148,7 @@ retry: * check if the first free block is within the * free space we just reserved */ - if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end) + if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) return 0; /* success */ /* * if the first free bit we found is out of the reservable space @@ -1193,7 +1193,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, if (!next) my_rsv->rsv_end += size; else { - next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); + next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node); if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) my_rsv->rsv_end += size; @@ -1271,7 +1271,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, } /* * grp_goal is a group relative block number (if there is a goal) - * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb) + * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb) * first block is a filesystem wide block number * first block is the block number of the first block in this group */ @@ -1307,10 +1307,14 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) grp_goal = -1; - } else if (grp_goal > 0 && - (my_rsv->rsv_end-grp_goal+1) < *count) - try_to_extend_reservation(my_rsv, sb, - *count-my_rsv->rsv_end + grp_goal - 1); + } else if (grp_goal >= 0) { + int curr = my_rsv->rsv_end - + (grp_goal + group_first_block) + 1; + + if (curr < *count) + try_to_extend_reservation(my_rsv, sb, + *count - curr); + } if ((my_rsv->rsv_start > group_last_block) || (my_rsv->rsv_end < group_first_block)) { @@ -1511,10 +1515,8 @@ retry_alloc: if (group_no >= ngroups) group_no = 0; gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); - if (!gdp) { - *errp = -EIO; - goto out; - } + if (!gdp) + goto io_error; free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); /* * skip this group if the number of @@ -1548,6 +1550,7 @@ retry_alloc: */ if (my_rsv) { my_rsv = NULL; + windowsz = 0; group_no = goal_group; goto retry_alloc; } diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index d0b54f30b91..5a9313ecd4e 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -154,6 +154,9 @@ static int ext3_readdir(struct file * filp, ext3_error (sb, "ext3_readdir", "directory #%lu contains a hole at offset %lu", inode->i_ino, (unsigned long)filp->f_pos); + /* corrupt size? Maybe no more blocks to read */ + if (filp->f_pos > inode->i_blocks << 9) + break; filp->f_pos += sb->s_blocksize - offset; continue; } diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c new file mode 100644 index 00000000000..e1f91fd26a9 --- /dev/null +++ b/fs/ext3/ext3_jbd.c @@ -0,0 +1,59 @@ +/* + * Interface between ext3 and JBD + */ + +#include <linux/ext3_jbd.h> + +int __ext3_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = journal_get_undo_access(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = journal_get_write_access(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = journal_forget(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_revoke(const char *where, handle_t *handle, + unsigned long blocknr, struct buffer_head *bh) +{ + int err = journal_revoke(handle, blocknr, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = journal_get_create_access(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = journal_dirty_metadata(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 03ba5bcab18..beaf25f5112 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1148,37 +1148,102 @@ static int do_journal_get_write_access(handle_t *handle, return ext3_journal_get_write_access(handle, bh); } +/* + * The idea of this helper function is following: + * if prepare_write has allocated some blocks, but not all of them, the + * transaction must include the content of the newly allocated blocks. + * This content is expected to be set to zeroes by block_prepare_write(). + * 2006/10/14 SAW + */ +static int ext3_prepare_failure(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct address_space *mapping; + struct buffer_head *bh, *head, *next; + unsigned block_start, block_end; + unsigned blocksize; + int ret; + handle_t *handle = ext3_journal_current_handle(); + + mapping = page->mapping; + if (ext3_should_writeback_data(mapping->host)) { + /* optimization: no constraints about data */ +skip: + return ext3_journal_stop(handle); + } + + head = page_buffers(page); + blocksize = head->b_size; + for ( bh = head, block_start = 0; + bh != head || !block_start; + block_start = block_end, bh = next) + { + next = bh->b_this_page; + block_end = block_start + blocksize; + if (block_end <= from) + continue; + if (block_start >= to) { + block_start = to; + break; + } + if (!buffer_mapped(bh)) + /* prepare_write failed on this bh */ + break; + if (ext3_should_journal_data(mapping->host)) { + ret = do_journal_get_write_access(handle, bh); + if (ret) { + ext3_journal_stop(handle); + return ret; + } + } + /* + * block_start here becomes the first block where the current iteration + * of prepare_write failed. + */ + } + if (block_start <= from) + goto skip; + + /* commit allocated and zeroed buffers */ + return mapping->a_ops->commit_write(file, page, from, block_start); +} + static int ext3_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; - int ret, needed_blocks = ext3_writepage_trans_blocks(inode); + int ret, ret2; + int needed_blocks = ext3_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; retry: handle = ext3_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } + if (IS_ERR(handle)) + return PTR_ERR(handle); if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) ret = nobh_prepare_write(page, from, to, ext3_get_block); else ret = block_prepare_write(page, from, to, ext3_get_block); if (ret) - goto prepare_write_failed; + goto failure; if (ext3_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); + if (ret) + /* fatal error, just put the handle and return */ + journal_stop(handle); } -prepare_write_failed: - if (ret) - ext3_journal_stop(handle); + return ret; + +failure: + ret2 = ext3_prepare_failure(file, page, from, to); + if (ret2 < 0) + return ret2; if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; -out: + /* retry number exceeded, or other error like -EDQUOT */ return ret; } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 906731a20f1..60d2f9dbdb0 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -552,6 +552,15 @@ static int htree_dirblock_to_tree(struct file *dir_file, dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0)); for (; de < top; de = ext3_next_entry(de)) { + if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, + (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) + +((char *)de - bh->b_data))) { + /* On error, skip the f_pos to the next block. */ + dir_file->f_pos = (dir_file->f_pos | + (dir->i_sb->s_blocksize - 1)) + 1; + brelse (bh); + return count; + } ext3fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || ((hinfo->hash == start_hash) && diff --git a/fs/ext3/super.c b/fs/ext3/super.c index afc2d4f42d7..580b8a6ca97 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -436,7 +436,7 @@ static void ext3_put_super (struct super_block * sb) return; } -static kmem_cache_t *ext3_inode_cachep; +static struct kmem_cache *ext3_inode_cachep; /* * Called inside transaction, so use GFP_NOFS @@ -445,7 +445,7 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) { struct ext3_inode_info *ei; - ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS); + ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); if (!ei) return NULL; #ifdef CONFIG_EXT3_FS_POSIX_ACL @@ -462,7 +462,7 @@ static void ext3_destroy_inode(struct inode *inode) kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; @@ -1264,6 +1264,12 @@ static void ext3_orphan_cleanup (struct super_block * sb, return; } + if (bdev_read_only(sb->s_bdev)) { + printk(KERN_ERR "EXT3-fs: write access " + "unavailable, skipping orphan cleanup.\n"); + return; + } + if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { if (es->s_last_orphan) jbd_debug(1, "Errors on filesystem, " @@ -2387,6 +2393,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) struct ext3_super_block *es = sbi->s_es; ext3_fsblk_t overhead; int i; + u64 fsid; if (test_opt (sb, MINIX_DF)) overhead = 0; @@ -2433,6 +2440,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); buf->f_namelen = EXT3_NAME_LEN; + fsid = le64_to_cpup((void *)es->s_uuid) ^ + le64_to_cpup((void *)es->s_uuid + sizeof(u64)); + buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; + buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; return 0; } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index f86f2482f01..99857a400f4 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -459,14 +459,11 @@ static void ext3_xattr_update_super_block(handle_t *handle, if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) return; - lock_super(sb); if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { - EXT3_SB(sb)->s_es->s_feature_compat |= - cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); + EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR); sb->s_dirt = 1; ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); } - unlock_super(sb); } /* diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index a6acb96ebeb..ae6e7e502ac 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o + ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ + ext4_jbd2.o ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 5d45582f951..c4dd1103ccf 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -165,7 +165,7 @@ restart: printk("Block Allocation Reservation Windows Map (%s):\n", fn); while (n) { - rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node); + rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); if (verbose) printk("reservation window 0x%p " "start: %llu, end: %llu\n", @@ -747,7 +747,7 @@ find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, here = 0; p = ((char *)bh->b_data) + (here >> 3); - r = memscan(p, 0, (maxblocks - here + 7) >> 3); + r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); next = (r - ((char *)bh->b_data)) << 3; if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) @@ -966,7 +966,7 @@ static int find_next_reservable_window( prev = rsv; next = rb_next(&rsv->rsv_node); - rsv = list_entry(next,struct ext4_reserve_window_node,rsv_node); + rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); /* * Reached the last reservation, we can just append to the @@ -1165,7 +1165,7 @@ retry: * check if the first free block is within the * free space we just reserved */ - if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end) + if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) return 0; /* success */ /* * if the first free bit we found is out of the reservable space @@ -1210,7 +1210,7 @@ static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, if (!next) my_rsv->rsv_end += size; else { - next_rsv = list_entry(next, struct ext4_reserve_window_node, rsv_node); + next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) my_rsv->rsv_end += size; @@ -1288,7 +1288,7 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, } /* * grp_goal is a group relative block number (if there is a goal) - * 0 < grp_goal < EXT4_BLOCKS_PER_GROUP(sb) + * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) * first block is a filesystem wide block number * first block is the block number of the first block in this group */ @@ -1324,10 +1324,14 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) grp_goal = -1; - } else if (grp_goal > 0 && - (my_rsv->rsv_end-grp_goal+1) < *count) - try_to_extend_reservation(my_rsv, sb, - *count-my_rsv->rsv_end + grp_goal - 1); + } else if (grp_goal >= 0) { + int curr = my_rsv->rsv_end - + (grp_goal + group_first_block) + 1; + + if (curr < *count) + try_to_extend_reservation(my_rsv, sb, + *count - curr); + } if ((my_rsv->rsv_start > group_last_block) || (my_rsv->rsv_end < group_first_block)) { @@ -1525,10 +1529,8 @@ retry_alloc: if (group_no >= ngroups) group_no = 0; gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); - if (!gdp) { - *errp = -EIO; - goto out; - } + if (!gdp) + goto io_error; free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); /* * skip this group if the number of @@ -1562,6 +1564,7 @@ retry_alloc: */ if (my_rsv) { my_rsv = NULL; + windowsz = 0; group_no = goal_group; goto retry_alloc; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index f8595787a70..f2ed3e7fb9f 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -153,6 +153,9 @@ static int ext4_readdir(struct file * filp, ext4_error (sb, "ext4_readdir", "directory #%lu contains a hole at offset %lu", inode->i_ino, (unsigned long)filp->f_pos); + /* corrupt size? Maybe no more blocks to read */ + if (filp->f_pos > inode->i_blocks << 9) + break; filp->f_pos += sb->s_blocksize - offset; continue; } diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c new file mode 100644 index 00000000000..d6afe4e2734 --- /dev/null +++ b/fs/ext4/ext4_jbd2.c @@ -0,0 +1,59 @@ +/* + * Interface between ext4 and JBD + */ + +#include <linux/ext4_jbd2.h> + +int __ext4_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = jbd2_journal_get_undo_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = jbd2_journal_get_write_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = jbd2_journal_forget(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_revoke(const char *where, handle_t *handle, + ext4_fsblk_t blocknr, struct buffer_head *bh) +{ + int err = jbd2_journal_revoke(handle, blocknr, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = jbd2_journal_get_create_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = jbd2_journal_dirty_metadata(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2608dce18f3..dc2724fa762 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -48,7 +48,7 @@ * ext_pblock: * combine low and high parts of physical block number into ext4_fsblk_t */ -static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex) +static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) { ext4_fsblk_t block; @@ -61,7 +61,7 @@ static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex) * idx_pblock: * combine low and high parts of a leaf physical block number into ext4_fsblk_t */ -static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) +static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) { ext4_fsblk_t block; @@ -75,7 +75,7 @@ static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) * stores a large physical block number into an extent struct, * breaking it into parts */ -static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) +static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) { ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); @@ -86,7 +86,7 @@ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb * stores a large physical block number into an index struct, * breaking it into parts */ -static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) +static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) { ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); @@ -186,7 +186,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, depth = path->p_depth; /* try to predict block placement */ - if ((ex = path[depth].p_ext)) + ex = path[depth].p_ext; + if (ex) return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); /* it looks like index is empty; @@ -215,7 +216,7 @@ ext4_ext_new_block(handle_t *handle, struct inode *inode, return newblock; } -static inline int ext4_ext_space_block(struct inode *inode) +static int ext4_ext_space_block(struct inode *inode) { int size; @@ -228,7 +229,7 @@ static inline int ext4_ext_space_block(struct inode *inode) return size; } -static inline int ext4_ext_space_block_idx(struct inode *inode) +static int ext4_ext_space_block_idx(struct inode *inode) { int size; @@ -241,7 +242,7 @@ static inline int ext4_ext_space_block_idx(struct inode *inode) return size; } -static inline int ext4_ext_space_root(struct inode *inode) +static int ext4_ext_space_root(struct inode *inode) { int size; @@ -255,7 +256,7 @@ static inline int ext4_ext_space_root(struct inode *inode) return size; } -static inline int ext4_ext_space_root_idx(struct inode *inode) +static int ext4_ext_space_root_idx(struct inode *inode) { int size; @@ -476,13 +477,12 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) /* account possible depth increase */ if (!path) { - path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 2), + path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), GFP_NOFS); if (!path) return ERR_PTR(-ENOMEM); alloc = 1; } - memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); path[0].p_hdr = eh; /* walk through the tree */ @@ -543,7 +543,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, struct ext4_extent_idx *ix; int len, err; - if ((err = ext4_ext_get_access(handle, inode, curp))) + err = ext4_ext_get_access(handle, inode, curp); + if (err) return err; BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); @@ -641,10 +642,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, * We need this to handle errors and free blocks * upon them. */ - ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); + ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); if (!ablocks) return -ENOMEM; - memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth); /* allocate all needed blocks */ ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); @@ -665,7 +665,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } lock_buffer(bh); - if ((err = ext4_journal_get_create_access(handle, bh))) + err = ext4_journal_get_create_access(handle, bh); + if (err) goto cleanup; neh = ext_block_hdr(bh); @@ -702,18 +703,21 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, set_buffer_uptodate(bh); unlock_buffer(bh); - if ((err = ext4_journal_dirty_metadata(handle, bh))) + err = ext4_journal_dirty_metadata(handle, bh); + if (err) goto cleanup; brelse(bh); bh = NULL; /* correct old leaf */ if (m) { - if ((err = ext4_ext_get_access(handle, inode, path + depth))) + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) goto cleanup; path[depth].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m); - if ((err = ext4_ext_dirty(handle, inode, path + depth))) + err = ext4_ext_dirty(handle, inode, path + depth); + if (err) goto cleanup; } @@ -736,7 +740,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } lock_buffer(bh); - if ((err = ext4_journal_get_create_access(handle, bh))) + err = ext4_journal_get_create_access(handle, bh); + if (err) goto cleanup; neh = ext_block_hdr(bh); @@ -780,7 +785,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, set_buffer_uptodate(bh); unlock_buffer(bh); - if ((err = ext4_journal_dirty_metadata(handle, bh))) + err = ext4_journal_dirty_metadata(handle, bh); + if (err) goto cleanup; brelse(bh); bh = NULL; @@ -800,9 +806,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } /* insert new index */ - if (err) - goto cleanup; - err = ext4_ext_insert_index(handle, inode, path + at, le32_to_cpu(border), newblock); @@ -857,7 +860,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, } lock_buffer(bh); - if ((err = ext4_journal_get_create_access(handle, bh))) { + err = ext4_journal_get_create_access(handle, bh); + if (err) { unlock_buffer(bh); goto out; } @@ -877,11 +881,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, set_buffer_uptodate(bh); unlock_buffer(bh); - if ((err = ext4_journal_dirty_metadata(handle, bh))) + err = ext4_journal_dirty_metadata(handle, bh); + if (err) goto out; /* create index in new top-level index: num,max,pointer */ - if ((err = ext4_ext_get_access(handle, inode, curp))) + err = ext4_ext_get_access(handle, inode, curp); + if (err) goto out; curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; @@ -1073,27 +1079,31 @@ int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, */ k = depth - 1; border = path[depth].p_ext->ee_block; - if ((err = ext4_ext_get_access(handle, inode, path + k))) + err = ext4_ext_get_access(handle, inode, path + k); + if (err) return err; path[k].p_idx->ei_block = border; - if ((err = ext4_ext_dirty(handle, inode, path + k))) + err = ext4_ext_dirty(handle, inode, path + k); + if (err) return err; while (k--) { /* change all left-side indexes */ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) break; - if ((err = ext4_ext_get_access(handle, inode, path + k))) + err = ext4_ext_get_access(handle, inode, path + k); + if (err) break; path[k].p_idx->ei_block = border; - if ((err = ext4_ext_dirty(handle, inode, path + k))) + err = ext4_ext_dirty(handle, inode, path + k); + if (err) break; } return err; } -static int inline +static int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { @@ -1145,7 +1155,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, le16_to_cpu(newext->ee_len), le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len), ext_pblock(ex)); - if ((err = ext4_ext_get_access(handle, inode, path + depth))) + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) return err; ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) + le16_to_cpu(newext->ee_len)); @@ -1195,7 +1206,8 @@ repeat: has_space: nearex = path[depth].p_ext; - if ((err = ext4_ext_get_access(handle, inode, path + depth))) + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) goto cleanup; if (!nearex) { @@ -1383,7 +1395,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, return err; } -static inline void +static void ext4_ext_put_in_cache(struct inode *inode, __u32 block, __u32 len, __u32 start, int type) { @@ -1401,7 +1413,7 @@ ext4_ext_put_in_cache(struct inode *inode, __u32 block, * calculate boundaries of the gap that the requested block fits into * and cache this gap */ -static inline void +static void ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, unsigned long block) { @@ -1442,7 +1454,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); } -static inline int +static int ext4_ext_in_cache(struct inode *inode, unsigned long block, struct ext4_extent *ex) { @@ -1489,10 +1501,12 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, path--; leaf = idx_pblock(path->p_idx); BUG_ON(path->p_hdr->eh_entries == 0); - if ((err = ext4_ext_get_access(handle, inode, path))) + err = ext4_ext_get_access(handle, inode, path); + if (err) return err; path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); - if ((err = ext4_ext_dirty(handle, inode, path))) + err = ext4_ext_dirty(handle, inode, path); + if (err) return err; ext_debug("index is empty, remove it, free block %llu\n", leaf); bh = sb_find_get_block(inode->i_sb, leaf); @@ -1509,7 +1523,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, * the caller should calculate credits under truncate_mutex and * pass the actual path. */ -int inline ext4_ext_calc_credits_for_insert(struct inode *inode, +int ext4_ext_calc_credits_for_insert(struct inode *inode, struct ext4_ext_path *path) { int depth, needed; @@ -1534,16 +1548,17 @@ int inline ext4_ext_calc_credits_for_insert(struct inode *inode, /* * tree can be full, so it would need to grow in depth: - * allocation + old root + new root + * we need one credit to modify old root, credits for + * new root will be added in split accounting */ - needed += 2 + 1 + 1; + needed += 1; /* * Index split can happen, we would need: * allocate intermediate indexes (bitmap + group) * + change two blocks at each level, but root (already included) */ - needed = (depth * 2) + (depth * 2); + needed += (depth * 2) + (depth * 2); /* any allocation modifies superblock */ needed += 1; @@ -1718,7 +1733,7 @@ out: * ext4_ext_more_to_rm: * returns 1 if current index has to be freed (even partial) */ -static int inline +static int ext4_ext_more_to_rm(struct ext4_ext_path *path) { BUG_ON(path->p_idx == NULL); @@ -1756,12 +1771,11 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start) * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. */ - path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); + path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); if (path == NULL) { ext4_journal_stop(handle); return -ENOMEM; } - memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); path[0].p_hdr = ext_inode_hdr(inode); if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) { err = -EIO; @@ -1932,7 +1946,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, mutex_lock(&EXT4_I(inode)->truncate_mutex); /* check in cache */ - if ((goal = ext4_ext_in_cache(inode, iblock, &newex))) { + goal = ext4_ext_in_cache(inode, iblock, &newex); + if (goal) { if (goal == EXT4_EXT_CACHE_GAP) { if (!create) { /* block isn't allocated yet and @@ -1971,7 +1986,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, */ BUG_ON(path[depth].p_ext == NULL && depth != 0); - if ((ex = path[depth].p_ext)) { + ex = path[depth].p_ext; + if (ex) { unsigned long ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext_pblock(ex); unsigned short ee_len = le16_to_cpu(ex->ee_len); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0a60ec5a16d..1d85d4ec959 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1147,37 +1147,102 @@ static int do_journal_get_write_access(handle_t *handle, return ext4_journal_get_write_access(handle, bh); } +/* + * The idea of this helper function is following: + * if prepare_write has allocated some blocks, but not all of them, the + * transaction must include the content of the newly allocated blocks. + * This content is expected to be set to zeroes by block_prepare_write(). + * 2006/10/14 SAW + */ +static int ext4_prepare_failure(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct address_space *mapping; + struct buffer_head *bh, *head, *next; + unsigned block_start, block_end; + unsigned blocksize; + int ret; + handle_t *handle = ext4_journal_current_handle(); + + mapping = page->mapping; + if (ext4_should_writeback_data(mapping->host)) { + /* optimization: no constraints about data */ +skip: + return ext4_journal_stop(handle); + } + + head = page_buffers(page); + blocksize = head->b_size; + for ( bh = head, block_start = 0; + bh != head || !block_start; + block_start = block_end, bh = next) + { + next = bh->b_this_page; + block_end = block_start + blocksize; + if (block_end <= from) + continue; + if (block_start >= to) { + block_start = to; + break; + } + if (!buffer_mapped(bh)) + /* prepare_write failed on this bh */ + break; + if (ext4_should_journal_data(mapping->host)) { + ret = do_journal_get_write_access(handle, bh); + if (ret) { + ext4_journal_stop(handle); + return ret; + } + } + /* + * block_start here becomes the first block where the current iteration + * of prepare_write failed. + */ + } + if (block_start <= from) + goto skip; + + /* commit allocated and zeroed buffers */ + return mapping->a_ops->commit_write(file, page, from, block_start); +} + static int ext4_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; - int ret, needed_blocks = ext4_writepage_trans_blocks(inode); + int ret, ret2; + int needed_blocks = ext4_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; retry: handle = ext4_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } + if (IS_ERR(handle)) + return PTR_ERR(handle); if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) ret = nobh_prepare_write(page, from, to, ext4_get_block); else ret = block_prepare_write(page, from, to, ext4_get_block); if (ret) - goto prepare_write_failed; + goto failure; if (ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); + if (ret) + /* fatal error, just put the handle and return */ + journal_stop(handle); } -prepare_write_failed: - if (ret) - ext4_journal_stop(handle); + return ret; + +failure: + ret2 = ext4_prepare_failure(file, page, from, to); + if (ret2 < 0) + return ret2; if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; -out: + /* retry number exceeded, or other error like -EDQUOT */ return ret; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8b1bd03d20f..859990eac50 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -552,6 +552,15 @@ static int htree_dirblock_to_tree(struct file *dir_file, dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0)); for (; de < top; de = ext4_next_entry(de)) { + if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, + (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) + +((char *)de - bh->b_data))) { + /* On error, skip the f_pos to the next block. */ + dir_file->f_pos = (dir_file->f_pos | + (dir->i_sb->s_blocksize - 1)) + 1; + brelse (bh); + return count; + } ext4fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || ((hinfo->hash == start_hash) && diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b4b022aa2bc..486a641ca71 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -486,7 +486,7 @@ static void ext4_put_super (struct super_block * sb) return; } -static kmem_cache_t *ext4_inode_cachep; +static struct kmem_cache *ext4_inode_cachep; /* * Called inside transaction, so use GFP_NOFS @@ -495,7 +495,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) { struct ext4_inode_info *ei; - ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS); + ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL @@ -513,7 +513,7 @@ static void ext4_destroy_inode(struct inode *inode) kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; @@ -1321,6 +1321,12 @@ static void ext4_orphan_cleanup (struct super_block * sb, return; } + if (bdev_read_only(sb->s_bdev)) { + printk(KERN_ERR "EXT4-fs: write access " + "unavailable, skipping orphan cleanup.\n"); + return; + } + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { if (es->s_last_orphan) jbd_debug(1, "Errors on filesystem, " @@ -2460,6 +2466,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) struct ext4_super_block *es = sbi->s_es; ext4_fsblk_t overhead; int i; + u64 fsid; if (test_opt (sb, MINIX_DF)) overhead = 0; @@ -2506,6 +2513,10 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); buf->f_namelen = EXT4_NAME_LEN; + fsid = le64_to_cpup((void *)es->s_uuid) ^ + le64_to_cpup((void *)es->s_uuid + sizeof(u64)); + buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; + buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; return 0; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 63233cd946a..dc969c357aa 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -459,14 +459,11 @@ static void ext4_xattr_update_super_block(handle_t *handle, if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR)) return; - lock_super(sb); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { - EXT4_SB(sb)->s_es->s_feature_compat |= - cpu_to_le32(EXT4_FEATURE_COMPAT_EXT_ATTR); + EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); sb->s_dirt = 1; ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); } - unlock_super(sb); } /* diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 82cc4f59e3b..05c2941c74f 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -34,9 +34,9 @@ static inline int fat_max_cache(struct inode *inode) return FAT_MAX_CACHE; } -static kmem_cache_t *fat_cache_cachep; +static struct kmem_cache *fat_cache_cachep; -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct fat_cache *cache = (struct fat_cache *)foo; @@ -63,7 +63,7 @@ void fat_cache_destroy(void) static inline struct fat_cache *fat_cache_alloc(struct inode *inode) { - return kmem_cache_alloc(fat_cache_cachep, SLAB_KERNEL); + return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL); } static inline void fat_cache_free(struct fat_cache *cache) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 78945b53b0f..a9e4688582a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -477,12 +477,12 @@ static void fat_put_super(struct super_block *sb) kfree(sbi); } -static kmem_cache_t *fat_inode_cachep; +static struct kmem_cache *fat_inode_cachep; static struct inode *fat_alloc_inode(struct super_block *sb) { struct msdos_inode_info *ei; - ei = kmem_cache_alloc(fat_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -493,7 +493,7 @@ static void fat_destroy_inode(struct inode *inode) kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; diff --git a/fs/fcntl.c b/fs/fcntl.c index e4f26165f12..4740d35e52c 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -553,7 +553,7 @@ int send_sigurg(struct fown_struct *fown) } static DEFINE_RWLOCK(fasync_lock); -static kmem_cache_t *fasync_cache __read_mostly; +static struct kmem_cache *fasync_cache __read_mostly; /* * fasync_helper() is used by some character device drivers (mainly mice) @@ -567,7 +567,7 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap int result = 0; if (on) { - new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL); + new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); if (!new) return -ENOMEM; } diff --git a/fs/file.c b/fs/file.c index 3787e82f54c..51aef675470 100644 --- a/fs/file.c +++ b/fs/file.c @@ -21,7 +21,6 @@ struct fdtable_defer { spinlock_t lock; struct work_struct wq; - struct timer_list timer; struct fdtable *next; }; @@ -75,22 +74,6 @@ static void __free_fdtable(struct fdtable *fdt) kfree(fdt); } -static void fdtable_timer(unsigned long data) -{ - struct fdtable_defer *fddef = (struct fdtable_defer *)data; - - spin_lock(&fddef->lock); - /* - * If someone already emptied the queue return. - */ - if (!fddef->next) - goto out; - if (!schedule_work(&fddef->wq)) - mod_timer(&fddef->timer, 5); -out: - spin_unlock(&fddef->lock); -} - static void free_fdtable_work(struct work_struct *work) { struct fdtable_defer *f = @@ -144,13 +127,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu) spin_lock(&fddef->lock); fdt->next = fddef->next; fddef->next = fdt; - /* - * vmallocs are handled from the workqueue context. - * If the per-cpu workqueue is running, then we - * defer work scheduling through a timer. - */ - if (!schedule_work(&fddef->wq)) - mod_timer(&fddef->timer, 5); + /* vmallocs are handled from the workqueue context */ + schedule_work(&fddef->wq); spin_unlock(&fddef->lock); put_cpu_var(fdtable_defer_list); } @@ -354,9 +332,6 @@ static void __devinit fdtable_defer_list_init(int cpu) struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); spin_lock_init(&fddef->lock); INIT_WORK(&fddef->wq, free_fdtable_work); - init_timer(&fddef->timer); - fddef->timer.data = (unsigned long)fddef; - fddef->timer.function = fdtable_timer; fddef->next = NULL; } diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 4786d51ad3b..0b7ae897cb7 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -46,7 +46,7 @@ extern const struct address_space_operations vxfs_immed_aops; extern struct inode_operations vxfs_immed_symlink_iops; -kmem_cache_t *vxfs_inode_cachep; +struct kmem_cache *vxfs_inode_cachep; #ifdef DIAGNOSTIC @@ -103,7 +103,7 @@ vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino) struct vxfs_inode_info *vip; struct vxfs_dinode *dip; - if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL))) + if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL))) goto fail; dip = (struct vxfs_dinode *)(bp->b_data + offset); memcpy(vip, dip, sizeof(*vip)); @@ -145,7 +145,7 @@ __vxfs_iget(ino_t ino, struct inode *ilistp) struct vxfs_dinode *dip; caddr_t kaddr = (char *)page_address(pp); - if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL))) + if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL))) goto fail; dip = (struct vxfs_dinode *)(kaddr + offset); memcpy(vip, dip, sizeof(*vip)); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 66571eafbb1..357764d85ff 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -19,7 +19,7 @@ MODULE_ALIAS_MISCDEV(FUSE_MINOR); -static kmem_cache_t *fuse_req_cachep; +static struct kmem_cache *fuse_req_cachep; static struct fuse_conn *fuse_get_conn(struct file *file) { @@ -41,7 +41,7 @@ static void fuse_request_init(struct fuse_req *req) struct fuse_req *fuse_request_alloc(void) { - struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL); + struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); if (req) fuse_request_init(req); return req; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c71a6c092ad..1cabdb229ad 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -141,9 +141,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) struct fuse_req *forget_req; struct dentry *parent; - /* Doesn't hurt to "reset" the validity timeout */ - fuse_invalidate_entry_cache(entry); - /* For negative dentries, always do a fresh lookup */ if (!inode) return 0; @@ -1027,6 +1024,8 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { unsigned long limit; is_truncate = 1; + if (IS_SWAPFILE(inode)) + return -ETXTBSY; limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { send_sig(SIGXFSZ, current, 0); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 763a50daf1c..128f79c4080 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -754,6 +754,42 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) return err; } +static sector_t fuse_bmap(struct address_space *mapping, sector_t block) +{ + struct inode *inode = mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_bmap_in inarg; + struct fuse_bmap_out outarg; + int err; + + if (!inode->i_sb->s_bdev || fc->no_bmap) + return 0; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return 0; + + memset(&inarg, 0, sizeof(inarg)); + inarg.block = block; + inarg.blocksize = inode->i_sb->s_blocksize; + req->in.h.opcode = FUSE_BMAP; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (err == -ENOSYS) + fc->no_bmap = 1; + + return err ? 0 : outarg.block; +} + static const struct file_operations fuse_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -787,6 +823,7 @@ static const struct address_space_operations fuse_file_aops = { .commit_write = fuse_commit_write, .readpages = fuse_readpages, .set_page_dirty = fuse_set_page_dirty, + .bmap = fuse_bmap, }; void fuse_init_file_inode(struct inode *inode) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 91edb8932d9..b98b20de740 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -298,6 +298,9 @@ struct fuse_conn { reply, before any other request, and never cleared */ unsigned conn_error : 1; + /** Connection successful. Only set in INIT */ + unsigned conn_init : 1; + /** Do readpages asynchronously? Only set in INIT */ unsigned async_read : 1; @@ -339,6 +342,9 @@ struct fuse_conn { /** Is interrupt not implemented by fs? */ unsigned no_interrupt : 1; + /** Is bmap not implemented by fs? */ + unsigned no_bmap : 1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -365,6 +371,9 @@ struct fuse_conn { /** Key for lock owner ID scrambling */ u32 scramble_key[4]; + + /** Reserved request for the DESTROY message */ + struct fuse_req *destroy_req; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fc420357037..12450d2b320 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -22,7 +22,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); MODULE_LICENSE("GPL"); -static kmem_cache_t *fuse_inode_cachep; +static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); @@ -39,6 +39,7 @@ struct fuse_mount_data { unsigned group_id_present : 1; unsigned flags; unsigned max_read; + unsigned blksize; }; static struct inode *fuse_alloc_inode(struct super_block *sb) @@ -46,7 +47,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) struct inode *inode; struct fuse_inode *fi; - inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL); + inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); if (!inode) return NULL; @@ -205,10 +206,23 @@ static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags) fuse_abort_conn(get_fuse_conn_super(vfsmnt->mnt_sb)); } +static void fuse_send_destroy(struct fuse_conn *fc) +{ + struct fuse_req *req = fc->destroy_req; + if (req && fc->conn_init) { + fc->destroy_req = NULL; + req->in.h.opcode = FUSE_DESTROY; + req->force = 1; + request_send(fc, req); + fuse_put_request(fc, req); + } +} + static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); + fuse_send_destroy(fc); spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; @@ -274,6 +288,7 @@ enum { OPT_DEFAULT_PERMISSIONS, OPT_ALLOW_OTHER, OPT_MAX_READ, + OPT_BLKSIZE, OPT_ERR }; @@ -285,14 +300,16 @@ static match_table_t tokens = { {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, {OPT_ALLOW_OTHER, "allow_other"}, {OPT_MAX_READ, "max_read=%u"}, + {OPT_BLKSIZE, "blksize=%u"}, {OPT_ERR, NULL} }; -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d) +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) { char *p; memset(d, 0, sizeof(struct fuse_mount_data)); d->max_read = ~0; + d->blksize = 512; while ((p = strsep(&opt, ",")) != NULL) { int token; @@ -345,6 +362,12 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d) d->max_read = value; break; + case OPT_BLKSIZE: + if (!is_bdev || match_int(&args[0], &value)) + return 0; + d->blksize = value; + break; + default: return 0; } @@ -400,6 +423,8 @@ static struct fuse_conn *new_conn(void) void fuse_conn_put(struct fuse_conn *fc) { if (atomic_dec_and_test(&fc->count)) { + if (fc->destroy_req) + fuse_request_free(fc->destroy_req); mutex_destroy(&fc->inst_mutex); kfree(fc); } @@ -456,6 +481,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; + fc->conn_init = 1; } fuse_put_request(fc, req); fc->blocked = 0; @@ -500,15 +526,23 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) struct dentry *root_dentry; struct fuse_req *init_req; int err; + int is_bdev = sb->s_bdev != NULL; if (sb->s_flags & MS_MANDLOCK) return -EINVAL; - if (!parse_fuse_opt((char *) data, &d)) + if (!parse_fuse_opt((char *) data, &d, is_bdev)) return -EINVAL; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + if (is_bdev) { +#ifdef CONFIG_BLOCK + if (!sb_set_blocksize(sb, d.blksize)) + return -EINVAL; +#endif + } else { + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + } sb->s_magic = FUSE_SUPER_MAGIC; sb->s_op = &fuse_super_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -547,6 +581,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!init_req) goto err_put_root; + if (is_bdev) { + fc->destroy_req = fuse_request_alloc(); + if (!fc->destroy_req) + goto err_put_root; + } + mutex_lock(&fuse_mutex); err = -EINVAL; if (file->private_data) @@ -598,10 +638,47 @@ static struct file_system_type fuse_fs_type = { .kill_sb = kill_anon_super, }; +#ifdef CONFIG_BLOCK +static int fuse_get_sb_blk(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data, struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super, + mnt); +} + +static struct file_system_type fuseblk_fs_type = { + .owner = THIS_MODULE, + .name = "fuseblk", + .get_sb = fuse_get_sb_blk, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static inline int register_fuseblk(void) +{ + return register_filesystem(&fuseblk_fs_type); +} + +static inline void unregister_fuseblk(void) +{ + unregister_filesystem(&fuseblk_fs_type); +} +#else +static inline int register_fuseblk(void) +{ + return 0; +} + +static inline void unregister_fuseblk(void) +{ +} +#endif + static decl_subsys(fuse, NULL, NULL); static decl_subsys(connections, NULL, NULL); -static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, +static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct inode * inode = foo; @@ -617,24 +694,34 @@ static int __init fuse_fs_init(void) err = register_filesystem(&fuse_fs_type); if (err) - printk("fuse: failed to register filesystem\n"); - else { - fuse_inode_cachep = kmem_cache_create("fuse_inode", - sizeof(struct fuse_inode), - 0, SLAB_HWCACHE_ALIGN, - fuse_inode_init_once, NULL); - if (!fuse_inode_cachep) { - unregister_filesystem(&fuse_fs_type); - err = -ENOMEM; - } - } + goto out; + err = register_fuseblk(); + if (err) + goto out_unreg; + + fuse_inode_cachep = kmem_cache_create("fuse_inode", + sizeof(struct fuse_inode), + 0, SLAB_HWCACHE_ALIGN, + fuse_inode_init_once, NULL); + err = -ENOMEM; + if (!fuse_inode_cachep) + goto out_unreg2; + + return 0; + + out_unreg2: + unregister_fuseblk(); + out_unreg: + unregister_filesystem(&fuse_fs_type); + out: return err; } static void fuse_fs_cleanup(void) { unregister_filesystem(&fuse_fs_type); + unregister_fuseblk(); kmem_cache_destroy(fuse_inode_cachep); } diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 8c27de8b956..c0791cbacad 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -2,6 +2,7 @@ config GFS2_FS tristate "GFS2 file system support" depends on EXPERIMENTAL select FS_POSIX_ACL + select CRC32 help A cluster filesystem. diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 5f959b8ce40..6e80844367e 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -74,11 +74,11 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) { if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl) return -EOPNOTSUPP; - if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER)) + if (current->fsuid != ip->i_inode.i_uid && !capable(CAP_FOWNER)) return -EPERM; - if (S_ISLNK(ip->i_di.di_mode)) + if (S_ISLNK(ip->i_inode.i_mode)) return -EOPNOTSUPP; - if (!access && !S_ISDIR(ip->i_di.di_mode)) + if (!access && !S_ISDIR(ip->i_inode.i_mode)) return -EACCES; return 0; @@ -145,14 +145,14 @@ out: } /** - * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something + * gfs2_check_acl - Check an ACL to see if we're allowed to do something * @inode: the file we want to do something to * @mask: what we want to do * * Returns: errno */ -int gfs2_check_acl_locked(struct inode *inode, int mask) +int gfs2_check_acl(struct inode *inode, int mask) { struct posix_acl *acl = NULL; int error; @@ -170,21 +170,6 @@ int gfs2_check_acl_locked(struct inode *inode, int mask) return -EAGAIN; } -int gfs2_check_acl(struct inode *inode, int mask) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder i_gh; - int error; - - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (!error) { - error = gfs2_check_acl_locked(inode, mask); - gfs2_glock_dq_uninit(&i_gh); - } - - return error; -} - static int munge_mode(struct gfs2_inode *ip, mode_t mode) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -198,10 +183,10 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_assert_withdraw(sdp, - (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT)); - ip->i_di.di_mode = mode; + (ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT)); + ip->i_inode.i_mode = mode; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -215,12 +200,12 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct posix_acl *acl = NULL, *clone; struct gfs2_ea_request er; - mode_t mode = ip->i_di.di_mode; + mode_t mode = ip->i_inode.i_mode; int error; if (!sdp->sd_args.ar_posix_acl) return 0; - if (S_ISLNK(ip->i_di.di_mode)) + if (S_ISLNK(ip->i_inode.i_mode)) return 0; memset(&er, 0, sizeof(struct gfs2_ea_request)); @@ -232,7 +217,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) return error; if (!acl) { mode &= ~current->fs->umask; - if (mode != ip->i_di.di_mode) + if (mode != ip->i_inode.i_mode) error = munge_mode(ip, mode); return error; } @@ -244,7 +229,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) posix_acl_release(acl); acl = clone; - if (S_ISDIR(ip->i_di.di_mode)) { + if (S_ISDIR(ip->i_inode.i_mode)) { er.er_name = GFS2_POSIX_ACL_DEFAULT; er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; error = gfs2_system_eaops.eo_set(ip, &er); diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 05c294fe0d7..6751930bfb6 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -31,7 +31,6 @@ int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, struct gfs2_ea_request *er, int *remove, mode_t *mode); int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access); -int gfs2_check_acl_locked(struct inode *inode, int mask); int gfs2_check_acl(struct inode *inode, int mask); int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 06e9a8cb45e..8240c1ff94f 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -38,8 +38,8 @@ struct metapath { }; typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh, - struct buffer_head *bh, u64 *top, - u64 *bottom, unsigned int height, + struct buffer_head *bh, __be64 *top, + __be64 *bottom, unsigned int height, void *data); struct strip_mine { @@ -163,6 +163,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) if (ip->i_di.di_size) { *(__be64 *)(di + 1) = cpu_to_be64(block); ip->i_di.di_blocks++; + gfs2_set_inode_blocks(&ip->i_inode); di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); } @@ -230,7 +231,7 @@ static int build_height(struct inode *inode, unsigned height) struct buffer_head *blocks[GFS2_MAX_META_HEIGHT]; struct gfs2_dinode *di; int error; - u64 *bp; + __be64 *bp; u64 bn; unsigned n; @@ -255,7 +256,7 @@ static int build_height(struct inode *inode, unsigned height) GFS2_FORMAT_IN); gfs2_buffer_clear_tail(blocks[n], sizeof(struct gfs2_meta_header)); - bp = (u64 *)(blocks[n]->b_data + + bp = (__be64 *)(blocks[n]->b_data + sizeof(struct gfs2_meta_header)); *bp = cpu_to_be64(blocks[n+1]->b_blocknr); brelse(blocks[n]); @@ -272,6 +273,7 @@ static int build_height(struct inode *inode, unsigned height) *(__be64 *)(di + 1) = cpu_to_be64(bn); ip->i_di.di_height += new_height; ip->i_di.di_blocks += new_height; + gfs2_set_inode_blocks(&ip->i_inode); di->di_height = cpu_to_be16(ip->i_di.di_height); di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); brelse(dibh); @@ -360,15 +362,15 @@ static void find_metapath(struct gfs2_inode *ip, u64 block, * metadata tree. */ -static inline u64 *metapointer(struct buffer_head *bh, int *boundary, +static inline __be64 *metapointer(struct buffer_head *bh, int *boundary, unsigned int height, const struct metapath *mp) { unsigned int head_size = (height > 0) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); - u64 *ptr; + __be64 *ptr; *boundary = 0; - ptr = ((u64 *)(bh->b_data + head_size)) + mp->mp_list[height]; - if (ptr + 1 == (u64 *)(bh->b_data + bh->b_size)) + ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; + if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size)) *boundary = 1; return ptr; } @@ -394,7 +396,7 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, int *new, u64 *block) { int boundary; - u64 *ptr = metapointer(bh, &boundary, height, mp); + __be64 *ptr = metapointer(bh, &boundary, height, mp); if (*ptr) { *block = be64_to_cpu(*ptr); @@ -415,17 +417,35 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, *ptr = cpu_to_be64(*block); ip->i_di.di_blocks++; + gfs2_set_inode_blocks(&ip->i_inode); *new = 1; return 0; } +static inline void bmap_lock(struct inode *inode, int create) +{ + struct gfs2_inode *ip = GFS2_I(inode); + if (create) + down_write(&ip->i_rw_mutex); + else + down_read(&ip->i_rw_mutex); +} + +static inline void bmap_unlock(struct inode *inode, int create) +{ + struct gfs2_inode *ip = GFS2_I(inode); + if (create) + up_write(&ip->i_rw_mutex); + else + up_read(&ip->i_rw_mutex); +} + /** - * gfs2_block_pointers - Map a block from an inode to a disk block + * gfs2_block_map - Map a block from an inode to a disk block * @inode: The inode * @lblock: The logical block number - * @map_bh: The bh to be mapped - * @mp: metapath to use + * @bh_map: The bh to be mapped * * Find the block number on the current device which corresponds to an * inode's block. If the block had to be created, "new" will be set. @@ -433,8 +453,8 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, * Returns: errno */ -static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create, - struct buffer_head *bh_map, struct metapath *mp) +int gfs2_block_map(struct inode *inode, u64 lblock, int create, + struct buffer_head *bh_map) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -448,57 +468,61 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create, u64 dblock = 0; int boundary; unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; + struct metapath mp; + u64 size; BUG_ON(maxlen == 0); if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) return 0; + bmap_lock(inode, create); + clear_buffer_mapped(bh_map); + clear_buffer_new(bh_map); + clear_buffer_boundary(bh_map); bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; - - height = calc_tree_height(ip, (lblock + 1) * bsize); - if (ip->i_di.di_height < height) { - if (!create) - return 0; - - error = build_height(inode, height); - if (error) - return error; + size = (lblock + 1) * bsize; + + if (size > ip->i_di.di_size) { + height = calc_tree_height(ip, size); + if (ip->i_di.di_height < height) { + if (!create) + goto out_ok; + + error = build_height(inode, height); + if (error) + goto out_fail; + } } - find_metapath(ip, lblock, mp); + find_metapath(ip, lblock, &mp); end_of_metadata = ip->i_di.di_height - 1; - error = gfs2_meta_inode_buffer(ip, &bh); if (error) - return error; + goto out_fail; for (x = 0; x < end_of_metadata; x++) { - lookup_block(ip, bh, x, mp, create, &new, &dblock); + lookup_block(ip, bh, x, &mp, create, &new, &dblock); brelse(bh); if (!dblock) - return 0; + goto out_ok; error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh); if (error) - return error; + goto out_fail; } - boundary = lookup_block(ip, bh, end_of_metadata, mp, create, &new, &dblock); - clear_buffer_mapped(bh_map); - clear_buffer_new(bh_map); - clear_buffer_boundary(bh_map); - + boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock); if (dblock) { map_bh(bh_map, inode->i_sb, dblock); if (boundary) - set_buffer_boundary(bh); + set_buffer_boundary(bh_map); if (new) { struct buffer_head *dibh; error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } set_buffer_new(bh_map); @@ -507,8 +531,8 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create, while(--maxlen && !buffer_boundary(bh_map)) { u64 eblock; - mp->mp_list[end_of_metadata]++; - boundary = lookup_block(ip, bh, end_of_metadata, mp, 0, &new, &eblock); + mp.mp_list[end_of_metadata]++; + boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock); if (eblock != ++dblock) break; bh_map->b_size += (1 << inode->i_blkbits); @@ -518,43 +542,15 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create, } out_brelse: brelse(bh); - return 0; -} - - -static inline void bmap_lock(struct inode *inode, int create) -{ - struct gfs2_inode *ip = GFS2_I(inode); - if (create) - down_write(&ip->i_rw_mutex); - else - down_read(&ip->i_rw_mutex); -} - -static inline void bmap_unlock(struct inode *inode, int create) -{ - struct gfs2_inode *ip = GFS2_I(inode); - if (create) - up_write(&ip->i_rw_mutex); - else - up_read(&ip->i_rw_mutex); -} - -int gfs2_block_map(struct inode *inode, u64 lblock, int create, - struct buffer_head *bh) -{ - struct metapath mp; - int ret; - - bmap_lock(inode, create); - ret = gfs2_block_pointers(inode, lblock, create, bh, &mp); +out_ok: + error = 0; +out_fail: bmap_unlock(inode, create); - return ret; + return error; } int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) { - struct metapath mp; struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; int ret; int create = *new; @@ -564,9 +560,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi BUG_ON(!new); bh.b_size = 1 << (inode->i_blkbits + 5); - bmap_lock(inode, create); - ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp); - bmap_unlock(inode, create); + ret = gfs2_block_map(inode, lblock, create, &bh); *extlen = bh.b_size >> inode->i_blkbits; *dblock = bh.b_blocknr; if (buffer_new(&bh)) @@ -600,7 +594,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *bh = NULL; - u64 *top, *bottom; + __be64 *top, *bottom; u64 bn; int error; int mh_size = sizeof(struct gfs2_meta_header); @@ -611,17 +605,17 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, return error; dibh = bh; - top = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; - bottom = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; + top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; + bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; } else { error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); if (error) return error; - top = (u64 *)(bh->b_data + mh_size) + + top = (__be64 *)(bh->b_data + mh_size) + (first ? mp->mp_list[height] : 0); - bottom = (u64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; + bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; } error = bc(ip, dibh, bh, top, bottom, height, data); @@ -660,7 +654,7 @@ out: */ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, - struct buffer_head *bh, u64 *top, u64 *bottom, + struct buffer_head *bh, __be64 *top, __be64 *bottom, unsigned int height, void *data) { struct strip_mine *sm = data; @@ -668,7 +662,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, struct gfs2_rgrp_list rlist; u64 bn, bstart; u32 blen; - u64 *p; + __be64 *p; unsigned int rg_blocks = 0; int metadata; unsigned int revokes = 0; @@ -770,6 +764,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (!ip->i_di.di_blocks) gfs2_consist_inode(ip); ip->i_di.di_blocks--; + gfs2_set_inode_blocks(&ip->i_inode); } if (bstart) { if (metadata) @@ -778,9 +773,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_free_data(ip, bstart, blen); } - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); up_write(&ip->i_rw_mutex); @@ -819,7 +814,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) if (error) goto out; - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); if (error) goto out_gunlock_q; @@ -853,14 +848,14 @@ static int do_grow(struct gfs2_inode *ip, u64 size) } ip->i_di.di_size = size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out_end_trans; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out_end_trans: @@ -968,9 +963,9 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (gfs2_is_stuffed(ip)) { ip->i_di.di_size = size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); error = 1; @@ -980,10 +975,10 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (!error) { ip->i_di.di_size = size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); } } @@ -1053,11 +1048,11 @@ static int trunc_end(struct gfs2_inode *ip) ip->i_num.no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); } - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out: @@ -1109,7 +1104,7 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size) { int error; - if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_di.di_mode))) + if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode))) return -EINVAL; if (size > ip->i_di.di_size) diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c index cab1f68d468..683cb5bda87 100644 --- a/fs/gfs2/daemon.c +++ b/fs/gfs2/daemon.c @@ -112,6 +112,7 @@ int gfs2_logd(void *data) struct gfs2_sbd *sdp = data; struct gfs2_holder ji_gh; unsigned long t; + int need_flush; while (!kthread_should_stop()) { /* Advance the log tail */ @@ -120,8 +121,10 @@ int gfs2_logd(void *data) gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; gfs2_ail1_empty(sdp, DIO_ALL); - - if (time_after_eq(jiffies, t)) { + gfs2_log_lock(sdp); + need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); + gfs2_log_unlock(sdp); + if (need_flush || time_after_eq(jiffies, t)) { gfs2_log_flush(sdp, NULL); sdp->sd_log_flush_time = jiffies; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index e24af28b1a1..0fdcb7713cd 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -131,8 +131,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_di.di_size < offset + size) ip->i_di.di_size = offset + size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -229,10 +229,10 @@ out: if (ip->i_di.di_size < offset + copied) ip->i_di.di_size = offset + copied; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); return copied; @@ -340,10 +340,15 @@ fail: return (copied) ? copied : error; } +static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) +{ + return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; +} + static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent, const struct qstr *name, int ret) { - if (dent->de_inum.no_addr != 0 && + if (!gfs2_dirent_sentinel(dent) && be32_to_cpu(dent->de_hash) == name->hash && be16_to_cpu(dent->de_name_len) == name->len && memcmp(dent+1, name->name, name->len) == 0) @@ -388,7 +393,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); unsigned totlen = be16_to_cpu(dent->de_rec_len); - if (!dent->de_inum.no_addr) + if (gfs2_dirent_sentinel(dent)) actual = GFS2_DIRENT_SIZE(0); if (totlen - actual >= required) return 1; @@ -405,7 +410,7 @@ static int gfs2_dirent_gather(const struct gfs2_dirent *dent, void *opaque) { struct dirent_gather *g = opaque; - if (dent->de_inum.no_addr) { + if (!gfs2_dirent_sentinel(dent)) { g->pdent[g->offset++] = dent; } return 0; @@ -433,10 +438,10 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, if (unlikely(offset + size > len)) goto error; msg = "zero inode number"; - if (unlikely(!first && !dent->de_inum.no_addr)) + if (unlikely(!first && gfs2_dirent_sentinel(dent))) goto error; msg = "name length is greater than space in dirent"; - if (dent->de_inum.no_addr && + if (!gfs2_dirent_sentinel(dent) && unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) > size)) goto error; @@ -598,7 +603,7 @@ static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, return ret; /* Only the first dent could ever have de_inum.no_addr == 0 */ - if (!tmp->de_inum.no_addr) { + if (gfs2_dirent_sentinel(tmp)) { gfs2_consist_inode(dip); return -EIO; } @@ -621,7 +626,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, { u16 cur_rec_len, prev_rec_len; - if (!cur->de_inum.no_addr) { + if (gfs2_dirent_sentinel(cur)) { gfs2_consist_inode(dip); return; } @@ -633,7 +638,8 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, out the inode number and return. */ if (!prev) { - cur->de_inum.no_addr = 0; /* No endianess worries */ + cur->de_inum.no_addr = 0; + cur->de_inum.no_formal_ino = 0; return; } @@ -664,7 +670,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, struct gfs2_dirent *ndent; unsigned offset = 0, totlen; - if (dent->de_inum.no_addr) + if (!gfs2_dirent_sentinel(dent)) offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); totlen = be16_to_cpu(dent->de_rec_len); BUG_ON(offset + name->len > totlen); @@ -713,12 +719,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, static int get_leaf_nr(struct gfs2_inode *dip, u32 index, u64 *leaf_out) { - u64 leaf_no; + __be64 leaf_no; int error; error = gfs2_dir_read_data(dip, (char *)&leaf_no, - index * sizeof(u64), - sizeof(u64), 0); + index * sizeof(__be64), + sizeof(__be64), 0); if (error != sizeof(u64)) return (error < 0) ? error : -EIO; @@ -837,7 +843,8 @@ static int dir_make_exhash(struct inode *inode) struct gfs2_leaf *leaf; int y; u32 x; - u64 *lp, bn; + __be64 *lp; + u64 bn; int error; error = gfs2_meta_inode_buffer(dip, &dibh); @@ -893,20 +900,20 @@ static int dir_make_exhash(struct inode *inode) gfs2_trans_add_bh(dip->i_gl, dibh, 1); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); - lp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); + lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); for (x = sdp->sd_hash_ptrs; x--; lp++) *lp = cpu_to_be64(bn); dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; dip->i_di.di_blocks++; + gfs2_set_inode_blocks(&dip->i_inode); dip->i_di.di_flags |= GFS2_DIF_EXHASH; - dip->i_di.di_payload_format = 0; for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; dip->i_di.di_depth = y; - gfs2_dinode_out(&dip->i_di, dibh->b_data); + gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); @@ -929,7 +936,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) struct gfs2_leaf *nleaf, *oleaf; struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new; u32 start, len, half_len, divider; - u64 bn, *lp, leaf_no; + u64 bn, leaf_no; + __be64 *lp; u32 index; int x, moved = 0; int error; @@ -974,7 +982,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) /* Change the pointers. Don't bother distinguishing stuffed from non-stuffed. This code is complicated enough already. */ - lp = kmalloc(half_len * sizeof(u64), GFP_NOFS | __GFP_NOFAIL); + lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL); /* Change the pointers */ for (x = 0; x < half_len; x++) lp[x] = cpu_to_be64(bn); @@ -1000,7 +1008,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) if (dirent_next(dip, obh, &next)) next = NULL; - if (dent->de_inum.no_addr && + if (!gfs2_dirent_sentinel(dent) && be32_to_cpu(dent->de_hash) < divider) { struct qstr str; str.name = (char*)(dent+1); @@ -1037,7 +1045,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) error = gfs2_meta_inode_buffer(dip, &dibh); if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { dip->i_di.di_blocks++; - gfs2_dinode_out(&dip->i_di, dibh->b_data); + gfs2_set_inode_blocks(&dip->i_inode); + gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); } @@ -1117,7 +1126,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) error = gfs2_meta_inode_buffer(dip, &dibh); if (!gfs2_assert_withdraw(sdp, !error)) { dip->i_di.di_depth++; - gfs2_dinode_out(&dip->i_di, dibh->b_data); + gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); } @@ -1194,7 +1203,7 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, int *copied) { const struct gfs2_dirent *dent, *dent_next; - struct gfs2_inum inum; + struct gfs2_inum_host inum; u64 off, off_next; unsigned int x, y; int run = 0; @@ -1341,7 +1350,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, u32 hsize, len = 0; u32 ht_offset, lp_offset, ht_offset_cur = -1; u32 hash, index; - u64 *lp; + __be64 *lp; int copied = 0; int error = 0; unsigned depth = 0; @@ -1365,7 +1374,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, if (ht_offset_cur != ht_offset) { error = gfs2_dir_read_data(dip, (char *)lp, - ht_offset * sizeof(u64), + ht_offset * sizeof(__be64), sdp->sd_hash_bsize, 1); if (error != sdp->sd_hash_bsize) { if (error >= 0) @@ -1456,7 +1465,7 @@ out: */ int gfs2_dir_search(struct inode *dir, const struct qstr *name, - struct gfs2_inum *inum, unsigned int *type) + struct gfs2_inum_host *inum, unsigned int *type) { struct buffer_head *bh; struct gfs2_dirent *dent; @@ -1515,7 +1524,8 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) return error; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_blocks++; - gfs2_dinode_out(&ip->i_di, bh->b_data); + gfs2_set_inode_blocks(&ip->i_inode); + gfs2_dinode_out(ip, bh->b_data); brelse(bh); return 0; } @@ -1531,7 +1541,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) */ int gfs2_dir_add(struct inode *inode, const struct qstr *name, - const struct gfs2_inum *inum, unsigned type) + const struct gfs2_inum_host *inum, unsigned type) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh; @@ -1558,8 +1568,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, break; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_entries++; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&ip->i_di, bh->b_data); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + gfs2_dinode_out(ip, bh->b_data); brelse(bh); error = 0; break; @@ -1644,8 +1654,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) gfs2_consist_inode(dip); gfs2_trans_add_bh(dip->i_gl, bh, 1); dip->i_di.di_entries--; - dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&dip->i_di, bh->b_data); + dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); + gfs2_dinode_out(dip, bh->b_data); brelse(bh); mark_inode_dirty(&dip->i_inode); @@ -1666,7 +1676,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) */ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - struct gfs2_inum *inum, unsigned int new_type) + struct gfs2_inum_host *inum, unsigned int new_type) { struct buffer_head *bh; struct gfs2_dirent *dent; @@ -1692,8 +1702,8 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, gfs2_trans_add_bh(dip->i_gl, bh, 1); } - dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&dip->i_di, bh->b_data); + dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); + gfs2_dinode_out(dip, bh->b_data); brelse(bh); return 0; } @@ -1715,7 +1725,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) u32 hsize, len; u32 ht_offset, lp_offset, ht_offset_cur = -1; u32 index = 0; - u64 *lp; + __be64 *lp; u64 leaf_no; int error = 0; @@ -1735,7 +1745,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) if (ht_offset_cur != ht_offset) { error = gfs2_dir_read_data(dip, (char *)lp, - ht_offset * sizeof(u64), + ht_offset * sizeof(__be64), sdp->sd_hash_bsize, 1); if (error != sdp->sd_hash_bsize) { if (error >= 0) @@ -1859,6 +1869,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, if (!dip->i_di.di_blocks) gfs2_consist_inode(dip); dip->i_di.di_blocks--; + gfs2_set_inode_blocks(&dip->i_inode); } error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); @@ -1873,7 +1884,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, goto out_end_trans; gfs2_trans_add_bh(dip->i_gl, dibh, 1); - gfs2_dinode_out(&dip->i_di, dibh->b_data); + gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); out_end_trans: diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 371233419b0..b21b33668a5 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -31,17 +31,17 @@ struct gfs2_inum; typedef int (*gfs2_filldir_t) (void *opaque, const char *name, unsigned int length, u64 offset, - struct gfs2_inum *inum, unsigned int type); + struct gfs2_inum_host *inum, unsigned int type); int gfs2_dir_search(struct inode *dir, const struct qstr *filename, - struct gfs2_inum *inum, unsigned int *type); + struct gfs2_inum_host *inum, unsigned int *type); int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inum *inum, unsigned int type); + const struct gfs2_inum_host *inum, unsigned int type); int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, gfs2_filldir_t filldir); int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - struct gfs2_inum *new_inum, unsigned int new_type); + struct gfs2_inum_host *new_inum, unsigned int new_type); int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index 92c54e9b0dc..cd747c00f67 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c @@ -120,7 +120,7 @@ static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) { if (!(er->er_flags & GFS2_ERF_MODE)) { - er->er_mode = ip->i_di.di_mode; + er->er_mode = ip->i_inode.i_mode; er->er_flags |= GFS2_ERF_MODE; } error = gfs2_acl_validate_set(ip, 1, er, diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index a65a4ccfd4d..ebebbdcd705 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -112,7 +112,7 @@ fail: static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) { struct buffer_head *bh, *eabh; - u64 *eablk, *end; + __be64 *eablk, *end; int error; error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh); @@ -129,7 +129,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) goto out; } - eablk = (u64 *)(bh->b_data + sizeof(struct gfs2_meta_header)); + eablk = (__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)); end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs; for (; eablk < end; eablk++) { @@ -224,7 +224,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_rgrpd *rgd; struct gfs2_holder rg_gh; struct buffer_head *dibh; - u64 *dataptrs, bn = 0; + __be64 *dataptrs; + u64 bn = 0; u64 bstart = 0; unsigned int blen = 0; unsigned int blks = 0; @@ -280,6 +281,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, if (!ip->i_di.di_blocks) gfs2_consist_inode(ip); ip->i_di.di_blocks--; + gfs2_set_inode_blocks(&ip->i_inode); } if (bstart) gfs2_free_meta(ip, bstart, blen); @@ -299,9 +301,9 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -444,7 +446,7 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, struct buffer_head **bh; unsigned int amount = GFS2_EA_DATA_LEN(ea); unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); - u64 *dataptrs = GFS2_EA2DATAPTRS(ea); + __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); unsigned int x; int error = 0; @@ -597,6 +599,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) ea->ea_num_ptrs = 0; ip->i_di.di_blocks++; + gfs2_set_inode_blocks(&ip->i_inode); return 0; } @@ -629,7 +632,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, ea->ea_num_ptrs = 0; memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len); } else { - u64 *dataptr = GFS2_EA2DATAPTRS(ea); + __be64 *dataptr = GFS2_EA2DATAPTRS(ea); const char *data = er->er_data; unsigned int data_len = er->er_data_len; unsigned int copy; @@ -648,6 +651,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); ip->i_di.di_blocks++; + gfs2_set_inode_blocks(&ip->i_inode); copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : data_len; @@ -686,7 +690,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) goto out; - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); if (error) goto out_gunlock_q; @@ -710,13 +714,13 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (!error) { if (er->er_flags & GFS2_ERF_MODE) { gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), - (ip->i_di.di_mode & S_IFMT) == + (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); - ip->i_di.di_mode = er->er_mode; + ip->i_inode.i_mode = er->er_mode; } - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -846,12 +850,12 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, if (er->er_flags & GFS2_ERF_MODE) { gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), - (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT)); - ip->i_di.di_mode = er->er_mode; + (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); + ip->i_inode.i_mode = er->er_mode; } - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out: gfs2_trans_end(GFS2_SB(&ip->i_inode)); @@ -931,12 +935,12 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *indbh, *newbh; - u64 *eablk; + __be64 *eablk; int error; int mh_size = sizeof(struct gfs2_meta_header); if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) { - u64 *end; + __be64 *end; error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &indbh); @@ -948,7 +952,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out; } - eablk = (u64 *)(indbh->b_data + mh_size); + eablk = (__be64 *)(indbh->b_data + mh_size); end = eablk + sdp->sd_inptrs; for (; eablk < end; eablk++) @@ -971,11 +975,12 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); gfs2_buffer_clear_tail(indbh, mh_size); - eablk = (u64 *)(indbh->b_data + mh_size); + eablk = (__be64 *)(indbh->b_data + mh_size); *eablk = cpu_to_be64(ip->i_di.di_eattr); ip->i_di.di_eattr = blk; ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT; ip->i_di.di_blocks++; + gfs2_set_inode_blocks(&ip->i_inode); eablk++; } @@ -1129,9 +1134,9 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1202,7 +1207,7 @@ static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, struct buffer_head **bh; unsigned int amount = GFS2_EA_DATA_LEN(ea); unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); - u64 *dataptrs = GFS2_EA2DATAPTRS(ea); + __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); unsigned int x; int error; @@ -1284,9 +1289,8 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, if (!error) { error = inode_setattr(&ip->i_inode, attr); gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); - gfs2_inode_attr_out(ip); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1300,7 +1304,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrp_list rlist; struct buffer_head *indbh, *dibh; - u64 *eablk, *end; + __be64 *eablk, *end; unsigned int rg_blocks = 0; u64 bstart = 0; unsigned int blen = 0; @@ -1319,7 +1323,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) goto out; } - eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); + eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); end = eablk + sdp->sd_inptrs; for (; eablk < end; eablk++) { @@ -1363,7 +1367,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) gfs2_trans_add_bh(ip->i_gl, indbh, 1); - eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); + eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); bstart = 0; blen = 0; @@ -1387,6 +1391,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) if (!ip->i_di.di_blocks) gfs2_consist_inode(ip); ip->i_di.di_blocks--; + gfs2_set_inode_blocks(&ip->i_inode); } if (bstart) gfs2_free_meta(ip, bstart, blen); @@ -1396,7 +1401,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1441,11 +1446,12 @@ static int ea_dealloc_block(struct gfs2_inode *ip) if (!ip->i_di.di_blocks) gfs2_consist_inode(ip); ip->i_di.di_blocks--; + gfs2_set_inode_blocks(&ip->i_inode); error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h index ffa65947d68..c82dbe01d71 100644 --- a/fs/gfs2/eattr.h +++ b/fs/gfs2/eattr.h @@ -19,7 +19,7 @@ struct iattr; #define GFS2_EA_SIZE(ea) \ ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \ ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \ - (sizeof(u64) * (ea)->ea_num_ptrs)), 8) + (sizeof(__be64) * (ea)->ea_num_ptrs)), 8) #define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs) #define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST) @@ -29,13 +29,13 @@ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8) #define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \ - sizeof(u64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8) + sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8) #define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1)) #define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len) #define GFS2_EA2DATAPTRS(ea) \ -((u64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8))) +((__be64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8))) #define GFS2_EA2NEXT(ea) \ ((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea))) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 55f5333dae9..438146904b5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -96,7 +96,7 @@ static inline rwlock_t *gl_lock_addr(unsigned int x) return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)]; } #else /* not SMP, so no spinlocks required */ -static inline rwlock_t *gl_lock_addr(x) +static inline rwlock_t *gl_lock_addr(unsigned int x) { return NULL; } @@ -769,7 +769,7 @@ restart: } else { spin_unlock(&gl->gl_spin); - new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_KERNEL); + new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS); if (!new_gh) return; set_bit(HIF_DEMOTE, &new_gh->gh_iflags); @@ -785,21 +785,6 @@ out: gfs2_holder_put(new_gh); } -void gfs2_glock_inode_squish(struct inode *inode) -{ - struct gfs2_holder gh; - struct gfs2_glock *gl = GFS2_I(inode)->i_gl; - gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh); - set_bit(HIF_DEMOTE, &gh.gh_iflags); - spin_lock(&gl->gl_spin); - gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders)); - list_add_tail(&gh.gh_list, &gl->gl_waiters2); - run_queue(gl); - spin_unlock(&gl->gl_spin); - wait_for_completion(&gh.gh_wait); - gfs2_holder_uninit(&gh); -} - /** * state_change - record that the glock is now in a different state * @gl: the glock @@ -847,12 +832,12 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) { if (glops->go_inval) - glops->go_inval(gl, DIO_METADATA | DIO_DATA); + glops->go_inval(gl, DIO_METADATA); } else if (gl->gl_state == LM_ST_DEFERRED) { /* We might not want to do this here. Look at moving to the inode glops. */ if (glops->go_inval) - glops->go_inval(gl, DIO_DATA); + glops->go_inval(gl, 0); } /* Deal with each possible exit condition */ @@ -954,7 +939,7 @@ void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) gfs2_assert_warn(sdp, state != gl->gl_state); if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) - glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE); + glops->go_sync(gl); gfs2_glock_hold(gl); gl->gl_req_bh = xmote_bh; @@ -995,7 +980,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) state_change(gl, LM_ST_UNLOCKED); if (glops->go_inval) - glops->go_inval(gl, DIO_METADATA | DIO_DATA); + glops->go_inval(gl, DIO_METADATA); if (gh) { spin_lock(&gl->gl_spin); @@ -1041,7 +1026,7 @@ void gfs2_glock_drop_th(struct gfs2_glock *gl) gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) - glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE); + glops->go_sync(gl); gfs2_glock_hold(gl); gl->gl_req_bh = drop_bh; @@ -1244,9 +1229,6 @@ restart: clear_bit(GLF_PREFETCH, &gl->gl_flags); - if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP)) - dump_glock(gl); - return error; } @@ -1923,7 +1905,7 @@ out: static void scan_glock(struct gfs2_glock *gl) { - if (gl->gl_ops == &gfs2_inode_glops) + if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) return; if (gfs2_glmutex_trylock(gl)) { @@ -2078,7 +2060,7 @@ static int dump_inode(struct gfs2_inode *ip) printk(KERN_INFO " num = %llu %llu\n", (unsigned long long)ip->i_num.no_formal_ino, (unsigned long long)ip->i_num.no_addr); - printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode)); + printk(KERN_INFO " type = %u\n", IF2DT(ip->i_inode.i_mode)); printk(KERN_INFO " i_flags ="); for (x = 0; x < 32; x++) if (test_bit(x, &ip->i_flags)) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2b2a889ee2c..fb39108fc05 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -27,8 +27,6 @@ #define GL_ATIME 0x00000200 #define GL_NOCACHE 0x00000400 #define GL_NOCANCEL 0x00001000 -#define GL_AOP 0x00004000 -#define GL_DUMP 0x00008000 #define GLR_TRYFAILED 13 #define GLR_CANCELED 14 @@ -108,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, unsigned int state, int flags); -void gfs2_glock_inode_squish(struct inode *inode); /** * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 41a6b6818a5..b068d10bcb6 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -92,7 +92,7 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) ip = gl->gl_object; inode = &ip->i_inode; - if (!ip || !S_ISREG(ip->i_di.di_mode)) + if (!ip || !S_ISREG(inode->i_mode)) return; if (!test_bit(GIF_PAGED, &ip->i_flags)) @@ -107,89 +107,20 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) } /** - * gfs2_page_inval - Invalidate all pages associated with a glock - * @gl: the glock - * - */ - -static void gfs2_page_inval(struct gfs2_glock *gl) -{ - struct gfs2_inode *ip; - struct inode *inode; - - ip = gl->gl_object; - inode = &ip->i_inode; - if (!ip || !S_ISREG(ip->i_di.di_mode)) - return; - - truncate_inode_pages(inode->i_mapping, 0); - gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages); - clear_bit(GIF_PAGED, &ip->i_flags); -} - -/** - * gfs2_page_wait - Wait for writeback of data - * @gl: the glock - * - * Syncs data (not metadata) for a regular file. - * No-op for all other types. - */ - -static void gfs2_page_wait(struct gfs2_glock *gl) -{ - struct gfs2_inode *ip = gl->gl_object; - struct inode *inode = &ip->i_inode; - struct address_space *mapping = inode->i_mapping; - int error; - - if (!S_ISREG(ip->i_di.di_mode)) - return; - - error = filemap_fdatawait(mapping); - - /* Put back any errors cleared by filemap_fdatawait() - so they can be caught by someone who can pass them - up to user space. */ - - if (error == -ENOSPC) - set_bit(AS_ENOSPC, &mapping->flags); - else if (error) - set_bit(AS_EIO, &mapping->flags); - -} - -static void gfs2_page_writeback(struct gfs2_glock *gl) -{ - struct gfs2_inode *ip = gl->gl_object; - struct inode *inode = &ip->i_inode; - struct address_space *mapping = inode->i_mapping; - - if (!S_ISREG(ip->i_di.di_mode)) - return; - - filemap_fdatawrite(mapping); -} - -/** * meta_go_sync - sync out the metadata for this glock * @gl: the glock - * @flags: DIO_* * * Called when demoting or unlocking an EX glock. We must flush * to disk all dirty buffers/pages relating to this glock, and must not * not return to caller to demote/unlock the glock until I/O is complete. */ -static void meta_go_sync(struct gfs2_glock *gl, int flags) +static void meta_go_sync(struct gfs2_glock *gl) { - if (!(flags & DIO_METADATA)) - return; - if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { gfs2_log_flush(gl->gl_sbd, gl); gfs2_meta_sync(gl); - if (flags & DIO_RELEASE) - gfs2_ail_empty_gl(gl); + gfs2_ail_empty_gl(gl); } } @@ -264,31 +195,31 @@ static void inode_go_drop_th(struct gfs2_glock *gl) /** * inode_go_sync - Sync the dirty data and/or metadata for an inode glock * @gl: the glock protecting the inode - * @flags: * */ -static void inode_go_sync(struct gfs2_glock *gl, int flags) +static void inode_go_sync(struct gfs2_glock *gl) { - int meta = (flags & DIO_METADATA); - int data = (flags & DIO_DATA); + struct gfs2_inode *ip = gl->gl_object; + + if (ip && !S_ISREG(ip->i_inode.i_mode)) + ip = NULL; if (test_bit(GLF_DIRTY, &gl->gl_flags)) { - if (meta && data) { - gfs2_page_writeback(gl); - gfs2_log_flush(gl->gl_sbd, gl); - gfs2_meta_sync(gl); - gfs2_page_wait(gl); - clear_bit(GLF_DIRTY, &gl->gl_flags); - } else if (meta) { - gfs2_log_flush(gl->gl_sbd, gl); - gfs2_meta_sync(gl); - } else if (data) { - gfs2_page_writeback(gl); - gfs2_page_wait(gl); + gfs2_log_flush(gl->gl_sbd, gl); + if (ip) + filemap_fdatawrite(ip->i_inode.i_mapping); + gfs2_meta_sync(gl); + if (ip) { + struct address_space *mapping = ip->i_inode.i_mapping; + int error = filemap_fdatawait(mapping); + if (error == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else if (error) + set_bit(AS_EIO, &mapping->flags); } - if (flags & DIO_RELEASE) - gfs2_ail_empty_gl(gl); + clear_bit(GLF_DIRTY, &gl->gl_flags); + gfs2_ail_empty_gl(gl); } } @@ -301,15 +232,20 @@ static void inode_go_sync(struct gfs2_glock *gl, int flags) static void inode_go_inval(struct gfs2_glock *gl, int flags) { + struct gfs2_inode *ip = gl->gl_object; int meta = (flags & DIO_METADATA); - int data = (flags & DIO_DATA); if (meta) { gfs2_meta_inval(gl); - gl->gl_vn++; + if (ip) + set_bit(GIF_INVALID, &ip->i_flags); + } + + if (ip && S_ISREG(ip->i_inode.i_mode)) { + truncate_inode_pages(ip->i_inode.i_mapping, 0); + gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !ip->i_inode.i_mapping->nrpages); + clear_bit(GIF_PAGED, &ip->i_flags); } - if (data) - gfs2_page_inval(gl); } /** @@ -351,11 +287,10 @@ static int inode_go_lock(struct gfs2_holder *gh) if (!ip) return 0; - if (ip->i_vn != gl->gl_vn) { + if (test_bit(GIF_INVALID, &ip->i_flags)) { error = gfs2_inode_refresh(ip); if (error) return error; - gfs2_inode_attr_in(ip); } if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && @@ -379,11 +314,8 @@ static void inode_go_unlock(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; struct gfs2_inode *ip = gl->gl_object; - if (ip == NULL) - return; - if (test_bit(GLF_DIRTY, &gl->gl_flags)) - gfs2_inode_attr_in(ip); - gfs2_meta_cache_flush(ip); + if (ip) + gfs2_meta_cache_flush(ip); } /** @@ -491,13 +423,13 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_log_header head; + struct gfs2_log_header_host head; int error; if (gl->gl_state != LM_ST_UNLOCKED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode)); - j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head); if (error) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 118dc693d11..734421edae8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -14,8 +14,6 @@ #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 -#define DIO_DATA 0x00000040 -#define DIO_RELEASE 0x00000080 #define DIO_ALL 0x00000100 struct gfs2_log_operations; @@ -41,7 +39,7 @@ struct gfs2_log_operations { void (*lo_before_commit) (struct gfs2_sbd *sdp); void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); void (*lo_before_scan) (struct gfs2_jdesc *jd, - struct gfs2_log_header *head, int pass); + struct gfs2_log_header_host *head, int pass); int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start, struct gfs2_log_descriptor *ld, __be64 *ptr, int pass); @@ -67,8 +65,8 @@ struct gfs2_rgrpd { struct list_head rd_list_mru; struct list_head rd_recent; /* Recently used rgrps */ struct gfs2_glock *rd_gl; /* Glock for this rgrp */ - struct gfs2_rindex rd_ri; - struct gfs2_rgrp rd_rg; + struct gfs2_rindex_host rd_ri; + struct gfs2_rgrp_host rd_rg; u64 rd_rg_vn; struct gfs2_bitmap *rd_bits; unsigned int rd_bh_count; @@ -103,18 +101,17 @@ struct gfs2_bufdata { }; struct gfs2_glock_operations { - void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state, - int flags); - void (*go_xmote_bh) (struct gfs2_glock * gl); - void (*go_drop_th) (struct gfs2_glock * gl); - void (*go_drop_bh) (struct gfs2_glock * gl); - void (*go_sync) (struct gfs2_glock * gl, int flags); - void (*go_inval) (struct gfs2_glock * gl, int flags); - int (*go_demote_ok) (struct gfs2_glock * gl); - int (*go_lock) (struct gfs2_holder * gh); - void (*go_unlock) (struct gfs2_holder * gh); - void (*go_callback) (struct gfs2_glock * gl, unsigned int state); - void (*go_greedy) (struct gfs2_glock * gl); + void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags); + void (*go_xmote_bh) (struct gfs2_glock *gl); + void (*go_drop_th) (struct gfs2_glock *gl); + void (*go_drop_bh) (struct gfs2_glock *gl); + void (*go_sync) (struct gfs2_glock *gl); + void (*go_inval) (struct gfs2_glock *gl, int flags); + int (*go_demote_ok) (struct gfs2_glock *gl); + int (*go_lock) (struct gfs2_holder *gh); + void (*go_unlock) (struct gfs2_holder *gh); + void (*go_callback) (struct gfs2_glock *gl, unsigned int state); + void (*go_greedy) (struct gfs2_glock *gl); const int go_type; }; @@ -217,6 +214,7 @@ struct gfs2_alloc { }; enum { + GIF_INVALID = 0, GIF_QD_LOCKED = 1, GIF_PAGED = 2, GIF_SW_PAGED = 3, @@ -224,12 +222,11 @@ enum { struct gfs2_inode { struct inode i_inode; - struct gfs2_inum i_num; + struct gfs2_inum_host i_num; unsigned long i_flags; /* GIF_... */ - u64 i_vn; - struct gfs2_dinode i_di; /* To be replaced by ref to block */ + struct gfs2_dinode_host i_di; /* To be replaced by ref to block */ struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; @@ -450,7 +447,7 @@ struct gfs2_sbd { struct super_block *sd_vfs_meta; struct kobject sd_kobj; unsigned long sd_flags; /* SDF_... */ - struct gfs2_sb sd_sb; + struct gfs2_sb_host sd_sb; /* Constants computed on mount */ @@ -503,8 +500,8 @@ struct gfs2_sbd { spinlock_t sd_statfs_spin; struct mutex sd_statfs_mutex; - struct gfs2_statfs_change sd_statfs_master; - struct gfs2_statfs_change sd_statfs_local; + struct gfs2_statfs_change_host sd_statfs_master; + struct gfs2_statfs_change_host sd_statfs_local; unsigned long sd_statfs_sync_time; /* Resource group stuff */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d470e5286ec..d122074c45e 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -38,83 +38,12 @@ #include "trans.h" #include "util.h" -/** - * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode - * @ip: The GFS2 inode (with embedded disk inode data) - * @inode: The Linux VFS inode - * - */ - -void gfs2_inode_attr_in(struct gfs2_inode *ip) -{ - struct inode *inode = &ip->i_inode; - struct gfs2_dinode *di = &ip->i_di; - - inode->i_ino = ip->i_num.no_addr; - - switch (di->di_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - inode->i_rdev = MKDEV(di->di_major, di->di_minor); - break; - default: - inode->i_rdev = 0; - break; - }; - - inode->i_mode = di->di_mode; - inode->i_nlink = di->di_nlink; - inode->i_uid = di->di_uid; - inode->i_gid = di->di_gid; - i_size_write(inode, di->di_size); - inode->i_atime.tv_sec = di->di_atime; - inode->i_mtime.tv_sec = di->di_mtime; - inode->i_ctime.tv_sec = di->di_ctime; - inode->i_atime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; - inode->i_blocks = di->di_blocks << - (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); - - if (di->di_flags & GFS2_DIF_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - - if (di->di_flags & GFS2_DIF_APPENDONLY) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; -} - -/** - * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode - * @ip: The GFS2 inode - * - * Only copy out the attributes that we want the VFS layer - * to be able to modify. - */ - -void gfs2_inode_attr_out(struct gfs2_inode *ip) -{ - struct inode *inode = &ip->i_inode; - struct gfs2_dinode *di = &ip->i_di; - gfs2_assert_withdraw(GFS2_SB(inode), - (di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT)); - di->di_mode = inode->i_mode; - di->di_uid = inode->i_uid; - di->di_gid = inode->i_gid; - di->di_atime = inode->i_atime.tv_sec; - di->di_mtime = inode->i_mtime.tv_sec; - di->di_ctime = inode->i_ctime.tv_sec; -} - static int iget_test(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_inum *inum = opaque; + struct gfs2_inum_host *inum = opaque; - if (ip && ip->i_num.no_addr == inum->no_addr) + if (ip->i_num.no_addr == inum->no_addr) return 1; return 0; @@ -123,19 +52,20 @@ static int iget_test(struct inode *inode, void *opaque) static int iget_set(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_inum *inum = opaque; + struct gfs2_inum_host *inum = opaque; ip->i_num = *inum; + inode->i_ino = inum->no_addr; return 0; } -struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum) +struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum) { return ilookup5(sb, (unsigned long)inum->no_formal_ino, iget_test, inum); } -static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum) +static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum) { return iget5_locked(sb, (unsigned long)inum->no_formal_ino, iget_test, iget_set, inum); @@ -150,7 +80,7 @@ static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum) * Returns: A VFS inode, or an error */ -struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned int type) +struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type) { struct inode *inode = gfs2_iget(sb, inum); struct gfs2_inode *ip = GFS2_I(inode); @@ -188,7 +118,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, if (unlikely(error)) goto fail_put; - ip->i_vn = ip->i_gl->gl_vn - 1; + set_bit(GIF_INVALID, &ip->i_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail_iopen; @@ -208,6 +138,63 @@ fail: return ERR_PTR(error); } +static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) +{ + struct gfs2_dinode_host *di = &ip->i_di; + const struct gfs2_dinode *str = buf; + + if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(ip); + return -EIO; + } + if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino)) + return -ESTALE; + + ip->i_inode.i_mode = be32_to_cpu(str->di_mode); + ip->i_inode.i_rdev = 0; + switch (ip->i_inode.i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), + be32_to_cpu(str->di_minor)); + break; + }; + + ip->i_inode.i_uid = be32_to_cpu(str->di_uid); + ip->i_inode.i_gid = be32_to_cpu(str->di_gid); + /* + * We will need to review setting the nlink count here in the + * light of the forthcoming ro bind mount work. This is a reminder + * to do that. + */ + ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); + di->di_size = be64_to_cpu(str->di_size); + i_size_write(&ip->i_inode, di->di_size); + di->di_blocks = be64_to_cpu(str->di_blocks); + gfs2_set_inode_blocks(&ip->i_inode); + ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); + ip->i_inode.i_atime.tv_nsec = 0; + ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); + ip->i_inode.i_mtime.tv_nsec = 0; + ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); + ip->i_inode.i_ctime.tv_nsec = 0; + + di->di_goal_meta = be64_to_cpu(str->di_goal_meta); + di->di_goal_data = be64_to_cpu(str->di_goal_data); + di->di_generation = be64_to_cpu(str->di_generation); + + di->di_flags = be32_to_cpu(str->di_flags); + gfs2_set_inode_flags(&ip->i_inode); + di->di_height = be16_to_cpu(str->di_height); + + di->di_depth = be16_to_cpu(str->di_depth); + di->di_entries = be32_to_cpu(str->di_entries); + + di->di_eattr = be64_to_cpu(str->di_eattr); + return 0; +} + /** * gfs2_inode_refresh - Refresh the incore copy of the dinode * @ip: The GFS2 inode @@ -229,21 +216,11 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) return -EIO; } - gfs2_dinode_in(&ip->i_di, dibh->b_data); - + error = gfs2_dinode_in(ip, dibh->b_data); brelse(dibh); + clear_bit(GIF_INVALID, &ip->i_flags); - if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); - return -EIO; - } - if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino) - return -ESTALE; - - ip->i_vn = ip->i_gl->gl_vn; - - return 0; + return error; } int gfs2_dinode_dealloc(struct gfs2_inode *ip) @@ -255,7 +232,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip) if (ip->i_di.di_blocks != 1) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); return -EIO; } @@ -318,14 +295,14 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) u32 nlink; int error; - BUG_ON(ip->i_di.di_nlink != ip->i_inode.i_nlink); - nlink = ip->i_di.di_nlink + diff; + BUG_ON(diff != 1 && diff != -1); + nlink = ip->i_inode.i_nlink + diff; /* If we are reducing the nlink count, but the new value ends up being bigger than the old one, we must have underflowed. */ - if (diff < 0 && nlink > ip->i_di.di_nlink) { + if (diff < 0 && nlink > ip->i_inode.i_nlink) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); return -EIO; } @@ -333,16 +310,19 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) if (error) return error; - ip->i_di.di_nlink = nlink; - ip->i_di.di_ctime = get_seconds(); - ip->i_inode.i_nlink = nlink; + if (diff > 0) + inc_nlink(&ip->i_inode); + else + drop_nlink(&ip->i_inode); + + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); mark_inode_dirty(&ip->i_inode); - if (ip->i_di.di_nlink == 0) { + if (ip->i_inode.i_nlink == 0) { struct gfs2_rgrpd *rgd; struct gfs2_holder ri_gh, rg_gh; @@ -357,7 +337,6 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) if (error) goto out_norgrp; - clear_nlink(&ip->i_inode); gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ gfs2_glock_dq_uninit(&rg_gh); out_norgrp: @@ -394,7 +373,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, struct super_block *sb = dir->i_sb; struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_holder d_gh; - struct gfs2_inum inum; + struct gfs2_inum_host inum; unsigned int type; int error = 0; struct inode *inode = NULL; @@ -436,7 +415,7 @@ static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) { struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); struct buffer_head *bh; - struct gfs2_inum_range ir; + struct gfs2_inum_range_host ir; int error; error = gfs2_trans_begin(sdp, RES_DINODE, 0); @@ -479,7 +458,7 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); struct gfs2_holder gh; struct buffer_head *bh; - struct gfs2_inum_range ir; + struct gfs2_inum_range_host ir; int error; error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); @@ -500,21 +479,22 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) if (!ir.ir_length) { struct buffer_head *m_bh; u64 x, y; + __be64 z; error = gfs2_meta_inode_buffer(m_ip, &m_bh); if (error) goto out_brelse; - x = *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); - x = y = be64_to_cpu(x); + z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); + x = y = be64_to_cpu(z); ir.ir_start = x; ir.ir_length = GFS2_INUM_QUANTUM; x += GFS2_INUM_QUANTUM; if (x < y) gfs2_consist_inode(m_ip); - x = cpu_to_be64(x); + z = cpu_to_be64(x); gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); - *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x; + *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z; brelse(m_bh); } @@ -567,7 +547,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, return error; /* Don't create entries in an unlinked directory */ - if (!dip->i_di.di_nlink) + if (!dip->i_inode.i_nlink) return -EPERM; error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL); @@ -583,7 +563,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, if (dip->i_di.di_entries == (u32)-1) return -EFBIG; - if (S_ISDIR(mode) && dip->i_di.di_nlink == (u32)-1) + if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) return -EMLINK; return 0; @@ -593,24 +573,24 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, unsigned int *uid, unsigned int *gid) { if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && - (dip->i_di.di_mode & S_ISUID) && dip->i_di.di_uid) { + (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { if (S_ISDIR(*mode)) *mode |= S_ISUID; - else if (dip->i_di.di_uid != current->fsuid) + else if (dip->i_inode.i_uid != current->fsuid) *mode &= ~07111; - *uid = dip->i_di.di_uid; + *uid = dip->i_inode.i_uid; } else *uid = current->fsuid; - if (dip->i_di.di_mode & S_ISGID) { + if (dip->i_inode.i_mode & S_ISGID) { if (S_ISDIR(*mode)) *mode |= S_ISGID; - *gid = dip->i_di.di_gid; + *gid = dip->i_inode.i_gid; } else *gid = current->fsgid; } -static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum *inum, +static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum, u64 *generation) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); @@ -650,9 +630,9 @@ out: */ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - const struct gfs2_inum *inum, unsigned int mode, + const struct gfs2_inum_host *inum, unsigned int mode, unsigned int uid, unsigned int gid, - const u64 *generation) + const u64 *generation, dev_t dev) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_dinode *di; @@ -669,14 +649,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_mode = cpu_to_be32(mode); di->di_uid = cpu_to_be32(uid); di->di_gid = cpu_to_be32(gid); - di->di_nlink = cpu_to_be32(0); - di->di_size = cpu_to_be64(0); + di->di_nlink = 0; + di->di_size = 0; di->di_blocks = cpu_to_be64(1); di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds()); - di->di_major = di->di_minor = cpu_to_be32(0); + di->di_major = cpu_to_be32(MAJOR(dev)); + di->di_minor = cpu_to_be32(MINOR(dev)); di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); di->di_generation = cpu_to_be64(*generation); - di->di_flags = cpu_to_be32(0); + di->di_flags = 0; if (S_ISREG(mode)) { if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || @@ -693,22 +674,22 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, } di->__pad1 = 0; - di->di_payload_format = cpu_to_be32(0); - di->di_height = cpu_to_be32(0); + di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); + di->di_height = 0; di->__pad2 = 0; di->__pad3 = 0; - di->di_depth = cpu_to_be16(0); - di->di_entries = cpu_to_be32(0); + di->di_depth = 0; + di->di_entries = 0; memset(&di->__pad4, 0, sizeof(di->__pad4)); - di->di_eattr = cpu_to_be64(0); + di->di_eattr = 0; memset(&di->di_reserved, 0, sizeof(di->di_reserved)); brelse(dibh); } static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - unsigned int mode, const struct gfs2_inum *inum, - const u64 *generation) + unsigned int mode, const struct gfs2_inum_host *inum, + const u64 *generation, dev_t dev) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); unsigned int uid, gid; @@ -729,7 +710,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, if (error) goto out_quota; - init_dinode(dip, gl, inum, mode, uid, gid, generation); + init_dinode(dip, gl, inum, mode, uid, gid, generation, dev); gfs2_quota_change(dip, +1, uid, gid); gfs2_trans_end(sdp); @@ -759,8 +740,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, if (alloc_required < 0) goto fail; if (alloc_required) { - error = gfs2_quota_check(dip, dip->i_di.di_uid, - dip->i_di.di_gid); + error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); if (error) goto fail_quota_locks; @@ -782,16 +762,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_quota_locks; } - error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_di.di_mode)); + error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode)); if (error) goto fail_end_trans; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto fail_end_trans; - ip->i_di.di_nlink = 1; + ip->i_inode.i_nlink = 1; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); return 0; @@ -860,13 +840,13 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) */ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode) + unsigned int mode, dev_t dev) { struct inode *inode; struct gfs2_inode *dip = ghs->gh_gl->gl_object; struct inode *dir = &dip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_inum inum; + struct gfs2_inum_host inum; int error; u64 generation; @@ -890,35 +870,12 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock; - if (inum.no_addr < dip->i_num.no_addr) { - gfs2_glock_dq(ghs); - - error = gfs2_glock_nq_num(sdp, inum.no_addr, - &gfs2_inode_glops, LM_ST_EXCLUSIVE, - GL_SKIP, ghs + 1); - if (error) { - return ERR_PTR(error); - } - - gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); - error = gfs2_glock_nq(ghs); - if (error) { - gfs2_glock_dq_uninit(ghs + 1); - return ERR_PTR(error); - } - - error = create_ok(dip, name, mode); - if (error) - goto fail_gunlock2; - } else { - error = gfs2_glock_nq_num(sdp, inum.no_addr, - &gfs2_inode_glops, LM_ST_EXCLUSIVE, - GL_SKIP, ghs + 1); - if (error) - goto fail_gunlock; - } + error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + if (error) + goto fail_gunlock; - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation); + error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev); if (error) goto fail_gunlock2; @@ -975,7 +932,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, if (ip->i_di.di_entries != 2) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); return -EIO; } @@ -997,7 +954,12 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, if (error) return error; - error = gfs2_change_nlink(ip, -2); + /* It looks odd, but it really should be done twice */ + error = gfs2_change_nlink(ip, -1); + if (error) + return error; + + error = gfs2_change_nlink(ip, -1); if (error) return error; @@ -1018,16 +980,16 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip) { - struct gfs2_inum inum; + struct gfs2_inum_host inum; unsigned int type; int error; if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) return -EPERM; - if ((dip->i_di.di_mode & S_ISVTX) && - dip->i_di.di_uid != current->fsuid && - ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER)) + if ((dip->i_inode.i_mode & S_ISVTX) && + dip->i_inode.i_uid != current->fsuid && + ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER)) return -EPERM; if (IS_APPEND(&dip->i_inode)) @@ -1044,7 +1006,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (!gfs2_inum_equal(&inum, &ip->i_num)) return -ENOENT; - if (IF2DT(ip->i_di.di_mode) != type) { + if (IF2DT(ip->i_inode.i_mode) != type) { gfs2_consist_inode(dip); return -EIO; } @@ -1194,7 +1156,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) return 0; curtime = get_seconds(); - if (curtime - ip->i_di.di_atime >= quantum) { + if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) { gfs2_glock_dq(gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, gh); @@ -1206,7 +1168,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) trying to get exclusive lock. */ curtime = get_seconds(); - if (curtime - ip->i_di.di_atime >= quantum) { + if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) { struct buffer_head *dibh; struct gfs2_dinode *di; @@ -1220,11 +1182,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) if (error) goto fail_end_trans; - ip->i_di.di_atime = curtime; + ip->i_inode.i_atime.tv_sec = curtime; gfs2_trans_add_bh(ip->i_gl, dibh, 1); di = (struct gfs2_dinode *)dibh->b_data; - di->di_atime = cpu_to_be64(ip->i_di.di_atime); + di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); brelse(dibh); gfs2_trans_end(sdp); @@ -1249,92 +1211,6 @@ fail: return error; } -/** - * glock_compare_atime - Compare two struct gfs2_glock structures for sort - * @arg_a: the first structure - * @arg_b: the second structure - * - * Returns: 1 if A > B - * -1 if A < B - * 0 if A == B - */ - -static int glock_compare_atime(const void *arg_a, const void *arg_b) -{ - const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; - const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; - const struct lm_lockname *a = &gh_a->gh_gl->gl_name; - const struct lm_lockname *b = &gh_b->gh_gl->gl_name; - - if (a->ln_number > b->ln_number) - return 1; - if (a->ln_number < b->ln_number) - return -1; - if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) - return 1; - if (gh_a->gh_state == LM_ST_SHARED && (gh_b->gh_flags & GL_ATIME)) - return 1; - - return 0; -} - -/** - * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an - * atime update - * @num_gh: the number of structures - * @ghs: an array of struct gfs2_holder structures - * - * Returns: 0 on success (all glocks acquired), - * errno on failure (no glocks acquired) - */ - -int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs) -{ - struct gfs2_holder **p; - unsigned int x; - int error = 0; - - if (!num_gh) - return 0; - - if (num_gh == 1) { - ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); - if (ghs->gh_flags & GL_ATIME) - error = gfs2_glock_nq_atime(ghs); - else - error = gfs2_glock_nq(ghs); - return error; - } - - p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL); - if (!p) - return -ENOMEM; - - for (x = 0; x < num_gh; x++) - p[x] = &ghs[x]; - - sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL); - - for (x = 0; x < num_gh; x++) { - p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); - - if (p[x]->gh_flags & GL_ATIME) - error = gfs2_glock_nq_atime(p[x]); - else - error = gfs2_glock_nq(p[x]); - - if (error) { - while (x--) - gfs2_glock_dq(p[x]); - break; - } - } - - kfree(p); - return error; -} - - static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) { @@ -1345,10 +1221,8 @@ __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) if (!error) { error = inode_setattr(&ip->i_inode, attr); gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); - gfs2_inode_attr_out(ip); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } return error; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index f5d86176057..b57f448b15b 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -22,13 +22,19 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip) static inline int gfs2_is_dir(struct gfs2_inode *ip) { - return S_ISDIR(ip->i_di.di_mode); + return S_ISDIR(ip->i_inode.i_mode); +} + +static inline void gfs2_set_inode_blocks(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + inode->i_blocks = ip->i_di.di_blocks << + (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); } void gfs2_inode_attr_in(struct gfs2_inode *ip); -void gfs2_inode_attr_out(struct gfs2_inode *ip); -struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned type); -struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum); +struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type); +struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum); int gfs2_inode_refresh(struct gfs2_inode *ip); @@ -37,19 +43,15 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff); struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root, struct nameidata *nd); struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode); + unsigned int mode, dev_t dev); int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip); int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip); int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); - int gfs2_glock_nq_atime(struct gfs2_holder *gh); -int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs); - int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); - struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); #endif /* __INODE_DOT_H__ */ diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 0cace3da9db..291415ddfe5 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -15,6 +15,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/lm_interface.h> +#include <linux/delay.h> #include "gfs2.h" #include "incore.h" @@ -142,7 +143,7 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl return list_empty(&ai->ai_ail1_list); } -void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) +static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) { struct list_head *head = &sdp->sd_ail1_list; u64 sync_gen; @@ -261,6 +262,12 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) * @sdp: The GFS2 superblock * @blks: The number of blocks to reserve * + * Note that we never give out the last 6 blocks of the journal. Thats + * due to the fact that there is are a small number of header blocks + * associated with each log flush. The exact number can't be known until + * flush time, so we ensure that we have just enough free blocks at all + * times to avoid running out during a log flush. + * * Returns: errno */ @@ -274,7 +281,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) mutex_lock(&sdp->sd_log_reserve_mutex); gfs2_log_lock(sdp); - while(sdp->sd_log_blks_free <= blks) { + while(sdp->sd_log_blks_free <= (blks + 6)) { gfs2_log_unlock(sdp); gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL); @@ -319,7 +326,8 @@ static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) bh_map.b_size = 1 << inode->i_blkbits; error = gfs2_block_map(inode, lbn, 0, &bh_map); if (error || !bh_map.b_blocknr) - printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn); + printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, + (unsigned long long)bh_map.b_blocknr, lbn); gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr); return bh_map.b_blocknr; @@ -643,12 +651,9 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) up_read(&sdp->sd_log_flush_lock); gfs2_log_lock(sdp); - if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) { - gfs2_log_unlock(sdp); - gfs2_log_flush(sdp, NULL); - } else { - gfs2_log_unlock(sdp); - } + if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) + wake_up_process(sdp->sd_logd_process); + gfs2_log_unlock(sdp); } /** @@ -686,3 +691,21 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) up_write(&sdp->sd_log_flush_lock); } + +/** + * gfs2_meta_syncfs - sync all the buffers in a filesystem + * @sdp: the filesystem + * + */ + +void gfs2_meta_syncfs(struct gfs2_sbd *sdp) +{ + gfs2_log_flush(sdp, NULL); + for (;;) { + gfs2_ail1_start(sdp, DIO_ALL); + if (gfs2_ail1_empty(sdp, DIO_ALL)) + break; + msleep(10); + } +} + diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 7f5737d5561..8e7aa0f2910 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -48,7 +48,6 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, unsigned int ssize); -void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags); int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags); int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); @@ -61,5 +60,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); void gfs2_log_shutdown(struct gfs2_sbd *sdp); +void gfs2_meta_syncfs(struct gfs2_sbd *sdp); #endif /* __LOG_DOT_H__ */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index ab6d1115f95..4d7f94d8c7b 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -182,7 +182,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) } static void buf_lo_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header *head, int pass) + struct gfs2_log_header_host *head, int pass) { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); @@ -328,7 +328,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) } static void revoke_lo_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header *head, int pass) + struct gfs2_log_header_host *head, int pass) { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); @@ -509,7 +509,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) { LIST_HEAD(started); struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; - struct buffer_head *bh = NULL; + struct buffer_head *bh = NULL,*bh1 = NULL; unsigned int offset = sizeof(struct gfs2_log_descriptor); struct gfs2_log_descriptor *ld; unsigned int limit; @@ -537,8 +537,13 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) { + /* store off the buffer head in a local ptr since + * gfs2_bufdata might change when we drop the log lock + */ + bh1 = bd1->bd_bh; + /* An ordered write buffer */ - if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) { + if (bh1 && !buffer_pinned(bh1)) { list_move(&bd1->bd_le.le_list, &started); if (bd1 == bd2) { bd2 = NULL; @@ -547,20 +552,21 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) bd_le.le_list); } total_dbuf--; - if (bd1->bd_bh) { - get_bh(bd1->bd_bh); - if (buffer_dirty(bd1->bd_bh)) { + if (bh1) { + if (buffer_dirty(bh1)) { + get_bh(bh1); + gfs2_log_unlock(sdp); - wait_on_buffer(bd1->bd_bh); - ll_rw_block(WRITE, 1, - &bd1->bd_bh); + + ll_rw_block(SWRITE, 1, &bh1); + brelse(bh1); + gfs2_log_lock(sdp); } - brelse(bd1->bd_bh); continue; } continue; - } else if (bd1->bd_bh) { /* A journaled buffer */ + } else if (bh1) { /* A journaled buffer */ int magic; gfs2_log_unlock(sdp); if (!bh) { @@ -582,16 +588,16 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) ld->ld_data2 = cpu_to_be32(0); memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); } - magic = gfs2_check_magic(bd1->bd_bh); - *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); + magic = gfs2_check_magic(bh1); + *ptr++ = cpu_to_be64(bh1->b_blocknr); *ptr++ = cpu_to_be64((__u64)magic); - clear_buffer_escaped(bd1->bd_bh); + clear_buffer_escaped(bh1); if (unlikely(magic != 0)) - set_buffer_escaped(bd1->bd_bh); + set_buffer_escaped(bh1); gfs2_log_lock(sdp); if (n++ > num) break; - } else if (!bd1->bd_bh) { + } else if (!bh1) { total_dbuf--; sdp->sd_log_num_databuf--; list_del_init(&bd1->bd_le.le_list); diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 5839c05ae6b..965bc65c7c6 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -60,7 +60,7 @@ static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) } static inline void lops_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header *head, + struct gfs2_log_header_host *head, unsigned int pass) { int x; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 9889c1eacec..7c1a9e22a52 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -25,7 +25,7 @@ #include "util.h" #include "glock.h" -static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_inode *ip = foo; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == @@ -37,7 +37,7 @@ static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long } } -static void gfs2_init_glock_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_glock *gl = foo; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 3912d6a4b1e..0e34d991897 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -127,17 +127,17 @@ void gfs2_meta_sync(struct gfs2_glock *gl) /** * getbuf - Get a buffer with a given address space - * @sdp: the filesystem - * @aspace: the address space + * @gl: the glock * @blkno: the block number (filesystem scope) * @create: 1 if the buffer should be created * * Returns: the buffer */ -static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace, - u64 blkno, int create) +static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create) { + struct address_space *mapping = gl->gl_aspace->i_mapping; + struct gfs2_sbd *sdp = gl->gl_sbd; struct page *page; struct buffer_head *bh; unsigned int shift; @@ -150,13 +150,13 @@ static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace, if (create) { for (;;) { - page = grab_cache_page(aspace->i_mapping, index); + page = grab_cache_page(mapping, index); if (page) break; yield(); } } else { - page = find_lock_page(aspace->i_mapping, index); + page = find_lock_page(mapping, index); if (!page) return NULL; } @@ -202,7 +202,7 @@ static void meta_prep_new(struct buffer_head *bh) struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) { struct buffer_head *bh; - bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE); + bh = getbuf(gl, blkno, CREATE); meta_prep_new(bh); return bh; } @@ -220,7 +220,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp) { - *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE); + *bhp = getbuf(gl, blkno, CREATE); if (!buffer_uptodate(*bhp)) ll_rw_block(READ_META, 1, bhp); if (flags & DIO_WAIT) { @@ -379,11 +379,10 @@ void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct inode *aspace = ip->i_gl->gl_aspace; struct buffer_head *bh; while (blen) { - bh = getbuf(sdp, aspace, bstart, NO_CREATE); + bh = getbuf(ip->i_gl, bstart, NO_CREATE); if (bh) { struct gfs2_bufdata *bd = bh->b_private; @@ -472,6 +471,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height; int in_cache = 0; + BUG_ON(!gl); + BUG_ON(!sdp); + spin_lock(&ip->i_spin); if (*bh_slot && (*bh_slot)->b_blocknr == num) { bh = *bh_slot; @@ -481,7 +483,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, spin_unlock(&ip->i_spin); if (!bh) - bh = getbuf(gl->gl_sbd, gl->gl_aspace, num, CREATE); + bh = getbuf(gl, num, CREATE); if (!bh) return -ENOBUFS; @@ -532,7 +534,6 @@ err: struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) { struct gfs2_sbd *sdp = gl->gl_sbd; - struct inode *aspace = gl->gl_aspace; struct buffer_head *first_bh, *bh; u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> sdp->sd_sb.sb_bsize_shift; @@ -544,7 +545,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (extlen > max_ra) extlen = max_ra; - first_bh = getbuf(sdp, aspace, dblock, CREATE); + first_bh = getbuf(gl, dblock, CREATE); if (buffer_uptodate(first_bh)) goto out; @@ -555,7 +556,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) extlen--; while (extlen) { - bh = getbuf(sdp, aspace, dblock, CREATE); + bh = getbuf(gl, dblock, CREATE); if (!buffer_uptodate(bh) && !buffer_locked(bh)) ll_rw_block(READA, 1, &bh); @@ -571,20 +572,3 @@ out: return first_bh; } -/** - * gfs2_meta_syncfs - sync all the buffers in a filesystem - * @sdp: the filesystem - * - */ - -void gfs2_meta_syncfs(struct gfs2_sbd *sdp) -{ - gfs2_log_flush(sdp, NULL); - for (;;) { - gfs2_ail1_start(sdp, DIO_ALL); - if (gfs2_ail1_empty(sdp, DIO_ALL)) - break; - msleep(10); - } -} - diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 3ec939e20df..e037425bc04 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -67,7 +67,6 @@ static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, } struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); -void gfs2_meta_syncfs(struct gfs2_sbd *sdp); #define buffer_busy(bh) \ ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 1025960b0e6..f2495f1e21a 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -15,6 +15,8 @@ #include "gfs2.h" #include <linux/gfs2_ondisk.h> +#include <linux/lm_interface.h> +#include "incore.h" #define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \ struct->member); @@ -32,7 +34,7 @@ * first arg: the cpu-order structure */ -void gfs2_inum_in(struct gfs2_inum *no, const void *buf) +void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf) { const struct gfs2_inum *str = buf; @@ -40,7 +42,7 @@ void gfs2_inum_in(struct gfs2_inum *no, const void *buf) no->no_addr = be64_to_cpu(str->no_addr); } -void gfs2_inum_out(const struct gfs2_inum *no, void *buf) +void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf) { struct gfs2_inum *str = buf; @@ -48,13 +50,13 @@ void gfs2_inum_out(const struct gfs2_inum *no, void *buf) str->no_addr = cpu_to_be64(no->no_addr); } -static void gfs2_inum_print(const struct gfs2_inum *no) +static void gfs2_inum_print(const struct gfs2_inum_host *no) { printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino); printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr); } -static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf) +static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf) { const struct gfs2_meta_header *str = buf; @@ -63,23 +65,7 @@ static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf) mh->mh_format = be32_to_cpu(str->mh_format); } -static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf) -{ - struct gfs2_meta_header *str = buf; - - str->mh_magic = cpu_to_be32(mh->mh_magic); - str->mh_type = cpu_to_be32(mh->mh_type); - str->mh_format = cpu_to_be32(mh->mh_format); -} - -static void gfs2_meta_header_print(const struct gfs2_meta_header *mh) -{ - pv(mh, mh_magic, "0x%.8X"); - pv(mh, mh_type, "%u"); - pv(mh, mh_format, "%u"); -} - -void gfs2_sb_in(struct gfs2_sb *sb, const void *buf) +void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) { const struct gfs2_sb *str = buf; @@ -97,7 +83,7 @@ void gfs2_sb_in(struct gfs2_sb *sb, const void *buf) memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); } -void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf) +void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf) { const struct gfs2_rindex *str = buf; @@ -109,7 +95,7 @@ void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf) } -void gfs2_rindex_print(const struct gfs2_rindex *ri) +void gfs2_rindex_print(const struct gfs2_rindex_host *ri) { printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr); pv(ri, ri_length, "%u"); @@ -120,22 +106,20 @@ void gfs2_rindex_print(const struct gfs2_rindex *ri) pv(ri, ri_bitbytes, "%u"); } -void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf) +void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf) { const struct gfs2_rgrp *str = buf; - gfs2_meta_header_in(&rg->rg_header, buf); rg->rg_flags = be32_to_cpu(str->rg_flags); rg->rg_free = be32_to_cpu(str->rg_free); rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); } -void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf) +void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) { struct gfs2_rgrp *str = buf; - gfs2_meta_header_out(&rg->rg_header, buf); str->rg_flags = cpu_to_be32(rg->rg_flags); str->rg_free = cpu_to_be32(rg->rg_free); str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); @@ -144,7 +128,7 @@ void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf) memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); } -void gfs2_quota_in(struct gfs2_quota *qu, const void *buf) +void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) { const struct gfs2_quota *str = buf; @@ -153,96 +137,56 @@ void gfs2_quota_in(struct gfs2_quota *qu, const void *buf) qu->qu_value = be64_to_cpu(str->qu_value); } -void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf) -{ - const struct gfs2_dinode *str = buf; - - gfs2_meta_header_in(&di->di_header, buf); - gfs2_inum_in(&di->di_num, &str->di_num); - - di->di_mode = be32_to_cpu(str->di_mode); - di->di_uid = be32_to_cpu(str->di_uid); - di->di_gid = be32_to_cpu(str->di_gid); - di->di_nlink = be32_to_cpu(str->di_nlink); - di->di_size = be64_to_cpu(str->di_size); - di->di_blocks = be64_to_cpu(str->di_blocks); - di->di_atime = be64_to_cpu(str->di_atime); - di->di_mtime = be64_to_cpu(str->di_mtime); - di->di_ctime = be64_to_cpu(str->di_ctime); - di->di_major = be32_to_cpu(str->di_major); - di->di_minor = be32_to_cpu(str->di_minor); - - di->di_goal_meta = be64_to_cpu(str->di_goal_meta); - di->di_goal_data = be64_to_cpu(str->di_goal_data); - di->di_generation = be64_to_cpu(str->di_generation); - - di->di_flags = be32_to_cpu(str->di_flags); - di->di_payload_format = be32_to_cpu(str->di_payload_format); - di->di_height = be16_to_cpu(str->di_height); - - di->di_depth = be16_to_cpu(str->di_depth); - di->di_entries = be32_to_cpu(str->di_entries); - - di->di_eattr = be64_to_cpu(str->di_eattr); - -} - -void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf) +void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { + const struct gfs2_dinode_host *di = &ip->i_di; struct gfs2_dinode *str = buf; - gfs2_meta_header_out(&di->di_header, buf); - gfs2_inum_out(&di->di_num, (char *)&str->di_num); + str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); + str->di_header.__pad0 = 0; + str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); + str->di_header.__pad1 = 0; - str->di_mode = cpu_to_be32(di->di_mode); - str->di_uid = cpu_to_be32(di->di_uid); - str->di_gid = cpu_to_be32(di->di_gid); - str->di_nlink = cpu_to_be32(di->di_nlink); + gfs2_inum_out(&ip->i_num, &str->di_num); + + str->di_mode = cpu_to_be32(ip->i_inode.i_mode); + str->di_uid = cpu_to_be32(ip->i_inode.i_uid); + str->di_gid = cpu_to_be32(ip->i_inode.i_gid); + str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); str->di_size = cpu_to_be64(di->di_size); str->di_blocks = cpu_to_be64(di->di_blocks); - str->di_atime = cpu_to_be64(di->di_atime); - str->di_mtime = cpu_to_be64(di->di_mtime); - str->di_ctime = cpu_to_be64(di->di_ctime); - str->di_major = cpu_to_be32(di->di_major); - str->di_minor = cpu_to_be32(di->di_minor); + str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); + str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); + str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); str->di_goal_meta = cpu_to_be64(di->di_goal_meta); str->di_goal_data = cpu_to_be64(di->di_goal_data); str->di_generation = cpu_to_be64(di->di_generation); str->di_flags = cpu_to_be32(di->di_flags); - str->di_payload_format = cpu_to_be32(di->di_payload_format); str->di_height = cpu_to_be16(di->di_height); - + str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && + !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? + GFS2_FORMAT_DE : 0); str->di_depth = cpu_to_be16(di->di_depth); str->di_entries = cpu_to_be32(di->di_entries); str->di_eattr = cpu_to_be64(di->di_eattr); - } -void gfs2_dinode_print(const struct gfs2_dinode *di) +void gfs2_dinode_print(const struct gfs2_inode *ip) { - gfs2_meta_header_print(&di->di_header); - gfs2_inum_print(&di->di_num); + const struct gfs2_dinode_host *di = &ip->i_di; + + gfs2_inum_print(&ip->i_num); - pv(di, di_mode, "0%o"); - pv(di, di_uid, "%u"); - pv(di, di_gid, "%u"); - pv(di, di_nlink, "%u"); printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks); - printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime); - printk(KERN_INFO " di_mtime = %lld\n", (long long)di->di_mtime); - printk(KERN_INFO " di_ctime = %lld\n", (long long)di->di_ctime); - pv(di, di_major, "%u"); - pv(di, di_minor, "%u"); - printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta); printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data); pv(di, di_flags, "0x%.8X"); - pv(di, di_payload_format, "%u"); pv(di, di_height, "%u"); pv(di, di_depth, "%u"); @@ -251,7 +195,7 @@ void gfs2_dinode_print(const struct gfs2_dinode *di) printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr); } -void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf) +void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf) { const struct gfs2_log_header *str = buf; @@ -263,7 +207,7 @@ void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf) lh->lh_hash = be32_to_cpu(str->lh_hash); } -void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf) +void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) { const struct gfs2_inum_range *str = buf; @@ -271,7 +215,7 @@ void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf) ir->ir_length = be64_to_cpu(str->ir_length); } -void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf) +void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) { struct gfs2_inum_range *str = buf; @@ -279,7 +223,7 @@ void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf) str->ir_length = cpu_to_be64(ir->ir_length); } -void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf) +void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) { const struct gfs2_statfs_change *str = buf; @@ -288,7 +232,7 @@ void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf) sc->sc_dinodes = be64_to_cpu(str->sc_dinodes); } -void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf) +void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf) { struct gfs2_statfs_change *str = buf; @@ -297,7 +241,7 @@ void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf) str->sc_dinodes = cpu_to_be64(sc->sc_dinodes); } -void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf) +void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) { const struct gfs2_quota_change *str = buf; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 015640b3f12..d8d69a72a10 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -156,19 +156,6 @@ out_ignore: return 0; } -static int zero_readpage(struct page *page) -{ - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - - SetPageUptodate(page); - - return 0; -} - /** * stuffed_readpage - Fill in a Linux page with stuffed file data * @ip: the inode @@ -183,9 +170,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) void *kaddr; int error; - /* Only the first page of a stuffed file might contain data */ - if (unlikely(page->index)) - return zero_readpage(page); + BUG_ON(page->index); error = gfs2_meta_inode_buffer(ip, &dibh); if (error) @@ -230,9 +215,9 @@ static int gfs2_readpage(struct file *file, struct page *page) /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */ goto skip_lock; } - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); do_unlock = 1; - error = gfs2_glock_nq_m_atime(1, &gh); + error = gfs2_glock_nq_atime(&gh); if (unlikely(error)) goto out_unlock; } @@ -254,6 +239,8 @@ skip_lock: out: return error; out_unlock: + if (error == GLR_TRYFAILED) + error = AOP_TRUNCATED_PAGE; unlock_page(page); if (do_unlock) gfs2_holder_uninit(&gh); @@ -293,9 +280,9 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, goto skip_lock; } gfs2_holder_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh); + LM_FLAG_TRY_1CB|GL_ATIME, &gh); do_unlock = 1; - ret = gfs2_glock_nq_m_atime(1, &gh); + ret = gfs2_glock_nq_atime(&gh); if (ret == GLR_TRYFAILED) goto out_noerror; if (unlikely(ret)) @@ -366,10 +353,13 @@ static int gfs2_prepare_write(struct file *file, struct page *page, unsigned int write_len = to - from; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh); - error = gfs2_glock_nq_m_atime(1, &ip->i_gh); - if (error) + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); + error = gfs2_glock_nq_atime(&ip->i_gh); + if (unlikely(error)) { + if (error == GLR_TRYFAILED) + error = AOP_TRUNCATED_PAGE; goto out_uninit; + } gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks); @@ -386,7 +376,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page, if (error) goto out_alloc_put; - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); if (error) goto out_qunlock; @@ -482,8 +472,10 @@ static int gfs2_commit_write(struct file *file, struct page *page, SetPageUptodate(page); - if (inode->i_size < file_size) + if (inode->i_size < file_size) { i_size_write(inode, file_size); + mark_inode_dirty(inode); + } } else { if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) @@ -498,11 +490,6 @@ static int gfs2_commit_write(struct file *file, struct page *page, di->di_size = cpu_to_be64(inode->i_size); } - di->di_mode = cpu_to_be32(inode->i_mode); - di->di_atime = cpu_to_be64(inode->i_atime.tv_sec); - di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); - di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); - brelse(dibh); gfs2_trans_end(sdp); if (al->al_requested) { @@ -624,7 +611,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, * on this path. All we need change is atime. */ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); - rv = gfs2_glock_nq_m_atime(1, &gh); + rv = gfs2_glock_nq_atime(&gh); if (rv) goto out; @@ -737,6 +724,9 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) if (!atomic_read(&aspace->i_writecount)) return 0; + if (!(gfp_mask & __GFP_WAIT)) + return 0; + if (time_after_eq(jiffies, t)) { stuck_releasepage(bh); /* should we withdraw here? */ diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index 00041b1b802..d355899585d 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c @@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode = dentry->d_inode; struct gfs2_holder d_gh; struct gfs2_inode *ip; - struct gfs2_inum inum; + struct gfs2_inum_host inum; unsigned int type; int error; @@ -76,7 +76,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) if (!gfs2_inum_equal(&ip->i_num, &inum)) goto invalid_gunlock; - if (IF2DT(ip->i_di.di_mode) != type) { + if (IF2DT(ip->i_inode.i_mode) != type) { gfs2_consist_inode(dip); goto fail_gunlock; } diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 86127d93bd3..b4e7b877531 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -27,15 +27,16 @@ #include "util.h" static struct dentry *gfs2_decode_fh(struct super_block *sb, - __u32 *fh, + __u32 *p, int fh_len, int fh_type, int (*acceptable)(void *context, struct dentry *dentry), void *context) { + __be32 *fh = (__force __be32 *)p; struct gfs2_fh_obj fh_obj; - struct gfs2_inum *this, parent; + struct gfs2_inum_host *this, parent; if (fh_type != fh_len) return NULL; @@ -65,9 +66,10 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb, acceptable, context); } -static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len, +static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, int connectable) { + __be32 *fh = (__force __be32 *)p; struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; struct gfs2_inode *ip = GFS2_I(inode); @@ -76,14 +78,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len, (connectable && *len < GFS2_LARGE_FH_SIZE)) return 255; - fh[0] = ip->i_num.no_formal_ino >> 32; - fh[0] = cpu_to_be32(fh[0]); - fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF; - fh[1] = cpu_to_be32(fh[1]); - fh[2] = ip->i_num.no_addr >> 32; - fh[2] = cpu_to_be32(fh[2]); - fh[3] = ip->i_num.no_addr & 0xFFFFFFFF; - fh[3] = cpu_to_be32(fh[3]); + fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32); + fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF); + fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32); + fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF); *len = GFS2_SMALL_FH_SIZE; if (!connectable || inode == sb->s_root->d_inode) @@ -95,14 +93,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len, igrab(inode); spin_unlock(&dentry->d_lock); - fh[4] = ip->i_num.no_formal_ino >> 32; - fh[4] = cpu_to_be32(fh[4]); - fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF; - fh[5] = cpu_to_be32(fh[5]); - fh[6] = ip->i_num.no_addr >> 32; - fh[6] = cpu_to_be32(fh[6]); - fh[7] = ip->i_num.no_addr & 0xFFFFFFFF; - fh[7] = cpu_to_be32(fh[7]); + fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32); + fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF); + fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32); + fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF); fh[8] = cpu_to_be32(inode->i_mode); fh[9] = 0; /* pad to double word */ @@ -114,12 +108,12 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len, } struct get_name_filldir { - struct gfs2_inum inum; + struct gfs2_inum_host inum; char *name; }; static int get_name_filldir(void *opaque, const char *name, unsigned int length, - u64 offset, struct gfs2_inum *inum, + u64 offset, struct gfs2_inum_host *inum, unsigned int type) { struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; @@ -202,7 +196,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj; - struct gfs2_inum *inum = &fh_obj->this; + struct gfs2_inum_host *inum = &fh_obj->this; struct gfs2_holder i_gh, ri_gh, rgd_gh; struct gfs2_rgrpd *rgd; struct inode *inode; diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h index 09aca5046fb..f925a955b3b 100644 --- a/fs/gfs2/ops_export.h +++ b/fs/gfs2/ops_export.h @@ -15,7 +15,7 @@ extern struct export_operations gfs2_export_ops; struct gfs2_fh_obj { - struct gfs2_inum this; + struct gfs2_inum_host this; __u32 imode; }; diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 3064f133bf3..b3f1e0349ae 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -22,6 +22,7 @@ #include <linux/ext2_fs.h> #include <linux/crc32.h> #include <linux/lm_interface.h> +#include <linux/writeback.h> #include <asm/uaccess.h> #include "gfs2.h" @@ -71,7 +72,7 @@ static int gfs2_read_actor(read_descriptor_t *desc, struct page *page, size = count; kaddr = kmap(page); - memcpy(desc->arg.buf, kaddr + offset, size); + memcpy(desc->arg.data, kaddr + offset, size); kunmap(page); desc->count = count - size; @@ -86,7 +87,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, struct inode *inode = &ip->i_inode; read_descriptor_t desc; desc.written = 0; - desc.arg.buf = buf; + desc.arg.data = buf; desc.count = size; desc.error = 0; do_generic_mapping_read(inode->i_mapping, ra_state, @@ -139,7 +140,7 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) */ static int filldir_func(void *opaque, const char *name, unsigned int length, - u64 offset, struct gfs2_inum *inum, + u64 offset, struct gfs2_inum_host *inum, unsigned int type) { struct filldir_reg *fdr = (struct filldir_reg *)opaque; @@ -253,7 +254,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) u32 fsflags; gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); - error = gfs2_glock_nq_m_atime(1, &gh); + error = gfs2_glock_nq_atime(&gh); if (error) return error; @@ -266,6 +267,24 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) return error; } +void gfs2_set_inode_flags(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_dinode_host *di = &ip->i_di; + unsigned int flags = inode->i_flags; + + flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + if (di->di_flags & GFS2_DIF_IMMUTABLE) + flags |= S_IMMUTABLE; + if (di->di_flags & GFS2_DIF_APPENDONLY) + flags |= S_APPEND; + if (di->di_flags & GFS2_DIF_NOATIME) + flags |= S_NOATIME; + if (di->di_flags & GFS2_DIF_SYNC) + flags |= S_SYNC; + inode->i_flags = flags; +} + /* Flags that can be set by user space */ #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ GFS2_DIF_DIRECTIO| \ @@ -336,8 +355,9 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) goto out_trans_end; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_flags = new_flags; - gfs2_dinode_out(&ip->i_di, bh->b_data); + gfs2_dinode_out(ip, bh->b_data); brelse(bh); + gfs2_set_inode_flags(inode); out_trans_end: gfs2_trans_end(sdp); out: @@ -425,7 +445,7 @@ static int gfs2_open(struct inode *inode, struct file *file) gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; - if (S_ISREG(ip->i_di.di_mode)) { + if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) @@ -484,16 +504,40 @@ static int gfs2_close(struct inode *inode, struct file *file) * @file: the file that points to the dentry (we ignore this) * @dentry: the dentry that points to the inode to sync * + * The VFS will flush "normal" data for us. We only need to worry + * about metadata here. For journaled data, we just do a log flush + * as we can't avoid it. Otherwise we can just bale out if datasync + * is set. For stuffed inodes we must flush the log in order to + * ensure that all data is on disk. + * + * The call to write_inode_now() is there to write back metadata and + * the inode itself. It does also try and write the data, but thats + * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite() + * for us. + * * Returns: errno */ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) { - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + struct inode *inode = dentry->d_inode; + int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); + int ret = 0; - gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); + if (gfs2_is_jdata(GFS2_I(inode))) { + gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); + return 0; + } - return 0; + if (sync_state != 0) { + if (!datasync) + ret = write_inode_now(inode, 0); + + if (gfs2_is_stuffed(GFS2_I(inode))) + gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); + } + + return ret; } /** @@ -515,7 +559,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; - if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; if (sdp->sd_args.ar_localflocks) { @@ -617,7 +661,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; - if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; if (sdp->sd_args.ar_localflocks) diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h index ce319f89ec8..7e5d8ec9c84 100644 --- a/fs/gfs2/ops_file.h +++ b/fs/gfs2/ops_file.h @@ -17,7 +17,7 @@ extern struct file gfs2_internal_file_sentinel; extern int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, char *buf, loff_t *pos, unsigned size); - +extern void gfs2_set_inode_flags(struct inode *inode); extern const struct file_operations gfs2_file_fops; extern const struct file_operations gfs2_dir_fops; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 882873a6bd6..d14e139d267 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -237,7 +237,7 @@ fail: } static struct inode *gfs2_lookup_root(struct super_block *sb, - struct gfs2_inum *inum) + struct gfs2_inum_host *inum) { return gfs2_inode_lookup(sb, inum, DT_DIR); } @@ -246,7 +246,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) { struct super_block *sb = sdp->sd_vfs; struct gfs2_holder sb_gh; - struct gfs2_inum *inum; + struct gfs2_inum_host *inum; struct inode *inode; int error = 0; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index ef6e5ed70e9..636dda4c7d3 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -59,7 +59,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, gfs2_holder_init(dip->i_gl, 0, 0, ghs); for (;;) { - inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); if (!IS_ERR(inode)) { gfs2_trans_end(sdp); if (dip->i_alloc.al_rgd) @@ -144,7 +144,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, int alloc_required; int error; - if (S_ISDIR(ip->i_di.di_mode)) + if (S_ISDIR(inode->i_mode)) return -EPERM; gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); @@ -169,7 +169,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, } error = -EINVAL; - if (!dip->i_di.di_nlink) + if (!dip->i_inode.i_nlink) goto out_gunlock; error = -EFBIG; if (dip->i_di.di_entries == (u32)-1) @@ -178,10 +178,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out_gunlock; error = -EINVAL; - if (!ip->i_di.di_nlink) + if (!ip->i_inode.i_nlink) goto out_gunlock; error = -EMLINK; - if (ip->i_di.di_nlink == (u32)-1) + if (ip->i_inode.i_nlink == (u32)-1) goto out_gunlock; alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); @@ -196,8 +196,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_alloc; - error = gfs2_quota_check(dip, dip->i_di.di_uid, - dip->i_di.di_gid); + error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); if (error) goto out_gunlock_q; @@ -220,7 +219,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, } error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num, - IF2DT(ip->i_di.di_mode)); + IF2DT(inode->i_mode)); if (error) goto out_end_trans; @@ -326,7 +325,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); @@ -339,7 +338,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, error = gfs2_meta_inode_buffer(ip, &dibh); if (!gfs2_assert_withdraw(sdp, !error)) { - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); brelse(dibh); @@ -379,7 +378,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); @@ -387,10 +386,9 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) ip = ghs[1].gh_gl->gl_object; - ip->i_di.di_nlink = 2; + ip->i_inode.i_nlink = 2; ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); ip->i_di.di_flags |= GFS2_DIF_JDATA; - ip->i_di.di_payload_format = GFS2_FORMAT_DE; ip->i_di.di_entries = 2; error = gfs2_meta_inode_buffer(ip, &dibh); @@ -414,7 +412,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) gfs2_inum_out(&dip->i_num, &dent->de_inum); dent->de_type = cpu_to_be16(DT_DIR); - gfs2_dinode_out(&ip->i_di, di); + gfs2_dinode_out(ip, di); brelse(dibh); } @@ -467,7 +465,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) if (ip->i_di.di_entries < 2) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); error = -EIO; goto out_gunlock; } @@ -504,47 +502,19 @@ out: static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { - struct gfs2_inode *dip = GFS2_I(dir), *ip; + struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); struct gfs2_holder ghs[2]; struct inode *inode; - struct buffer_head *dibh; - u32 major = 0, minor = 0; - int error; - - switch (mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - major = MAJOR(dev); - minor = MINOR(dev); - break; - case S_IFIFO: - case S_IFSOCK: - break; - default: - return -EOPNOTSUPP; - }; gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, mode); + inode = gfs2_createi(ghs, &dentry->d_name, mode, dev); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); } - ip = ghs[1].gh_gl->gl_object; - - ip->i_di.di_major = major; - ip->i_di.di_minor = minor; - - error = gfs2_meta_inode_buffer(ip, &dibh); - - if (!gfs2_assert_withdraw(sdp, !error)) { - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); - } - gfs2_trans_end(sdp); if (dip->i_alloc.al_rgd) gfs2_inplace_release(dip); @@ -592,11 +562,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Make sure we aren't trying to move a dirctory into it's subdir */ - if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) { + if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) { dir_rename = 1; - error = gfs2_glock_nq_init(sdp->sd_rename_gl, - LM_ST_EXCLUSIVE, 0, + error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, &r_gh); if (error) goto out; @@ -637,10 +606,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_gunlock; - if (S_ISDIR(nip->i_di.di_mode)) { + if (S_ISDIR(nip->i_inode.i_mode)) { if (nip->i_di.di_entries < 2) { if (gfs2_consist_inode(nip)) - gfs2_dinode_print(&nip->i_di); + gfs2_dinode_print(nip); error = -EIO; goto out_gunlock; } @@ -666,7 +635,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, }; if (odip != ndip) { - if (!ndip->i_di.di_nlink) { + if (!ndip->i_inode.i_nlink) { error = -EINVAL; goto out_gunlock; } @@ -674,8 +643,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = -EFBIG; goto out_gunlock; } - if (S_ISDIR(ip->i_di.di_mode) && - ndip->i_di.di_nlink == (u32)-1) { + if (S_ISDIR(ip->i_inode.i_mode) && + ndip->i_inode.i_nlink == (u32)-1) { error = -EMLINK; goto out_gunlock; } @@ -702,8 +671,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_alloc; - error = gfs2_quota_check(ndip, ndip->i_di.di_uid, - ndip->i_di.di_gid); + error = gfs2_quota_check(ndip, ndip->i_inode.i_uid, ndip->i_inode.i_gid); if (error) goto out_gunlock_q; @@ -729,7 +697,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Remove the target file, if it exists */ if (nip) { - if (S_ISDIR(nip->i_di.di_mode)) + if (S_ISDIR(nip->i_inode.i_mode)) error = gfs2_rmdiri(ndip, &ndentry->d_name, nip); else { error = gfs2_dir_del(ndip, &ndentry->d_name); @@ -760,9 +728,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out_end_trans; - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -771,7 +739,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_end_trans; error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num, - IF2DT(ip->i_di.di_mode)); + IF2DT(ip->i_inode.i_mode)); if (error) goto out_end_trans; @@ -867,6 +835,10 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) * @mask: * @nd: passed from Linux VFS, ignored by us * + * This may be called from the VFS directly, or from within GFS2 with the + * inode locked, so we look to see if the glock is already locked and only + * lock the glock if its not already been done. + * * Returns: errno */ @@ -875,15 +847,18 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; int error; + int unlock = 0; - if (ip->i_vn == ip->i_gl->gl_vn) - return generic_permission(inode, mask, gfs2_check_acl); + if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + unlock = 1; + } - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (!error) { - error = generic_permission(inode, mask, gfs2_check_acl_locked); + error = generic_permission(inode, mask, gfs2_check_acl); + if (unlock) gfs2_glock_dq_uninit(&i_gh); - } return error; } @@ -914,8 +889,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) u32 ouid, ogid, nuid, ngid; int error; - ouid = ip->i_di.di_uid; - ogid = ip->i_di.di_gid; + ouid = inode->i_uid; + ogid = inode->i_gid; nuid = attr->ia_uid; ngid = attr->ia_gid; @@ -946,10 +921,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) error = inode_setattr(inode, attr); gfs2_assert_warn(sdp, !error); - gfs2_inode_attr_out(ip); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { @@ -1018,6 +992,12 @@ out: * @dentry: The dentry to stat * @stat: The inode's stats * + * This may be called from the VFS directly, or from within GFS2 with the + * inode locked, so we look to see if the glock is already locked and only + * lock the glock if its not already been done. Note that its the NFS + * readdirplus operation which causes this to be called (from filldir) + * with the glock already held. + * * Returns: errno */ @@ -1028,14 +1008,20 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; + int unlock = 0; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (!error) { - generic_fillattr(inode, stat); - gfs2_glock_dq_uninit(&gh); + if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) + return error; + unlock = 1; } - return error; + generic_fillattr(inode, stat); + if (unlock); + gfs2_glock_dq_uninit(&gh); + + return 0; } static int gfs2_setxattr(struct dentry *dentry, const char *name, diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index b47d9598c04..7685b46f934 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -157,7 +157,8 @@ static void gfs2_write_super(struct super_block *sb) static int gfs2_sync_fs(struct super_block *sb, int wait) { sb->s_dirt = 0; - gfs2_log_flush(sb->s_fs_info, NULL); + if (wait) + gfs2_log_flush(sb->s_fs_info, NULL); return 0; } @@ -215,7 +216,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_statfs_change sc; + struct gfs2_statfs_change_host sc; int error; if (gfs2_tune_get(sdp, gt_statfs_slow)) @@ -293,8 +294,6 @@ static void gfs2_clear_inode(struct inode *inode) */ if (inode->i_private) { struct gfs2_inode *ip = GFS2_I(inode); - gfs2_glock_inode_squish(inode); - gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED); ip->i_gl->gl_object = NULL; gfs2_glock_schedule_for_reclaim(ip->i_gl); gfs2_glock_put(ip->i_gl); @@ -395,7 +394,7 @@ static void gfs2_delete_inode(struct inode *inode) if (!inode->i_private) goto out; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh); if (unlikely(error)) { gfs2_glock_dq_uninit(&ip->i_iopen_gh); goto out; @@ -407,7 +406,7 @@ static void gfs2_delete_inode(struct inode *inode) if (error) goto out_uninit; - if (S_ISDIR(ip->i_di.di_mode) && + if (S_ISDIR(inode->i_mode) && (ip->i_di.di_flags & GFS2_DIF_EXHASH)) { error = gfs2_dir_exhash_dealloc(ip); if (error) diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index 5453d2947ab..45a5f11fc39 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -76,7 +76,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) if (error) goto out; - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); if (error) goto out_gunlock_q; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3deae7416c..d0db881b55d 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -452,19 +452,19 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd); if (error) goto out; al->al_qd_num++; qd++; - error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd); if (error) goto out; al->al_qd_num++; qd++; - if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) { + if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd); if (error) goto out; @@ -472,7 +472,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) qd++; } - if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) { + if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd); if (error) goto out; @@ -539,8 +539,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) qc->qc_id = cpu_to_be32(qd->qd_id); } - x = qc->qc_change; - x = be64_to_cpu(x) + change; + x = be64_to_cpu(qc->qc_change) + change; qc->qc_change = cpu_to_be64(x); spin_lock(&sdp->sd_quota_spin); @@ -743,7 +742,7 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; - struct gfs2_quota q; + struct gfs2_quota_host q; char buf[sizeof(struct gfs2_quota)]; struct file_ra_state ra_state; int error; @@ -1103,7 +1102,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; y++, slot++) { - struct gfs2_quota_change qc; + struct gfs2_quota_change_host qc; struct gfs2_quota_data *qd; gfs2_quota_change_in(&qc, bh->b_data + diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 62cd223819b..d0c806b85c8 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -132,10 +132,11 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp) */ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, - struct gfs2_log_header *head) + struct gfs2_log_header_host *head) { struct buffer_head *bh; - struct gfs2_log_header lh; + struct gfs2_log_header_host lh; + const u32 nothing = 0; u32 hash; int error; @@ -143,11 +144,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, if (error) return error; - memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header)); - lh.lh_hash = 0; - hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header)); + hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) - + sizeof(u32)); + hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing)); + hash ^= (u32)~0; gfs2_log_header_in(&lh, bh->b_data); - brelse(bh); if (lh.lh_header.mh_magic != GFS2_MAGIC || @@ -174,7 +175,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, */ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, - struct gfs2_log_header *head) + struct gfs2_log_header_host *head) { unsigned int orig_blk = *blk; int error; @@ -205,10 +206,10 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, * Returns: errno */ -static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head) +static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { unsigned int blk = head->lh_blkno; - struct gfs2_log_header lh; + struct gfs2_log_header_host lh; int error; for (;;) { @@ -245,9 +246,9 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head) * Returns: errno */ -int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head) +int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { - struct gfs2_log_header lh_1, lh_m; + struct gfs2_log_header_host lh_1, lh_m; u32 blk_1, blk_2, blk_m; int error; @@ -320,7 +321,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, length = be32_to_cpu(ld->ld_length); if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) { - struct gfs2_log_header lh; + struct gfs2_log_header_host lh; error = get_log_header(jd, start, &lh); if (!error) { gfs2_replay_incr_blk(sdp, &start); @@ -363,7 +364,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, * Returns: errno */ -static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head) +static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); @@ -425,7 +426,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - struct gfs2_log_header head; + struct gfs2_log_header_host head; struct gfs2_holder j_gh, ji_gh, t_gh; unsigned long t; int ro = 0; diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 961feedf4d8..f7235e61c72 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -26,7 +26,7 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); void gfs2_revoke_clean(struct gfs2_sbd *sdp); int gfs2_find_jhead(struct gfs2_jdesc *jd, - struct gfs2_log_header *head); + struct gfs2_log_header_host *head); int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); void gfs2_check_journals(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b261385c006..ff0846528d5 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -253,7 +253,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } -static inline int rgrp_contains_block(struct gfs2_rindex *ri, u64 block) +static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block) { u64 first = ri->ri_data0; u64 last = first + ri->ri_data; @@ -1217,7 +1217,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip) al->al_alloced++; gfs2_statfs_change(sdp, 0, -1, 0); - gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); spin_lock(&sdp->sd_rindex_spin); rgd->rd_free_clone--; @@ -1261,7 +1261,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip) al->al_alloced++; gfs2_statfs_change(sdp, 0, -1, 0); - gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_trans_add_unrevoke(sdp, block); spin_lock(&sdp->sd_rindex_spin); @@ -1337,8 +1337,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) gfs2_trans_add_rg(rgd); gfs2_statfs_change(sdp, 0, +blen, 0); - gfs2_quota_change(ip, -(s64)blen, - ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); } /** @@ -1366,7 +1365,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) gfs2_trans_add_rg(rgd); gfs2_statfs_change(sdp, 0, +blen, 0); - gfs2_quota_change(ip, -(s64)blen, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_meta_wipe(ip, bstart, blen); } @@ -1411,7 +1410,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { gfs2_free_uninit_di(rgd, ip->i_num.no_addr); - gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_meta_wipe(ip, ip->i_num.no_addr, 1); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6a78b1b32e2..43a24f2e590 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -97,7 +97,7 @@ void gfs2_tune_init(struct gfs2_tune *gt) * changed. */ -int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent) +int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) { unsigned int x; @@ -180,6 +180,24 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error) return 0; } +/** + * gfs2_read_super - Read the gfs2 super block from disk + * @sb: The VFS super block + * @sector: The location of the super block + * + * This uses the bio functions to read the super block from disk + * because we want to be 100% sure that we never read cached data. + * A super block is read twice only during each GFS2 mount and is + * never written to by the filesystem. The first time its read no + * locks are held, and the only details which are looked at are those + * relating to the locking protocol. Once locking is up and working, + * the sb is read again under the lock to establish the location of + * the master directory (contains pointers to journals etc) and the + * root directory. + * + * Returns: A page containing the sb or NULL + */ + struct page *gfs2_read_super(struct super_block *sb, sector_t sector) { struct page *page; @@ -199,7 +217,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector) return NULL; } - bio->bi_sector = sector; + bio->bi_sector = sector * (sb->s_blocksize >> 9); bio->bi_bdev = sb->s_bdev; bio_add_page(bio, page, PAGE_SIZE, 0); @@ -508,7 +526,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; struct gfs2_holder t_gh; - struct gfs2_log_header head; + struct gfs2_log_header_host head; int error; error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, @@ -517,7 +535,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) return error; gfs2_meta_cache_flush(ip); - j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head); if (error) @@ -587,9 +605,9 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) int gfs2_statfs_init(struct gfs2_sbd *sdp) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); - struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct buffer_head *m_bh, *l_bh; struct gfs2_holder gh; int error; @@ -634,7 +652,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, s64 dinodes) { struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); - struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct buffer_head *l_bh; int error; @@ -660,8 +678,8 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); - struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; - struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct gfs2_holder gh; struct buffer_head *m_bh, *l_bh; int error; @@ -727,10 +745,10 @@ out: * Returns: errno */ -int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) +int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) { - struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; - struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; spin_lock(&sdp->sd_statfs_spin); @@ -760,7 +778,7 @@ int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) */ static int statfs_slow_fill(struct gfs2_rgrpd *rgd, - struct gfs2_statfs_change *sc) + struct gfs2_statfs_change_host *sc) { gfs2_rgrp_verify(rgd); sc->sc_total += rgd->rd_ri.ri_data; @@ -782,7 +800,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd, * Returns: errno */ -int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) +int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) { struct gfs2_holder ri_gh; struct gfs2_rgrpd *rgd_next; @@ -792,7 +810,7 @@ int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) int done; int error = 0, err; - memset(sc, 0, sizeof(struct gfs2_statfs_change)); + memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); if (!gha) return -ENOMEM; @@ -873,7 +891,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd; struct lfcc *lfcc; LIST_HEAD(list); - struct gfs2_log_header lh; + struct gfs2_log_header_host lh; int error; error = gfs2_jindex_hold(sdp, &ji_gh); diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 5bb443ae0f5..e590b2df11d 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -14,7 +14,7 @@ void gfs2_tune_init(struct gfs2_tune *gt); -int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent); +int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); struct page *gfs2_read_super(struct super_block *sb, sector_t sector); @@ -45,8 +45,8 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp); void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, s64 dinodes); int gfs2_statfs_sync(struct gfs2_sbd *sdp); -int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); -int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); +int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc); +int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc); int gfs2_freeze_fs(struct gfs2_sbd *sdp); void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 0e0ec988f73..983eaf1e06b 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -426,9 +426,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ } \ TUNE_ATTR_2(name, name##_store) -TUNE_ATTR(ilimit, 0); -TUNE_ATTR(ilimit_tries, 0); -TUNE_ATTR(ilimit_min, 0); TUNE_ATTR(demote_secs, 0); TUNE_ATTR(incore_log_blocks, 0); TUNE_ATTR(log_flush_secs, 0); @@ -447,7 +444,6 @@ TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(quota_cache_secs, 1); TUNE_ATTR(max_atomic_write, 1); TUNE_ATTR(stall_secs, 1); -TUNE_ATTR(entries_per_readdir, 1); TUNE_ATTR(greedy_default, 1); TUNE_ATTR(greedy_quantum, 1); TUNE_ATTR(greedy_max, 1); @@ -459,9 +455,6 @@ TUNE_ATTR_DAEMON(quotad_secs, quotad_process); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); static struct attribute *tune_attrs[] = { - &tune_attr_ilimit.attr, - &tune_attr_ilimit_tries.attr, - &tune_attr_ilimit_min.attr, &tune_attr_demote_secs.attr, &tune_attr_incore_log_blocks.attr, &tune_attr_log_flush_secs.attr, @@ -478,7 +471,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_quota_cache_secs.attr, &tune_attr_max_atomic_write.attr, &tune_attr_stall_secs.attr, - &tune_attr_entries_per_readdir.attr, &tune_attr_greedy_default.attr, &tune_attr_greedy_quantum.attr, &tune_attr_greedy_max.attr, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 196c604faad..e5707a9f78c 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -23,9 +23,9 @@ #include "lm.h" #include "util.h" -kmem_cache_t *gfs2_glock_cachep __read_mostly; -kmem_cache_t *gfs2_inode_cachep __read_mostly; -kmem_cache_t *gfs2_bufdata_cachep __read_mostly; +struct kmem_cache *gfs2_glock_cachep __read_mostly; +struct kmem_cache *gfs2_inode_cachep __read_mostly; +struct kmem_cache *gfs2_bufdata_cachep __read_mostly; void gfs2_assert_i(struct gfs2_sbd *sdp) { diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 76a50899fe9..28938a46cf4 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -83,8 +83,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, char *file, unsigned int line) { struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; - u32 magic = mh->mh_magic; - magic = be32_to_cpu(magic); + u32 magic = be32_to_cpu(mh->mh_magic); if (unlikely(magic != GFS2_MAGIC)) return gfs2_meta_check_ii(sdp, bh, "magic number", function, file, line); @@ -107,9 +106,8 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, char *file, unsigned int line) { struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; - u32 magic = mh->mh_magic; + u32 magic = be32_to_cpu(mh->mh_magic); u16 t = be32_to_cpu(mh->mh_type); - magic = be32_to_cpu(magic); if (unlikely(magic != GFS2_MAGIC)) return gfs2_meta_check_ii(sdp, bh, "magic number", function, file, line); @@ -146,9 +144,9 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); -extern kmem_cache_t *gfs2_glock_cachep; -extern kmem_cache_t *gfs2_inode_cachep; -extern kmem_cache_t *gfs2_bufdata_cachep; +extern struct kmem_cache *gfs2_glock_cachep; +extern struct kmem_cache *gfs2_inode_cachep; +extern struct kmem_cache *gfs2_bufdata_cachep; static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, unsigned int *p) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 85b17b3fa4a..a3698796600 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -24,7 +24,7 @@ #include "hfs_fs.h" #include "btree.h" -static kmem_cache_t *hfs_inode_cachep; +static struct kmem_cache *hfs_inode_cachep; MODULE_LICENSE("GPL"); @@ -145,7 +145,7 @@ static struct inode *hfs_alloc_inode(struct super_block *sb) { struct hfs_inode_info *i; - i = kmem_cache_alloc(hfs_inode_cachep, SLAB_KERNEL); + i = kmem_cache_alloc(hfs_inode_cachep, GFP_KERNEL); return i ? &i->vfs_inode : NULL; } @@ -430,7 +430,7 @@ static struct file_system_type hfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void hfs_init_once(void *p, kmem_cache_t *cachep, unsigned long flags) +static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct hfs_inode_info *i = p; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 194eede52fa..0f513c6bf84 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -434,13 +434,13 @@ MODULE_AUTHOR("Brad Boyer"); MODULE_DESCRIPTION("Extended Macintosh Filesystem"); MODULE_LICENSE("GPL"); -static kmem_cache_t *hfsplus_inode_cachep; +static struct kmem_cache *hfsplus_inode_cachep; static struct inode *hfsplus_alloc_inode(struct super_block *sb) { struct hfsplus_inode_info *i; - i = kmem_cache_alloc(hfsplus_inode_cachep, SLAB_KERNEL); + i = kmem_cache_alloc(hfsplus_inode_cachep, GFP_KERNEL); return i ? &i->vfs_inode : NULL; } @@ -467,7 +467,7 @@ static struct file_system_type hfsplus_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void hfsplus_init_once(void *p, kmem_cache_t *cachep, unsigned long flags) +static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct hfsplus_inode_info *i = p; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index ecc9180645a..594f9c428fc 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -84,7 +84,8 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } if (!fno->dirflag) { e = 1; - hpfs_error(inode->i_sb, "not a directory, fnode %08x",inode->i_ino); + hpfs_error(inode->i_sb, "not a directory, fnode %08lx", + (unsigned long)inode->i_ino); } if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) { e = 1; @@ -144,8 +145,11 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } if (de->first || de->last) { if (hpfs_sb(inode->i_sb)->sb_chk) { - if (de->first && !de->last && (de->namelen != 2 || de ->name[0] != 1 || de->name[1] != 1)) hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08x", old_pos); - if (de->last && (de->namelen != 1 || de ->name[0] != 255)) hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08x", old_pos); + if (de->first && !de->last && (de->namelen != 2 + || de ->name[0] != 1 || de->name[1] != 1)) + hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08lx", old_pos); + if (de->last && (de->namelen != 1 || de ->name[0] != 255)) + hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08lx", old_pos); } hpfs_brelse4(&qbh); goto again; diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 229ff2fb180..fe83c2b7d2d 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -533,10 +533,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) struct buffer_head *bh; struct dnode *d1; struct quad_buffer_head qbh1; - if (hpfs_sb(i->i_sb)->sb_chk) if (up != i->i_ino) { - hpfs_error(i->i_sb, "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08x", dno, up, i->i_ino); + if (hpfs_sb(i->i_sb)->sb_chk) + if (up != i->i_ino) { + hpfs_error(i->i_sb, + "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx", + dno, up, (unsigned long)i->i_ino); return; - } + } if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { d1->up = up; d1->root_dnode = 1; @@ -851,7 +854,9 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp, /* Going to the next dirent */ if ((d = de_next_de(de)) < dnode_end_de(dnode)) { if (!(++*posp & 077)) { - hpfs_error(inode->i_sb, "map_pos_dirent: pos crossed dnode boundary; pos = %08x", *posp); + hpfs_error(inode->i_sb, + "map_pos_dirent: pos crossed dnode boundary; pos = %08llx", + (unsigned long long)*posp); goto bail; } /* We're going down the tree */ diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index 66339dc030e..547a8384571 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -243,8 +243,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, char *key, char *data fnode->ea_offs = 0xc4; } if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) { - hpfs_error(s, "fnode %08x: ea_offs == %03x, ea_size_s == %03x", - inode->i_ino, fnode->ea_offs, fnode->ea_size_s); + hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", + (unsigned long)inode->i_ino, + fnode->ea_offs, fnode->ea_size_s); return; } if ((fnode->ea_size_s || !fnode->ea_size_l) && diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 32ab51e42b9..1c07aa82d32 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -317,7 +317,8 @@ static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb) /* super.c */ -void hpfs_error(struct super_block *, char *, ...); +void hpfs_error(struct super_block *, const char *, ...) + __attribute__((format (printf, 2, 3))); int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); unsigned hpfs_count_one_bitmap(struct super_block *, secno); diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 7faef8544f3..85d3e1d9ac0 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -251,7 +251,10 @@ void hpfs_write_inode_nolock(struct inode *i) de->file_size = 0; hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); - } else hpfs_error(i->i_sb, "directory %08x doesn't have '.' entry", i->i_ino); + } else + hpfs_error(i->i_sb, + "directory %08lx doesn't have '.' entry", + (unsigned long)i->i_ino); } mark_buffer_dirty(bh); brelse(bh); diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index 0fecdac22e4..c4724589b2e 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -126,32 +126,40 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea struct extended_attribute *ea; struct extended_attribute *ea_end; if (fnode->magic != FNODE_MAGIC) { - hpfs_error(s, "bad magic on fnode %08x", ino); + hpfs_error(s, "bad magic on fnode %08lx", + (unsigned long)ino); goto bail; } if (!fnode->dirflag) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != (fnode->btree.internal ? 12 : 8)) { - hpfs_error(s, "bad number of nodes in fnode %08x", ino); + hpfs_error(s, + "bad number of nodes in fnode %08lx", + (unsigned long)ino); goto bail; } if (fnode->btree.first_free != 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) { - hpfs_error(s, "bad first_free pointer in fnode %08x", ino); + hpfs_error(s, + "bad first_free pointer in fnode %08lx", + (unsigned long)ino); goto bail; } } if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 || (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) { - hpfs_error(s, "bad EA info in fnode %08x: ea_offs == %04x ea_size_s == %04x", - ino, fnode->ea_offs, fnode->ea_size_s); + hpfs_error(s, + "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x", + (unsigned long)ino, + fnode->ea_offs, fnode->ea_size_s); goto bail; } ea = fnode_ea(fnode); ea_end = fnode_end_ea(fnode); while (ea != ea_end) { if (ea > ea_end) { - hpfs_error(s, "bad EA in fnode %08x", ino); + hpfs_error(s, "bad EA in fnode %08lx", + (unsigned long)ino); goto bail; } ea = next_ea(ea); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 450b5e0b478..d4abc1a1d56 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -46,21 +46,17 @@ static void unmark_dirty(struct super_block *s) } /* Filesystem error... */ +static char err_buf[1024]; -#define ERR_BUF_SIZE 1024 - -void hpfs_error(struct super_block *s, char *m,...) +void hpfs_error(struct super_block *s, const char *fmt, ...) { - char *buf; - va_list l; - va_start(l, m); - if (!(buf = kmalloc(ERR_BUF_SIZE, GFP_KERNEL))) - printk("HPFS: No memory for error message '%s'\n",m); - else if (vsprintf(buf, m, l) >= ERR_BUF_SIZE) - printk("HPFS: Grrrr... Kernel memory corrupted ... going on, but it'll crash very soon :-(\n"); - printk("HPFS: filesystem error: "); - if (buf) printk("%s", buf); - else printk("%s\n",m); + va_list args; + + va_start(args, fmt); + vsnprintf(err_buf, sizeof(err_buf), fmt, args); + va_end(args); + + printk("HPFS: filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { printk("; crashing the system because you wanted it\n"); @@ -76,7 +72,6 @@ void hpfs_error(struct super_block *s, char *m,...) } else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n"); else printk("; corrupted filesystem mounted read/write - your computer will explode within 20 seconds ... but you wanted it so!\n"); } else printk("\n"); - kfree(buf); hpfs_sb(s)->sb_was_error = 1; } @@ -160,12 +155,12 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static kmem_cache_t * hpfs_inode_cachep; +static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) { struct hpfs_inode_info *ei; - ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, SLAB_NOFS); + ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; ei->vfs_inode.i_version = 1; @@ -177,7 +172,7 @@ static void hpfs_destroy_inode(struct inode *inode) kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7f4756963d0..0706f5aac6a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -513,7 +513,7 @@ static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) } -static kmem_cache_t *hugetlbfs_inode_cachep; +static struct kmem_cache *hugetlbfs_inode_cachep; static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) { @@ -522,7 +522,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) return NULL; - p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); + p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL); if (unlikely(!p)) { hugetlbfs_inc_free_inodes(sbinfo); return NULL; @@ -545,7 +545,7 @@ static const struct address_space_operations hugetlbfs_aops = { }; -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; diff --git a/fs/inode.c b/fs/inode.c index 26cdb115ce6..9ecccab7326 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -97,7 +97,7 @@ static DEFINE_MUTEX(iprune_mutex); */ struct inodes_stat_t inodes_stat; -static kmem_cache_t * inode_cachep __read_mostly; +static struct kmem_cache * inode_cachep __read_mostly; static struct inode *alloc_inode(struct super_block *sb) { @@ -109,7 +109,7 @@ static struct inode *alloc_inode(struct super_block *sb) if (sb->s_op->alloc_inode) inode = sb->s_op->alloc_inode(sb); else - inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL); + inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL); if (inode) { struct address_space * const mapping = &inode->i_data; @@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode) EXPORT_SYMBOL(inode_init_once); -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct inode * inode = (struct inode *) foo; @@ -1242,9 +1242,6 @@ EXPORT_SYMBOL(inode_needs_sync); */ #ifdef CONFIG_QUOTA -/* Function back in dquot.c */ -int remove_inode_dquot_ref(struct inode *, int, struct list_head *); - void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head) { diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 017cb0f134d..e1956e6f116 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -34,8 +34,8 @@ #include <asm/ioctls.h> -static kmem_cache_t *watch_cachep __read_mostly; -static kmem_cache_t *event_cachep __read_mostly; +static struct kmem_cache *watch_cachep __read_mostly; +static struct kmem_cache *event_cachep __read_mostly; static struct vfsmount *inotify_mnt __read_mostly; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index c34b862cdbf..ea55b6c469e 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -57,12 +57,12 @@ static void isofs_put_super(struct super_block *sb) static void isofs_read_inode(struct inode *); static int isofs_statfs (struct dentry *, struct kstatfs *); -static kmem_cache_t *isofs_inode_cachep; +static struct kmem_cache *isofs_inode_cachep; static struct inode *isofs_alloc_inode(struct super_block *sb) { struct iso_inode_info *ei; - ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(isofs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode) kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct iso_inode_info *ei = foo; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b85c686b60d..10fff944393 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -31,7 +31,7 @@ #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/mm.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/pagemap.h> #include <linux/kthread.h> #include <linux/poison.h> @@ -1630,7 +1630,7 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) #define JBD_MAX_SLABS 5 #define JBD_SLAB_INDEX(size) (size >> 11) -static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; +static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; static const char *jbd_slab_names[JBD_MAX_SLABS] = { "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" }; @@ -1693,7 +1693,7 @@ void jbd_slab_free(void *ptr, size_t size) /* * Journal_head storage management */ -static kmem_cache_t *journal_head_cache; +static struct kmem_cache *journal_head_cache; #ifdef CONFIG_JBD_DEBUG static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif @@ -1996,7 +1996,7 @@ static void __exit remove_jbd_proc_entry(void) #endif -kmem_cache_t *jbd_handle_cache; +struct kmem_cache *jbd_handle_cache; static int __init journal_init_handle_cache(void) { diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index c532429d8d9..d204ab394f3 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -70,8 +70,8 @@ #include <linux/init.h> #endif -static kmem_cache_t *revoke_record_cache; -static kmem_cache_t *revoke_table_cache; +static struct kmem_cache *revoke_record_cache; +static struct kmem_cache *revoke_table_cache; /* Each revoke record represents one single revoked block. During journal replay, this involves recording the transaction ID of the diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 4f82bcd63e4..d38e0d575e4 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -27,6 +27,8 @@ #include <linux/mm.h> #include <linux/highmem.h> +static void __journal_temp_unlink_buffer(struct journal_head *jh); + /* * get_transaction: obtain a new transaction_t object. * @@ -1499,7 +1501,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) * * Called under j_list_lock. The journal may not be locked. */ -void __journal_temp_unlink_buffer(struct journal_head *jh) +static void __journal_temp_unlink_buffer(struct journal_head *jh) { struct journal_head **list = NULL; transaction_t *transaction; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 70b2ae1ef28..6bd8005e3d3 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -248,8 +248,12 @@ write_out_data: bufs = 0; goto write_out_data; } - } - else { + } else if (!locked && buffer_locked(bh)) { + __jbd2_journal_file_buffer(jh, commit_transaction, + BJ_Locked); + jbd_unlock_bh_state(bh); + put_bh(bh); + } else { BUFFER_TRACE(bh, "writeout complete: unfile"); __jbd2_journal_unfile_buffer(jh); jbd_unlock_bh_state(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c60f378b0f7..44fc32bfd7f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -31,7 +31,7 @@ #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/mm.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/pagemap.h> #include <linux/kthread.h> #include <linux/poison.h> @@ -1641,7 +1641,7 @@ void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry) #define JBD_MAX_SLABS 5 #define JBD_SLAB_INDEX(size) (size >> 11) -static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; +static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; static const char *jbd_slab_names[JBD_MAX_SLABS] = { "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k" }; @@ -1704,7 +1704,7 @@ void jbd2_slab_free(void *ptr, size_t size) /* * Journal_head storage management */ -static kmem_cache_t *jbd2_journal_head_cache; +static struct kmem_cache *jbd2_journal_head_cache; #ifdef CONFIG_JBD_DEBUG static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif @@ -2007,7 +2007,7 @@ static void __exit jbd2_remove_jbd_proc_entry(void) #endif -kmem_cache_t *jbd2_handle_cache; +struct kmem_cache *jbd2_handle_cache; static int __init journal_init_handle_cache(void) { diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 380d19917f3..f506646ad0f 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -70,8 +70,8 @@ #include <linux/init.h> #endif -static kmem_cache_t *jbd2_revoke_record_cache; -static kmem_cache_t *jbd2_revoke_table_cache; +static struct kmem_cache *jbd2_revoke_record_cache; +static struct kmem_cache *jbd2_revoke_table_cache; /* Each revoke record represents one single revoked block. During journal replay, this involves recording the transaction ID of the diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index c051a94c8a9..3a8700153cb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -27,6 +27,8 @@ #include <linux/mm.h> #include <linux/highmem.h> +static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); + /* * jbd2_get_transaction: obtain a new transaction_t object. * diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 3f7899ea4cb..9f15bce9202 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -61,8 +61,8 @@ static const struct file_operations jffs_dir_operations; static struct inode_operations jffs_dir_inode_operations; static const struct address_space_operations jffs_address_operations; -kmem_cache_t *node_cache = NULL; -kmem_cache_t *fm_cache = NULL; +struct kmem_cache *node_cache = NULL; +struct kmem_cache *fm_cache = NULL; /* Called by the VFS at mount time to initialize the whole file system. */ static int jffs_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index 4a543e11497..d0e783f199e 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -66,6 +66,7 @@ #include <linux/smp_lock.h> #include <linux/time.h> #include <linux/ctype.h> +#include <linux/freezer.h> #include "intrep.h" #include "jffs_fm.h" @@ -591,7 +592,7 @@ jffs_add_virtual_root(struct jffs_control *c) D2(printk("jffs_add_virtual_root(): " "Creating a virtual root directory.\n")); - if (!(root = kmalloc(sizeof(struct jffs_file), GFP_KERNEL))) { + if (!(root = kzalloc(sizeof(struct jffs_file), GFP_KERNEL))) { return -ENOMEM; } no_jffs_file++; @@ -603,7 +604,6 @@ jffs_add_virtual_root(struct jffs_control *c) DJM(no_jffs_node++); memset(node, 0, sizeof(struct jffs_node)); node->ino = JFFS_MIN_INO; - memset(root, 0, sizeof(struct jffs_file)); root->ino = JFFS_MIN_INO; root->mode = S_IFDIR | S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c index 29b68d939bd..077258b2103 100644 --- a/fs/jffs/jffs_fm.c +++ b/fs/jffs/jffs_fm.c @@ -29,8 +29,8 @@ static int jffs_mark_obsolete(struct jffs_fmcontrol *fmc, __u32 fm_offset); static struct jffs_fm *jffs_alloc_fm(void); static void jffs_free_fm(struct jffs_fm *n); -extern kmem_cache_t *fm_cache; -extern kmem_cache_t *node_cache; +extern struct kmem_cache *fm_cache; +extern struct kmem_cache *node_cache; #if CONFIG_JFFS_FS_VERBOSE > 0 void diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index ff2a872e80e..6eb3daebd56 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -16,6 +16,7 @@ #include <linux/mtd/mtd.h> #include <linux/completion.h> #include <linux/sched.h> +#include <linux/freezer.h> #include "nodelist.h" diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 33f29100501..83f9881ec4c 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -19,16 +19,16 @@ /* These are initialised to NULL in the kernel startup code. If you're porting to other operating systems, beware */ -static kmem_cache_t *full_dnode_slab; -static kmem_cache_t *raw_dirent_slab; -static kmem_cache_t *raw_inode_slab; -static kmem_cache_t *tmp_dnode_info_slab; -static kmem_cache_t *raw_node_ref_slab; -static kmem_cache_t *node_frag_slab; -static kmem_cache_t *inode_cache_slab; +static struct kmem_cache *full_dnode_slab; +static struct kmem_cache *raw_dirent_slab; +static struct kmem_cache *raw_inode_slab; +static struct kmem_cache *tmp_dnode_info_slab; +static struct kmem_cache *raw_node_ref_slab; +static struct kmem_cache *node_frag_slab; +static struct kmem_cache *inode_cache_slab; #ifdef CONFIG_JFFS2_FS_XATTR -static kmem_cache_t *xattr_datum_cache; -static kmem_cache_t *xattr_ref_cache; +static struct kmem_cache *xattr_datum_cache; +static struct kmem_cache *xattr_ref_cache; #endif int __init jffs2_create_slab_caches(void) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index bc4b8106a49..7deb7825402 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -28,12 +28,12 @@ static void jffs2_put_super(struct super_block *); -static kmem_cache_t *jffs2_inode_cachep; +static struct kmem_cache *jffs2_inode_cachep; static struct inode *jffs2_alloc_inode(struct super_block *sb) { struct jffs2_inode_info *ei; - ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, SLAB_KERNEL); + ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode) kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); } -static void jffs2_i_init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index b89c9aba046..5065baa530b 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -67,7 +67,7 @@ #include <linux/kthread.h> #include <linux/buffer_head.h> /* for sync_blockdev() */ #include <linux/bio.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/delay.h> #include <linux/mutex.h> #include "jfs_incore.h" diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 0cccd1c39d7..b1a1c729601 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -74,7 +74,7 @@ static inline void lock_metapage(struct metapage *mp) } #define METAPOOL_MIN_PAGES 32 -static kmem_cache_t *metapage_cache; +static struct kmem_cache *metapage_cache; static mempool_t *metapage_mempool; #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE) @@ -180,7 +180,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) #endif -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 81f6f04af19..d558e51b0df 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -46,7 +46,7 @@ #include <linux/vmalloc.h> #include <linux/smp_lock.h> #include <linux/completion.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kthread.h> diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 9c1c6e0e633..846ac8f3451 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -44,7 +44,7 @@ MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); MODULE_LICENSE("GPL"); -static kmem_cache_t * jfs_inode_cachep; +static struct kmem_cache * jfs_inode_cachep; static struct super_operations jfs_super_operations; static struct export_operations jfs_export_operations; @@ -93,7 +93,7 @@ void jfs_error(struct super_block *sb, const char * function, ...) va_list args; va_start(args, function); - vsprintf(error_buf, function, args); + vsnprintf(error_buf, sizeof(error_buf), function, args); va_end(args); printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); @@ -748,7 +748,7 @@ static struct file_system_type jfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 41c983a0529..497c3cd59d5 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -13,6 +13,7 @@ #include <linux/nfs_fs.h> #include <linux/utsname.h> #include <linux/smp_lock.h> +#include <linux/freezer.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> diff --git a/fs/lockd/host.c b/fs/lockd/host.c index fb24a973034..3d4610c2a26 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -36,34 +36,14 @@ static DEFINE_MUTEX(nlm_host_mutex); static void nlm_gc_hosts(void); static struct nsm_handle * __nsm_find(const struct sockaddr_in *, const char *, int, int); - -/* - * Find an NLM server handle in the cache. If there is none, create it. - */ -struct nlm_host * -nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, - const char *hostname, int hostname_len) -{ - return nlm_lookup_host(0, sin, proto, version, - hostname, hostname_len); -} - -/* - * Find an NLM client handle in the cache. If there is none, create it. - */ -struct nlm_host * -nlmsvc_lookup_host(struct svc_rqst *rqstp, - const char *hostname, int hostname_len) -{ - return nlm_lookup_host(1, &rqstp->rq_addr, - rqstp->rq_prot, rqstp->rq_vers, - hostname, hostname_len); -} +static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, + const char *hostname, + int hostname_len); /* * Common host lookup routine for server & client */ -struct nlm_host * +static struct nlm_host * nlm_lookup_host(int server, const struct sockaddr_in *sin, int proto, int version, const char *hostname, @@ -195,6 +175,29 @@ nlm_destroy_host(struct nlm_host *host) } /* + * Find an NLM server handle in the cache. If there is none, create it. + */ +struct nlm_host * +nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, + const char *hostname, int hostname_len) +{ + return nlm_lookup_host(0, sin, proto, version, + hostname, hostname_len); +} + +/* + * Find an NLM client handle in the cache. If there is none, create it. + */ +struct nlm_host * +nlmsvc_lookup_host(struct svc_rqst *rqstp, + const char *hostname, int hostname_len) +{ + return nlm_lookup_host(1, &rqstp->rq_addr, + rqstp->rq_prot, rqstp->rq_vers, + hostname, hostname_len); +} + +/* * Create the NLM RPC client for an NLM peer */ struct rpc_clnt * @@ -495,7 +498,7 @@ out: return nsm; } -struct nsm_handle * +static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len) { return __nsm_find(sin, hostname, hostname_len, 1); diff --git a/fs/locks.c b/fs/locks.c index e0b6a80649a..1cb0c57fedb 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -142,12 +142,12 @@ int lease_break_time = 45; static LIST_HEAD(file_lock_list); static LIST_HEAD(blocked_list); -static kmem_cache_t *filelock_cache __read_mostly; +static struct kmem_cache *filelock_cache __read_mostly; /* Allocate an empty lock structure. */ static struct file_lock *locks_alloc_lock(void) { - return kmem_cache_alloc(filelock_cache, SLAB_KERNEL); + return kmem_cache_alloc(filelock_cache, GFP_KERNEL); } static void locks_release_private(struct file_lock *fl) @@ -199,7 +199,7 @@ EXPORT_SYMBOL(locks_init_lock); * Initialises the fields of the file lock which are invariant for * free file_locks. */ -static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags) { struct file_lock *lock = (struct file_lock *) foo; diff --git a/fs/mbcache.c b/fs/mbcache.c index 0ff71256e65..deeb9dc062d 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -85,7 +85,7 @@ struct mb_cache { #ifndef MB_CACHE_INDEXES_COUNT int c_indexes_count; #endif - kmem_cache_t *c_entry_cache; + struct kmem_cache *c_entry_cache; struct list_head *c_block_hash; struct list_head *c_indexes_hash[0]; }; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 1e36bae4d0e..629e09b38c5 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -51,12 +51,12 @@ static void minix_put_super(struct super_block *sb) return; } -static kmem_cache_t * minix_inode_cachep; +static struct kmem_cache * minix_inode_cachep; static struct inode *minix_alloc_inode(struct super_block *sb) { struct minix_inode_info *ei; - ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, SLAB_KERNEL); + ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -67,7 +67,7 @@ static void minix_destroy_inode(struct inode *inode) kmem_cache_free(minix_inode_cachep, minix_i(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct minix_inode_info *ei = (struct minix_inode_info *) foo; diff --git a/fs/namei.c b/fs/namei.c index 28d49b301d5..db1bca26d88 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -249,9 +249,11 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) /* * MAY_EXEC on regular files requires special handling: We override - * filesystem execute permissions if the mode bits aren't set. + * filesystem execute permissions if the mode bits aren't set or + * the fs is mounted with the "noexec" flag. */ - if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO)) + if ((mask & MAY_EXEC) && S_ISREG(mode) && (!(mode & S_IXUGO) || + (nd && nd->mnt && (nd->mnt->mnt_flags & MNT_NOEXEC)))) return -EACCES; /* Ordinary permission routines do not understand MAY_APPEND. */ @@ -1996,8 +1998,7 @@ asmlinkage long sys_mkdir(const char __user *pathname, int mode) void dentry_unhash(struct dentry *dentry) { dget(dentry); - if (atomic_read(&dentry->d_count)) - shrink_dcache_parent(dentry); + shrink_dcache_parent(dentry); spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) == 2) diff --git a/fs/namespace.c b/fs/namespace.c index 55442a6cf22..b00ac84ebbd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -36,7 +36,7 @@ static int event; static struct list_head *mount_hashtable __read_mostly; static int hash_mask __read_mostly, hash_bits __read_mostly; -static kmem_cache_t *mnt_cache __read_mostly; +static struct kmem_cache *mnt_cache __read_mostly; static struct rw_semaphore namespace_sem; /* /sys/fs */ diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 72dad552aa0..fae53243bb9 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -40,12 +40,12 @@ static void ncp_delete_inode(struct inode *); static void ncp_put_super(struct super_block *); static int ncp_statfs(struct dentry *, struct kstatfs *); -static kmem_cache_t * ncp_inode_cachep; +static struct kmem_cache * ncp_inode_cachep; static struct inode *ncp_alloc_inode(struct super_block *sb) { struct ncp_inode_info *ei; - ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, SLAB_KERNEL); + ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -56,7 +56,7 @@ static void ncp_destroy_inode(struct inode *inode) kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 784bbb54e6c..f9d678f4ae0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -58,7 +58,7 @@ #define NFSDBG_FACILITY NFSDBG_VFS -static kmem_cache_t *nfs_direct_cachep; +static struct kmem_cache *nfs_direct_cachep; /* * This represents a set of asynchronous requests that we're waiting on @@ -143,7 +143,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; - dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); if (!dreq) return NULL; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7c32187f953..36680d1061b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -55,7 +55,7 @@ static int nfs_update_inode(struct inode *, struct nfs_fattr *); static void nfs_zap_acl_cache(struct inode *); -static kmem_cache_t * nfs_inode_cachep; +static struct kmem_cache * nfs_inode_cachep; static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) @@ -1080,7 +1080,7 @@ void nfs4_clear_inode(struct inode *inode) struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; - nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); + nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); if (!nfsi) return NULL; nfsi->flags = 0UL; @@ -1111,7 +1111,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) #endif } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct nfs_inode *nfsi = (struct nfs_inode *) foo; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index bc9fab68b29..ca4b1d4ff42 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -21,13 +21,13 @@ #define NFS_PARANOIA 1 -static kmem_cache_t *nfs_page_cachep; +static struct kmem_cache *nfs_page_cachep; static inline struct nfs_page * nfs_page_alloc(void) { struct nfs_page *p; - p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL); + p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->wb_list); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 05cca660997..a9c26521a9e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -39,7 +39,7 @@ static int nfs_pagein_one(struct list_head *, struct inode *); static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; -static kmem_cache_t *nfs_rdata_cachep; +static struct kmem_cache *nfs_rdata_cachep; static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) @@ -47,7 +47,7 @@ static mempool_t *nfs_rdata_mempool; struct nfs_read_data *nfs_readdata_alloc(size_t len) { unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; - struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7f3844d2bf3..594eb16879e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -85,7 +85,7 @@ static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; static const struct rpc_call_ops nfs_commit_ops; -static kmem_cache_t *nfs_wdata_cachep; +static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; @@ -93,7 +93,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); struct nfs_write_data *nfs_commit_alloc(void) { - struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); @@ -118,7 +118,7 @@ void nfs_commit_free(struct nfs_write_data *wdata) struct nfs_write_data *nfs_writedata_alloc(size_t len) { unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; - struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index b4baca3053c..277df40f098 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -24,10 +24,6 @@ #define NFSDDBG_FACILITY NFSDDBG_XDR -#ifdef NFSD_OPTIMIZE_SPACE -# define inline -#endif - /* * Mapping of S_IF* types to NFS file types @@ -42,14 +38,14 @@ static u32 nfs3_ftypes[] = { /* * XDR functions for basic NFS types */ -static inline __be32 * +static __be32 * encode_time3(__be32 *p, struct timespec *time) { *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); return p; } -static inline __be32 * +static __be32 * decode_time3(__be32 *p, struct timespec *time) { time->tv_sec = ntohl(*p++); @@ -57,7 +53,7 @@ decode_time3(__be32 *p, struct timespec *time) return p; } -static inline __be32 * +static __be32 * decode_fh(__be32 *p, struct svc_fh *fhp) { unsigned int size; @@ -77,7 +73,7 @@ __be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp) return decode_fh(p, fhp); } -static inline __be32 * +static __be32 * encode_fh(__be32 *p, struct svc_fh *fhp) { unsigned int size = fhp->fh_handle.fh_size; @@ -91,7 +87,7 @@ encode_fh(__be32 *p, struct svc_fh *fhp) * Decode a file name and make sure that the path contains * no slashes or null bytes. */ -static inline __be32 * +static __be32 * decode_filename(__be32 *p, char **namp, int *lenp) { char *name; @@ -107,7 +103,7 @@ decode_filename(__be32 *p, char **namp, int *lenp) return p; } -static inline __be32 * +static __be32 * decode_sattr3(__be32 *p, struct iattr *iap) { u32 tmp; @@ -153,7 +149,7 @@ decode_sattr3(__be32 *p, struct iattr *iap) return p; } -static inline __be32 * +static __be32 * encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { @@ -186,7 +182,7 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, return p; } -static inline __be32 * +static __be32 * encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct inode *inode = fhp->fh_dentry->d_inode; @@ -776,7 +772,7 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p, return xdr_ressize_check(rqstp, p); } -static inline __be32 * +static __be32 * encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, ino_t ino) { @@ -790,7 +786,7 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, return p; } -static inline __be32 * +static __be32 * encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, struct svc_fh *fhp) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e431e93ab50..640c92b2a9f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -84,10 +84,10 @@ static void nfs4_set_recdir(char *recdir); */ static DEFINE_MUTEX(client_mutex); -static kmem_cache_t *stateowner_slab = NULL; -static kmem_cache_t *file_slab = NULL; -static kmem_cache_t *stateid_slab = NULL; -static kmem_cache_t *deleg_slab = NULL; +static struct kmem_cache *stateowner_slab = NULL; +static struct kmem_cache *file_slab = NULL; +static struct kmem_cache *stateid_slab = NULL; +static struct kmem_cache *deleg_slab = NULL; void nfs4_lock_state(void) @@ -1003,7 +1003,7 @@ alloc_init_file(struct inode *ino) } static void -nfsd4_free_slab(kmem_cache_t **slab) +nfsd4_free_slab(struct kmem_cache **slab) { if (*slab == NULL) return; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 56ebb1443e0..f5243f94399 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -18,11 +18,6 @@ #define NFSDDBG_FACILITY NFSDDBG_XDR - -#ifdef NFSD_OPTIMIZE_SPACE -# define inline -#endif - /* * Mapping of S_IF* types to NFS file types */ @@ -55,7 +50,7 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp) return decode_fh(p, fhp); } -static inline __be32 * +static __be32 * encode_fh(__be32 *p, struct svc_fh *fhp) { memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); @@ -66,7 +61,7 @@ encode_fh(__be32 *p, struct svc_fh *fhp) * Decode a file name and make sure that the path contains * no slashes or null bytes. */ -static inline __be32 * +static __be32 * decode_filename(__be32 *p, char **namp, int *lenp) { char *name; @@ -82,7 +77,7 @@ decode_filename(__be32 *p, char **namp, int *lenp) return p; } -static inline __be32 * +static __be32 * decode_pathname(__be32 *p, char **namp, int *lenp) { char *name; @@ -98,7 +93,7 @@ decode_pathname(__be32 *p, char **namp, int *lenp) return p; } -static inline __be32 * +static __be32 * decode_sattr(__be32 *p, struct iattr *iap) { u32 tmp, tmp1; diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c index 046fde8170e..65e640c61c8 100644 --- a/fs/nls/nls_cp936.c +++ b/fs/nls/nls_cp936.c @@ -4421,6 +4421,73 @@ static wchar_t *page_charset2uni[256] = { c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL, }; +static unsigned char u2c_00[512] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ + 0xA1, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xEC, /* 0xA4-0xA7 */ + 0xA1, 0xA7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ + 0xA1, 0xE3, 0xA1, 0xC0, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xA4, /* 0xB4-0xB7 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC1, /* 0xD4-0xD7 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */ + 0xA8, 0xA4, 0xA8, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ + 0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0x00, 0x00, /* 0xE8-0xEB */ + 0xA8, 0xAC, 0xA8, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */ + 0x00, 0x00, 0x00, 0x00, 0xA8, 0xB0, 0xA8, 0xAE, /* 0xF0-0xF3 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC2, /* 0xF4-0xF7 */ + 0x00, 0x00, 0xA8, 0xB4, 0xA8, 0xB2, 0x00, 0x00, /* 0xF8-0xFB */ + 0xA8, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */ +}; + static unsigned char u2c_01[512] = { 0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ @@ -10825,7 +10892,7 @@ static unsigned char u2c_FF[512] = { }; static unsigned char *page_uni2charset[256] = { - NULL, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, + u2c_00, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -10936,11 +11003,34 @@ static int uni2char(const wchar_t uni, unsigned char *uni2charset; unsigned char cl = uni&0xFF; unsigned char ch = (uni>>8)&0xFF; - int n; + unsigned char out0,out1; if (boundlen <= 0) return -ENAMETOOLONG; + if (uni == 0x20ac) {/* Euro symbol.The only exception with a non-ascii unicode */ + out[0] = 0x80; + return 1; + } + + if (ch == 0) { /* handle the U00 plane*/ + /* if (cl == 0) return -EINVAL;*/ /*U0000 is legal in cp936*/ + out0 = u2c_00[cl*2]; + out1 = u2c_00[cl*2+1]; + if (out0 == 0x00 && out1 == 0x00) { + if (cl<0x80) { + out[0] = cl; + return 1; + } + return -EINVAL; + } else { + if (boundlen <= 1) + return -ENAMETOOLONG; + out[0] = out0; + out[1] = out1; + return 2; + } + } uni2charset = page_uni2charset[ch]; if (uni2charset) { @@ -10950,15 +11040,10 @@ static int uni2char(const wchar_t uni, out[1] = uni2charset[cl*2+1]; if (out[0] == 0x00 && out[1] == 0x00) return -EINVAL; - n = 2; - } else if (ch==0 && cl) { - out[0] = cl; - n = 1; + return 2; } else return -EINVAL; - - return n; } static int char2uni(const unsigned char *rawstring, int boundlen, @@ -10972,7 +11057,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen, return -ENAMETOOLONG; if (boundlen == 1) { - *uni = rawstring[0]; + if (rawstring[0]==0x80) { /* Euro symbol.The only exception with a non-ascii unicode */ + *uni = 0x20ac; + } else { + *uni = rawstring[0]; + } return 1; } @@ -10986,7 +11075,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen, return -EINVAL; n = 2; } else{ - *uni = ch; + if (ch==0x80) {/* Euro symbol.The only exception with a non-ascii unicode */ + *uni = 0x20ac; + } else { + *uni = ch; + } n = 1; } return n; diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 9f08e851cfb..c577d8e1bd9 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1272,7 +1272,7 @@ ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec) { ntfs_attr_search_ctx *ctx; - ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS); + ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, GFP_NOFS); if (ctx) ntfs_attr_init_search_ctx(ctx, ni, mrec); return ctx; diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index e32cde48636..2194eff4974 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -38,7 +38,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) { ntfs_index_context *ictx; - ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS); + ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS); if (ictx) *ictx = (ntfs_index_context){ .idx_ni = idx_ni }; return ictx; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 2d3de9c8981..247989891b4 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -324,7 +324,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb) ntfs_inode *ni; ntfs_debug("Entering."); - ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS); + ni = kmem_cache_alloc(ntfs_big_inode_cache, GFP_NOFS); if (likely(ni != NULL)) { ni->state = 0; return VFS_I(ni); @@ -349,7 +349,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void) ntfs_inode *ni; ntfs_debug("Entering."); - ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS); + ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS); if (likely(ni != NULL)) { ni->state = 0; return ni; diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index 6a495f7369f..005ca4b0f13 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -266,7 +266,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, /* We do not trust outside sources. */ if (likely(ins)) { - ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS); + ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS); if (likely(ucs)) { for (i = o = 0; i < ins_len; i += wc_len) { wc_len = nls->char2uni(ins + i, ins_len - i, diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 16b8d1ba706..941acf14e61 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -66,7 +66,7 @@ static struct file_operations dlmfs_file_operations; static struct inode_operations dlmfs_dir_inode_operations; static struct inode_operations dlmfs_root_inode_operations; static struct inode_operations dlmfs_file_inode_operations; -static kmem_cache_t *dlmfs_inode_cache; +static struct kmem_cache *dlmfs_inode_cache; struct workqueue_struct *user_dlm_worker; @@ -257,7 +257,7 @@ static ssize_t dlmfs_file_write(struct file *filp, } static void dlmfs_init_once(void *foo, - kmem_cache_t *cachep, + struct kmem_cache *cachep, unsigned long flags) { struct dlmfs_inode_private *ip = @@ -276,7 +276,7 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb) { struct dlmfs_inode_private *ip; - ip = kmem_cache_alloc(dlmfs_inode_cache, SLAB_NOFS); + ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); if (!ip) return NULL; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index f784177b624..856012b4fa4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(dlm_dump_all_mles); #endif /* 0 */ -static kmem_cache_t *dlm_mle_cache = NULL; +static struct kmem_cache *dlm_mle_cache = NULL; static void dlm_mle_release(struct kref *kref); diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index fcd4475d1f8..80ac69f11d9 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -61,7 +61,7 @@ struct ocfs2_em_insert_context { struct ocfs2_extent_map_entry *right_ent; }; -static kmem_cache_t *ocfs2_em_ent_cachep = NULL; +static struct kmem_cache *ocfs2_em_ent_cachep = NULL; static struct ocfs2_extent_map_entry * diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 46a378fbc40..1a7dd2945b3 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -106,7 +106,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) #define INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_JOURNAL) #define SET_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_JOURNAL) -extern kmem_cache_t *ocfs2_inode_cache; +extern struct kmem_cache *ocfs2_inode_cache; extern const struct address_space_operations ocfs2_aops; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d9b4214a12d..4bf39540e65 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -68,7 +68,7 @@ #include "buffer_head_io.h" -static kmem_cache_t *ocfs2_inode_cachep = NULL; +static struct kmem_cache *ocfs2_inode_cachep = NULL; /* OCFS2 needs to schedule several differnt types of work which * require cluster locking, disk I/O, recovery waits, etc. Since these @@ -303,7 +303,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) { struct ocfs2_inode_info *oi; - oi = kmem_cache_alloc(ocfs2_inode_cachep, SLAB_NOFS); + oi = kmem_cache_alloc(ocfs2_inode_cachep, GFP_NOFS); if (!oi) return NULL; @@ -914,7 +914,7 @@ bail: } static void ocfs2_inode_init_once(void *data, - kmem_cache_t *cachep, + struct kmem_cache *cachep, unsigned long flags) { struct ocfs2_inode_info *oi = data; @@ -1674,7 +1674,7 @@ void __ocfs2_error(struct super_block *sb, va_list args; va_start(args, fmt); - vsprintf(error_buf, fmt, args); + vsnprintf(error_buf, sizeof(error_buf), fmt, args); va_end(args); /* Not using mlog here because we want to show the actual @@ -1695,7 +1695,7 @@ void __ocfs2_abort(struct super_block* sb, va_list args; va_start(args, fmt); - vsprintf(error_buf, fmt, args); + vsnprintf(error_buf, sizeof(error_buf), fmt, args); va_end(args); printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 9707ed7a320..39814b900fc 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -69,7 +69,7 @@ struct ocfs2_meta_cache_item { sector_t c_block; }; -static kmem_cache_t *ocfs2_uptodate_cachep = NULL; +static struct kmem_cache *ocfs2_uptodate_cachep = NULL; void ocfs2_metadata_cache_init(struct inode *inode) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 592a6402e85..26f44e0074e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -330,13 +330,13 @@ out: return 0; } -static kmem_cache_t *op_inode_cachep; +static struct kmem_cache *op_inode_cachep; static struct inode *openprom_alloc_inode(struct super_block *sb) { struct op_inode_info *oi; - oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL); + oi = kmem_cache_alloc(op_inode_cachep, GFP_KERNEL); if (!oi) return NULL; @@ -415,7 +415,7 @@ static struct file_system_type openprom_fs_type = { .kill_sb = kill_anon_super, }; -static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags) +static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned long flags) { struct op_inode_info *oi = (struct op_inode_info *) data; diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c index 3068528890a..9917a8c360f 100644 --- a/fs/partitions/amiga.c +++ b/fs/partitions/amiga.c @@ -43,6 +43,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) if (warn_no_part) printk("Dev %s: unable to read RDB block %d\n", bdevname(bdev, b), blk); + res = -1; goto rdb_done; } if (*(__be32 *)data != cpu_to_be32(IDNAME_RIGIDDISK)) @@ -79,6 +80,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) if (warn_no_part) printk("Dev %s: unable to read partition block %d\n", bdevname(bdev, b), blk); + res = -1; goto rdb_done; } pb = (struct PartitionBlock *)data; diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c index 192a6adfdef..1f3572d5b75 100644 --- a/fs/partitions/atari.c +++ b/fs/partitions/atari.c @@ -88,7 +88,7 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev) if (!xrs) { printk (" block %ld read failed\n", partsect); put_dev_sector(sect); - return 0; + return -1; } /* ++roman: sanity check: bit 0 of flg field must be set */ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6fb4b6150d7..1901137f4ec 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -153,7 +153,7 @@ static struct parsed_partitions * check_partition(struct gendisk *hd, struct block_device *bdev) { struct parsed_partitions *state; - int i, res; + int i, res, err; state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL); if (!state) @@ -165,19 +165,30 @@ check_partition(struct gendisk *hd, struct block_device *bdev) sprintf(state->name, "p"); state->limit = hd->minors; - i = res = 0; + i = res = err = 0; while (!res && check_part[i]) { memset(&state->parts, 0, sizeof(state->parts)); res = check_part[i++](state, bdev); + if (res < 0) { + /* We have hit an I/O error which we don't report now. + * But record it, and let the others do their job. + */ + err = res; + res = 0; + } + } if (res > 0) return state; + if (!err) + /* The partition is unrecognized. So report I/O errors if there were any */ + res = err; if (!res) printk(" unknown partition table\n"); else if (warn_no_part) printk(" unable to read partition table\n"); kfree(state); - return NULL; + return ERR_PTR(res); } /* @@ -494,6 +505,8 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) disk->fops->revalidate_disk(disk); if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; + if (IS_ERR(state)) /* I/O error reading the partition table */ + return PTR_ERR(state); for (p = 1; p < state->limit; p++) { sector_t size = state->parts[p].size; sector_t from = state->parts[p].from; diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index d352a7381fe..9f7ad4244f6 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -43,7 +43,7 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { int ibm_partition(struct parsed_partitions *state, struct block_device *bdev) { - int blocksize, offset, size; + int blocksize, offset, size,res; loff_t i_size; dasd_information_t *info; struct hd_geometry *geo; @@ -56,15 +56,16 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) unsigned char *data; Sector sect; + res = 0; blocksize = bdev_hardsect_size(bdev); if (blocksize <= 0) - return 0; + goto out_exit; i_size = i_size_read(bdev->bd_inode); if (i_size == 0) - return 0; + goto out_exit; if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL) - goto out_noinfo; + goto out_exit; if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL) goto out_nogeo; if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL) @@ -72,7 +73,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 || ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) - goto out_noioctl; + goto out_freeall; /* * Get volume label, extract name and type. @@ -92,6 +93,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) EBCASC(type, 4); EBCASC(name, 6); + res = 1; + /* * Three different types: CMS1, VOL1 and LNX1/unlabeled */ @@ -156,6 +159,9 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) counter++; blk++; } + if (!data) + /* Are we not supposed to report this ? */ + goto out_readerr; } else { /* * Old style LNX1 or unlabeled disk @@ -171,18 +177,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) } printk("\n"); - kfree(label); - kfree(geo); - kfree(info); - return 1; + goto out_freeall; + out_readerr: -out_noioctl: + res = -1; +out_freeall: kfree(label); out_nolab: kfree(geo); out_nogeo: kfree(info); -out_noinfo: - return 0; +out_exit: + return res; } diff --git a/fs/pipe.c b/fs/pipe.c index b1626f269a3..ae36b89b1a3 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -830,7 +830,14 @@ void free_pipe_info(struct inode *inode) static struct vfsmount *pipe_mnt __read_mostly; static int pipefs_delete_dentry(struct dentry *dentry) { - return 1; + /* + * At creation time, we pretended this dentry was hashed + * (by clearing DCACHE_UNHASHED bit in d_flags) + * At delete time, we restore the truth : not hashed. + * (so that dput() can proceed correctly) + */ + dentry->d_flags |= DCACHE_UNHASHED; + return 0; } static struct dentry_operations pipefs_dentry_operations = { @@ -891,17 +898,22 @@ struct file *create_write_pipe(void) if (!inode) goto err_file; - sprintf(name, "[%lu]", inode->i_ino); + this.len = sprintf(name, "[%lu]", inode->i_ino); this.name = name; - this.len = strlen(name); - this.hash = inode->i_ino; /* will go */ + this.hash = 0; err = -ENOMEM; dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); if (!dentry) goto err_inode; dentry->d_op = &pipefs_dentry_operations; - d_add(dentry, inode); + /* + * We dont want to publish this dentry into global dentry hash table. + * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED + * This permits a working /proc/$pid/fd/XXX on pipes + */ + dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(dentry, inode); f->f_vfsmnt = mntget(pipe_mnt); f->f_dentry = dentry; f->f_mapping = inode->i_mapping; diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 7431d7ba2d0..f6c77627257 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,8 +8,9 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - kmsg.o proc_tty.o proc_misc.o + proc_tty.o proc_misc.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o +proc-$(CONFIG_PRINTK) += kmsg.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 795319c54f7..b859fc749c0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -683,8 +683,6 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, char buffer[PROC_NUMBUF], *end; int oom_adjust; - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; @@ -699,6 +697,10 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, task = get_proc_task(file->f_dentry->d_inode); if (!task) return -ESRCH; + if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { + put_task_struct(task); + return -EACCES; + } task->oomkilladj = oom_adjust; put_task_struct(task); if (end - buffer == 0) @@ -1883,8 +1885,9 @@ out: return; } -struct dentry *proc_pid_instantiate(struct inode *dir, - struct dentry * dentry, struct task_struct *task, void *ptr) +static struct dentry *proc_pid_instantiate(struct inode *dir, + struct dentry * dentry, + struct task_struct *task, void *ptr) { struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 49dfb2ab783..e26945ba685 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -81,14 +81,14 @@ static void proc_read_inode(struct inode * inode) inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; } -static kmem_cache_t * proc_inode_cachep; +static struct kmem_cache * proc_inode_cachep; static struct inode *proc_alloc_inode(struct super_block *sb) { struct proc_inode *ei; struct inode *inode; - ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); + ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->pid = NULL; @@ -105,7 +105,7 @@ static void proc_destroy_inode(struct inode *inode) kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct proc_inode *ei = (struct proc_inode *) foo; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 1294eda4aca..1be73082edd 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -22,6 +22,7 @@ #include <asm/uaccess.h> #include <asm/io.h> +#define CORE_STR "CORE" static int open_kcore(struct inode * inode, struct file * filp) { @@ -82,10 +83,11 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) } *elf_buflen = sizeof(struct elfhdr) + (*nphdr + 2)*sizeof(struct elf_phdr) + - 3 * (sizeof(struct elf_note) + 4) + - sizeof(struct elf_prstatus) + - sizeof(struct elf_prpsinfo) + - sizeof(struct task_struct); + 3 * ((sizeof(struct elf_note)) + + roundup(sizeof(CORE_STR), 4)) + + roundup(sizeof(struct elf_prstatus), 4) + + roundup(sizeof(struct elf_prpsinfo), 4) + + roundup(sizeof(struct task_struct), 4); *elf_buflen = PAGE_ALIGN(*elf_buflen); return size + *elf_buflen; } @@ -210,7 +212,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) nhdr->p_offset = offset; /* set up the process status */ - notes[0].name = "CORE"; + notes[0].name = CORE_STR; notes[0].type = NT_PRSTATUS; notes[0].datasz = sizeof(struct elf_prstatus); notes[0].data = &prstatus; @@ -221,7 +223,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) bufp = storenote(¬es[0], bufp); /* set up the process info */ - notes[1].name = "CORE"; + notes[1].name = CORE_STR; notes[1].type = NT_PRPSINFO; notes[1].datasz = sizeof(struct elf_prpsinfo); notes[1].data = &prpsinfo; @@ -238,7 +240,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) bufp = storenote(¬es[1], bufp); /* set up the task structure */ - notes[2].name = "CORE"; + notes[2].name = CORE_STR; notes[2].type = NT_TASKSTRUCT; notes[2].datasz = sizeof(struct task_struct); notes[2].data = current; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 93c43b676e5..51815cece6f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -696,9 +696,11 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ +#ifdef CONFIG_PRINTK entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); if (entry) entry->proc_fops = &proc_kmsg_operations; +#endif create_seq_entry("devices", 0, &proc_devinfo_operations); create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 5a41db2a218..c047dc654d5 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -515,12 +515,12 @@ static void qnx4_read_inode(struct inode *inode) brelse(bh); } -static kmem_cache_t *qnx4_inode_cachep; +static struct kmem_cache *qnx4_inode_cachep; static struct inode *qnx4_alloc_inode(struct super_block *sb) { struct qnx4_inode_info *ei; - ei = kmem_cache_alloc(qnx4_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(qnx4_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -531,7 +531,7 @@ static void qnx4_destroy_inode(struct inode *inode) kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } -static void init_once(void *foo, kmem_cache_t * cachep, +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index ac14318c81b..373d862c3f8 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -317,12 +317,11 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl /* area filled with zeroes, to supply as list of zero blocknumbers We allocate it outside of loop just in case loop would spin for several iterations. */ - char *zeros = kmalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. + char *zeros = kzalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. if (!zeros) { res = -ENOMEM; goto error_exit_free_blocks; } - memset(zeros, 0, to_paste * UNFM_P_SIZE); do { to_paste = min_t(__u64, hole_size, @@ -407,6 +406,8 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl we restart it. This will also free the path. */ if (journal_transaction_should_end (th, th->t_blocks_allocated)) { + inode->i_size = cpu_key_k_offset(&key) + + (to_paste << inode->i_blkbits); res = restart_transaction(th, inode, &path); @@ -1045,6 +1046,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); memset(kaddr, 0, from); kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(prepared_pages[0]); } if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */ char *kaddr = @@ -1052,6 +1054,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode KM_USER0); memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(prepared_pages[num_pages - 1]); } /* Since all blocks are new - use already calculated value */ @@ -1185,6 +1188,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode memset(kaddr + block_start, 0, from - block_start); kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(prepared_pages[0]); set_buffer_uptodate(bh); } } @@ -1222,6 +1226,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode KM_USER0); memset(kaddr + to, 0, block_end - to); kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(prepared_pages[num_pages - 1]); set_buffer_uptodate(bh); } } @@ -1307,56 +1312,8 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t count = MAX_NON_LFS - (unsigned long)*ppos; } - if (file->f_flags & O_DIRECT) { // Direct IO needs treatment - ssize_t result, after_file_end = 0; - if ((*ppos + count >= inode->i_size) - || (file->f_flags & O_APPEND)) { - /* If we are appending a file, we need to put this savelink in here. - If we will crash while doing direct io, finish_unfinished will - cut the garbage from the file end. */ - reiserfs_write_lock(inode->i_sb); - err = - journal_begin(&th, inode->i_sb, - JOURNAL_PER_BALANCE_CNT); - if (err) { - reiserfs_write_unlock(inode->i_sb); - return err; - } - reiserfs_update_inode_transaction(inode); - add_save_link(&th, inode, 1 /* Truncate */ ); - after_file_end = 1; - err = - journal_end(&th, inode->i_sb, - JOURNAL_PER_BALANCE_CNT); - reiserfs_write_unlock(inode->i_sb); - if (err) - return err; - } - result = do_sync_write(file, buf, count, ppos); - - if (after_file_end) { /* Now update i_size and remove the savelink */ - struct reiserfs_transaction_handle th; - reiserfs_write_lock(inode->i_sb); - err = journal_begin(&th, inode->i_sb, 1); - if (err) { - reiserfs_write_unlock(inode->i_sb); - return err; - } - reiserfs_update_inode_transaction(inode); - mark_inode_dirty(inode); - err = journal_end(&th, inode->i_sb, 1); - if (err) { - reiserfs_write_unlock(inode->i_sb); - return err; - } - err = remove_save_link(inode, 1 /* truncate */ ); - reiserfs_write_unlock(inode->i_sb); - if (err) - return err; - } - - return result; - } + if (file->f_flags & O_DIRECT) + return do_sync_write(file, buf, count, ppos); if (unlikely((ssize_t) count < 0)) return -EINVAL; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9c69bcacad2..254239e6f9e 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -216,11 +216,12 @@ static int file_capable(struct inode *inode, long block) BUG_ON(!th->t_trans_id); BUG_ON(!th->t_refcount); + pathrelse(path); + /* we cannot restart while nested */ if (th->t_refcount > 1) { return 0; } - pathrelse(path); reiserfs_update_sd(th, inode); err = journal_end(th, s, len); if (!err) { @@ -928,15 +929,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block, if (blocks_needed == 1) { un = &unf_single; } else { - un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. + un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. if (!un) { un = &unf_single; blocks_needed = 1; max_to_insert = 0; - } else - memset(un, 0, - UNFM_P_SIZE * min(blocks_needed, - max_to_insert)); + } } if (blocks_needed <= max_to_insert) { /* we are going to add target block to the file. Use allocated diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 17249994110..7fb5fb036f9 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -490,13 +490,13 @@ static void reiserfs_put_super(struct super_block *s) return; } -static kmem_cache_t *reiserfs_inode_cachep; +static struct kmem_cache *reiserfs_inode_cachep; static struct inode *reiserfs_alloc_inode(struct super_block *sb) { struct reiserfs_inode_info *ei; ei = (struct reiserfs_inode_info *) - kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL); + kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -507,7 +507,7 @@ static void reiserfs_destroy_inode(struct inode *inode) kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; @@ -1549,13 +1549,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) struct reiserfs_sb_info *sbi; int errval = -EINVAL; - sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); + sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); if (!sbi) { errval = -ENOMEM; goto error; } s->s_fs_info = sbi; - memset(sbi, 0, sizeof(struct reiserfs_sb_info)); /* Set default values for options: non-aggressive tails, RO on errors */ REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index ddcd9e1ef28..c5af088d4a4 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -550,12 +550,12 @@ romfs_read_inode(struct inode *i) } } -static kmem_cache_t * romfs_inode_cachep; +static struct kmem_cache * romfs_inode_cachep; static struct inode *romfs_alloc_inode(struct super_block *sb) { struct romfs_inode_info *ei; - ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, SLAB_KERNEL); + ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -566,7 +566,7 @@ static void romfs_destroy_inode(struct inode *inode) kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct romfs_inode_info *ei = (struct romfs_inode_info *) foo; diff --git a/fs/seq_file.c b/fs/seq_file.c index 555b9ac04c2..10690aa401c 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -26,7 +26,7 @@ * ERR_PTR(error). In the end of sequence they return %NULL. ->show() * returns 0 in case of success and negative number in case of error. */ -int seq_open(struct file *file, struct seq_operations *op) +int seq_open(struct file *file, const struct seq_operations *op) { struct seq_file *p = file->private_data; @@ -408,7 +408,7 @@ EXPORT_SYMBOL(single_open); int single_release(struct inode *inode, struct file *file) { - struct seq_operations *op = ((struct seq_file *)file->private_data)->op; + const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; int res = seq_release(inode, file); kfree(op); return res; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 2c122ee83ad..4af4cd729a5 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -50,12 +50,12 @@ static void smb_put_super(struct super_block *); static int smb_statfs(struct dentry *, struct kstatfs *); static int smb_show_options(struct seq_file *, struct vfsmount *); -static kmem_cache_t *smb_inode_cachep; +static struct kmem_cache *smb_inode_cachep; static struct inode *smb_alloc_inode(struct super_block *sb) { struct smb_inode_info *ei; - ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, SLAB_KERNEL); + ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -66,7 +66,7 @@ static void smb_destroy_inode(struct inode *inode) kmem_cache_free(smb_inode_cachep, SMB_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct smb_inode_info *ei = (struct smb_inode_info *) foo; unsigned long flagmask = SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR; diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index 0fb74697abc..a4bcae8a9af 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -25,7 +25,7 @@ #define ROUND_UP(x) (((x)+3) & ~3) /* cache for request structures */ -static kmem_cache_t *req_cachep; +static struct kmem_cache *req_cachep; static int smb_request_send_req(struct smb_request *req); @@ -61,7 +61,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server, struct smb_request *req; unsigned char *buf = NULL; - req = kmem_cache_alloc(req_cachep, SLAB_KERNEL); + req = kmem_cache_alloc(req_cachep, GFP_KERNEL); VERBOSE("allocating request: %p\n", req); if (!req) goto out; diff --git a/fs/stat.c b/fs/stat.c index bca07eb2003..a0ebfc7f8a6 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -51,13 +51,6 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) return inode->i_op->getattr(mnt, dentry, stat); generic_fillattr(inode, stat); - if (!stat->blksize) { - struct super_block *s = inode->i_sb; - unsigned blocks; - blocks = (stat->size+s->s_blocksize-1) >> s->s_blocksize_bits; - stat->blocks = (s->s_blocksize / 512) * blocks; - stat->blksize = s->s_blocksize; - } return 0; } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 20551a1b8a5..e503f858fba 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -16,7 +16,7 @@ struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; -kmem_cache_t *sysfs_dir_cachep; +struct kmem_cache *sysfs_dir_cachep; static struct super_operations sysfs_ops = { .statfs = simple_statfs, diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 6f3d6bd5288..bd7cec295da 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -1,6 +1,6 @@ extern struct vfsmount * sysfs_mount; -extern kmem_cache_t *sysfs_dir_cachep; +extern struct kmem_cache *sysfs_dir_cachep; extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); diff --git a/fs/sysv/CHANGES b/fs/sysv/CHANGES deleted file mode 100644 index 66ea6e92be6..00000000000 --- a/fs/sysv/CHANGES +++ /dev/null @@ -1,60 +0,0 @@ -Mon, 15 Dec 1997 Krzysztof G. Baranowski <kgb@manjak.knm.org.pl> - * namei.c: struct sysv_dir_inode_operations updated to use dentries. - -Fri, 23 Jan 1998 Krzysztof G. Baranowski <kgb@manjak.knm.org.pl> - * inode.c: corrected 1 track offset setting (in sb->sv_block_base). - Originally it was overridden (by setting to zero) - in detected_[xenix,sysv4,sysv2,coherent]. Thanks - to Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl> - for identifying the problem. - -Tue, 27 Jan 1998 Krzysztof G. Baranowski <kgb@manjak.knm.org.pl> - * inode.c: added 2048-byte block support to SystemV FS. - Merged detected_bs[512,1024,2048]() into one function: - void detected_bs (u_char type, struct super_block *sb). - Thanks to Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl> - for the patch. - -Wed, 4 Feb 1998 Krzysztof G. Baranowski <kgb@manjak.knm.org.pl> - * namei.c: removed static subdir(); is_subdir() from dcache.c - is used instead. Cosmetic changes. - -Thu, 3 Dec 1998 Al Viro (viro@parcelfarce.linux.theplanet.co.uk) - * namei.c (sysv_rmdir): - Bugectomy: old check for victim being busy - (inode->i_count) wasn't replaced (with checking - dentry->d_count) and escaped Linus in the last round - of changes. Shot and buried. - -Wed, 9 Dec 1998 AV - * namei.c (do_sysv_rename): - Fixed incorrect check for other owners + race. - Removed checks that went to VFS. - * namei.c (sysv_unlink): - Removed checks that went to VFS. - -Thu, 10 Dec 1998 AV - * namei.c (do_mknod): - Removed dead code - mknod is never asked to - create a symlink or directory. Incidentially, - it wouldn't do it right if it would be called. - -Sat, 26 Dec 1998 KGB - * inode.c (detect_sysv4): - Added detection of expanded s_type field (0x10, - 0x20 and 0x30). Forced read-only access in this case. - -Sun, 21 Mar 1999 AV - * namei.c (sysv_link): - Fixed i_count usage that resulted in dcache corruption. - * inode.c: - Filled ->delete_inode() method with sysv_delete_inode(). - sysv_put_inode() is gone, as it tried to do ->delete_ - _inode()'s job. - * ialloc.c: (sysv_free_inode): - Fixed race. - -Sun, 30 Apr 1999 AV - * namei.c (sysv_mknod): - Removed dead code (S_IFREG case is now passed to - ->create() by VFS). diff --git a/fs/sysv/ChangeLog b/fs/sysv/ChangeLog deleted file mode 100644 index f403f8b91b8..00000000000 --- a/fs/sysv/ChangeLog +++ /dev/null @@ -1,106 +0,0 @@ -Thu Feb 14 2002 Andrew Morton <akpm@zip.com.au> - - * dir_commit_chunk(): call writeout_one_page() as well as - waitfor_one_page() for IS_SYNC directories, so that we - actually do sync the directory. (forward-port from 2.4). - -Thu Feb 7 2002 Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk> - - * super.c: switched to ->get_sb() - * ChangeLog: fixed dates ;-) - -2002-01-24 David S. Miller <davem@redhat.com> - - * inode.c: Include linux/init.h - -Mon Jan 21 2002 Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk> - * ialloc.c (sysv_new_inode): zero SYSV_I(inode)->i_data out. - * i_vnode renamed to vfs_inode. Sorry, but let's keep that - consistent. - -Sat Jan 19 2002 Christoph Hellwig <hch@infradead.org> - - * include/linux/sysv_fs.h (SYSV_I): Get fs-private inode data using - list_entry() instead of inode->u. - * include/linux/sysv_fs_i.h: Add 'struct inode i_vnode' field to - sysv_inode_info structure. - * inode.c: Include <linux/slab.h>, implement alloc_inode/destroy_inode - sop methods, add infrastructure for per-fs inode slab cache. - * super.c (init_sysv_fs): Initialize inode cache, recover properly - in the case of failed register_filesystem for V7. - (exit_sysv_fs): Destroy inode cache. - -Sat Jan 19 2002 Christoph Hellwig <hch@infradead.org> - - * include/linux/sysv_fs.h: Include <linux/sysv_fs_i.h>, declare SYSV_I(). - * dir.c (sysv_find_entry): Use SYSV_I() instead of ->u.sysv_i to - access fs-private inode data. - * ialloc.c (sysv_new_inode): Likewise. - * inode.c (sysv_read_inode): Likewise. - (sysv_update_inode): Likewise. - * itree.c (get_branch): Likewise. - (sysv_truncate): Likewise. - * symlink.c (sysv_readlink): Likewise. - (sysv_follow_link): Likewise. - -Fri Jan 4 2002 Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk> - - * ialloc.c (sysv_free_inode): Use sb->s_id instead of bdevname(). - * inode.c (sysv_read_inode): Likewise. - (sysv_update_inode): Likewise. - (sysv_sync_inode): Likewise. - * super.c (detect_sysv): Likewise. - (complete_read_super): Likewise. - (sysv_read_super): Likewise. - (v7_read_super): Likewise. - -Sun Dec 30 2001 Manfred Spraul <manfred@colorfullife.com> - - * dir.c (dir_commit_chunk): Do not set dir->i_version. - (sysv_readdir): Likewise. - -Thu Dec 27 2001 Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk> - - * itree.c (get_block): Use map_bh() to fill out bh_result. - -Tue Dec 25 2001 Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk> - - * super.c (sysv_read_super): Use sb_set_blocksize() to set blocksize. - (v7_read_super): Likewise. - -Tue Nov 27 2001 Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk> - - * itree.c (get_block): Change type for iblock argument to sector_t. - * super.c (sysv_read_super): Set s_blocksize early. - (v7_read_super): Likewise. - * balloc.c (sysv_new_block): Use sb_bread(). instead of bread(). - (sysv_count_free_blocks): Likewise. - * ialloc.c (sysv_raw_inode): Likewise. - * itree.c (get_branch): Likewise. - (free_branches): Likewise. - * super.c (sysv_read_super): Likewise. - (v7_read_super): Likewise. - -Sat Dec 15 2001 Christoph Hellwig <hch@infradead.org> - - * inode.c (sysv_read_inode): Mark inode as bad in case of failure. - * super.c (complete_read_super): Check for bad root inode. - -Wed Nov 21 2001 Andrew Morton <andrewm@uow.edu.au> - - * file.c (sysv_sync_file): Call fsync_inode_data_buffers. - -Fri Oct 26 2001 Christoph Hellwig <hch@infradead.org> - - * dir.c, ialloc.c, namei.c, include/linux/sysv_fs_i.h: - Implement per-Inode lookup offset cache. - Modelled after Ted's ext2 patch. - -Fri Oct 26 2001 Christoph Hellwig <hch@infradead.org> - - * inode.c, super.c, include/linux/sysv_fs.h, - include/linux/sysv_fs_sb.h: - Remove symlink faking. Noone really wants to use these as - linux filesystems and native OSes don't support it anyway. - - diff --git a/fs/sysv/INTRO b/fs/sysv/INTRO deleted file mode 100644 index de4e4d17cac..00000000000 --- a/fs/sysv/INTRO +++ /dev/null @@ -1,182 +0,0 @@ -This is the implementation of the SystemV/Coherent filesystem for Linux. -It grew out of separate filesystem implementations - - Xenix FS Doug Evans <dje@cygnus.com> June 1992 - SystemV FS Paul B. Monday <pmonday@eecs.wsu.edu> March-June 1993 - Coherent FS B. Haible <haible@ma2s2.mathematik.uni-karlsruhe.de> June 1993 - -and was merged together in July 1993. - -These filesystems are rather similar. Here is a comparison with Minix FS: - -* Linux fdisk reports on partitions - - Minix FS 0x81 Linux/Minix - - Xenix FS ?? - - SystemV FS ?? - - Coherent FS 0x08 AIX bootable - -* Size of a block or zone (data allocation unit on disk) - - Minix FS 1024 - - Xenix FS 1024 (also 512 ??) - - SystemV FS 1024 (also 512 and 2048) - - Coherent FS 512 - -* General layout: all have one boot block, one super block and - separate areas for inodes and for directories/data. - On SystemV Release 2 FS (e.g. Microport) the first track is reserved and - all the block numbers (including the super block) are offset by one track. - -* Byte ordering of "short" (16 bit entities) on disk: - - Minix FS little endian 0 1 - - Xenix FS little endian 0 1 - - SystemV FS little endian 0 1 - - Coherent FS little endian 0 1 - Of course, this affects only the file system, not the data of files on it! - -* Byte ordering of "long" (32 bit entities) on disk: - - Minix FS little endian 0 1 2 3 - - Xenix FS little endian 0 1 2 3 - - SystemV FS little endian 0 1 2 3 - - Coherent FS PDP-11 2 3 0 1 - Of course, this affects only the file system, not the data of files on it! - -* Inode on disk: "short", 0 means non-existent, the root dir ino is: - - Minix FS 1 - - Xenix FS, SystemV FS, Coherent FS 2 - -* Maximum number of hard links to a file: - - Minix FS 250 - - Xenix FS ?? - - SystemV FS ?? - - Coherent FS >=10000 - -* Free inode management: - - Minix FS a bitmap - - Xenix FS, SystemV FS, Coherent FS - There is a cache of a certain number of free inodes in the super-block. - When it is exhausted, new free inodes are found using a linear search. - -* Free block management: - - Minix FS a bitmap - - Xenix FS, SystemV FS, Coherent FS - Free blocks are organized in a "free list". Maybe a misleading term, - since it is not true that every free block contains a pointer to - the next free block. Rather, the free blocks are organized in chunks - of limited size, and every now and then a free block contains pointers - to the free blocks pertaining to the next chunk; the first of these - contains pointers and so on. The list terminates with a "block number" - 0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS. - -* Super-block location: - - Minix FS block 1 = bytes 1024..2047 - - Xenix FS block 1 = bytes 1024..2047 - - SystemV FS bytes 512..1023 - - Coherent FS block 1 = bytes 512..1023 - -* Super-block layout: - - Minix FS - unsigned short s_ninodes; - unsigned short s_nzones; - unsigned short s_imap_blocks; - unsigned short s_zmap_blocks; - unsigned short s_firstdatazone; - unsigned short s_log_zone_size; - unsigned long s_max_size; - unsigned short s_magic; - - Xenix FS, SystemV FS, Coherent FS - unsigned short s_firstdatazone; - unsigned long s_nzones; - unsigned short s_fzone_count; - unsigned long s_fzones[NICFREE]; - unsigned short s_finode_count; - unsigned short s_finodes[NICINOD]; - char s_flock; - char s_ilock; - char s_modified; - char s_rdonly; - unsigned long s_time; - short s_dinfo[4]; -- SystemV FS only - unsigned long s_free_zones; - unsigned short s_free_inodes; - short s_dinfo[4]; -- Xenix FS only - unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only - char s_fname[6]; - char s_fpack[6]; - then they differ considerably: - Xenix FS - char s_clean; - char s_fill[371]; - long s_magic; - long s_type; - SystemV FS - long s_fill[12 or 14]; - long s_state; - long s_magic; - long s_type; - Coherent FS - unsigned long s_unique; - Note that Coherent FS has no magic. - -* Inode layout: - - Minix FS - unsigned short i_mode; - unsigned short i_uid; - unsigned long i_size; - unsigned long i_time; - unsigned char i_gid; - unsigned char i_nlinks; - unsigned short i_zone[7+1+1]; - - Xenix FS, SystemV FS, Coherent FS - unsigned short i_mode; - unsigned short i_nlink; - unsigned short i_uid; - unsigned short i_gid; - unsigned long i_size; - unsigned char i_zone[3*(10+1+1+1)]; - unsigned long i_atime; - unsigned long i_mtime; - unsigned long i_ctime; - -* Regular file data blocks are organized as - - Minix FS - 7 direct blocks - 1 indirect block (pointers to blocks) - 1 double-indirect block (pointer to pointers to blocks) - - Xenix FS, SystemV FS, Coherent FS - 10 direct blocks - 1 indirect block (pointers to blocks) - 1 double-indirect block (pointer to pointers to blocks) - 1 triple-indirect block (pointer to pointers to pointers to blocks) - -* Inode size, inodes per block - - Minix FS 32 32 - - Xenix FS 64 16 - - SystemV FS 64 16 - - Coherent FS 64 8 - -* Directory entry on disk - - Minix FS - unsigned short inode; - char name[14/30]; - - Xenix FS, SystemV FS, Coherent FS - unsigned short inode; - char name[14]; - -* Dir entry size, dir entries per block - - Minix FS 16/32 64/32 - - Xenix FS 16 64 - - SystemV FS 16 64 - - Coherent FS 16 32 - -* How to implement symbolic links such that the host fsck doesn't scream: - - Minix FS normal - - Xenix FS kludge: as regular files with chmod 1000 - - SystemV FS ?? - - Coherent FS kludge: as regular files with chmod 1000 - - -Notation: We often speak of a "block" but mean a zone (the allocation unit) -and not the disk driver's notion of "block". - - -Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhe.de> diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index d63c5e48b05..ead9864567e 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -301,13 +301,13 @@ static void sysv_delete_inode(struct inode *inode) unlock_kernel(); } -static kmem_cache_t *sysv_inode_cachep; +static struct kmem_cache *sysv_inode_cachep; static struct inode *sysv_alloc_inode(struct super_block *sb) { struct sysv_inode_info *si; - si = kmem_cache_alloc(sysv_inode_cachep, SLAB_KERNEL); + si = kmem_cache_alloc(sysv_inode_cachep, GFP_KERNEL); if (!si) return NULL; return &si->vfs_inode; @@ -318,7 +318,7 @@ static void sysv_destroy_inode(struct inode *inode) kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); } -static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; diff --git a/fs/udf/super.c b/fs/udf/super.c index 1aea6a4f9a4..1dbc2955f02 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -107,12 +107,12 @@ static struct file_system_type udf_fstype = { .fs_flags = FS_REQUIRES_DEV, }; -static kmem_cache_t * udf_inode_cachep; +static struct kmem_cache * udf_inode_cachep; static struct inode *udf_alloc_inode(struct super_block *sb) { struct udf_inode_info *ei; - ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL); + ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL); if (!ei) return NULL; @@ -130,7 +130,7 @@ static void udf_destroy_inode(struct inode *inode) kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct udf_inode_info *ei = (struct udf_inode_info *) foo; @@ -1709,7 +1709,7 @@ void udf_error(struct super_block *sb, const char *function, sb->s_dirt = 1; } va_start(args, fmt); - vsprintf(error_buf, fmt, args); + vsnprintf(error_buf, sizeof(error_buf), fmt, args); va_end(args); printk (KERN_CRIT "UDF-fs error (device %s): %s: %s\n", sb->s_id, function, error_buf); @@ -1721,7 +1721,7 @@ void udf_warning(struct super_block *sb, const char *function, va_list args; va_start (args, fmt); - vsprintf(error_buf, fmt, args); + vsnprintf(error_buf, sizeof(error_buf), fmt, args); va_end(args); printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n", sb->s_id, function, error_buf); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index ec79e3091d1..8a8e9382ec0 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -224,7 +224,7 @@ void ufs_error (struct super_block * sb, const char * function, sb->s_flags |= MS_RDONLY; } va_start (args, fmt); - vsprintf (error_buf, fmt, args); + vsnprintf (error_buf, sizeof(error_buf), fmt, args); va_end (args); switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) { case UFS_MOUNT_ONERROR_PANIC: @@ -255,7 +255,7 @@ void ufs_panic (struct super_block * sb, const char * function, sb->s_dirt = 1; } va_start (args, fmt); - vsprintf (error_buf, fmt, args); + vsnprintf (error_buf, sizeof(error_buf), fmt, args); va_end (args); sb->s_flags |= MS_RDONLY; printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n", @@ -268,7 +268,7 @@ void ufs_warning (struct super_block * sb, const char * function, va_list args; va_start (args, fmt); - vsprintf (error_buf, fmt, args); + vsnprintf (error_buf, sizeof(error_buf), fmt, args); va_end (args); printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n", sb->s_id, function, error_buf); @@ -1204,12 +1204,12 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static kmem_cache_t * ufs_inode_cachep; +static struct kmem_cache * ufs_inode_cachep; static struct inode *ufs_alloc_inode(struct super_block *sb) { struct ufs_inode_info *ei; - ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, SLAB_KERNEL); + ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->vfs_inode.i_version = 1; @@ -1221,7 +1221,7 @@ static void ufs_destroy_inode(struct inode *inode) kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 28fce6c239b..7dd12bb1d62 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -299,7 +299,7 @@ static inline void *get_usb_offset(struct ufs_sb_private_info *uspi, #define ubh_get_addr16(ubh,begin) \ (((__fs16*)((ubh)->bh[(begin) >> (uspi->s_fshift-1)]->b_data)) + \ - ((begin) & (uspi->fsize>>1) - 1))) + ((begin) & ((uspi->fsize>>1) - 1))) #define ubh_get_addr32(ubh,begin) \ (((__fs32*)((ubh)->bh[(begin) >> (uspi->s_fshift-2)]->b_data)) + \ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index eef4a0ba11e..4fb01ffdfd1 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -32,6 +32,7 @@ #include <linux/kthread.h> #include <linux/migrate.h> #include <linux/backing-dev.h> +#include <linux/freezer.h> STATIC kmem_zone_t *xfs_buf_zone; STATIC kmem_shaker_t xfs_buf_shake; @@ -1826,11 +1827,11 @@ xfs_buf_init(void) if (!xfs_buf_zone) goto out_free_trace_buf; - xfslogd_workqueue = create_workqueue("xfslogd"); + xfslogd_workqueue = create_freezeable_workqueue("xfslogd"); if (!xfslogd_workqueue) goto out_free_buf_zone; - xfsdatad_workqueue = create_workqueue("xfsdatad"); + xfsdatad_workqueue = create_freezeable_workqueue("xfsdatad"); if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index de05abbbe7f..b93265b7c79 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -56,6 +56,7 @@ #include <linux/mempool.h> #include <linux/writeback.h> #include <linux/kthread.h> +#include <linux/freezer.h> STATIC struct quotactl_ops xfs_quotactl_operations; STATIC struct super_operations xfs_super_operations; diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 47faf27913a..7f1e92930b6 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -64,7 +64,7 @@ /* Host-dependent types and defines */ #define ACPI_MACHINE_WIDTH BITS_PER_LONG -#define acpi_cache_t kmem_cache_t +#define acpi_cache_t struct kmem_cache #define acpi_spinlock spinlock_t * #define ACPI_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL(symbol); #define strtoul simple_strtoul diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h index b9ff4d8cb33..57e09f5e342 100644 --- a/include/asm-alpha/dma-mapping.h +++ b/include/asm-alpha/dma-mapping.h @@ -51,7 +51,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(dev) (1) +#define dma_is_consistent(d, h) (1) int dma_set_mask(struct device *dev, u64 mask); @@ -60,7 +60,7 @@ int dma_set_mask(struct device *dev, u64 mask); #define dma_sync_single_range(dev, addr, off, size, dir) do { } while (0) #define dma_sync_sg_for_cpu(dev, sg, nents, dir) do { } while (0) #define dma_sync_sg_for_device(dev, sg, nents, dir) do { } while (0) -#define dma_cache_sync(va, size, dir) do { } while (0) +#define dma_cache_sync(dev, va, size, dir) do { } while (0) #define dma_get_cache_alignment() L1_CACHE_BYTES diff --git a/include/asm-alpha/unistd.h b/include/asm-alpha/unistd.h index 2cabbd465c0..84313d14e78 100644 --- a/include/asm-alpha/unistd.h +++ b/include/asm-alpha/unistd.h @@ -387,188 +387,6 @@ #define NR_SYSCALLS 447 -#if defined(__GNUC__) - -#define _syscall_return(type) \ - return (_sc_err ? errno = _sc_ret, _sc_ret = -1L : 0), (type) _sc_ret - -#define _syscall_clobbers \ - "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", \ - "$22", "$23", "$24", "$25", "$27", "$28" \ - -#define _syscall0(type, name) \ -type name(void) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - __asm__("callsys # %0 %1 %2" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - __asm__("callsys # %0 %1 %2 %3" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - __asm__("callsys # %0 %1 %2 %3 %4" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_18 __asm__("$18"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - _sc_18 = (long) (arg3); \ - __asm__("callsys # %0 %1 %2 %3 %4 %5" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), \ - "r"(_sc_18) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_18 __asm__("$18"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - _sc_18 = (long) (arg3); \ - _sc_19 = (long) (arg4); \ - __asm__("callsys # %0 %1 %2 %3 %4 %5 %6" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), \ - "r"(_sc_18), "1"(_sc_19) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_18 __asm__("$18"); \ - register long _sc_19 __asm__("$19"); \ - register long _sc_20 __asm__("$20"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - _sc_18 = (long) (arg3); \ - _sc_19 = (long) (arg4); \ - _sc_20 = (long) (arg5); \ - __asm__("callsys # %0 %1 %2 %3 %4 %5 %6 %7" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), \ - "r"(_sc_18), "1"(_sc_19), "r"(_sc_20) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, type6 arg6)\ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_18 __asm__("$18"); \ - register long _sc_19 __asm__("$19"); \ - register long _sc_20 __asm__("$20"); \ - register long _sc_21 __asm__("$21"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - _sc_18 = (long) (arg3); \ - _sc_19 = (long) (arg4); \ - _sc_20 = (long) (arg5); \ - _sc_21 = (long) (arg6); \ - __asm__("callsys # %0 %1 %2 %3 %4 %5 %6 %7 %8" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), \ - "r"(_sc_18), "1"(_sc_19), "r"(_sc_20), "r"(_sc_21) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#endif /* __GNUC__ */ - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h index 666617711c8..9bc46b486af 100644 --- a/include/asm-arm/dma-mapping.h +++ b/include/asm-arm/dma-mapping.h @@ -48,7 +48,7 @@ static inline int dma_get_cache_alignment(void) return 32; } -static inline int dma_is_consistent(dma_addr_t handle) +static inline int dma_is_consistent(struct device *dev, dma_addr_t handle) { return !!arch_is_coherent(); } diff --git a/include/asm-arm/setup.h b/include/asm-arm/setup.h index aa4b5782f0c..e5407392afc 100644 --- a/include/asm-arm/setup.h +++ b/include/asm-arm/setup.h @@ -14,55 +14,57 @@ #ifndef __ASMARM_SETUP_H #define __ASMARM_SETUP_H +#include <asm/types.h> + #define COMMAND_LINE_SIZE 1024 /* The list ends with an ATAG_NONE node. */ #define ATAG_NONE 0x00000000 struct tag_header { - u32 size; - u32 tag; + __u32 size; + __u32 tag; }; /* The list must start with an ATAG_CORE node */ #define ATAG_CORE 0x54410001 struct tag_core { - u32 flags; /* bit 0 = read-only */ - u32 pagesize; - u32 rootdev; + __u32 flags; /* bit 0 = read-only */ + __u32 pagesize; + __u32 rootdev; }; /* it is allowed to have multiple ATAG_MEM nodes */ #define ATAG_MEM 0x54410002 struct tag_mem32 { - u32 size; - u32 start; /* physical start address */ + __u32 size; + __u32 start; /* physical start address */ }; /* VGA text type displays */ #define ATAG_VIDEOTEXT 0x54410003 struct tag_videotext { - u8 x; - u8 y; - u16 video_page; - u8 video_mode; - u8 video_cols; - u16 video_ega_bx; - u8 video_lines; - u8 video_isvga; - u16 video_points; + __u8 x; + __u8 y; + __u16 video_page; + __u8 video_mode; + __u8 video_cols; + __u16 video_ega_bx; + __u8 video_lines; + __u8 video_isvga; + __u16 video_points; }; /* describes how the ramdisk will be used in kernel */ #define ATAG_RAMDISK 0x54410004 struct tag_ramdisk { - u32 flags; /* bit 0 = load, bit 1 = prompt */ - u32 size; /* decompressed ramdisk size in _kilo_ bytes */ - u32 start; /* starting block of floppy-based RAM disk image */ + __u32 flags; /* bit 0 = load, bit 1 = prompt */ + __u32 size; /* decompressed ramdisk size in _kilo_ bytes */ + __u32 start; /* starting block of floppy-based RAM disk image */ }; /* describes where the compressed ramdisk image lives (virtual address) */ @@ -76,23 +78,23 @@ struct tag_ramdisk { #define ATAG_INITRD2 0x54420005 struct tag_initrd { - u32 start; /* physical start address */ - u32 size; /* size of compressed ramdisk image in bytes */ + __u32 start; /* physical start address */ + __u32 size; /* size of compressed ramdisk image in bytes */ }; /* board serial number. "64 bits should be enough for everybody" */ #define ATAG_SERIAL 0x54410006 struct tag_serialnr { - u32 low; - u32 high; + __u32 low; + __u32 high; }; /* board revision */ #define ATAG_REVISION 0x54410007 struct tag_revision { - u32 rev; + __u32 rev; }; /* initial values for vesafb-type framebuffers. see struct screen_info @@ -101,20 +103,20 @@ struct tag_revision { #define ATAG_VIDEOLFB 0x54410008 struct tag_videolfb { - u16 lfb_width; - u16 lfb_height; - u16 lfb_depth; - u16 lfb_linelength; - u32 lfb_base; - u32 lfb_size; - u8 red_size; - u8 red_pos; - u8 green_size; - u8 green_pos; - u8 blue_size; - u8 blue_pos; - u8 rsvd_size; - u8 rsvd_pos; + __u16 lfb_width; + __u16 lfb_height; + __u16 lfb_depth; + __u16 lfb_linelength; + __u32 lfb_base; + __u32 lfb_size; + __u8 red_size; + __u8 red_pos; + __u8 green_size; + __u8 green_pos; + __u8 blue_size; + __u8 blue_pos; + __u8 rsvd_size; + __u8 rsvd_pos; }; /* command line: \0 terminated string */ @@ -128,17 +130,17 @@ struct tag_cmdline { #define ATAG_ACORN 0x41000101 struct tag_acorn { - u32 memc_control_reg; - u32 vram_pages; - u8 sounddefault; - u8 adfsdrives; + __u32 memc_control_reg; + __u32 vram_pages; + __u8 sounddefault; + __u8 adfsdrives; }; /* footbridge memory clock, see arch/arm/mach-footbridge/arch.c */ #define ATAG_MEMCLK 0x41000402 struct tag_memclk { - u32 fmemclk; + __u32 fmemclk; }; struct tag { @@ -167,24 +169,26 @@ struct tag { }; struct tagtable { - u32 tag; + __u32 tag; int (*parse)(const struct tag *); }; -#define __tag __attribute_used__ __attribute__((__section__(".taglist.init"))) -#define __tagtable(tag, fn) \ -static struct tagtable __tagtable_##fn __tag = { tag, fn } - #define tag_member_present(tag,member) \ ((unsigned long)(&((struct tag *)0L)->member + 1) \ <= (tag)->hdr.size * 4) -#define tag_next(t) ((struct tag *)((u32 *)(t) + (t)->hdr.size)) +#define tag_next(t) ((struct tag *)((__u32 *)(t) + (t)->hdr.size)) #define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type)) >> 2) #define for_each_tag(t,base) \ for (t = base; t->hdr.size; t = tag_next(t)) +#ifdef __KERNEL__ + +#define __tag __attribute_used__ __attribute__((__section__(".taglist.init"))) +#define __tagtable(tag, fn) \ +static struct tagtable __tagtable_##fn __tag = { tag, fn } + /* * Memory map description */ @@ -217,4 +221,6 @@ struct early_params { static struct early_params __early_##fn __attribute_used__ \ __attribute__((__section__(".early_param.init"))) = { name, fn } +#endif /* __KERNEL__ */ + #endif diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index 14a87eec5a2..d44c629d842 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -377,156 +377,6 @@ #endif #ifdef __KERNEL__ -#include <linux/err.h> -#include <linux/linkage.h> - -#define __sys2(x) #x -#define __sys1(x) __sys2(x) - -#ifndef __syscall -#if defined(__thumb__) || defined(__ARM_EABI__) -#define __SYS_REG(name) register long __sysreg __asm__("r7") = __NR_##name; -#define __SYS_REG_LIST(regs...) "r" (__sysreg) , ##regs -#define __syscall(name) "swi\t0" -#else -#define __SYS_REG(name) -#define __SYS_REG_LIST(regs...) regs -#define __syscall(name) "swi\t" __sys1(__NR_##name) "" -#endif -#endif - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type,name) \ -type name(void) { \ - __SYS_REG(name) \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST() \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)\ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), "r" (__r3) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __r4 __asm__("r4") = (long)arg5; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), \ - "r" (__r3), "r" (__r4) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __r4 __asm__("r4") = (long)arg5; \ - register long __r5 __asm__("r5") = (long)arg6; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), \ - "r" (__r3), "r" (__r4), "r" (__r5) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_STAT64 diff --git a/include/asm-arm26/pgalloc.h b/include/asm-arm26/pgalloc.h index 6437167b1ff..7725af3ddb4 100644 --- a/include/asm-arm26/pgalloc.h +++ b/include/asm-arm26/pgalloc.h @@ -15,7 +15,7 @@ #include <asm/tlbflush.h> #include <linux/slab.h> -extern kmem_cache_t *pte_cache; +extern struct kmem_cache *pte_cache; static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr){ return kmem_cache_alloc(pte_cache, GFP_KERNEL); diff --git a/include/asm-arm26/setup.h b/include/asm-arm26/setup.h index 6348931be65..1a867b4e8d5 100644 --- a/include/asm-arm26/setup.h +++ b/include/asm-arm26/setup.h @@ -16,6 +16,8 @@ #define COMMAND_LINE_SIZE 1024 +#ifdef __KERNEL__ + /* The list ends with an ATAG_NONE node. */ #define ATAG_NONE 0x00000000 @@ -202,4 +204,6 @@ struct meminfo { extern struct meminfo meminfo; +#endif /* __KERNEL__ */ + #endif diff --git a/include/asm-arm26/unistd.h b/include/asm-arm26/unistd.h index 25a5eead85b..4c3b919177e 100644 --- a/include/asm-arm26/unistd.h +++ b/include/asm-arm26/unistd.h @@ -311,139 +311,6 @@ #define __ARM_NR_usr26 (__ARM_NR_BASE+3) #ifdef __KERNEL__ -#include <linux/err.h> -#include <linux/linkage.h> - -#define __sys2(x) #x -#define __sys1(x) __sys2(x) - -#ifndef __syscall -#define __syscall(name) "swi\t" __sys1(__NR_##name) "" -#endif - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)-MAX_ERRNO) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type,name) \ -type name(void) { \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1),"r" (__r2) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)\ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1),"r" (__r2),"r" (__r3) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __r4 __asm__("r4") = (long)arg5; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1),"r" (__r2),"r" (__r3),"r" (__r4) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __r4 __asm__("r4") = (long)arg5; \ - register long __r5 __asm__("r5") = (long)arg6; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1),"r" (__r2),"r" (__r3), "r" (__r4),"r" (__r5) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h index 4c40cb41cdf..0580b5d62bb 100644 --- a/include/asm-avr32/dma-mapping.h +++ b/include/asm-avr32/dma-mapping.h @@ -8,7 +8,8 @@ #include <asm/cacheflush.h> #include <asm/io.h> -extern void dma_cache_sync(void *vaddr, size_t size, int direction); +extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + int direction); /* * Return whether the given device DMA address mask can be supported @@ -307,7 +308,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -static inline int dma_is_consistent(dma_addr_t dma_addr) +static inline int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h index 10193da4113..0a5224245e4 100644 --- a/include/asm-avr32/setup.h +++ b/include/asm-avr32/setup.h @@ -13,6 +13,8 @@ #define COMMAND_LINE_SIZE 256 +#ifdef __KERNEL__ + /* Magic number indicating that a tag table is present */ #define ATAG_MAGIC 0xa2a25441 @@ -138,4 +140,6 @@ void chip_enable_sdram(void); #endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ + #endif /* __ASM_AVR32_SETUP_H__ */ diff --git a/include/asm-cris/arch-v10/bitops.h b/include/asm-cris/arch-v10/bitops.h index b73f5396e5a..be85f6de25d 100644 --- a/include/asm-cris/arch-v10/bitops.h +++ b/include/asm-cris/arch-v10/bitops.h @@ -10,7 +10,7 @@ * number. They differ in that the first function also inverts all bits * in the input. */ -extern inline unsigned long cris_swapnwbrlz(unsigned long w) +static inline unsigned long cris_swapnwbrlz(unsigned long w) { /* Let's just say we return the result in the same register as the input. Saying we clobber the input but can return the result @@ -26,7 +26,7 @@ extern inline unsigned long cris_swapnwbrlz(unsigned long w) return res; } -extern inline unsigned long cris_swapwbrlz(unsigned long w) +static inline unsigned long cris_swapwbrlz(unsigned long w) { unsigned res; __asm__ ("swapwbr %0 \n\t" @@ -40,7 +40,7 @@ extern inline unsigned long cris_swapwbrlz(unsigned long w) * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. */ -extern inline unsigned long ffz(unsigned long w) +static inline unsigned long ffz(unsigned long w) { return cris_swapnwbrlz(w); } @@ -51,7 +51,7 @@ extern inline unsigned long ffz(unsigned long w) * * Undefined if no bit exists, so code should check against 0 first. */ -extern inline unsigned long __ffs(unsigned long word) +static inline unsigned long __ffs(unsigned long word) { return cris_swapnwbrlz(~word); } @@ -65,7 +65,7 @@ extern inline unsigned long __ffs(unsigned long word) * differs in spirit from the above ffz (man ffs). */ -extern inline unsigned long kernel_ffs(unsigned long w) +static inline unsigned long kernel_ffs(unsigned long w) { return w ? cris_swapwbrlz (w) + 1 : 0; } diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h index cbf1a98f012..662cea70152 100644 --- a/include/asm-cris/dma-mapping.h +++ b/include/asm-cris/dma-mapping.h @@ -156,10 +156,10 @@ dma_get_cache_alignment(void) return (1 << INTERNODE_CACHE_SHIFT); } -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { } diff --git a/include/asm-cris/semaphore-helper.h b/include/asm-cris/semaphore-helper.h index dbd0f30b85b..a8e1e6cb7cd 100644 --- a/include/asm-cris/semaphore-helper.h +++ b/include/asm-cris/semaphore-helper.h @@ -20,12 +20,12 @@ /* * These two _must_ execute atomically wrt each other. */ -extern inline void wake_one_more(struct semaphore * sem) +static inline void wake_one_more(struct semaphore * sem) { atomic_inc(&sem->waking); } -extern inline int waking_non_zero(struct semaphore *sem) +static inline int waking_non_zero(struct semaphore *sem) { unsigned long flags; int ret = 0; @@ -40,7 +40,7 @@ extern inline int waking_non_zero(struct semaphore *sem) return ret; } -extern inline int waking_non_zero_interruptible(struct semaphore *sem, +static inline int waking_non_zero_interruptible(struct semaphore *sem, struct task_struct *tsk) { int ret = 0; @@ -59,7 +59,7 @@ extern inline int waking_non_zero_interruptible(struct semaphore *sem, return ret; } -extern inline int waking_non_zero_trylock(struct semaphore *sem) +static inline int waking_non_zero_trylock(struct semaphore *sem) { int ret = 1; unsigned long flags; diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h index e9fc1d47797..bcb2df68496 100644 --- a/include/asm-frv/dma-mapping.h +++ b/include/asm-frv/dma-mapping.h @@ -172,10 +172,10 @@ int dma_get_cache_alignment(void) return 1 << L1_CACHE_SHIFT; } -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline -void dma_cache_sync(void *vaddr, size_t size, +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { flush_write_buffers(); diff --git a/include/asm-frv/highmem.h b/include/asm-frv/highmem.h index 0f390f41f81..ff4d6cdeb15 100644 --- a/include/asm-frv/highmem.h +++ b/include/asm-frv/highmem.h @@ -115,7 +115,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) { unsigned long paddr; - inc_preempt_count(); + pagefault_disable(); paddr = page_to_phys(page); switch (type) { @@ -170,8 +170,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) default: BUG(); } - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-frv/param.h b/include/asm-frv/param.h index 168381ebb41..365653b1726 100644 --- a/include/asm-frv/param.h +++ b/include/asm-frv/param.h @@ -18,6 +18,5 @@ #endif #define MAXHOSTNAMELEN 64 /* max length of hostname */ -#define COMMAND_LINE_SIZE 512 #endif /* _ASM_PARAM_H */ diff --git a/include/asm-frv/setup.h b/include/asm-frv/setup.h index 0d293b9a585..afd787ceede 100644 --- a/include/asm-frv/setup.h +++ b/include/asm-frv/setup.h @@ -12,6 +12,10 @@ #ifndef _ASM_SETUP_H #define _ASM_SETUP_H +#define COMMAND_LINE_SIZE 512 + +#ifdef __KERNEL__ + #include <linux/init.h> #ifndef __ASSEMBLY__ @@ -22,4 +26,6 @@ extern unsigned long __initdata num_mappedpages; #endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ + #endif /* _ASM_SETUP_H */ diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h index 725e854928c..584c0417ae4 100644 --- a/include/asm-frv/unistd.h +++ b/include/asm-frv/unistd.h @@ -320,125 +320,6 @@ #ifdef __KERNEL__ #define NR_syscalls 310 -#include <linux/err.h> - -/* - * process the return value of a syscall, consigning it to one of two possible fates - * - user-visible error numbers are in the range -1 - -4095: see <asm-frv/errno.h> - */ -#undef __syscall_return -#define __syscall_return(type, res) \ -do { \ - unsigned long __sr2 = (res); \ - if (__builtin_expect(__sr2 >= (unsigned long)(-MAX_ERRNO), 0)) { \ - errno = (-__sr2); \ - __sr2 = ~0UL; \ - } \ - return (type) __sr2; \ -} while (0) - -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ - -#undef _syscall0 -#define _syscall0(type,name) \ -type name(void) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8"); \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "=r" (__sc0) \ - : "r" (__scnum)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall1 -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall2 -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall3 -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - register unsigned long __sc2 __asm__ ("gr10") = (unsigned long) arg3; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1), "r" (__sc2)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall4 -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - register unsigned long __sc2 __asm__ ("gr10") = (unsigned long) arg3; \ - register unsigned long __sc3 __asm__ ("gr11") = (unsigned long) arg4; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1), "r" (__sc2), "r" (__sc3)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall5 -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - register unsigned long __sc2 __asm__ ("gr10") = (unsigned long) arg3; \ - register unsigned long __sc3 __asm__ ("gr11") = (unsigned long) arg4; \ - register unsigned long __sc4 __asm__ ("gr12") = (unsigned long) arg5; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1), "r" (__sc2), \ - "r" (__sc3), "r" (__sc4)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall6 -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5, type6, arg6) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - register unsigned long __sc2 __asm__ ("gr10") = (unsigned long) arg3; \ - register unsigned long __sc3 __asm__ ("gr11") = (unsigned long) arg4; \ - register unsigned long __sc4 __asm__ ("gr12") = (unsigned long) arg5; \ - register unsigned long __sc5 __asm__ ("gr13") = (unsigned long) arg6; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1), "r" (__sc2), \ - "r" (__sc3), "r" (__sc4), "r" (__sc5)); \ - __syscall_return(type, __sc0); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION /* #define __ARCH_WANT_OLD_READDIR */ diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 3c06be38170..fa14f8cd30c 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -1,4 +1,3 @@ -header-y += atomic.h header-y += errno-base.h header-y += errno.h header-y += fcntl.h diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index a84c3d88a18..a37e95fe58d 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -14,6 +14,7 @@ unifdef-y += posix_types.h unifdef-y += ptrace.h unifdef-y += resource.h unifdef-y += sembuf.h +unifdef-y += setup.h unifdef-y += shmbuf.h unifdef-y += sigcontext.h unifdef-y += siginfo.h diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 42a95d9a064..b7e4a0467cb 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -66,7 +66,7 @@ static inline void atomic_long_sub(long i, atomic_long_t *l) atomic64_sub(i, v); } -#else +#else /* BITS_PER_LONG == 64 */ typedef atomic_t atomic_long_t; @@ -113,5 +113,6 @@ static inline void atomic_long_sub(long i, atomic_long_t *l) atomic_sub(i, v); } -#endif -#endif +#endif /* BITS_PER_LONG == 64 */ + +#endif /* _ASM_GENERIC_ATOMIC_H */ diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index b541e48cc54..783ab9944d7 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -266,7 +266,7 @@ dma_error(dma_addr_t dma_addr) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline int dma_get_cache_alignment(void) @@ -295,7 +295,7 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, } static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { /* could define this in terms of the dma_cache ... operations, diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index df893c16031..f422df0956a 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -21,7 +21,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -33,7 +33,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e60d6f21fa6..4d4c62d1105 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -11,8 +11,8 @@ #define RODATA \ . = ALIGN(4096); \ - __start_rodata = .; \ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_rodata) = .; \ *(.rodata) *(.rodata.*) \ *(__vermagic) /* Kernel version magic */ \ } \ @@ -119,17 +119,16 @@ *(__ksymtab_strings) \ } \ \ + EH_FRAME \ + \ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___param) = .; \ *(__param) \ VMLINUX_SYMBOL(__stop___param) = .; \ + VMLINUX_SYMBOL(__end_rodata) = .; \ } \ \ - /* Unwind data binary search table */ \ - EH_FRAME_HDR \ - \ - __end_rodata = .; \ . = ALIGN(4096); #define SECURITY_INIT \ @@ -162,15 +161,23 @@ VMLINUX_SYMBOL(__kprobes_text_end) = .; #ifdef CONFIG_STACK_UNWIND - /* Unwind data binary search table */ -#define EH_FRAME_HDR \ +#define EH_FRAME \ + /* Unwind data binary search table */ \ + . = ALIGN(8); \ .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_unwind_hdr) = .; \ *(.eh_frame_hdr) \ VMLINUX_SYMBOL(__end_unwind_hdr) = .; \ + } \ + /* Unwind data */ \ + . = ALIGN(8); \ + .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind) = .; \ + *(.eh_frame) \ + VMLINUX_SYMBOL(__end_unwind) = .; \ } #else -#define EH_FRAME_HDR +#define EH_FRAME #endif /* DWARF debug sections. diff --git a/include/asm-h8300/delay.h b/include/asm-h8300/delay.h index cbccbbdd640..743beba70f8 100644 --- a/include/asm-h8300/delay.h +++ b/include/asm-h8300/delay.h @@ -9,7 +9,7 @@ * Delay routines, using a pre-computed "loops_per_second" value. */ -extern __inline__ void __delay(unsigned long loops) +static inline void __delay(unsigned long loops) { __asm__ __volatile__ ("1:\n\t" "dec.l #1,%0\n\t" @@ -27,7 +27,7 @@ extern __inline__ void __delay(unsigned long loops) extern unsigned long loops_per_jiffy; -extern __inline__ void udelay(unsigned long usecs) +static inline void udelay(unsigned long usecs) { usecs *= 4295; /* 2**32 / 1000000 */ usecs /= (loops_per_jiffy*HZ); diff --git a/include/asm-h8300/mmu_context.h b/include/asm-h8300/mmu_context.h index 855721a5dcc..5c165f7bee0 100644 --- a/include/asm-h8300/mmu_context.h +++ b/include/asm-h8300/mmu_context.h @@ -9,7 +9,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } -extern inline int +static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { // mm->context = virt_to_phys(mm->pgd); @@ -23,7 +23,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str { } -extern inline void activate_mm(struct mm_struct *prev_mm, +static inline void activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm) { } diff --git a/include/asm-h8300/pci.h b/include/asm-h8300/pci.h index 5edad5b70fd..0c771b05fdd 100644 --- a/include/asm-h8300/pci.h +++ b/include/asm-h8300/pci.h @@ -10,12 +10,12 @@ #define pcibios_assign_all_busses() 0 #define pcibios_scan_all_fns(a, b) 0 -extern inline void pcibios_set_master(struct pci_dev *dev) +static inline void pcibios_set_master(struct pci_dev *dev) { /* No special bus mastering setup handling */ } -extern inline void pcibios_penalize_isa_irq(int irq, int active) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-h8300/tlbflush.h b/include/asm-h8300/tlbflush.h index bbdffbeeede..9a2c5c9fd70 100644 --- a/include/asm-h8300/tlbflush.h +++ b/include/asm-h8300/tlbflush.h @@ -47,12 +47,12 @@ static inline void flush_tlb_range(struct mm_struct *mm, BUG(); } -extern inline void flush_tlb_kernel_page(unsigned long addr) +static inline void flush_tlb_kernel_page(unsigned long addr) { BUG(); } -extern inline void flush_tlb_pgtables(struct mm_struct *mm, +static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) { BUG(); diff --git a/include/asm-h8300/unistd.h b/include/asm-h8300/unistd.h index 747788d629a..7ddd414f8d1 100644 --- a/include/asm-h8300/unistd.h +++ b/include/asm-h8300/unistd.h @@ -295,172 +295,6 @@ #ifdef __KERNEL__ #define NR_syscalls 289 -#include <linux/err.h> - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see - <asm-m68k/errno.h> */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* avoid using res which is declared to be in register d0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type, name) \ -type name(void) \ -{ \ - register long __res __asm__("er0"); \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall1(type, name, atype, a) \ -type name(atype a) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - _a = (long)a; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall2(type, name, atype, a, btype, b) \ -type name(atype a, btype b) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - _a = (long)a; \ - _b = (long)b; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall3(type, name, atype, a, btype, b, ctype, c) \ -type name(atype a, btype b, ctype c) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - register long _c __asm__("er3"); \ - _a = (long)a; \ - _b = (long)b; \ - _c = (long)c; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b), \ - "g" (_c) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall4(type, name, atype, a, btype, b, \ - ctype, c, dtype, d) \ -type name(atype a, btype b, ctype c, dtype d) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - register long _c __asm__("er3"); \ - register long _d __asm__("er4"); \ - _a = (long)a; \ - _b = (long)b; \ - _c = (long)c; \ - _d = (long)d; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b), \ - "g" (_c), \ - "g" (_d) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall5(type, name, atype, a, btype, b, \ - ctype, c, dtype, d, etype, e) \ -type name(atype a, btype b, ctype c, dtype d, etype e) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - register long _c __asm__("er3"); \ - register long _d __asm__("er4"); \ - register long _e __asm__("er5"); \ - _a = (long)a; \ - _b = (long)b; \ - _c = (long)c; \ - _d = (long)d; \ - _e = (long)e; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b), \ - "g" (_c), \ - "g" (_d), \ - "g" (_e) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall6(type, name, atype, a, btype, b, \ - ctype, c, dtype, d, etype, e, ftype, f) \ -type name(atype a, btype b, ctype c, dtype d, etype e, ftype f) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - register long _c __asm__("er3"); \ - register long _d __asm__("er4"); \ - register long _e __asm__("er5"); \ - register long _f __asm__("er6"); \ - _a = (long)a; \ - _b = (long)b; \ - _c = (long)c; \ - _d = (long)d; \ - _e = (long)e; \ - _f = (long)f; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b), \ - "g" (_c), \ - "g" (_d), \ - "g" (_e) \ - "g" (_f) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild index 147e4ac1ebf..5ae93afc67e 100644 --- a/include/asm-i386/Kbuild +++ b/include/asm-i386/Kbuild @@ -7,5 +7,4 @@ header-y += ptrace-abi.h header-y += ucontext.h unifdef-y += mtrr.h -unifdef-y += setup.h unifdef-y += vm86.h diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index b01a7ec409c..b8fa9557c53 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -4,7 +4,7 @@ #ifdef __KERNEL__ #include <asm/types.h> - +#include <linux/stddef.h> #include <linux/types.h> struct alt_instr { @@ -118,4 +118,15 @@ static inline void alternatives_smp_switch(int smp) {} #define LOCK_PREFIX "" #endif +struct paravirt_patch; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); +#else +static inline void +apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +{} +#define __start_parainstructions NULL +#define __stop_parainstructions NULL +#endif + #endif /* _I386_ALTERNATIVE_H */ diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index b9529578fc3..41a44319905 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -37,18 +37,27 @@ extern void generic_apic_probe(void); /* * Basic functions accessing APICs. */ +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define apic_write native_apic_write +#define apic_write_atomic native_apic_write_atomic +#define apic_read native_apic_read +#endif -static __inline void apic_write(unsigned long reg, unsigned long v) +static __inline fastcall void native_apic_write(unsigned long reg, + unsigned long v) { *((volatile unsigned long *)(APIC_BASE+reg)) = v; } -static __inline void apic_write_atomic(unsigned long reg, unsigned long v) +static __inline fastcall void native_apic_write_atomic(unsigned long reg, + unsigned long v) { xchg((volatile unsigned long *)(APIC_BASE+reg), v); } -static __inline unsigned long apic_read(unsigned long reg) +static __inline fastcall unsigned long native_apic_read(unsigned long reg) { return *((volatile unsigned long *)(APIC_BASE+reg)); } diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h index a6c024e2506..c57441bb290 100644 --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -187,9 +187,9 @@ static __inline__ int atomic_add_return(int i, atomic_t *v) /* Modern 486+ processor */ __i = i; __asm__ __volatile__( - LOCK_PREFIX "xaddl %0, %1;" - :"=r"(i) - :"m"(v->counter), "0"(i)); + LOCK_PREFIX "xaddl %0, %1" + :"+r" (i), "+m" (v->counter) + : : "memory"); return i + __i; #ifdef CONFIG_M386 diff --git a/include/asm-i386/boot.h b/include/asm-i386/boot.h index 96b228e6e79..8ce79a6fa89 100644 --- a/include/asm-i386/boot.h +++ b/include/asm-i386/boot.h @@ -12,4 +12,8 @@ #define EXTENDED_VGA 0xfffe /* 80x50 mode */ #define ASK_VGA 0xfffd /* ask for it at bootup */ -#endif +/* Physical address where kenrel should be loaded. */ +#define LOAD_PHYSICAL_ADDR ((0x100000 + CONFIG_PHYSICAL_ALIGN - 1) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + +#endif /* _LINUX_BOOT_H */ diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index 592ffeeda45..38f1aebbbdb 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -21,6 +21,7 @@ #include <asm/processor.h> #include <asm/i387.h> #include <asm/msr.h> +#include <asm/paravirt.h> static int __init no_halt(char *s) { @@ -91,6 +92,9 @@ static void __init check_fpu(void) static void __init check_hlt(void) { + if (paravirt_enabled()) + return; + printk(KERN_INFO "Checking 'hlt' instruction... "); if (!boot_cpu_data.hlt_works_ok) { printk("disabled\n"); diff --git a/include/asm-i386/cpu.h b/include/asm-i386/cpu.h index b1bc7b1b64b..9d914e1e4aa 100644 --- a/include/asm-i386/cpu.h +++ b/include/asm-i386/cpu.h @@ -13,6 +13,9 @@ struct i386_cpu { extern int arch_register_cpu(int num); #ifdef CONFIG_HOTPLUG_CPU extern void arch_unregister_cpu(int); +extern int enable_cpu_hotplug; +#else +#define enable_cpu_hotplug 0 #endif DECLARE_PER_CPU(int, cpu_state); diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index d314ebb3d59..3f92b94e0d7 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -31,7 +31,7 @@ #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ #define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ -#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_DS (0*32+21) /* Debug Store */ #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ @@ -73,6 +73,8 @@ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -134,6 +136,10 @@ #define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN) #define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) #define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) +#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) +#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) +#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) #endif /* __ASM_I386_CPUFEATURE_H */ diff --git a/include/asm-i386/current.h b/include/asm-i386/current.h index 3cbbecd7901..5252ee0f6d7 100644 --- a/include/asm-i386/current.h +++ b/include/asm-i386/current.h @@ -1,13 +1,14 @@ #ifndef _I386_CURRENT_H #define _I386_CURRENT_H -#include <linux/thread_info.h> +#include <asm/pda.h> +#include <linux/compiler.h> struct task_struct; -static __always_inline struct task_struct * get_current(void) +static __always_inline struct task_struct *get_current(void) { - return current_thread_info()->task; + return read_pda(pcurrent); } #define current get_current() diff --git a/include/asm-i386/delay.h b/include/asm-i386/delay.h index b1c7650dc7b..32d6678d0bb 100644 --- a/include/asm-i386/delay.h +++ b/include/asm-i386/delay.h @@ -7,6 +7,7 @@ * Delay routines calling functions in arch/i386/lib/delay.c */ +/* Undefined functions to get compile-time errors */ extern void __bad_udelay(void); extern void __bad_ndelay(void); @@ -15,13 +16,23 @@ extern void __ndelay(unsigned long nsecs); extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); +#if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY) +#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul) + +#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul) + +#else /* !PARAVIRT || USE_REAL_TIME_DELAY */ + +/* 0x10c7 is 2**32 / 1000000 (rounded up) */ #define udelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ __udelay(n)) - + +/* 0x5 is 2**32 / 1000000000 (rounded up) */ #define ndelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) +#endif void use_tsc_delay(void); diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 5874ef119ff..f398cc45644 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -4,8 +4,6 @@ #include <asm/ldt.h> #include <asm/segment.h> -#define CPU_16BIT_STACK_SIZE 1024 - #ifndef __ASSEMBLY__ #include <linux/preempt.h> @@ -16,8 +14,6 @@ extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); - struct Xgt_desc_struct { unsigned short size; unsigned long address __attribute__((packed)); @@ -33,11 +29,6 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; } -/* - * This is the ldt that every process will get unless we need - * something other than this. - */ -extern struct desc_struct default_ldt[]; extern struct desc_struct idt_table[]; extern void set_intr_gate(unsigned int irq, void * addr); @@ -64,8 +55,10 @@ static inline void pack_gate(__u32 *a, __u32 *b, #define DESCTYPE_DPL3 0x60 /* DPL-3 */ #define DESCTYPE_S 0x10 /* !system */ +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) @@ -88,6 +81,10 @@ static inline void load_TLS(struct thread_struct *t, unsigned int cpu) #undef C } +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) + static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) { __u32 *lp = (__u32 *)((char *)dt + entry*8); @@ -95,9 +92,25 @@ static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entr *(lp+1) = entry_b; } -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) +#define set_ldt native_set_ldt +#endif /* CONFIG_PARAVIRT */ + +static inline fastcall void native_set_ldt(const void *addr, + unsigned int entries) +{ + if (likely(entries == 0)) + __asm__ __volatile__("lldt %w0"::"q" (0)); + else { + unsigned cpu = smp_processor_id(); + __u32 a, b; + + pack_descriptor(&a, &b, (unsigned long)addr, + entries * sizeof(struct desc_struct) - 1, + DESCTYPE_LDT, 0); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + } +} static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) { @@ -115,14 +128,6 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); } -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) -{ - __u32 a, b; - pack_descriptor(&a, &b, (unsigned long)addr, - entries * sizeof(struct desc_struct) - 1, - DESCTYPE_LDT, 0); - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); -} #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) @@ -153,35 +158,22 @@ static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entri static inline void clear_LDT(void) { - int cpu = get_cpu(); - - set_ldt_desc(cpu, &default_ldt[0], 5); - load_LDT_desc(); - put_cpu(); + set_ldt(NULL, 0); } /* * load one particular LDT into the current CPU */ -static inline void load_LDT_nolock(mm_context_t *pc, int cpu) +static inline void load_LDT_nolock(mm_context_t *pc) { - void *segments = pc->ldt; - int count = pc->size; - - if (likely(!count)) { - segments = &default_ldt[0]; - count = 5; - } - - set_ldt_desc(cpu, segments, count); - load_LDT_desc(); + set_ldt(pc->ldt, pc->size); } static inline void load_LDT(mm_context_t *pc) { - int cpu = get_cpu(); - load_LDT_nolock(pc, cpu); - put_cpu(); + preempt_disable(); + load_LDT_nolock(pc); + preempt_enable(); } static inline unsigned long get_desc_base(unsigned long *desc) @@ -193,6 +185,29 @@ static inline unsigned long get_desc_base(unsigned long *desc) return base; } +#else /* __ASSEMBLY__ */ + +/* + * GET_DESC_BASE reads the descriptor base of the specified segment. + * + * Args: + * idx - descriptor index + * gdt - GDT pointer + * base - 32bit register to which the base will be written + * lo_w - lo word of the "base" register + * lo_b - lo byte of the "base" register + * hi_b - hi byte of the low word of the "base" register + * + * Example: + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. + */ +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ + movb idx*8+4(gdt), lo_b; \ + movb idx*8+7(gdt), hi_b; \ + shll $16, base; \ + movw idx*8+2(gdt), lo_w; + #endif /* !__ASSEMBLY__ */ #endif diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h index 81999a3ebe7..183eebeebbd 100644 --- a/include/asm-i386/dma-mapping.h +++ b/include/asm-i386/dma-mapping.h @@ -156,10 +156,10 @@ dma_get_cache_alignment(void) return (1 << INTERNODE_CACHE_SHIFT); } -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { flush_write_buffers(); diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index f7514fb6e8e..395077aba58 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -38,6 +38,11 @@ extern struct e820map e820; extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); +extern void find_max_pfn(void); +extern void register_bootmem_low_pages(unsigned long max_low_pfn); +extern void register_memory(void); +extern void limit_regions(unsigned long long size); +extern void print_memory_map(char *who); #endif/*!__ASSEMBLY__*/ diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 3a05436f31c..45d21a0c95b 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -91,7 +91,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t; pr_reg[7] = regs->xds; \ pr_reg[8] = regs->xes; \ savesegment(fs,pr_reg[9]); \ - savesegment(gs,pr_reg[10]); \ + pr_reg[10] = regs->xgs; \ pr_reg[11] = regs->orig_eax; \ pr_reg[12] = regs->eip; \ pr_reg[13] = regs->xcs; \ diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h index 946d97cfea2..438ef0ec710 100644 --- a/include/asm-i386/futex.h +++ b/include/asm-i386/futex.h @@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); if (op == FUTEX_OP_SET) __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); @@ -88,7 +88,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h index 8ffbb0f0745..fd2be593b06 100644 --- a/include/asm-i386/genapic.h +++ b/include/asm-i386/genapic.h @@ -122,6 +122,6 @@ struct genapic { APICFUNC(phys_pkg_id) \ } -extern struct genapic *genapic; +extern struct genapic *genapic, apic_default; #endif diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h index bc1d6edae1e..434936c732d 100644 --- a/include/asm-i386/i387.h +++ b/include/asm-i386/i387.h @@ -76,7 +76,9 @@ static inline void __save_init_fpu( struct task_struct *tsk ) #define __unlazy_fpu( tsk ) do { \ if (task_thread_info(tsk)->status & TS_USEDFPU) \ - save_init_fpu( tsk ); \ + save_init_fpu( tsk ); \ + else \ + tsk->fpu_counter = 0; \ } while (0) #define __clear_fpu( tsk ) \ @@ -118,6 +120,7 @@ static inline void save_init_fpu( struct task_struct *tsk ) extern unsigned short get_fpu_cwd( struct task_struct *tsk ); extern unsigned short get_fpu_swd( struct task_struct *tsk ); extern unsigned short get_fpu_mxcsr( struct task_struct *tsk ); +extern asmlinkage void math_state_restore(void); /* * Signal frame handlers... diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index 68df0dc3ab8..86ff5e83be2 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -256,11 +256,11 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:" +#if defined(CONFIG_PARAVIRT) +#include <asm/paravirt.h> #else + #define __SLOW_DOWN_IO "outb %%al,$0x80;" -#endif static inline void slow_down_io(void) { __asm__ __volatile__( @@ -271,6 +271,8 @@ static inline void slow_down_io(void) { : : ); } +#endif + #ifdef CONFIG_X86_NUMAQ extern void *xquad_portio; /* Where the IO area was mapped */ #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 331726b4112..11761cdaae1 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -37,8 +37,13 @@ static __inline__ int irq_canonicalize(int irq) extern int irqbalance_disable(char *str); #endif +extern void quirk_intel_irqbalance(void); + #ifdef CONFIG_HOTPLUG_CPU extern void fixup_irqs(cpumask_t map); #endif +void init_IRQ(void); +void __init native_init_IRQ(void); + #endif /* _ASM_IRQ_H */ diff --git a/include/asm-i386/irq_regs.h b/include/asm-i386/irq_regs.h index 3dd9c0b7027..a1b3f7f594a 100644 --- a/include/asm-i386/irq_regs.h +++ b/include/asm-i386/irq_regs.h @@ -1 +1,27 @@ -#include <asm-generic/irq_regs.h> +/* + * Per-cpu current frame pointer - the location of the last exception frame on + * the stack, stored in the PDA. + * + * Jeremy Fitzhardinge <jeremy@goop.org> + */ +#ifndef _ASM_I386_IRQ_REGS_H +#define _ASM_I386_IRQ_REGS_H + +#include <asm/pda.h> + +static inline struct pt_regs *get_irq_regs(void) +{ + return read_pda(irq_regs); +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = read_pda(irq_regs); + write_pda(irq_regs, new_regs); + + return old_regs; +} + +#endif /* _ASM_I386_IRQ_REGS_H */ diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h index e1bdb97c07f..17b18cf4fe9 100644 --- a/include/asm-i386/irqflags.h +++ b/include/asm-i386/irqflags.h @@ -10,6 +10,9 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else #ifndef __ASSEMBLY__ static inline unsigned long __raw_local_save_flags(void) @@ -25,9 +28,6 @@ static inline unsigned long __raw_local_save_flags(void) return flags; } -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - static inline void raw_local_irq_restore(unsigned long flags) { __asm__ __volatile__( @@ -66,18 +66,6 @@ static inline void halt(void) __asm__ __volatile__("hlt": : :"memory"); } -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & (1 << 9)); -} - -static inline int raw_irqs_disabled(void) -{ - unsigned long flags = __raw_local_save_flags(); - - return raw_irqs_disabled_flags(flags); -} - /* * For spinlocks, etc: */ @@ -90,9 +78,33 @@ static inline unsigned long __raw_local_irq_save(void) return flags; } +#else +#define DISABLE_INTERRUPTS(clobbers) cli +#define ENABLE_INTERRUPTS(clobbers) sti +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +#define INTERRUPT_RETURN iret +#define GET_CR0_INTO_EAX movl %cr0, %eax +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + +#ifndef __ASSEMBLY__ +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + #define raw_local_irq_save(flags) \ do { (flags) = __raw_local_irq_save(); } while (0) +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1 << 9)); +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} #endif /* __ASSEMBLY__ */ /* diff --git a/include/asm-i386/mach-default/setup_arch.h b/include/asm-i386/mach-default/setup_arch.h index fb42099e7bd..605e3ccb991 100644 --- a/include/asm-i386/mach-default/setup_arch.h +++ b/include/asm-i386/mach-default/setup_arch.h @@ -2,4 +2,6 @@ /* no action for generic */ +#ifndef ARCH_SETUP #define ARCH_SETUP +#endif diff --git a/include/asm-i386/math_emu.h b/include/asm-i386/math_emu.h index 697673b555c..a4b0aa3320e 100644 --- a/include/asm-i386/math_emu.h +++ b/include/asm-i386/math_emu.h @@ -21,6 +21,7 @@ struct info { long ___eax; long ___ds; long ___es; + long ___fs; long ___orig_eax; long ___eip; long ___cs; diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 62b7bf18409..68ff102d6f5 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -44,7 +44,7 @@ static inline void switch_mm(struct mm_struct *prev, * load the LDT, if the LDT is different: */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } #ifdef CONFIG_SMP else { @@ -56,14 +56,14 @@ static inline void switch_mm(struct mm_struct *prev, * tlb flush IPI delivery. We must reload %cr3. */ load_cr3(next->pgd); - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } } #endif } -#define deactivate_mm(tsk, mm) \ - asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) +#define deactivate_mm(tsk, mm) \ + asm("movl %0,%%fs": :"r" (0)); #define activate_mm(prev, next) \ switch_mm((prev),(next),NULL) diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 61b07332200..3503ad66945 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -120,13 +120,26 @@ static inline int pfn_valid(int pfn) __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) -#define alloc_bootmem_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) - +#define alloc_bootmem_node(pgdat, x) \ +({ \ + struct pglist_data __attribute__ ((unused)) \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ + __pa(MAX_DMA_ADDRESS)); \ +}) +#define alloc_bootmem_pages_node(pgdat, x) \ +({ \ + struct pglist_data __attribute__ ((unused)) \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \ + __pa(MAX_DMA_ADDRESS)) \ +}) +#define alloc_bootmem_low_pages_node(pgdat, x) \ +({ \ + struct pglist_data __attribute__ ((unused)) \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \ +}) #endif /* CONFIG_NEED_MULTIPLE_NODES */ #endif /* _ASM_MMZONE_H_ */ diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h index 424661d25bd..02f8f541cbe 100644 --- a/include/asm-i386/module.h +++ b/include/asm-i386/module.h @@ -20,6 +20,8 @@ struct mod_arch_specific #define MODULE_PROC_FAMILY "586TSC " #elif defined CONFIG_M586MMX #define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MCORE2 +#define MODULE_PROC_FAMILY "CORE2 " #elif defined CONFIG_M686 #define MODULE_PROC_FAMILY "686 " #elif defined CONFIG_MPENTIUMII @@ -60,18 +62,12 @@ struct mod_arch_specific #error unknown processor family #endif -#ifdef CONFIG_REGPARM -#define MODULE_REGPARM "REGPARM " -#else -#define MODULE_REGPARM "" -#endif - #ifdef CONFIG_4KSTACKS #define MODULE_STACKSIZE "4KSTACKS " #else #define MODULE_STACKSIZE "" #endif -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE #endif /* _ASM_I386_MODULE_H */ diff --git a/include/asm-i386/mpspec_def.h b/include/asm-i386/mpspec_def.h index 76feedf85a8..13bafb16e7a 100644 --- a/include/asm-i386/mpspec_def.h +++ b/include/asm-i386/mpspec_def.h @@ -97,7 +97,6 @@ struct mpc_config_bus #define BUSTYPE_TC "TC" #define BUSTYPE_VME "VME" #define BUSTYPE_XPRESS "XPRESS" -#define BUSTYPE_NEC98 "NEC98" struct mpc_config_ioapic { @@ -182,7 +181,6 @@ enum mp_bustype { MP_BUS_EISA, MP_BUS_PCI, MP_BUS_MCA, - MP_BUS_NEC98 }; #endif diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 62b76cd9695..5679d499307 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -1,6 +1,10 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else + /* * Access to machine-specific registers (available on 586 and better only) * Note: the rd* operations modify the parameters directly (without using @@ -77,6 +81,7 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) __asm__ __volatile__("rdpmc" \ : "=a" (low), "=d" (high) \ : "c" (counter)) +#endif /* !CONFIG_PARAVIRT */ /* symbolic names for some interesting MSRs */ /* Intel defined MSRs. */ @@ -141,6 +146,10 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) #define MSR_IA32_MC0_ADDR 0x402 #define MSR_IA32_MC0_MISC 0x403 +#define MSR_IA32_PEBS_ENABLE 0x3f1 +#define MSR_IA32_DS_AREA 0x600 +#define MSR_IA32_PERF_CAPABILITIES 0x345 + /* Pentium IV performance counter MSRs */ #define MSR_P4_BPU_PERFCTR0 0x300 #define MSR_P4_BPU_PERFCTR1 0x301 @@ -284,4 +293,13 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) #define MSR_TMTA_LRTI_READOUT 0x80868018 #define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x309 +#define MSR_CORE_PERF_FIXED_CTR1 0x30a +#define MSR_CORE_PERF_FIXED_CTR2 0x30b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 + #endif /* __ASM_MSR_H */ diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index 269d315719c..b04333ea6f3 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -5,6 +5,9 @@ #define ASM_NMI_H #include <linux/pm.h> +#include <asm/irq.h> + +#ifdef ARCH_HAS_NMI_WATCHDOG /** * do_nmi_callback @@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; +void __trigger_all_cpu_backtrace(void); +#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() + +#endif + #endif /* ASM_NMI_H */ diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index f5bf544c729..fd3f64ace24 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -52,6 +52,7 @@ typedef struct { unsigned long long pgprot; } pgprot_t; #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) #define __pmd(x) ((pmd_t) { (x) } ) #define HPAGE_SHIFT 21 +#include <asm-generic/pgtable-nopud.h> #else typedef struct { unsigned long pte_low; } pte_t; typedef struct { unsigned long pgd; } pgd_t; @@ -59,6 +60,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define boot_pte_t pte_t /* or would you rather have a typedef */ #define pte_val(x) ((x).pte_low) #define HPAGE_SHIFT 22 +#include <asm-generic/pgtable-nopmd.h> #endif #define PTE_MASK PAGE_MASK @@ -112,18 +114,18 @@ extern int page_is_ram(unsigned long pagenr); #ifdef __ASSEMBLY__ #define __PAGE_OFFSET CONFIG_PAGE_OFFSET -#define __PHYSICAL_START CONFIG_PHYSICAL_START #else #define __PAGE_OFFSET ((unsigned long)CONFIG_PAGE_OFFSET) -#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) #endif -#define __KERNEL_START (__PAGE_OFFSET + __PHYSICAL_START) #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) #define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +/* __pa_symbol should be used for C visible symbols. + This seems to be the official gcc blessed way to do such arithmetic. */ +#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x),0)) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #ifdef CONFIG_FLATMEM diff --git a/include/asm-i386/param.h b/include/asm-i386/param.h index 745dc5bd0fb..21b32466fcd 100644 --- a/include/asm-i386/param.h +++ b/include/asm-i386/param.h @@ -18,6 +18,5 @@ #endif #define MAXHOSTNAMELEN 64 /* max length of hostname */ -#define COMMAND_LINE_SIZE 256 #endif diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h new file mode 100644 index 00000000000..9f06265065f --- /dev/null +++ b/include/asm-i386/paravirt.h @@ -0,0 +1,505 @@ +#ifndef __ASM_PARAVIRT_H +#define __ASM_PARAVIRT_H +/* Various instructions on x86 need to be replaced for + * para-virtualization: those hooks are defined here. */ +#include <linux/linkage.h> +#include <linux/stringify.h> +#include <asm/page.h> + +#ifdef CONFIG_PARAVIRT +/* These are the most performance critical ops, so we want to be able to patch + * callers */ +#define PARAVIRT_IRQ_DISABLE 0 +#define PARAVIRT_IRQ_ENABLE 1 +#define PARAVIRT_RESTORE_FLAGS 2 +#define PARAVIRT_SAVE_FLAGS 3 +#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4 +#define PARAVIRT_INTERRUPT_RETURN 5 +#define PARAVIRT_STI_SYSEXIT 6 + +/* Bitmask of what can be clobbered: usually at least eax. */ +#define CLBR_NONE 0x0 +#define CLBR_EAX 0x1 +#define CLBR_ECX 0x2 +#define CLBR_EDX 0x4 +#define CLBR_ANY 0x7 + +#ifndef __ASSEMBLY__ +struct thread_struct; +struct Xgt_desc_struct; +struct tss_struct; +struct mm_struct; +struct paravirt_ops +{ + unsigned int kernel_rpl; + int paravirt_enabled; + const char *name; + + /* + * Patch may replace one of the defined code sequences with arbitrary + * code, subject to the same register constraints. This generally + * means the code is not free to clobber any registers other than EAX. + * The patch function should return the number of bytes of code + * generated, as we nop pad the rest in generic code. + */ + unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); + + void (*arch_setup)(void); + char *(*memory_setup)(void); + void (*init_IRQ)(void); + + void (*banner)(void); + + unsigned long (*get_wallclock)(void); + int (*set_wallclock)(unsigned long); + void (*time_init)(void); + + /* All the function pointers here are declared as "fastcall" + so that we get a specific register-based calling + convention. This makes it easier to implement inline + assembler replacements. */ + + void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + + unsigned long (fastcall *get_debugreg)(int regno); + void (fastcall *set_debugreg)(int regno, unsigned long value); + + void (fastcall *clts)(void); + + unsigned long (fastcall *read_cr0)(void); + void (fastcall *write_cr0)(unsigned long); + + unsigned long (fastcall *read_cr2)(void); + void (fastcall *write_cr2)(unsigned long); + + unsigned long (fastcall *read_cr3)(void); + void (fastcall *write_cr3)(unsigned long); + + unsigned long (fastcall *read_cr4_safe)(void); + unsigned long (fastcall *read_cr4)(void); + void (fastcall *write_cr4)(unsigned long); + + unsigned long (fastcall *save_fl)(void); + void (fastcall *restore_fl)(unsigned long); + void (fastcall *irq_disable)(void); + void (fastcall *irq_enable)(void); + void (fastcall *safe_halt)(void); + void (fastcall *halt)(void); + void (fastcall *wbinvd)(void); + + /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + u64 (fastcall *read_msr)(unsigned int msr, int *err); + int (fastcall *write_msr)(unsigned int msr, u64 val); + + u64 (fastcall *read_tsc)(void); + u64 (fastcall *read_pmc)(void); + + void (fastcall *load_tr_desc)(void); + void (fastcall *load_gdt)(const struct Xgt_desc_struct *); + void (fastcall *load_idt)(const struct Xgt_desc_struct *); + void (fastcall *store_gdt)(struct Xgt_desc_struct *); + void (fastcall *store_idt)(struct Xgt_desc_struct *); + void (fastcall *set_ldt)(const void *desc, unsigned entries); + unsigned long (fastcall *store_tr)(void); + void (fastcall *load_tls)(struct thread_struct *t, unsigned int cpu); + void (fastcall *write_ldt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *write_gdt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *write_idt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *load_esp0)(struct tss_struct *tss, + struct thread_struct *thread); + + void (fastcall *set_iopl_mask)(unsigned mask); + + void (fastcall *io_delay)(void); + void (*const_udelay)(unsigned long loops); + +#ifdef CONFIG_X86_LOCAL_APIC + void (fastcall *apic_write)(unsigned long reg, unsigned long v); + void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v); + unsigned long (fastcall *apic_read)(unsigned long reg); +#endif + + void (fastcall *flush_tlb_user)(void); + void (fastcall *flush_tlb_kernel)(void); + void (fastcall *flush_tlb_single)(u32 addr); + + void (fastcall *set_pte)(pte_t *ptep, pte_t pteval); + void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); + void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval); + void (fastcall *pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); + void (fastcall *pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); +#ifdef CONFIG_X86_PAE + void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval); + void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + void (fastcall *set_pud)(pud_t *pudp, pud_t pudval); + void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (fastcall *pmd_clear)(pmd_t *pmdp); +#endif + + /* These two are jmp to, not actually called. */ + void (fastcall *irq_enable_sysexit)(void); + void (fastcall *iret)(void); +}; + +/* Mark a paravirt probe function. */ +#define paravirt_probe(fn) \ + static asmlinkage void (*__paravirtprobe_##fn)(void) __attribute_used__ \ + __attribute__((__section__(".paravirtprobe"))) = fn + +extern struct paravirt_ops paravirt_ops; + +#define paravirt_enabled() (paravirt_ops.paravirt_enabled) + +static inline void load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + paravirt_ops.load_esp0(tss, thread); +} + +#define ARCH_SETUP paravirt_ops.arch_setup(); +static inline unsigned long get_wallclock(void) +{ + return paravirt_ops.get_wallclock(); +} + +static inline int set_wallclock(unsigned long nowtime) +{ + return paravirt_ops.set_wallclock(nowtime); +} + +static inline void do_time_init(void) +{ + return paravirt_ops.time_init(); +} + +/* The paravirtualized CPUID instruction. */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + paravirt_ops.cpuid(eax, ebx, ecx, edx); +} + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg) +#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val) + +#define clts() paravirt_ops.clts() + +#define read_cr0() paravirt_ops.read_cr0() +#define write_cr0(x) paravirt_ops.write_cr0(x) + +#define read_cr2() paravirt_ops.read_cr2() +#define write_cr2(x) paravirt_ops.write_cr2(x) + +#define read_cr3() paravirt_ops.read_cr3() +#define write_cr3(x) paravirt_ops.write_cr3(x) + +#define read_cr4() paravirt_ops.read_cr4() +#define read_cr4_safe(x) paravirt_ops.read_cr4_safe() +#define write_cr4(x) paravirt_ops.write_cr4(x) + +static inline void raw_safe_halt(void) +{ + paravirt_ops.safe_halt(); +} + +static inline void halt(void) +{ + paravirt_ops.safe_halt(); +} +#define wbinvd() paravirt_ops.wbinvd() + +#define get_kernel_rpl() (paravirt_ops.kernel_rpl) + +#define rdmsr(msr,val1,val2) do { \ + int _err; \ + u64 _l = paravirt_ops.read_msr(msr,&_err); \ + val1 = (u32)_l; \ + val2 = _l >> 32; \ +} while(0) + +#define wrmsr(msr,val1,val2) do { \ + u64 _l = ((u64)(val2) << 32) | (val1); \ + paravirt_ops.write_msr((msr), _l); \ +} while(0) + +#define rdmsrl(msr,val) do { \ + int _err; \ + val = paravirt_ops.read_msr((msr),&_err); \ +} while(0) + +#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val))) +#define wrmsr_safe(msr,a,b) ({ \ + u64 _l = ((u64)(b) << 32) | (a); \ + paravirt_ops.write_msr((msr),_l); \ +}) + +/* rdmsr with exception handling */ +#define rdmsr_safe(msr,a,b) ({ \ + int _err; \ + u64 _l = paravirt_ops.read_msr(msr,&_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ + _err; }) + +#define rdtsc(low,high) do { \ + u64 _l = paravirt_ops.read_tsc(); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) + +#define rdtscl(low) do { \ + u64 _l = paravirt_ops.read_tsc(); \ + low = (int)_l; \ +} while(0) + +#define rdtscll(val) (val = paravirt_ops.read_tsc()) + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) do { \ + u64 _l = paravirt_ops.read_pmc(); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) + +#define load_TR_desc() (paravirt_ops.load_tr_desc()) +#define load_gdt(dtr) (paravirt_ops.load_gdt(dtr)) +#define load_idt(dtr) (paravirt_ops.load_idt(dtr)) +#define set_ldt(addr, entries) (paravirt_ops.set_ldt((addr), (entries))) +#define store_gdt(dtr) (paravirt_ops.store_gdt(dtr)) +#define store_idt(dtr) (paravirt_ops.store_idt(dtr)) +#define store_tr(tr) ((tr) = paravirt_ops.store_tr()) +#define load_TLS(t,cpu) (paravirt_ops.load_tls((t),(cpu))) +#define write_ldt_entry(dt, entry, low, high) \ + (paravirt_ops.write_ldt_entry((dt), (entry), (low), (high))) +#define write_gdt_entry(dt, entry, low, high) \ + (paravirt_ops.write_gdt_entry((dt), (entry), (low), (high))) +#define write_idt_entry(dt, entry, low, high) \ + (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) +#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) + +/* The paravirtualized I/O functions */ +static inline void slow_down_io(void) { + paravirt_ops.io_delay(); +#ifdef REALLY_SLOW_IO + paravirt_ops.io_delay(); + paravirt_ops.io_delay(); + paravirt_ops.io_delay(); +#endif +} + +#ifdef CONFIG_X86_LOCAL_APIC +/* + * Basic functions accessing APICs. + */ +static inline void apic_write(unsigned long reg, unsigned long v) +{ + paravirt_ops.apic_write(reg,v); +} + +static inline void apic_write_atomic(unsigned long reg, unsigned long v) +{ + paravirt_ops.apic_write_atomic(reg,v); +} + +static inline unsigned long apic_read(unsigned long reg) +{ + return paravirt_ops.apic_read(reg); +} +#endif + + +#define __flush_tlb() paravirt_ops.flush_tlb_user() +#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() +#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte(ptep, pteval); +} + +static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte_at(mm, addr, ptep, pteval); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + paravirt_ops.set_pmd(pmdp, pmdval); +} + +static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep) +{ + paravirt_ops.pte_update(mm, addr, ptep); +} + +static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) +{ + paravirt_ops.pte_update_defer(mm, addr, ptep); +} + +#ifdef CONFIG_X86_PAE +static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte_atomic(ptep, pteval); +} + +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + paravirt_ops.set_pte_present(mm, addr, ptep, pte); +} + +static inline void set_pud(pud_t *pudp, pud_t pudval) +{ + paravirt_ops.set_pud(pudp, pudval); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + paravirt_ops.pte_clear(mm, addr, ptep); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + paravirt_ops.pmd_clear(pmdp); +} +#endif + +/* These all sit in the .parainstructions section to tell us what to patch. */ +struct paravirt_patch { + u8 *instr; /* original instructions */ + u8 instrtype; /* type of this instruction */ + u8 len; /* length of original instruction */ + u16 clobbers; /* what registers you may clobber */ +}; + +#define paravirt_alt(insn_string, typenum, clobber) \ + "771:\n\t" insn_string "\n" "772:\n" \ + ".pushsection .parainstructions,\"a\"\n" \ + " .long 771b\n" \ + " .byte " __stringify(typenum) "\n" \ + " .byte 772b-771b\n" \ + " .short " __stringify(clobber) "\n" \ + ".popsection" + +static inline unsigned long __raw_local_save_flags(void) +{ + unsigned long f; + + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%1;" + "popl %%edx; popl %%ecx", + PARAVIRT_SAVE_FLAGS, CLBR_NONE) + : "=a"(f): "m"(paravirt_ops.save_fl) + : "memory", "cc"); + return f; +} + +static inline void raw_local_irq_restore(unsigned long f) +{ + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%1;" + "popl %%edx; popl %%ecx", + PARAVIRT_RESTORE_FLAGS, CLBR_EAX) + : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f) + : "memory", "cc"); +} + +static inline void raw_local_irq_disable(void) +{ + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%0;" + "popl %%edx; popl %%ecx", + PARAVIRT_IRQ_DISABLE, CLBR_EAX) + : : "m" (paravirt_ops.irq_disable) + : "memory", "eax", "cc"); +} + +static inline void raw_local_irq_enable(void) +{ + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%0;" + "popl %%edx; popl %%ecx", + PARAVIRT_IRQ_ENABLE, CLBR_EAX) + : : "m" (paravirt_ops.irq_enable) + : "memory", "eax", "cc"); +} + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long f; + + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%1; pushl %%eax;" + "call *%2; popl %%eax;" + "popl %%edx; popl %%ecx", + PARAVIRT_SAVE_FLAGS_IRQ_DISABLE, + CLBR_NONE) + : "=a"(f) + : "m" (paravirt_ops.save_fl), + "m" (paravirt_ops.irq_disable) + : "memory", "cc"); + return f; +} + +#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[irq_disable];" \ + "popl %%edx; popl %%ecx", \ + PARAVIRT_IRQ_DISABLE, CLBR_EAX) + +#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[irq_enable];" \ + "popl %%edx; popl %%ecx", \ + PARAVIRT_IRQ_ENABLE, CLBR_EAX) +#define CLI_STI_CLOBBERS , "%eax" +#define CLI_STI_INPUT_ARGS \ + , \ + [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \ + [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable)) + +#else /* __ASSEMBLY__ */ + +#define PARA_PATCH(ptype, clobbers, ops) \ +771:; \ + ops; \ +772:; \ + .pushsection .parainstructions,"a"; \ + .long 771b; \ + .byte ptype; \ + .byte 772b-771b; \ + .short clobbers; \ + .popsection + +#define INTERRUPT_RETURN \ + PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_iret) + +#define DISABLE_INTERRUPTS(clobbers) \ + PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \ + pushl %ecx; pushl %edx; \ + call *paravirt_ops+PARAVIRT_irq_disable; \ + popl %edx; popl %ecx) \ + +#define ENABLE_INTERRUPTS(clobbers) \ + PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \ + pushl %ecx; pushl %edx; \ + call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ + popl %edx; popl %ecx) + +#define ENABLE_INTERRUPTS_SYSEXIT \ + PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) + +#define GET_CR0_INTO_EAX \ + call *paravirt_ops+PARAVIRT_read_cr0 + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ +#endif /* __ASM_PARAVIRT_H */ diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h new file mode 100644 index 00000000000..2ba2736aa10 --- /dev/null +++ b/include/asm-i386/pda.h @@ -0,0 +1,100 @@ +/* + Per-processor Data Areas + Jeremy Fitzhardinge <jeremy@goop.org> 2006 + Based on asm-x86_64/pda.h by Andi Kleen. + */ +#ifndef _I386_PDA_H +#define _I386_PDA_H + +#include <linux/stddef.h> +#include <linux/types.h> + +struct i386_pda +{ + struct i386_pda *_pda; /* pointer to self */ + + int cpu_number; + struct task_struct *pcurrent; /* current process */ + struct pt_regs *irq_regs; +}; + +extern struct i386_pda *_cpu_pda[]; + +#define cpu_pda(i) (_cpu_pda[i]) + +#define pda_offset(field) offsetof(struct i386_pda, field) + +extern void __bad_pda_field(void); + +/* This variable is never instantiated. It is only used as a stand-in + for the real per-cpu PDA memory, so that gcc can understand what + memory operations the inline asms() below are performing. This + eliminates the need to make the asms volatile or have memory + clobbers, so gcc can readily analyse them. */ +extern struct i386_pda _proxy_pda; + +#define pda_to_op(op,field,val) \ + do { \ + typedef typeof(_proxy_pda.field) T__; \ + if (0) { T__ tmp__; tmp__ = (val); } \ + switch (sizeof(_proxy_pda.field)) { \ + case 1: \ + asm(op "b %1,%%gs:%c2" \ + : "+m" (_proxy_pda.field) \ + :"ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + case 2: \ + asm(op "w %1,%%gs:%c2" \ + : "+m" (_proxy_pda.field) \ + :"ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + case 4: \ + asm(op "l %1,%%gs:%c2" \ + : "+m" (_proxy_pda.field) \ + :"ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + default: __bad_pda_field(); \ + } \ + } while (0) + +#define pda_from_op(op,field) \ + ({ \ + typeof(_proxy_pda.field) ret__; \ + switch (sizeof(_proxy_pda.field)) { \ + case 1: \ + asm(op "b %%gs:%c1,%0" \ + : "=r" (ret__) \ + : "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + case 2: \ + asm(op "w %%gs:%c1,%0" \ + : "=r" (ret__) \ + : "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + case 4: \ + asm(op "l %%gs:%c1,%0" \ + : "=r" (ret__) \ + : "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + default: __bad_pda_field(); \ + } \ + ret__; }) + +/* Return a pointer to a pda field */ +#define pda_addr(field) \ + ((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \ + pda_offset(field))) + +#define read_pda(field) pda_from_op("mov",field) +#define write_pda(field,val) pda_to_op("mov",field,val) +#define add_pda(field,val) pda_to_op("add",field,val) +#define sub_pda(field,val) pda_to_op("sub",field,val) +#define or_pda(field,val) pda_to_op("or",field,val) + +#endif /* _I386_PDA_H */ diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index 5764afa4b6a..510ae1d3486 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -1,6 +1,31 @@ #ifndef __ARCH_I386_PERCPU__ #define __ARCH_I386_PERCPU__ +#ifndef __ASSEMBLY__ #include <asm-generic/percpu.h> +#else + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * cpu - 32bit register containing the current CPU number + * + * The resulting address is stored in the "cpu" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP +#define PER_CPU(var, cpu) \ + movl __per_cpu_offset(,cpu,4), cpu; \ + addl $per_cpu__/**/var, cpu; +#else /* ! SMP */ +#define PER_CPU(var, cpu) \ + movl $per_cpu__/**/var, cpu; +#endif /* SMP */ + +#endif /* !__ASSEMBLY__ */ #endif /* __ARCH_I386_PERCPU__ */ diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 8d8d3b9ecdb..38c3fcc0676 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -1,8 +1,6 @@ #ifndef _I386_PGTABLE_2LEVEL_H #define _I386_PGTABLE_2LEVEL_H -#include <asm-generic/pgtable-nopmd.h> - #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) #define pgd_ERROR(e) \ @@ -13,17 +11,19 @@ * within a page table are directly modified. Thus, the following * hook is made available. */ +#ifndef CONFIG_PARAVIRT #define set_pte(pteptr, pteval) (*(pteptr) = pteval) #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#endif + #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) #define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval) -#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0)) +#define raw_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index c2d701ea35b..7a2318f3830 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -1,8 +1,6 @@ #ifndef _I386_PGTABLE_3LEVEL_H #define _I386_PGTABLE_3LEVEL_H -#include <asm-generic/pgtable-nopud.h> - /* * Intel Physical Address Extension (PAE) Mode - three-level page * tables on PPro+ CPUs. @@ -44,6 +42,7 @@ static inline int pte_exec_kernel(pte_t pte) return pte_x(pte); } +#ifndef CONFIG_PARAVIRT /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is @@ -81,25 +80,6 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte (*(pudptr) = (pudval)) /* - * Pentium-II erratum A13: in PAE mode we explicitly have to flush - * the TLB via cr3 if the top-level pgd is changed... - * We do not let the generic code free and clear pgd entries due to - * this erratum. - */ -static inline void pud_clear (pud_t * pud) { } - -#define pud_page(pud) \ -((struct page *) __va(pud_val(pud) & PAGE_MASK)) - -#define pud_page_vaddr(pud) \ -((unsigned long) __va(pud_val(pud) & PAGE_MASK)) - - -/* Find an entry in the second-level page table.. */ -#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ - pmd_index(address)) - -/* * For PTEs and PDEs, we must clear the P-bit first when clearing a page table * entry, so clear the bottom half first and enforce ordering with a compiler * barrier. @@ -118,9 +98,28 @@ static inline void pmd_clear(pmd_t *pmd) smp_wmb(); *(tmp + 1) = 0; } +#endif + +/* + * Pentium-II erratum A13: in PAE mode we explicitly have to flush + * the TLB via cr3 if the top-level pgd is changed... + * We do not let the generic code free and clear pgd entries due to + * this erratum. + */ +static inline void pud_clear (pud_t * pud) { } + +#define pud_page(pud) \ +((struct page *) __va(pud_val(pud) & PAGE_MASK)) + +#define pud_page_vaddr(pud) \ +((unsigned long) __va(pud_val(pud) & PAGE_MASK)) + + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ + pmd_index(address)) -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline pte_t raw_ptep_get_and_clear(pte_t *ptep) { pte_t res; diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 7d398f493dd..e6a4723f0eb 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -15,6 +15,7 @@ #include <asm/processor.h> #include <asm/fixmap.h> #include <linux/threads.h> +#include <asm/paravirt.h> #ifndef _I386_BITOPS_H #include <asm/bitops.h> @@ -34,14 +35,14 @@ struct vm_area_struct; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) extern unsigned long empty_zero_page[1024]; extern pgd_t swapper_pg_dir[1024]; -extern kmem_cache_t *pgd_cache; -extern kmem_cache_t *pmd_cache; +extern struct kmem_cache *pgd_cache; +extern struct kmem_cache *pmd_cache; extern spinlock_t pgd_lock; extern struct page *pgd_list; -void pmd_ctor(void *, kmem_cache_t *, unsigned long); -void pgd_ctor(void *, kmem_cache_t *, unsigned long); -void pgd_dtor(void *, kmem_cache_t *, unsigned long); +void pmd_ctor(void *, struct kmem_cache *, unsigned long); +void pgd_ctor(void *, struct kmem_cache *, unsigned long); +void pgd_dtor(void *, struct kmem_cache *, unsigned long); void pgtable_cache_init(void); void paging_init(void); @@ -246,6 +247,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p # include <asm/pgtable-2level.h> #endif +#ifndef CONFIG_PARAVIRT /* * Rules for using pte_update - it must be called after any PTE update which * has not been done using the set_pte / clear_pte interfaces. It is used by @@ -261,7 +263,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) - +#endif /* * We only update the dirty/accessed state if we set @@ -275,7 +277,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p do { \ if (dirty) { \ (ptep)->pte_low = (entry).pte_low; \ - pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ + pte_update_defer((vma)->vm_mm, (address), (ptep)); \ flush_tlb_page(vma, address); \ } \ } while (0) @@ -305,7 +307,7 @@ do { \ __dirty = pte_dirty(*(ptep)); \ if (__dirty) { \ clear_bit(_PAGE_BIT_DIRTY, &(ptep)->pte_low); \ - pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ + pte_update_defer((vma)->vm_mm, (address), (ptep)); \ flush_tlb_page(vma, address); \ } \ __dirty; \ @@ -318,12 +320,20 @@ do { \ __young = pte_young(*(ptep)); \ if (__young) { \ clear_bit(_PAGE_BIT_ACCESSED, &(ptep)->pte_low); \ - pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ + pte_update_defer((vma)->vm_mm, (address), (ptep)); \ flush_tlb_page(vma, address); \ } \ __young; \ }) +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t pte = raw_ptep_get_and_clear(ptep); + pte_update(mm, addr, ptep); + return pte; +} + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index e0ddca94d50..a52d6544042 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -20,6 +20,7 @@ #include <linux/threads.h> #include <asm/percpu.h> #include <linux/cpumask.h> +#include <linux/init.h> /* flag for disabling the tsc */ extern int tsc_disable; @@ -72,6 +73,7 @@ struct cpuinfo_x86 { #endif unsigned char x86_max_cores; /* cpuid returned max cores value */ unsigned char apicid; + unsigned short x86_clflush_size; #ifdef CONFIG_SMP unsigned char booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical processor id. */ @@ -111,6 +113,8 @@ extern struct cpuinfo_x86 cpu_data[]; extern int cpu_llc_id[NR_CPUS]; extern char ignore_fpu_irq; +void __init cpu_detect(struct cpuinfo_x86 *c); + extern void identify_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); @@ -143,8 +147,8 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {} #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) +static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ __asm__("cpuid" @@ -155,59 +159,6 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, : "0" (*eax), "2" (*ecx)); } -/* - * Generic CPUID function - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx - * resulting in stale register contents being returned. - */ -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = 0; - __cpuid(eax, ebx, ecx, edx); -} - -/* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) -{ - *eax = op; - *ecx = count; - __cpuid(eax, ebx, ecx, edx); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return edx; -} - #define load_cr3(pgdir) write_cr3(__pa(pgdir)) /* @@ -473,6 +424,7 @@ struct thread_struct { .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ + .gs = __KERNEL_PDA, \ } /* @@ -489,18 +441,9 @@ struct thread_struct { .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ } -static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) -{ - tss->esp0 = thread->esp0; - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ - if (unlikely(tss->ss1 != thread->sysenter_cs)) { - tss->ss1 = thread->sysenter_cs; - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); - } -} - #define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ + __asm__("movl %0,%%fs": :"r" (0)); \ + regs->xgs = 0; \ set_fs(USER_DS); \ regs->xds = __USER_DS; \ regs->xes = __USER_DS; \ @@ -510,33 +453,6 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa regs->esp = new_esp; \ } while (0) -/* - * These special macros can be used to get or set a debugging register - */ -#define get_debugreg(var, register) \ - __asm__("movl %%db" #register ", %0" \ - :"=r" (var)) -#define set_debugreg(value, register) \ - __asm__("movl %0,%%db" #register \ - : /* no output */ \ - :"r" (value)) - -/* - * Set IOPL bits in EFLAGS from given mask - */ -static inline void set_iopl_mask(unsigned mask) -{ - unsigned int reg; - __asm__ __volatile__ ("pushfl;" - "popl %0;" - "andl %1, %0;" - "orl %2, %0;" - "pushl %0;" - "popfl" - : "=&r" (reg) - : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -} - /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; @@ -628,6 +544,105 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define paravirt_enabled() 0 +#define __cpuid native_cpuid + +static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) +{ + tss->esp0 = thread->esp0; + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ + if (unlikely(tss->ss1 != thread->sysenter_cs)) { + tss->ss1 = thread->sysenter_cs; + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } +} + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + __asm__("movl %%db" #register ", %0" \ + :"=r" (var)) +#define set_debugreg(value, register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (value)) + +#define set_iopl_mask native_set_iopl_mask +#endif /* CONFIG_PARAVIRT */ + +/* + * Set IOPL bits in EFLAGS from given mask + */ +static fastcall inline void native_set_iopl_mask(unsigned mask) +{ + unsigned int reg; + __asm__ __volatile__ ("pushfl;" + "popl %0;" + "andl %1, %0;" + "orl %2, %0;" + "pushl %0;" + "popfl" + : "=&r" (reg) + : "i" (~X86_EFLAGS_IOPL), "r" (mask)); +} + +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, + int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return edx; +} + /* generic versions from gas */ #define GENERIC_NOP1 ".byte 0x90\n" #define GENERIC_NOP2 ".byte 0x89,0xf6\n" @@ -727,4 +742,7 @@ extern unsigned long boot_option_idle_override; extern void enable_sep_cpu(void); extern int sysenter_setup(void); +extern int init_gdt(int cpu, struct task_struct *idle); +extern void secondary_cpu_init(void); + #endif /* __ASM_I386_PROCESSOR_H */ diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index d505f501077..bdbc894339b 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -16,6 +16,8 @@ struct pt_regs { long eax; int xds; int xes; + /* int xfs; */ + int xgs; long orig_eax; long eip; int xcs; diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h index bc598d6388e..041906f3c6d 100644 --- a/include/asm-i386/rwsem.h +++ b/include/asm-i386/rwsem.h @@ -75,8 +75,8 @@ struct rw_semaphore { #define __RWSEM_INITIALIZER(name) \ -{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } +{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ + LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index b7ab59685ba..3c796af3377 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -39,7 +39,7 @@ * 25 - APM BIOS support * * 26 - ESPFIX small SS - * 27 - unused + * 27 - PDA [ per-cpu private data area ] * 28 - unused * 29 - unused * 30 - unused @@ -74,6 +74,9 @@ #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) +#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 /* @@ -128,5 +131,7 @@ #define SEGMENT_LDT 0x4 #define SEGMENT_GDT 0x0 +#ifndef CONFIG_PARAVIRT #define get_kernel_rpl() 0 #endif +#endif diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index 2734909eff8..67659dbaf12 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h @@ -6,6 +6,8 @@ #ifndef _i386_SETUP_H #define _i386_SETUP_H +#define COMMAND_LINE_SIZE 256 + #ifdef __KERNEL__ #include <linux/pfn.h> @@ -14,10 +16,8 @@ */ #define MAXMEM_PFN PFN_DOWN(MAXMEM) #define MAX_NONPAE_PFN (1 << 20) -#endif #define PARAM_SIZE 4096 -#define COMMAND_LINE_SIZE 256 #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -70,6 +70,7 @@ extern unsigned char boot_params[PARAM_SIZE]; struct e820entry; char * __init machine_specific_memory_setup(void); +char *memory_setup(void); int __init copy_e820_map(struct e820entry * biosmap, int nr_map); int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map); @@ -78,4 +79,6 @@ void __init add_memory_region(unsigned long long start, #endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + #endif /* _i386_SETUP_H */ diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index bd59c1508e7..64fe624c02c 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <linux/threads.h> #include <linux/cpumask.h> +#include <asm/pda.h> #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -56,7 +57,7 @@ extern void cpu_uninit(void); * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (current_thread_info()->cpu) +#define raw_smp_processor_id() (read_pda(cpu_number)) extern cpumask_t cpu_callout_map; extern cpumask_t cpu_callin_map; diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h index c18b71fae6b..d3bcebed60c 100644 --- a/include/asm-i386/spinlock.h +++ b/include/asm-i386/spinlock.h @@ -7,8 +7,14 @@ #include <asm/processor.h> #include <linux/compiler.h> +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else #define CLI_STRING "cli" #define STI_STRING "sti" +#define CLI_STI_CLOBBERS +#define CLI_STI_INPUT_ARGS +#endif /* CONFIG_PARAVIRT */ /* * Your basic SMP spinlocks, allowing only a single CPU anywhere @@ -53,25 +59,28 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla { asm volatile( "\n1:\t" - LOCK_PREFIX " ; decb %0\n\t" + LOCK_PREFIX " ; decb %[slock]\n\t" "jns 5f\n" "2:\t" - "testl $0x200, %1\n\t" + "testl $0x200, %[flags]\n\t" "jz 4f\n\t" STI_STRING "\n" "3:\t" "rep;nop\n\t" - "cmpb $0, %0\n\t" + "cmpb $0, %[slock]\n\t" "jle 3b\n\t" CLI_STRING "\n\t" "jmp 1b\n" "4:\t" "rep;nop\n\t" - "cmpb $0, %0\n\t" + "cmpb $0, %[slock]\n\t" "jg 1b\n\t" "jmp 4b\n" "5:\n\t" - : "+m" (lock->slock) : "r" (flags) : "memory"); + : [slock] "+m" (lock->slock) + : [flags] "r" (flags) + CLI_STI_INPUT_ARGS + : "memory" CLI_STI_CLOBBERS); } #endif diff --git a/include/asm-i386/suspend.h b/include/asm-i386/suspend.h index 08be1e5009d..8dbaafe611f 100644 --- a/include/asm-i386/suspend.h +++ b/include/asm-i386/suspend.h @@ -6,29 +6,14 @@ #include <asm/desc.h> #include <asm/i387.h> -static inline int -arch_prepare_suspend(void) -{ - /* If you want to make non-PSE machine work, turn off paging - in swsusp_arch_suspend. swsusp_pg_dir should have identity mapping, so - it could work... */ - if (!cpu_has_pse) { - printk(KERN_ERR "PSE is required for swsusp.\n"); - return -EPERM; - } - return 0; -} +static inline int arch_prepare_suspend(void) { return 0; } /* image of the saved processor state */ struct saved_context { u16 es, fs, gs, ss; unsigned long cr0, cr2, cr3, cr4; - u16 gdt_pad; - u16 gdt_limit; - unsigned long gdt_base; - u16 idt_pad; - u16 idt_limit; - unsigned long idt_base; + struct Xgt_desc_struct gdt; + struct Xgt_desc_struct idt; u16 ldt; u16 tss; unsigned long tr; diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index a6dabbcd6e6..a6d20d9a1a3 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -88,6 +88,9 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define savesegment(seg, value) \ asm volatile("mov %%" #seg ",%0":"=rm" (value)) +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else #define read_cr0() ({ \ unsigned int __dummy; \ __asm__ __volatile__( \ @@ -139,17 +142,18 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define write_cr4(x) \ __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) -/* - * Clear and set 'TS' bit respectively - */ +#define wbinvd() \ + __asm__ __volatile__ ("wbinvd": : :"memory") + +/* Clear the 'TS' bit */ #define clts() __asm__ __volatile__ ("clts") +#endif/* CONFIG_PARAVIRT */ + +/* Set the 'TS' bit */ #define stts() write_cr0(8 | read_cr0()) #endif /* __KERNEL__ */ -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory") - static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index 54d6d7aea93..46d32ad9208 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -95,15 +95,7 @@ static inline struct thread_info *current_thread_info(void) /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ({ \ - struct thread_info *ret; \ - \ - ret = kmalloc(THREAD_SIZE, GFP_KERNEL); \ - if (ret) \ - memset(ret, 0, THREAD_SIZE); \ - ret; \ - }) +#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL) #else #define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) #endif diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h new file mode 100644 index 00000000000..ea8065af825 --- /dev/null +++ b/include/asm-i386/time.h @@ -0,0 +1,41 @@ +#ifndef _ASMi386_TIME_H +#define _ASMi386_TIME_H + +#include <linux/efi.h> +#include "mach_time.h" + +static inline unsigned long native_get_wallclock(void) +{ + unsigned long retval; + + if (efi_enabled) + retval = efi_get_time(); + else + retval = mach_get_cmos_time(); + + return retval; +} + +static inline int native_set_wallclock(unsigned long nowtime) +{ + int retval; + + if (efi_enabled) + retval = efi_set_rtc_mmss(nowtime); + else + retval = mach_set_rtc_mmss(nowtime); + + return retval; +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else /* !CONFIG_PARAVIRT */ + +#define get_wallclock() native_get_wallclock() +#define set_wallclock(x) native_set_wallclock(x) +#define do_time_init() time_init_hook() + +#endif /* CONFIG_PARAVIRT */ + +#endif diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h index 360648b0f2b..4dd82840d53 100644 --- a/include/asm-i386/tlbflush.h +++ b/include/asm-i386/tlbflush.h @@ -4,7 +4,15 @@ #include <linux/mm.h> #include <asm/processor.h> -#define __flush_tlb() \ +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define __flush_tlb() __native_flush_tlb() +#define __flush_tlb_global() __native_flush_tlb_global() +#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#endif + +#define __native_flush_tlb() \ do { \ unsigned int tmpreg; \ \ @@ -19,7 +27,7 @@ * Global pages have to be flushed a bit differently. Not a real * performance problem because this does not happen often. */ -#define __flush_tlb_global() \ +#define __native_flush_tlb_global() \ do { \ unsigned int tmpreg, cr4, cr4_orig; \ \ @@ -36,6 +44,9 @@ : "memory"); \ } while (0) +#define __native_flush_tlb_single(addr) \ + __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") + # define __flush_tlb_all() \ do { \ if (cpu_has_pge) \ @@ -46,9 +57,6 @@ #define cpu_has_invlpg (boot_cpu_data.x86 > 3) -#define __flush_tlb_single(addr) \ - __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") - #ifdef CONFIG_X86_INVLPG # define __flush_tlb_one(addr) __flush_tlb_single(addr) #else diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index beeeaf6b054..833fa1704ff 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -329,104 +329,6 @@ #ifdef __KERNEL__ #define NR_syscalls 320 -#include <linux/err.h> - -/* - * user-visible error numbers are in the range -1 - -MAX_ERRNO: see - * <asm-i386/errno.h> - */ -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -__asm__ volatile ("int $0x80" \ - : "=a" (__res) \ - : "0" (__NR_##name)); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)) : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)),"c" ((long)(arg2)) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)) : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4)) : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; movl %1,%%eax ; " \ - "int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "i" (__NR_##name),"ri" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \ -{ \ -long __res; \ - struct { long __a1; long __a6; } __s = { (long)arg1, (long)arg6 }; \ -__asm__ volatile ("push %%ebp ; push %%ebx ; movl 4(%2),%%ebp ; " \ - "movl 0(%2),%%ebx ; movl %1,%%eax ; int $0x80 ; " \ - "pop %%ebx ; pop %%ebp" \ - : "=a" (__res) \ - : "i" (__NR_##name),"0" ((long)(&__s)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)) \ - : "memory"); \ -__syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index 5031d693b89..aa2c931e30d 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h @@ -71,6 +71,7 @@ static inline void arch_unw_init_blocked(struct unwind_frame_info *info) info->regs.xss = __KERNEL_DS; info->regs.xds = __USER_DS; info->regs.xes = __USER_DS; + info->regs.xgs = __KERNEL_PDA; } extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, @@ -78,17 +79,13 @@ extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, void *arg), void *arg); -static inline int arch_unw_user_mode(const struct unwind_frame_info *info) +static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info) { -#if 0 /* This can only work when selector register and EFLAGS saves/restores - are properly annotated (and tracked in UNW_REGISTER_INFO). */ - return user_mode_vm(&info->regs); -#else - return info->regs.eip < PAGE_OFFSET + return user_mode_vm(&info->regs) + || info->regs.eip < PAGE_OFFSET || (info->regs.eip >= __fix_to_virt(FIX_VDSO) - && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) || info->regs.esp < PAGE_OFFSET; -#endif } #else diff --git a/include/asm-i386/vm86.h b/include/asm-i386/vm86.h index 952fd695738..a5edf517b99 100644 --- a/include/asm-i386/vm86.h +++ b/include/asm-i386/vm86.h @@ -145,26 +145,13 @@ struct vm86plus_struct { * at the end of the structure. Look at ptrace.h to see the "normal" * setup. For user space layout see 'struct vm86_regs' above. */ +#include <asm/ptrace.h> struct kernel_vm86_regs { /* * normal regs, with special meaning for the segment descriptors.. */ - long ebx; - long ecx; - long edx; - long esi; - long edi; - long ebp; - long eax; - long __null_ds; - long __null_es; - long orig_eax; - long eip; - unsigned short cs, __csh; - long eflags; - long esp; - unsigned short ss, __ssh; + struct pt_regs pt; /* * these are specific to v86 mode: */ diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild index 15818a18bc5..4a1e48b9f40 100644 --- a/include/asm-ia64/Kbuild +++ b/include/asm-ia64/Kbuild @@ -10,7 +10,6 @@ header-y += intrinsics.h header-y += perfmon_default_smpl.h header-y += ptrace_offsets.h header-y += rse.h -header-y += setup.h header-y += ucontext.h unifdef-y += perfmon.h diff --git a/include/asm-ia64/dma-mapping.h b/include/asm-ia64/dma-mapping.h index 99a8f8e1218..ebd5887f4b1 100644 --- a/include/asm-ia64/dma-mapping.h +++ b/include/asm-ia64/dma-mapping.h @@ -50,7 +50,8 @@ dma_set_mask (struct device *dev, u64 mask) extern int dma_get_cache_alignment(void); static inline void -dma_cache_sync (void *vaddr, size_t size, enum dma_data_direction dir) +dma_cache_sync (struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) { /* * IA-64 is cache-coherent, so this is mostly a no-op. However, we do need to @@ -59,6 +60,6 @@ dma_cache_sync (void *vaddr, size_t size, enum dma_data_direction dir) mb(); } -#define dma_is_consistent(dma_handle) (1) /* all we do is coherent memory... */ +#define dma_is_consistent(d, h) (1) /* all we do is coherent memory... */ #endif /* _ASM_IA64_DMA_MAPPING_H */ diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h index 07d77f3a8cb..8a98a265413 100644 --- a/include/asm-ia64/futex.h +++ b/include/asm-ia64/futex.h @@ -59,7 +59,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -83,7 +83,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 9cb68e9b377..393e04c42a2 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h @@ -60,7 +60,7 @@ static inline void *pgtable_quicklist_alloc(void) static inline void pgtable_quicklist_free(void *pgtable_entry) { #ifdef CONFIG_NUMA - unsigned long nid = page_to_nid(virt_to_page(pgtable_entry)); + int nid = page_to_nid(virt_to_page(pgtable_entry)); if (unlikely(nid != numa_node_id())) { free_page((unsigned long)pgtable_entry); diff --git a/include/asm-m32r/setup.h b/include/asm-m32r/setup.h index 52f4fa29abf..6a0b32202d4 100644 --- a/include/asm-m32r/setup.h +++ b/include/asm-m32r/setup.h @@ -1,6 +1,11 @@ /* * This is set up by the setup-routine at boot-time */ + +#define COMMAND_LINE_SIZE 512 + +#ifdef __KERNEL__ + #define PARAM ((unsigned char *)empty_zero_page) #define MOUNT_ROOT_RDONLY (*(unsigned long *) (PARAM+0x000)) @@ -18,8 +23,6 @@ #define SCREEN_INFO (*(struct screen_info *) (PARAM+0x200)) -#define COMMAND_LINE_SIZE (512) - #define RAMDISK_IMAGE_START_MASK (0x07FF) #define RAMDISK_PROMPT_FLAG (0x8000) #define RAMDISK_LOAD_FLAG (0x4000) @@ -27,3 +30,5 @@ extern unsigned long memory_start; extern unsigned long memory_end; +#endif /* __KERNEL__ */ + diff --git a/include/asm-m32r/unistd.h b/include/asm-m32r/unistd.h index 95aa34298d8..5b66bd3c6ed 100644 --- a/include/asm-m32r/unistd.h +++ b/include/asm-m32r/unistd.h @@ -296,117 +296,6 @@ #ifdef __KERNEL__ #define NR_syscalls 285 -#include <linux/err.h> - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see - * <asm-m32r/errno.h> - */ - -#include <asm/syscall.h> /* SYSCALL_* */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* Avoid using "res" which is declared to be in register r0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __res __asm__("r0"); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __arg2 __asm__ ("r1") = (long)(arg2); \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res), "r" (__arg2) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __arg3 __asm__ ("r2") = (long)(arg3); \ -register long __arg2 __asm__ ("r1") = (long)(arg2); \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res), "r" (__arg2), \ - "r" (__arg3) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name(type1 arg1,type2 arg2,type3 arg3,type4 arg4) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __arg4 __asm__ ("r3") = (long)(arg4); \ -register long __arg3 __asm__ ("r2") = (long)(arg3); \ -register long __arg2 __asm__ ("r1") = (long)(arg2); \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res), "r" (__arg2), \ - "r" (__arg3), "r" (__arg4) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name(type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __arg5 __asm__ ("r4") = (long)(arg5); \ -register long __arg4 __asm__ ("r3") = (long)(arg4); \ -register long __arg3 __asm__ ("r2") = (long)(arg3); \ -register long __arg2 __asm__ ("r1") = (long)(arg2); \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res), "r" (__arg2), \ - "r" (__arg3), "r" (__arg4), "r" (__arg5) \ - : "memory"); \ -__syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_STAT64 diff --git a/include/asm-m68k/dma-mapping.h b/include/asm-m68k/dma-mapping.h index d90d841d3df..00259ed6fc9 100644 --- a/include/asm-m68k/dma-mapping.h +++ b/include/asm-m68k/dma-mapping.h @@ -21,7 +21,7 @@ static inline int dma_get_cache_alignment(void) return 1 << L1_CACHE_SHIFT; } -static inline int dma_is_consistent(dma_addr_t dma_addr) +static inline int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 0; } @@ -41,7 +41,7 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, { dma_free_coherent(dev, size, addr, handle); } -static inline void dma_cache_sync(void *vaddr, size_t size, +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) { /* we use coherent allocation, so not much to do here. */ diff --git a/include/asm-m68k/setup.h b/include/asm-m68k/setup.h index 7facc9a46e7..2a8853cd655 100644 --- a/include/asm-m68k/setup.h +++ b/include/asm-m68k/setup.h @@ -41,8 +41,12 @@ #define MACH_Q40 10 #define MACH_SUN3X 11 +#define COMMAND_LINE_SIZE 256 + #ifdef __KERNEL__ +#define CL_SIZE COMMAND_LINE_SIZE + #ifndef __ASSEMBLY__ extern unsigned long m68k_machtype; #endif /* !__ASSEMBLY__ */ @@ -355,8 +359,6 @@ extern int m68k_is040or060; */ #define NUM_MEMINFO 4 -#define CL_SIZE 256 -#define COMMAND_LINE_SIZE CL_SIZE #ifndef __ASSEMBLY__ struct mem_info { diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h index ad4348058c6..fdbb60e6a0d 100644 --- a/include/asm-m68k/unistd.h +++ b/include/asm-m68k/unistd.h @@ -317,103 +317,6 @@ #ifdef __KERNEL__ #define NR_syscalls 311 -#include <linux/err.h> - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see - <asm-m68k/errno.h> */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* avoid using res which is declared to be in register d0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) ); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,atype,a) \ -type name(atype a) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a) ); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,atype,a,btype,b) \ -type name(atype a,btype b) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -register long __b __asm__ ("%d2") = (long)(b); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a), "d" (__b) \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,atype,a,btype,b,ctype,c) \ -type name(atype a,btype b,ctype c) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -register long __b __asm__ ("%d2") = (long)(b); \ -register long __c __asm__ ("%d3") = (long)(c); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a), "d" (__b), \ - "d" (__c) \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,atype,a,btype,b,ctype,c,dtype,d) \ -type name (atype a, btype b, ctype c, dtype d) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -register long __b __asm__ ("%d2") = (long)(b); \ -register long __c __asm__ ("%d3") = (long)(c); \ -register long __d __asm__ ("%d4") = (long)(d); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a), "d" (__b), \ - "d" (__c), "d" (__d) \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall5(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e) \ -type name (atype a,btype b,ctype c,dtype d,etype e) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -register long __b __asm__ ("%d2") = (long)(b); \ -register long __c __asm__ ("%d3") = (long)(c); \ -register long __d __asm__ ("%d4") = (long)(d); \ -register long __e __asm__ ("%d5") = (long)(e); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a), "d" (__b), \ - "d" (__c), "d" (__d), "d" (__e) \ - ); \ -__syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-m68knommu/setup.h b/include/asm-m68knommu/setup.h index d2b0fcce41b..fb86bb2a607 100644 --- a/include/asm-m68knommu/setup.h +++ b/include/asm-m68knommu/setup.h @@ -1,5 +1,10 @@ +#ifdef __KERNEL__ + #include <asm-m68k/setup.h> /* We have a bigger command line buffer. */ #undef COMMAND_LINE_SIZE + +#endif /* __KERNEL__ */ + #define COMMAND_LINE_SIZE 512 diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h index ebaf0319711..82e03195f32 100644 --- a/include/asm-m68knommu/unistd.h +++ b/include/asm-m68knommu/unistd.h @@ -318,156 +318,6 @@ #ifdef __KERNEL__ #define NR_syscalls 311 -#include <linux/err.h> - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see - <asm-m68k/errno.h> */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* avoid using res which is declared to be in register d0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type, name) \ -type name(void) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name) \ - : "cc", "%d0"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall1(type, name, atype, a) \ -type name(atype a) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "g" ((long)a) \ - : "cc", "%d0", "%d1"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall2(type, name, atype, a, btype, b) \ -type name(atype a, btype b) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %3, %%d2\n\t" \ - "movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "a" ((long)a), \ - "g" ((long)b) \ - : "cc", "%d0", "%d1", "%d2"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall3(type, name, atype, a, btype, b, ctype, c) \ -type name(atype a, btype b, ctype c) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %4, %%d3\n\t" \ - "movel %3, %%d2\n\t" \ - "movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "a" ((long)a), \ - "a" ((long)b), \ - "g" ((long)c) \ - : "cc", "%d0", "%d1", "%d2", "%d3"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall4(type, name, atype, a, btype, b, ctype, c, dtype, d) \ -type name(atype a, btype b, ctype c, dtype d) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %5, %%d4\n\t" \ - "movel %4, %%d3\n\t" \ - "movel %3, %%d2\n\t" \ - "movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "a" ((long)a), \ - "a" ((long)b), \ - "a" ((long)c), \ - "g" ((long)d) \ - : "cc", "%d0", "%d1", "%d2", "%d3", \ - "%d4"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall5(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e) \ -type name(atype a, btype b, ctype c, dtype d, etype e) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %6, %%d5\n\t" \ - "movel %5, %%d4\n\t" \ - "movel %4, %%d3\n\t" \ - "movel %3, %%d2\n\t" \ - "movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "a" ((long)a), \ - "a" ((long)b), \ - "a" ((long)c), \ - "a" ((long)d), \ - "g" ((long)e) \ - : "cc", "%d0", "%d1", "%d2", "%d3", \ - "%d4", "%d5"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-mips/dma-mapping.h b/include/asm-mips/dma-mapping.h index 43288634c38..236d1a467cc 100644 --- a/include/asm-mips/dma-mapping.h +++ b/include/asm-mips/dma-mapping.h @@ -63,9 +63,9 @@ dma_get_cache_alignment(void) return 128; } -extern int dma_is_consistent(dma_addr_t dma_addr); +extern int dma_is_consistent(struct device *dev, dma_addr_t dma_addr); -extern void dma_cache_sync(void *vaddr, size_t size, +extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); #define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h index 927a216bd53..47e5679c235 100644 --- a/include/asm-mips/futex.h +++ b/include/asm-mips/futex.h @@ -88,7 +88,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -115,7 +115,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-mips/highmem.h b/include/asm-mips/highmem.h index c976bfaaba8..f8c8182f7f2 100644 --- a/include/asm-mips/highmem.h +++ b/include/asm-mips/highmem.h @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/uaccess.h> #include <asm/kmap_types.h> /* undef for production */ @@ -70,11 +71,16 @@ static inline void *kmap(struct page *page) static inline void *kmap_atomic(struct page *page, enum km_type type) { + pagefault_disable(); return page_address(page); } -static inline void kunmap_atomic(void *kvaddr, enum km_type type) { } -#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) +static inline void kunmap_atomic(void *kvaddr, enum km_type type) +{ + pagefault_enable(); +} + +#define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) diff --git a/include/asm-mips/setup.h b/include/asm-mips/setup.h index 737fa4a6912..70009a90263 100644 --- a/include/asm-mips/setup.h +++ b/include/asm-mips/setup.h @@ -1,8 +1,6 @@ -#ifdef __KERNEL__ #ifndef _MIPS_SETUP_H #define _MIPS_SETUP_H #define COMMAND_LINE_SIZE 256 #endif /* __SETUP_H */ -#endif /* __KERNEL__ */ diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h index ec56aa52f66..696cff39a1d 100644 --- a/include/asm-mips/unistd.h +++ b/include/asm-mips/unistd.h @@ -933,268 +933,6 @@ #ifndef __ASSEMBLY__ -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ - register unsigned long __a3 asm("$7"); \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %2\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "=r" (__a3) \ - : "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -/* - * DANGER: This macro isn't usable for the pipe(2) call - * which has a unusual return convention. - */ -#define _syscall1(type,name,atype,a) \ -type name(atype a) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a3 asm("$7"); \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %3\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "=r" (__a3) \ - : "r" (__a0), "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall2(type,name,atype,a,btype,b) \ -type name(atype a, btype b) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a3 asm("$7"); \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %4\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "=r" (__a3) \ - : "r" (__a0), "r" (__a1), "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall3(type,name,atype,a,btype,b,ctype,c) \ -type name(atype a, btype b, ctype c) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7"); \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %5\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "=r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall4(type,name,atype,a,btype,b,ctype,c,dtype,d) \ -type name(atype a, btype b, ctype c, dtype d) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %5\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#if (_MIPS_SIM == _MIPS_SIM_ABI32) - -/* - * Using those means your brain needs more than an oil change ;-) - */ - -#define _syscall5(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e) \ -type name(atype a, btype b, ctype c, dtype d, etype e) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "lw\t$2, %6\n\t" \ - "subu\t$29, 32\n\t" \ - "sw\t$2, 16($29)\n\t" \ - "li\t$2, %5\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - "addiu\t$29, 32\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_##name), \ - "m" ((unsigned long)e) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall6(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e,ftype,f) \ -type name(atype a, btype b, ctype c, dtype d, etype e, ftype f) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "lw\t$2, %6\n\t" \ - "lw\t$8, %7\n\t" \ - "subu\t$29, 32\n\t" \ - "sw\t$2, 16($29)\n\t" \ - "sw\t$8, 20($29)\n\t" \ - "li\t$2, %5\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - "addiu\t$29, 32\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_##name), \ - "m" ((unsigned long)e), "m" ((unsigned long)f) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#endif /* (_MIPS_SIM == _MIPS_SIM_ABI32) */ - -#if (_MIPS_SIM == _MIPS_SIM_NABI32) || (_MIPS_SIM == _MIPS_SIM_ABI64) - -#define _syscall5(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e) \ -type name (atype a,btype b,ctype c,dtype d,etype e) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - register unsigned long __a4 asm("$8") = (unsigned long) e; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %6\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a4), "i" (__NR_##name) \ - : "$2", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall6(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e,ftype,f) \ -type name (atype a,btype b,ctype c,dtype d,etype e,ftype f) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - register unsigned long __a4 asm("$8") = (unsigned long) e; \ - register unsigned long __a5 asm("$9") = (unsigned long) f; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %7\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a4), "r" (__a5), \ - "i" (__NR_##name) \ - : "$2", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#endif /* (_MIPS_SIM == _MIPS_SIM_NABI32) || (_MIPS_SIM == _MIPS_SIM_ABI64) */ - - #define __ARCH_OMIT_COMPAT_SYS_GETDENTS64 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h index 1e387e1dad3..66f0b408c66 100644 --- a/include/asm-parisc/dma-mapping.h +++ b/include/asm-parisc/dma-mapping.h @@ -191,13 +191,13 @@ dma_get_cache_alignment(void) } static inline int -dma_is_consistent(dma_addr_t dma_addr) +dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return (hppa_dma_ops->dma_sync_single_for_cpu == NULL); } static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { if(hppa_dma_ops->dma_sync_single_for_cpu) diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h index d84bbb283fd..dbee6e60aa8 100644 --- a/include/asm-parisc/futex.h +++ b/include/asm-parisc/futex.h @@ -21,7 +21,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -33,7 +33,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h index 7e38b5fddad..7c7de87bd8a 100644 --- a/include/asm-powerpc/dma-mapping.h +++ b/include/asm-powerpc/dma-mapping.h @@ -342,9 +342,9 @@ static inline int dma_mapping_error(dma_addr_t dma_addr) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #ifdef CONFIG_NOT_COHERENT_CACHE -#define dma_is_consistent(d) (0) +#define dma_is_consistent(d, h) (0) #else -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) #endif static inline int dma_get_cache_alignment(void) @@ -378,7 +378,7 @@ static inline void dma_sync_single_range_for_device(struct device *dev, dma_sync_single_for_device(dev, dma_handle, offset + size, direction); } -static inline void dma_cache_sync(void *vaddr, size_t size, +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h index b5436642a10..d36426c01b6 100644 --- a/include/asm-powerpc/elf.h +++ b/include/asm-powerpc/elf.h @@ -124,12 +124,10 @@ typedef elf_greg_t32 elf_gregset_t32[ELF_NGREG]; # define ELF_DATA ELFDATA2MSB typedef elf_greg_t64 elf_greg_t; typedef elf_gregset_t64 elf_gregset_t; -# define elf_addr_t unsigned long #else /* Assumption: ELF_ARCH == EM_PPC and ELF_CLASS == ELFCLASS32 */ typedef elf_greg_t32 elf_greg_t; typedef elf_gregset_t32 elf_gregset_t; -# define elf_addr_t __u32 #endif /* ELF_ARCH */ /* Floating point registers */ diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h index 936422e5489..3f3673fd3ff 100644 --- a/include/asm-powerpc/futex.h +++ b/include/asm-powerpc/futex.h @@ -43,7 +43,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h index ae63db7b3e7..b0830db68f8 100644 --- a/include/asm-powerpc/pgalloc.h +++ b/include/asm-powerpc/pgalloc.h @@ -11,7 +11,7 @@ #include <linux/cpumask.h> #include <linux/percpu.h> -extern kmem_cache_t *pgtable_cache[]; +extern struct kmem_cache *pgtable_cache[]; #ifdef CONFIG_PPC_64K_PAGES #define PTE_CACHE_NUM 0 diff --git a/include/asm-powerpc/setup.h b/include/asm-powerpc/setup.h index 3d9740aae01..817fac0a071 100644 --- a/include/asm-powerpc/setup.h +++ b/include/asm-powerpc/setup.h @@ -1,9 +1,6 @@ #ifndef _ASM_POWERPC_SETUP_H #define _ASM_POWERPC_SETUP_H -#ifdef __KERNEL__ - #define COMMAND_LINE_SIZE 512 -#endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index 04b6c17cc59..0ae954e3d25 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -334,115 +334,6 @@ #ifndef __ASSEMBLY__ -/* On powerpc a system call basically clobbers the same registers like a - * function call, with the exception of LR (which is needed for the - * "sc; bnslr" sequence) and CR (where only CR0.SO is clobbered to signal - * an error return status). - */ - -#define __syscall_nr(nr, type, name, args...) \ - unsigned long __sc_ret, __sc_err; \ - { \ - register unsigned long __sc_0 __asm__ ("r0"); \ - register unsigned long __sc_3 __asm__ ("r3"); \ - register unsigned long __sc_4 __asm__ ("r4"); \ - register unsigned long __sc_5 __asm__ ("r5"); \ - register unsigned long __sc_6 __asm__ ("r6"); \ - register unsigned long __sc_7 __asm__ ("r7"); \ - register unsigned long __sc_8 __asm__ ("r8"); \ - \ - __sc_loadargs_##nr(name, args); \ - __asm__ __volatile__ \ - ("sc \n\t" \ - "mfcr %0 " \ - : "=&r" (__sc_0), \ - "=&r" (__sc_3), "=&r" (__sc_4), \ - "=&r" (__sc_5), "=&r" (__sc_6), \ - "=&r" (__sc_7), "=&r" (__sc_8) \ - : __sc_asm_input_##nr \ - : "cr0", "ctr", "memory", \ - "r9", "r10","r11", "r12"); \ - __sc_ret = __sc_3; \ - __sc_err = __sc_0; \ - } \ - if (__sc_err & 0x10000000) \ - { \ - errno = __sc_ret; \ - __sc_ret = -1; \ - } \ - return (type) __sc_ret - -#define __sc_loadargs_0(name, dummy...) \ - __sc_0 = __NR_##name -#define __sc_loadargs_1(name, arg1) \ - __sc_loadargs_0(name); \ - __sc_3 = (unsigned long) (arg1) -#define __sc_loadargs_2(name, arg1, arg2) \ - __sc_loadargs_1(name, arg1); \ - __sc_4 = (unsigned long) (arg2) -#define __sc_loadargs_3(name, arg1, arg2, arg3) \ - __sc_loadargs_2(name, arg1, arg2); \ - __sc_5 = (unsigned long) (arg3) -#define __sc_loadargs_4(name, arg1, arg2, arg3, arg4) \ - __sc_loadargs_3(name, arg1, arg2, arg3); \ - __sc_6 = (unsigned long) (arg4) -#define __sc_loadargs_5(name, arg1, arg2, arg3, arg4, arg5) \ - __sc_loadargs_4(name, arg1, arg2, arg3, arg4); \ - __sc_7 = (unsigned long) (arg5) -#define __sc_loadargs_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ - __sc_loadargs_5(name, arg1, arg2, arg3, arg4, arg5); \ - __sc_8 = (unsigned long) (arg6) - -#define __sc_asm_input_0 "0" (__sc_0) -#define __sc_asm_input_1 __sc_asm_input_0, "1" (__sc_3) -#define __sc_asm_input_2 __sc_asm_input_1, "2" (__sc_4) -#define __sc_asm_input_3 __sc_asm_input_2, "3" (__sc_5) -#define __sc_asm_input_4 __sc_asm_input_3, "4" (__sc_6) -#define __sc_asm_input_5 __sc_asm_input_4, "5" (__sc_7) -#define __sc_asm_input_6 __sc_asm_input_5, "6" (__sc_8) - -#define _syscall0(type,name) \ -type name(void) \ -{ \ - __syscall_nr(0, type, name); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ - __syscall_nr(1, type, name, arg1); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1, type2 arg2) \ -{ \ - __syscall_nr(2, type, name, arg1, arg2); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1, type2 arg2, type3 arg3) \ -{ \ - __syscall_nr(3, type, name, arg1, arg2, arg3); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ - __syscall_nr(4, type, name, arg1, arg2, arg3, arg4); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ - __syscall_nr(5, type, name, arg1, arg2, arg3, arg4, arg5); \ -} -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ -{ \ - __syscall_nr(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ -} - - #include <linux/types.h> #include <linux/compiler.h> #include <linux/linkage.h> diff --git a/include/asm-ppc/highmem.h b/include/asm-ppc/highmem.h index 1d2c4ef81c2..f7b21ee302b 100644 --- a/include/asm-ppc/highmem.h +++ b/include/asm-ppc/highmem.h @@ -79,7 +79,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -101,8 +101,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) unsigned int idx = type + KM_TYPE_NR*smp_processor_id(); if (vaddr < KMAP_FIX_BEGIN) { // FIXME - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); return; } @@ -115,8 +114,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) pte_clear(&init_mm, vaddr, kmap_pte+idx); flush_tlb_page(NULL, vaddr); #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } static inline struct page *kmap_atomic_to_page(void *ptr) diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 7664bacdd83..9574fe80a04 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -8,12 +8,13 @@ #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H +#define COMMAND_LINE_SIZE 896 + #ifdef __KERNEL__ #include <asm/types.h> #define PARMAREA 0x10400 -#define COMMAND_LINE_SIZE 896 #define MEMORY_CHUNKS 16 /* max 0x7fff */ #define IPL_PARMBLOCK_ORIGIN 0x2000 diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index 71d3c21b84f..fb6fef97d73 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -345,160 +345,6 @@ #ifdef __KERNEL__ -#include <linux/err.h> - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _svc_clobber "1", "cc", "memory" - -#define _syscall0(type,name) \ -type name(void) { \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) { \ - register type1 __arg1 asm("2") = arg1; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1, type2 arg2) { \ - register type1 __arg1 asm("2") = arg1; \ - register type2 __arg2 asm("3") = arg2; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1), \ - "d" (__arg2) \ - : _svc_clobber ); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1, type2 arg2, type3 arg3) { \ - register type1 __arg1 asm("2") = arg1; \ - register type2 __arg2 asm("3") = arg2; \ - register type3 __arg3 asm("4") = arg3; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1), \ - "d" (__arg2), \ - "d" (__arg3) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3, \ - type4,name4) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - register type1 __arg1 asm("2") = arg1; \ - register type2 __arg2 asm("3") = arg2; \ - register type3 __arg3 asm("4") = arg3; \ - register type4 __arg4 asm("5") = arg4; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1), \ - "d" (__arg2), \ - "d" (__arg3), \ - "d" (__arg4) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3, \ - type4,name4,type5,name5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - register type1 __arg1 asm("2") = arg1; \ - register type2 __arg2 asm("3") = arg2; \ - register type3 __arg3 asm("4") = arg3; \ - register type4 __arg4 asm("5") = arg4; \ - register type5 __arg5 asm("6") = arg5; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1), \ - "d" (__arg2), \ - "d" (__arg3), \ - "d" (__arg4), \ - "d" (__arg5) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h index 56cd4b97723..37ab0c131a4 100644 --- a/include/asm-sh/dma-mapping.h +++ b/include/asm-sh/dma-mapping.h @@ -53,7 +53,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, consistent_free(vaddr, size); } -static inline void dma_cache_sync(void *vaddr, size_t size, +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) { consistent_sync(vaddr, size, (int)dir); diff --git a/include/asm-sh/setup.h b/include/asm-sh/setup.h index 34ca8a7f06b..1583c6b7bda 100644 --- a/include/asm-sh/setup.h +++ b/include/asm-sh/setup.h @@ -1,10 +1,12 @@ -#ifdef __KERNEL__ #ifndef _SH_SETUP_H #define _SH_SETUP_H #define COMMAND_LINE_SIZE 256 +#ifdef __KERNEL__ + int setup_early_printk(char *); -#endif /* _SH_SETUP_H */ #endif /* __KERNEL__ */ + +#endif /* _SH_SETUP_H */ diff --git a/include/asm-sh/unistd.h b/include/asm-sh/unistd.h index 0cae1d24876..f982073dc6c 100644 --- a/include/asm-sh/unistd.h +++ b/include/asm-sh/unistd.h @@ -332,143 +332,6 @@ #ifdef __KERNEL__ -#include <linux/err.h> - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: - * see <asm-sh/errno.h> */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* Avoid using "res" which is declared to be in register r0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#if defined(__sh2__) || defined(__SH2E__) || defined(__SH2A__) -#define SYSCALL_ARG0 "trapa #0x20" -#define SYSCALL_ARG1 "trapa #0x21" -#define SYSCALL_ARG2 "trapa #0x22" -#define SYSCALL_ARG3 "trapa #0x23" -#define SYSCALL_ARG4 "trapa #0x24" -#define SYSCALL_ARG5 "trapa #0x25" -#define SYSCALL_ARG6 "trapa #0x26" -#else -#define SYSCALL_ARG0 "trapa #0x10" -#define SYSCALL_ARG1 "trapa #0x11" -#define SYSCALL_ARG2 "trapa #0x12" -#define SYSCALL_ARG3 "trapa #0x13" -#define SYSCALL_ARG4 "trapa #0x14" -#define SYSCALL_ARG5 "trapa #0x15" -#define SYSCALL_ARG6 "trapa #0x16" -#endif - -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -__asm__ __volatile__ (SYSCALL_ARG0 \ - : "=z" (__sc0) \ - : "0" (__sc0) \ - : "memory" ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -__asm__ __volatile__ (SYSCALL_ARG1 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4) \ - : "memory"); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -__asm__ __volatile__ (SYSCALL_ARG2 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5) \ - : "memory"); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -register long __sc6 __asm__ ("r6") = (long) arg3; \ -__asm__ __volatile__ (SYSCALL_ARG3 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6) \ - : "memory"); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -register long __sc6 __asm__ ("r6") = (long) arg3; \ -register long __sc7 __asm__ ("r7") = (long) arg4; \ -__asm__ __volatile__ (SYSCALL_ARG4 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), \ - "r" (__sc7) \ - : "memory" ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ -register long __sc3 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -register long __sc6 __asm__ ("r6") = (long) arg3; \ -register long __sc7 __asm__ ("r7") = (long) arg4; \ -register long __sc0 __asm__ ("r0") = (long) arg5; \ -__asm__ __volatile__ (SYSCALL_ARG5 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), "r" (__sc7), \ - "r" (__sc3) \ - : "memory" ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ -{ \ -register long __sc3 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -register long __sc6 __asm__ ("r6") = (long) arg3; \ -register long __sc7 __asm__ ("r7") = (long) arg4; \ -register long __sc0 __asm__ ("r0") = (long) arg5; \ -register long __sc1 __asm__ ("r1") = (long) arg6; \ -__asm__ __volatile__ (SYSCALL_ARG6 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), "r" (__sc7), \ - "r" (__sc3), "r" (__sc1) \ - : "memory" ); \ -__syscall_return(type,__sc0); \ -} - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/include/asm-sh64/dma-mapping.h b/include/asm-sh64/dma-mapping.h index 68e27a8fca3..5efe906c59f 100644 --- a/include/asm-sh64/dma-mapping.h +++ b/include/asm-sh64/dma-mapping.h @@ -35,7 +35,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, consistent_free(NULL, size, vaddr, dma_handle); } -static inline void dma_cache_sync(void *vaddr, size_t size, +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) { dma_cache_wback_inv((unsigned long)vaddr, size); diff --git a/include/asm-sh64/setup.h b/include/asm-sh64/setup.h index ebd42eb1b70..5b07b14c292 100644 --- a/include/asm-sh64/setup.h +++ b/include/asm-sh64/setup.h @@ -1,6 +1,10 @@ #ifndef __ASM_SH64_SETUP_H #define __ASM_SH64_SETUP_H +#define COMMAND_LINE_SIZE 256 + +#ifdef __KERNEL__ + #define PARAM ((unsigned char *)empty_zero_page) #define MOUNT_ROOT_RDONLY (*(unsigned long *) (PARAM+0x000)) #define RAMDISK_FLAGS (*(unsigned long *) (PARAM+0x004)) @@ -12,5 +16,7 @@ #define COMMAND_LINE ((char *) (PARAM+256)) #define COMMAND_LINE_SIZE 256 +#endif /* __KERNEL__ */ + #endif /* __ASM_SH64_SETUP_H */ diff --git a/include/asm-sh64/unistd.h b/include/asm-sh64/unistd.h index ee7828b27ad..1f38a7aacaa 100644 --- a/include/asm-sh64/unistd.h +++ b/include/asm-sh64/unistd.h @@ -347,148 +347,6 @@ #ifdef __KERNEL__ #define NR_syscalls 321 -#include <linux/err.h> - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: - * see <asm-sh64/errno.h> */ - -#define __syscall_return(type, res) \ -do { \ - /* Note: when returning from kernel the return value is in r9 \ - ** This prevents conflicts between return value and arg1 \ - ** when dispatching signal handler, in other words makes \ - ** life easier in the system call epilogue (see entry.S) \ - */ \ - register unsigned long __sr2 __asm__ ("r2") = res; \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - __sr2 = -1; \ - } \ - return (type) (__sr2); \ -} while (0) - -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x10 << 16) | __NR_##name); \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "()" \ - : "=r" (__sc0) \ - : "r" (__sc0) ); \ -__syscall_return(type,__sc0); \ -} - - /* - * The apparent spurious "dummy" assembler comment is *needed*, - * as without it, the compiler treats the arg<n> variables - * as no longer live just before the asm. The compiler can - * then optimize the storage into any registers it wishes. - * The additional dummy statement forces the compiler to put - * the arguments into the correct registers before the TRAPA. - */ -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x11 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2)); \ -__asm__ __volatile__ ("!dummy %0 %1" \ - : \ - : "r" (__sc0), "r" (__sc2)); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x12 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3) ); \ -__asm__ __volatile__ ("!dummy %0 %1 %2" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3) ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -register unsigned long __sc4 __asm__ ("r4") = (unsigned long) arg3; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3,%4)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) ); \ -__asm__ __volatile__ ("!dummy %0 %1 %2 %3" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x14 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -register unsigned long __sc4 __asm__ ("r4") = (unsigned long) arg3; \ -register unsigned long __sc5 __asm__ ("r5") = (unsigned long) arg4; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3,%4,%5)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5) );\ -__asm__ __volatile__ ("!dummy %0 %1 %2 %3 %4" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5) );\ -__syscall_return(type,__sc0); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x15 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -register unsigned long __sc4 __asm__ ("r4") = (unsigned long) arg3; \ -register unsigned long __sc5 __asm__ ("r5") = (unsigned long) arg4; \ -register unsigned long __sc6 __asm__ ("r6") = (unsigned long) arg5; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3,%4,%5,%6)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5), \ - "r" (__sc6)); \ -__asm__ __volatile__ ("!dummy %0 %1 %2 %3 %4 %5" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5), \ - "r" (__sc6)); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5, type6, arg6) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x16 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -register unsigned long __sc4 __asm__ ("r4") = (unsigned long) arg3; \ -register unsigned long __sc5 __asm__ ("r5") = (unsigned long) arg4; \ -register unsigned long __sc6 __asm__ ("r6") = (unsigned long) arg5; \ -register unsigned long __sc7 __asm__ ("r7") = (unsigned long) arg6; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3,%4,%5,%6,%7)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5), \ - "r" (__sc6), "r" (__sc7)); \ -__asm__ __volatile__ ("!dummy %0 %1 %2 %3 %4 %5 %6" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5), \ - "r" (__sc6), "r" (__sc7)); \ -__syscall_return(type,__sc0); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h index f7827fa4cd5..d5b2f8053b3 100644 --- a/include/asm-sparc/unistd.h +++ b/include/asm-sparc/unistd.h @@ -329,136 +329,6 @@ * find a free slot in the 0-302 range. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res)\ - : "r" (__g1) \ - : "o0", "cc"); \ -if (__res < -255 || __res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -register long __o3 __asm__ ("o3") = (long)(arg4); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__o3), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -register long __o3 __asm__ ("o3") = (long)(arg4); \ -register long __o4 __asm__ ("o4") = (long)(arg5); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__o3), "r" (__o4), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/include/asm-sparc64/dma-mapping.h b/include/asm-sparc64/dma-mapping.h index 27c46fbeebd..2f858a2df94 100644 --- a/include/asm-sparc64/dma-mapping.h +++ b/include/asm-sparc64/dma-mapping.h @@ -181,7 +181,7 @@ dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t siz #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline int dma_get_cache_alignment(void) @@ -210,7 +210,7 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, } static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { /* could define this in terms of the dma_cache ... operations, diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h index 7392fc4a954..876312fe82c 100644 --- a/include/asm-sparc64/futex.h +++ b/include/asm-sparc64/futex.h @@ -45,7 +45,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -67,7 +67,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index 010f9cd0a67..5891ff7ba76 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -13,7 +13,7 @@ #include <asm/page.h> /* Page table allocation/freeing. */ -extern kmem_cache_t *pgtable_cache; +extern struct kmem_cache *pgtable_cache; static inline pgd_t *pgd_alloc(struct mm_struct *mm) { diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h index 63669dad0d7..47047536f26 100644 --- a/include/asm-sparc64/unistd.h +++ b/include/asm-sparc64/unistd.h @@ -332,124 +332,6 @@ * find a free slot in the 0-302 range. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res)\ - : "r" (__g1) \ - : "o0", "cc"); \ -if (__res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__g1) \ - : "cc"); \ -if (__res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__g1) \ - : "cc"); \ -if (__res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1) \ - : "cc"); \ -if (__res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -register long __o3 __asm__ ("o3") = (long)(arg4); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__o3), "r" (__g1) \ - : "cc"); \ -if (__res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -register long __o3 __asm__ ("o3") = (long)(arg4); \ -register long __o4 __asm__ ("o4") = (long)(arg5); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__o3), "r" (__o4), "r" (__g1) \ - : "cc"); \ -if (__res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - /* sysconf options, for SunOS compatibility */ #define _SC_ARG_MAX 1 #define _SC_CHILD_MAX 2 diff --git a/include/asm-um/dma-mapping.h b/include/asm-um/dma-mapping.h index babd2989511..f0ee4fb5591 100644 --- a/include/asm-um/dma-mapping.h +++ b/include/asm-um/dma-mapping.h @@ -94,7 +94,7 @@ dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems, #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline int dma_get_cache_alignment(void) @@ -112,7 +112,7 @@ dma_sync_single_range(struct device *dev, dma_addr_t dma_handle, } static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG(); diff --git a/include/asm-v850/irq.h b/include/asm-v850/irq.h index 1bf096db8f4..88687c181f0 100644 --- a/include/asm-v850/irq.h +++ b/include/asm-v850/irq.h @@ -46,8 +46,6 @@ extern void init_irq_handlers (int base_irq, int num, int interval, struct hw_interrupt_type *irq_type); -typedef void (*irq_handler_t)(int irq, void *data, struct pt_regs *regs); - /* Handle interrupt IRQ. REGS are the registers at the time of ther interrupt. */ extern unsigned int handle_irq (int irq, struct pt_regs *regs); diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h index 737401e7d3a..2241ed45ecf 100644 --- a/include/asm-v850/unistd.h +++ b/include/asm-v850/unistd.h @@ -204,168 +204,8 @@ #define __NR_gettid 201 #define __NR_tkill 202 - -/* Syscall protocol: - Syscall number in r12, args in r6-r9, r13-r14 - Return value in r10 - Trap 0 for `short' syscalls, where all the args can fit in function - call argument registers, and trap 1 when there are additional args in - r13-r14. */ - -#define SYSCALL_NUM "r12" -#define SYSCALL_ARG0 "r6" -#define SYSCALL_ARG1 "r7" -#define SYSCALL_ARG2 "r8" -#define SYSCALL_ARG3 "r9" -#define SYSCALL_ARG4 "r13" -#define SYSCALL_ARG5 "r14" -#define SYSCALL_RET "r10" - -#define SYSCALL_SHORT_TRAP "0" -#define SYSCALL_LONG_TRAP "1" - -/* Registers clobbered by any syscall. This _doesn't_ include the syscall - number (r12) or the `extended arg' registers (r13, r14), even though - they are actually clobbered too (this is because gcc's `asm' statement - doesn't allow a clobber to be used as an input or output). */ -#define SYSCALL_CLOBBERS "r1", "r5", "r11", "r15", "r16", \ - "r17", "r18", "r19" - -/* Registers clobbered by a `short' syscall. This includes all clobbers - except the syscall number (r12). */ -#define SYSCALL_SHORT_CLOBBERS SYSCALL_CLOBBERS, "r13", "r14" - #ifdef __KERNEL__ -#include <asm/clinkage.h> -#include <linux/err.h> - -#define __syscall_return(type, res) \ - do { \ - /* user-visible error numbers are in the range -1 - -MAX_ERRNO: \ - see <asm-v850/errno.h> */ \ - if (__builtin_expect ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO), 0)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - - -#define _syscall0(type, name) \ -type name (void) \ -{ \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall1(type, name, atype, a) \ -type name (atype a) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall), "r" (__a) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall2(type, name, atype, a, btype, b) \ -type name (atype a, btype b) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall), "r" (__a), "r" (__b) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall3(type, name, atype, a, btype, b, ctype, c) \ -type name (atype a, btype b, ctype c) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register ctype __c __asm__ (SYSCALL_ARG2) = c; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall), "r" (__a), "r" (__b), "r" (__c) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall4(type, name, atype, a, btype, b, ctype, c, dtype, d) \ -type name (atype a, btype b, ctype c, dtype d) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register ctype __c __asm__ (SYSCALL_ARG2) = c; \ - register dtype __d __asm__ (SYSCALL_ARG3) = d; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall), \ - "r" (__a), "r" (__b), "r" (__c), "r" (__d) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall5(type, name, atype, a, btype, b, ctype, c, dtype, d, etype,e)\ -type name (atype a, btype b, ctype c, dtype d, etype e) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register ctype __c __asm__ (SYSCALL_ARG2) = c; \ - register dtype __d __asm__ (SYSCALL_ARG3) = d; \ - register etype __e __asm__ (SYSCALL_ARG4) = e; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ - : "=r" (__ret), "=r" (__syscall), "=r" (__e) \ - : "1" (__syscall), \ - "r" (__a), "r" (__b), "r" (__c), "r" (__d), "2" (__e) \ - : SYSCALL_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \ - __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ - : "=r" (ret), "=r" (syscall), \ - "=r" (e), "=r" (f) \ - : "1" (syscall), \ - "r" (a), "r" (b), "r" (c), "r" (d), \ - "2" (e), "3" (f) \ - : SYSCALL_CLOBBERS); - -#define _syscall6(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e, ftype, f) \ -type name (atype a, btype b, ctype c, dtype d, etype e, ftype f) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register ctype __c __asm__ (SYSCALL_ARG2) = c; \ - register dtype __d __asm__ (SYSCALL_ARG3) = d; \ - register etype __e __asm__ (SYSCALL_ARG4) = e; \ - register etype __f __asm__ (SYSCALL_ARG5) = f; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __SYSCALL6_TRAP(__syscall, __ret, __a, __b, __c, __d, __e, __f); \ - __syscall_return (type, __ret); \ -} - - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild index 1ee9b07f3fe..ebd7117782a 100644 --- a/include/asm-x86_64/Kbuild +++ b/include/asm-x86_64/Kbuild @@ -6,13 +6,11 @@ ALTARCHDEF := defined __i386__ header-y += boot.h header-y += bootsetup.h -header-y += cpufeature.h header-y += debugreg.h header-y += ldt.h header-y += msr.h header-y += prctl.h header-y += ptrace-abi.h -header-y += setup.h header-y += sigcontext32.h header-y += ucontext.h header-y += vsyscall32.h diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h index a584826cc57..a6657b4f3e0 100644 --- a/include/asm-x86_64/alternative.h +++ b/include/asm-x86_64/alternative.h @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #include <linux/types.h> +#include <linux/stddef.h> #include <asm/cpufeature.h> struct alt_instr { @@ -133,4 +134,15 @@ static inline void alternatives_smp_switch(int smp) {} #define LOCK_PREFIX "" #endif +struct paravirt_patch; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); +#else +static inline void +apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +{} +#define __start_parainstructions NULL +#define __stop_parainstructions NULL +#endif + #endif /* _X86_64_ALTERNATIVE_H */ diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h index 93849f7abc2..706ca4b6000 100644 --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -189,9 +189,9 @@ static __inline__ int atomic_add_return(int i, atomic_t *v) { int __i = i; __asm__ __volatile__( - LOCK_PREFIX "xaddl %0, %1;" - :"=r"(i) - :"m"(v->counter), "0"(i)); + LOCK_PREFIX "xaddl %0, %1" + :"+r" (i), "+m" (v->counter) + : : "memory"); return i + __i; } diff --git a/include/asm-x86_64/calgary.h b/include/asm-x86_64/calgary.h index 6b93f5a3a5c..7ee90064571 100644 --- a/include/asm-x86_64/calgary.h +++ b/include/asm-x86_64/calgary.h @@ -51,6 +51,8 @@ struct iommu_table { #define TCE_TABLE_SIZE_4M 6 #define TCE_TABLE_SIZE_8M 7 +extern int use_calgary; + #ifdef CONFIG_CALGARY_IOMMU extern int calgary_iommu_init(void); extern void detect_calgary(void); diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index ee792faaca0..0b3c686139f 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -29,7 +29,7 @@ #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ #define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ -#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_DS (0*32+21) /* Debug Store */ #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ @@ -68,6 +68,8 @@ #define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */ #define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */ #define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS (3*32+10) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+11) /* Branch Trace Store */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -112,5 +114,8 @@ #define cpu_has_cyrix_arr 0 #define cpu_has_centaur_mcr 0 #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) +#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) +#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) #endif /* __ASM_X8664_CPUFEATURE_H */ diff --git a/include/asm-x86_64/delay.h b/include/asm-x86_64/delay.h index 65f64acc531..c2669f1f552 100644 --- a/include/asm-x86_64/delay.h +++ b/include/asm-x86_64/delay.h @@ -7,18 +7,21 @@ * Delay routines calling functions in arch/x86_64/lib/delay.c */ +/* Undefined functions to get compile-time errors */ extern void __bad_udelay(void); extern void __bad_ndelay(void); extern void __udelay(unsigned long usecs); -extern void __ndelay(unsigned long usecs); +extern void __ndelay(unsigned long nsecs); extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); +/* 0x10c7 is 2**32 / 1000000 (rounded up) */ #define udelay(n) (__builtin_constant_p(n) ? \ - ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \ + ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ __udelay(n)) +/* 0x5 is 2**32 / 1000000000 (rounded up) */ #define ndelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index eb7723a4679..913d6ac0003 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h @@ -9,64 +9,13 @@ #include <linux/string.h> #include <linux/smp.h> +#include <asm/desc_defs.h> #include <asm/segment.h> #include <asm/mmu.h> -// 8 byte segment descriptor -struct desc_struct { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; -} __attribute__((packed)); - -struct n_desc_struct { - unsigned int a,b; -}; - extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -enum { - GATE_INTERRUPT = 0xE, - GATE_TRAP = 0xF, - GATE_CALL = 0xC, -}; - -// 16byte gate -struct gate_struct { - u16 offset_low; - u16 segment; - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; - u16 offset_middle; - u32 offset_high; - u32 zero1; -} __attribute__((packed)); - -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) -#define PTR_HIGH(x) ((unsigned long)(x) >> 32) - -enum { - DESC_TSS = 0x9, - DESC_LDT = 0x2, -}; - -// LDT or TSS descriptor in the GDT. 16 bytes. -struct ldttss_desc { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 5, dpl : 2, p : 1; - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; - u32 base3; - u32 zero1; -} __attribute__((packed)); - -struct desc_ptr { - unsigned short size; - unsigned long address; -} __attribute__((packed)) ; - #define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8)) #define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8)) #define clear_LDT() asm volatile("lldt %w0"::"r" (0)) diff --git a/include/asm-x86_64/desc_defs.h b/include/asm-x86_64/desc_defs.h new file mode 100644 index 00000000000..08900407009 --- /dev/null +++ b/include/asm-x86_64/desc_defs.h @@ -0,0 +1,69 @@ +/* Written 2000 by Andi Kleen */ +#ifndef __ARCH_DESC_DEFS_H +#define __ARCH_DESC_DEFS_H + +/* + * Segment descriptor structure definitions, usable from both x86_64 and i386 + * archs. + */ + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +// 8 byte segment descriptor +struct desc_struct { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; + unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; +} __attribute__((packed)); + +struct n_desc_struct { + unsigned int a,b; +}; + +enum { + GATE_INTERRUPT = 0xE, + GATE_TRAP = 0xF, + GATE_CALL = 0xC, +}; + +// 16byte gate +struct gate_struct { + u16 offset_low; + u16 segment; + unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + u16 offset_middle; + u32 offset_high; + u32 zero1; +} __attribute__((packed)); + +#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) +#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) +#define PTR_HIGH(x) ((unsigned long)(x) >> 32) + +enum { + DESC_TSS = 0x9, + DESC_LDT = 0x2, +}; + +// LDT or TSS descriptor in the GDT. 16 bytes. +struct ldttss_desc { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 5, dpl : 2, p : 1; + unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; + u32 base3; + u32 zero1; +} __attribute__((packed)); + +struct desc_ptr { + unsigned short size; + unsigned long address; +} __attribute__((packed)) ; + + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h index 10174b110a5..be9ec689072 100644 --- a/include/asm-x86_64/dma-mapping.h +++ b/include/asm-x86_64/dma-mapping.h @@ -180,12 +180,13 @@ static inline int dma_get_cache_alignment(void) return boot_cpu_data.x86_clflush_size; } -#define dma_is_consistent(h) 1 +#define dma_is_consistent(d, h) 1 extern int dma_set_mask(struct device *dev, u64 mask); static inline void -dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) { flush_write_buffers(); } diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h index 9804bf07b09..5cdfb08013c 100644 --- a/include/asm-x86_64/futex.h +++ b/include/asm-x86_64/futex.h @@ -55,7 +55,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -78,7 +78,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h index a0e9a4b9348..b80f4bb5f27 100644 --- a/include/asm-x86_64/genapic.h +++ b/include/asm-x86_64/genapic.h @@ -30,6 +30,6 @@ struct genapic { }; -extern struct genapic *genapic; +extern struct genapic *genapic, *genapic_force, apic_flat; #endif diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index 37e194169fa..952783d35c7 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h @@ -169,8 +169,8 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ #define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */ #define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit GS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit FS base */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */ /* EFER bits: */ #define _EFER_SCE 0 /* SYSCALL/SYSRET */ @@ -210,6 +210,10 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_IA32_LASTINTFROMIP 0x1dd #define MSR_IA32_LASTINTTOIP 0x1de +#define MSR_IA32_PEBS_ENABLE 0x3f1 +#define MSR_IA32_DS_AREA 0x600 +#define MSR_IA32_PERF_CAPABILITIES 0x345 + #define MSR_MTRRfix64K_00000 0x250 #define MSR_MTRRfix16K_80000 0x258 #define MSR_MTRRfix16K_A0000 0x259 @@ -407,4 +411,13 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_P4_U2L_ESCR0 0x3b0 #define MSR_P4_U2L_ESCR1 0x3b1 +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x309 +#define MSR_CORE_PERF_FIXED_CTR1 0x30a +#define MSR_CORE_PERF_FIXED_CTR2 0x30b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 + #endif diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index f367d4014b4..72375e7d32a 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, extern int unknown_nmi_panic; +void __trigger_all_cpu_backtrace(void); +#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() + #endif /* ASM_NMI_H */ diff --git a/include/asm-x86_64/pci-direct.h b/include/asm-x86_64/pci-direct.h index eba9cb471df..6823fa4f1af 100644 --- a/include/asm-x86_64/pci-direct.h +++ b/include/asm-x86_64/pci-direct.h @@ -10,6 +10,7 @@ extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset); extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); +extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); extern int early_pci_allowed(void); diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 0555c1c4d8f..59901c690a0 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -221,20 +221,19 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC -static inline unsigned long pgd_bad(pgd_t pgd) -{ - unsigned long val = pgd_val(pgd); - val &= ~PTE_MASK; - val &= ~(_PAGE_USER | _PAGE_DIRTY); - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); -} +static inline unsigned long pgd_bad(pgd_t pgd) +{ + return pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); +} static inline unsigned long pud_bad(pud_t pud) { - unsigned long val = pud_val(pud); - val &= ~PTE_MASK; - val &= ~(_PAGE_USER | _PAGE_DIRTY); - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); + return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); +} + +static inline unsigned long pmd_bad(pmd_t pmd) +{ + return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } #define pte_none(x) (!pte_val(x)) @@ -347,7 +346,6 @@ static inline int pmd_large(pmd_t pte) { #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE ) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index cef17e0f828..76552d72804 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -475,6 +475,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) : :"a" (eax), "c" (ecx)); } +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +{ + /* "mwait %eax,%ecx;" */ + asm volatile( + "sti; .byte 0x0f,0x01,0xc9;" + : :"a" (eax), "c" (ecx)); +} + extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); #define stack_current() \ diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index e72cfcdf534..6d324b83897 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -61,7 +61,6 @@ extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); extern unsigned long numa_free_all_bootmem(void); extern void reserve_bootmem_generic(unsigned long phys, unsigned len); -extern void free_bootmem_generic(unsigned long phys, unsigned len); extern void load_gs_index(unsigned gs); @@ -88,6 +87,7 @@ extern void syscall32_cpu_init(void); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); extern void early_quirks(void); +extern void quirk_intel_irqbalance(void); extern void check_efer(void); extern int unhandled_signal(struct task_struct *tsk, int sig); diff --git a/include/asm-x86_64/rio.h b/include/asm-x86_64/rio.h new file mode 100644 index 00000000000..c7350f6d201 --- /dev/null +++ b/include/asm-x86_64/rio.h @@ -0,0 +1,74 @@ +/* + * Derived from include/asm-i386/mach-summit/mach_mpparse.h + * and include/asm-i386/mach-default/bios_ebda.h + * + * Author: Laurent Vivier <Laurent.Vivier@bull.net> + */ + +#ifndef __ASM_RIO_H +#define __ASM_RIO_H + +#define RIO_TABLE_VERSION 3 + +struct rio_table_hdr { + u8 version; /* Version number of this data structure */ + u8 num_scal_dev; /* # of Scalability devices */ + u8 num_rio_dev; /* # of RIO I/O devices */ +} __attribute__((packed)); + +struct scal_detail { + u8 node_id; /* Scalability Node ID */ + u32 CBAR; /* Address of 1MB register space */ + u8 port0node; /* Node ID port connected to: 0xFF=None */ + u8 port0port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port1node; /* Node ID port connected to: 0xFF = None */ + u8 port1port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port2node; /* Node ID port connected to: 0xFF = None */ + u8 port2port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + u8 node_id; /* RIO Node ID */ + u32 BBAR; /* Address of 1MB register space */ + u8 type; /* Type of device */ + u8 owner_id; /* Node ID of Hurricane that owns this */ + /* node */ + u8 port0node; /* Node ID port connected to: 0xFF=None */ + u8 port0port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port1node; /* Node ID port connected to: 0xFF=None */ + u8 port1port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 first_slot; /* Lowest slot number below this Calgary */ + u8 status; /* Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie: */ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + u8 WP_index; /* instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + u8 chassis_num; /* 1 based Chassis number */ +} __attribute__((packed)); + +enum { + HURR_SCALABILTY = 0, /* Hurricane Scalability info */ + HURR_RIOIB = 2, /* Hurricane RIOIB info */ + COMPAT_CALGARY = 4, /* Compatibility Calgary */ + ALT_CALGARY = 5, /* Second Planar Calgary */ +}; + +/* + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E. + */ +static inline unsigned long get_bios_ebda(void) +{ + unsigned long address = *(unsigned short *)phys_to_virt(0x40EUL); + address <<= 4; + return address; +} + +#endif /* __ASM_RIO_H */ diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index d6b7c057edb..e17b9ec42e9 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -82,11 +82,6 @@ extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ extern u8 x86_cpu_to_log_apicid[NR_CPUS]; extern u8 bios_cpu_apicid[]; -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - return cpus_addr(cpumask)[0]; -} - static inline int cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < NR_CPUS) @@ -118,13 +113,6 @@ static __inline int logical_smp_processor_id(void) #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] #else #define cpu_physical_id(cpu) boot_cpu_id -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), - void *info, int retry, int wait) -{ - /* Disable interrupts here? */ - func(info); - return 0; -} #endif /* !CONFIG_SMP */ #endif diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h index 05ef097ba55..88bf981e73c 100644 --- a/include/asm-x86_64/spinlock.h +++ b/include/asm-x86_64/spinlock.h @@ -36,7 +36,34 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) "2:\t" : "=m" (lock->slock) : : "memory"); } -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) +/* + * Same as __raw_spin_lock, but reenable interrupts during spinning. + */ +#ifndef CONFIG_PROVE_LOCKING +static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) +{ + asm volatile( + "\n1:\t" + LOCK_PREFIX " ; decl %0\n\t" + "jns 5f\n" + "testl $0x200, %1\n\t" /* interrupts were disabled? */ + "jz 4f\n\t" + "sti\n" + "3:\t" + "rep;nop\n\t" + "cmpl $0, %0\n\t" + "jle 3b\n\t" + "cli\n\t" + "jmp 1b\n" + "4:\t" + "rep;nop\n\t" + "cmpl $0, %0\n\t" + "jg 1b\n\t" + "jmp 4b\n" + "5:\n\t" + : "+m" (lock->slock) : "r" ((unsigned)flags) : "memory"); +} +#endif static inline int __raw_spin_trylock(raw_spinlock_t *lock) { diff --git a/include/asm-x86_64/stacktrace.h b/include/asm-x86_64/stacktrace.h index 5eb9799bef7..6f0b5459430 100644 --- a/include/asm-x86_64/stacktrace.h +++ b/include/asm-x86_64/stacktrace.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKTRACE_H #define _ASM_STACKTRACE_H 1 +extern int kstack_depth_to_print; + /* Generic stack tracer with callbacks */ struct stacktrace_ops { diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 777288eb7e7..c5f596e71fa 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -622,25 +622,7 @@ __SYSCALL(__NR_move_pages, sys_move_pages) #define __NR_syscall_max __NR_move_pages -#ifdef __KERNEL__ -#include <linux/err.h> -#endif - #ifndef __NO_STUBS - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO */ - -#define __syscall_clobber "r11","rcx","memory" - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT #define __ARCH_WANT_SYS_ALARM @@ -664,87 +646,6 @@ do { \ #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_TIME -#define __syscall "syscall" - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -__asm__ volatile (__syscall \ - : "=a" (__res) \ - : "0" (__NR_##name) : __syscall_clobber ); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -long __res; \ -__asm__ volatile (__syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)) : __syscall_clobber ); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -long __res; \ -__asm__ volatile (__syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)) : __syscall_clobber ); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -__asm__ volatile (__syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \ - "d" ((long)(arg3)) : __syscall_clobber); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -long __res; \ -__asm__ volatile ("movq %5,%%r10 ;" __syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \ - "d" ((long)(arg3)),"g" ((long)(arg4)) : __syscall_clobber,"r10" ); \ -__syscall_return(type,__res); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \ - "d" ((long)(arg3)),"g" ((long)(arg4)),"g" ((long)(arg5)) : \ - __syscall_clobber,"r8","r10" ); \ -__syscall_return(type,__res); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \ -{ \ -long __res; \ -__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; " __syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \ - "d" ((long)(arg3)), "g" ((long)(arg4)), "g" ((long)(arg5)), \ - "g" ((long)(arg6)) : \ - __syscall_clobber,"r8","r10","r9" ); \ -__syscall_return(type,__res); \ -} - #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h index 2e7ff10fd77..2f6349e4871 100644 --- a/include/asm-x86_64/unwind.h +++ b/include/asm-x86_64/unwind.h @@ -87,14 +87,10 @@ extern int arch_unwind_init_running(struct unwind_frame_info *, static inline int arch_unw_user_mode(const struct unwind_frame_info *info) { -#if 0 /* This can only work when selector register saves/restores - are properly annotated (and tracked in UNW_REGISTER_INFO). */ - return user_mode(&info->regs); -#else - return (long)info->regs.rip >= 0 + return user_mode(&info->regs) + || (long)info->regs.rip >= 0 || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END) || (long)info->regs.rsp >= 0; -#endif } #else diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 01d1c17e284..05cb8dd200d 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h @@ -10,6 +10,7 @@ enum vsyscall_num { #define VSYSCALL_START (-10UL << 20) #define VSYSCALL_SIZE 1024 #define VSYSCALL_END (-2UL << 20) +#define VSYSCALL_MAPPED_PAGES 1 #define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr)) #ifdef __KERNEL__ diff --git a/include/asm-xtensa/dma-mapping.h b/include/asm-xtensa/dma-mapping.h index c39c91dfcc6..82b03b3a2ee 100644 --- a/include/asm-xtensa/dma-mapping.h +++ b/include/asm-xtensa/dma-mapping.h @@ -170,10 +170,10 @@ dma_get_cache_alignment(void) return L1_CACHE_BYTES; } -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { consistent_sync(vaddr, size, direction); diff --git a/include/asm-xtensa/unistd.h b/include/asm-xtensa/unistd.h index 411f810a55c..2e1a1b997e7 100644 --- a/include/asm-xtensa/unistd.h +++ b/include/asm-xtensa/unistd.h @@ -218,190 +218,6 @@ #define SYSXTENSA_COUNT 5 /* count of syscall0 functions*/ -#ifdef __KERNEL__ -#include <linux/linkage.h> - -#define __syscall_return(type, res) return ((type)(res)) - -/* Tensilica's xt-xcc compiler is much more agressive at code - * optimization than gcc. Multiple __asm__ statements are - * insufficient for xt-xcc because subsequent optimization passes - * (beyond the front-end that knows of __asm__ statements and other - * such GNU Extensions to C) can modify the register selection for - * containment of C variables. - * - * xt-xcc cannot modify the contents of a single __asm__ statement, so - * we create single-asm versions of the syscall macros that are - * suitable and optimal for both xt-xcc and gcc. - * - * Linux takes system-call arguments in registers. The following - * design is optimized for user-land apps (e.g., glibc) which - * typically have a function wrapper around the "syscall" assembly - * instruction. It satisfies the Xtensa ABI while minizing argument - * shifting. - * - * The Xtensa ABI and software conventions require the system-call - * number in a2. If an argument exists in a2, we move it to the next - * available register. Note that for improved efficiency, we do NOT - * shift all parameters down one register to maintain the original - * order. - * - * At best case (zero arguments), we just write the syscall number to - * a2. At worst case (1 to 6 arguments), we move the argument in a2 - * to the next available register, then write the syscall number to - * a2. - * - * For clarity, the following truth table enumerates all possibilities. - * - * arguments syscall number arg0, arg1, arg2, arg3, arg4, arg5 - * --------- -------------- ---------------------------------- - * 0 a2 - * 1 a2 a3 - * 2 a2 a4, a3 - * 3 a2 a5, a3, a4 - * 4 a2 a6, a3, a4, a5 - * 5 a2 a7, a3, a4, a5, a6 - * 6 a2 a8, a3, a4, a5, a6, a7 - */ - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name) \ - : "a2" \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type0,arg0) \ -type name(type0 arg0) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a3, %2 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0) \ - : "a2", "a3" \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type0,arg0,type1,arg1) \ -type name(type0 arg0,type1 arg1) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a4, %2 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1) \ - : "a2", "a3", "a4" \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type0,arg0,type1,arg1,type2,arg2) \ -type name(type0 arg0,type1 arg1,type2 arg2) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a5, %2 \n" \ - " mov a4, %4 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1), "a" (arg2) \ - : "a2", "a3", "a4", "a5" \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type0,arg0,type1,arg1,type2,arg2,type3,arg3) \ -type name(type0 arg0,type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a6, %2 \n" \ - " mov a5, %5 \n" \ - " mov a4, %4 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1), "a" (arg2), "a" (arg3) \ - : "a2", "a3", "a4", "a5", "a6" \ - ); \ -__syscall_return(type,__res); \ -} - -/* Note that we save and restore the a7 frame pointer. - * Including a7 in the clobber list doesn't do what you'd expect. - */ -#define _syscall5(type,name,type0,arg0,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name(type0 arg0,type1 arg1,type2 arg2,type3 arg3,type4 arg4) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a9, a7 \n" \ - " mov a7, %2 \n" \ - " mov a6, %6 \n" \ - " mov a5, %5 \n" \ - " mov a4, %4 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov a7, a9 \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1), "a" (arg2), \ - "a" (arg3), "a" (arg4) \ - : "a2", "a3", "a4", "a5", "a6", "a9" \ - ); \ -__syscall_return(type,__res); \ -} - -/* Note that we save and restore the a7 frame pointer. - * Including a7 in the clobber list doesn't do what you'd expect. - */ -#define _syscall6(type,name,type0,arg0,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type0 arg0,type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a9, a7 \n" \ - " mov a8, %2 \n" \ - " mov a7, %7 \n" \ - " mov a6, %6 \n" \ - " mov a5, %5 \n" \ - " mov a4, %4 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov a7, a9 \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1), "a" (arg2), \ - "a" (arg3), "a" (arg4), "a" (arg5) \ - : "a2", "a3", "a4", "a5", "a6", "a8", "a9" \ - ); \ -__syscall_return(type,__res); \ -} - /* * "Conditional" syscalls * diff --git a/include/crypto/b128ops.h b/include/crypto/b128ops.h new file mode 100644 index 00000000000..0b8e6bc5530 --- /dev/null +++ b/include/crypto/b128ops.h @@ -0,0 +1,80 @@ +/* b128ops.h - common 128-bit block operations + * + * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. + * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org> + * + * Based on Dr Brian Gladman's (GPL'd) work published at + * http://fp.gladman.plus.com/cryptography_technology/index.htm + * See the original copyright notice below. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 13/06/2006 +*/ + +#ifndef _CRYPTO_B128OPS_H +#define _CRYPTO_B128OPS_H + +#include <linux/types.h> + +typedef struct { + u64 a, b; +} u128; + +typedef struct { + __be64 a, b; +} be128; + +typedef struct { + __le64 b, a; +} le128; + +static inline void u128_xor(u128 *r, const u128 *p, const u128 *q) +{ + r->a = p->a ^ q->a; + r->b = p->b ^ q->b; +} + +static inline void be128_xor(be128 *r, const be128 *p, const be128 *q) +{ + u128_xor((u128 *)r, (u128 *)p, (u128 *)q); +} + +static inline void le128_xor(le128 *r, const le128 *p, const le128 *q) +{ + u128_xor((u128 *)r, (u128 *)p, (u128 *)q); +} + +#endif /* _CRYPTO_B128OPS_H */ diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h new file mode 100644 index 00000000000..4fd31520244 --- /dev/null +++ b/include/crypto/gf128mul.h @@ -0,0 +1,198 @@ +/* gf128mul.h - GF(2^128) multiplication functions + * + * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. + * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> + * + * Based on Dr Brian Gladman's (GPL'd) work published at + * http://fp.gladman.plus.com/cryptography_technology/index.htm + * See the original copyright notice below. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 31/01/2006 + + An implementation of field multiplication in Galois Field GF(128) +*/ + +#ifndef _CRYPTO_GF128MUL_H +#define _CRYPTO_GF128MUL_H + +#include <crypto/b128ops.h> +#include <linux/slab.h> + +/* Comment by Rik: + * + * For some background on GF(2^128) see for example: http://- + * csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf + * + * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can + * be mapped to computer memory in a variety of ways. Let's examine + * three common cases. + * + * Take a look at the 16 binary octets below in memory order. The msb's + * are left and the lsb's are right. char b[16] is an array and b[0] is + * the first octet. + * + * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 + * b[0] b[1] b[2] b[3] b[13] b[14] b[15] + * + * Every bit is a coefficient of some power of X. We can store the bits + * in every byte in little-endian order and the bytes themselves also in + * little endian order. I will call this lle (little-little-endian). + * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks + * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. + * This format was originally implemented in gf128mul and is used + * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). + * + * Another convention says: store the bits in bigendian order and the + * bytes also. This is bbe (big-big-endian). Now the buffer above + * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, + * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe + * is partly implemented. + * + * Both of the above formats are easy to implement on big-endian + * machines. + * + * EME (which is patent encumbered) uses the ble format (bits are stored + * in big endian order and the bytes in little endian). The above buffer + * represents X^7 in this case and the primitive polynomial is b[0] = 0x87. + * + * The common machine word-size is smaller than 128 bits, so to make + * an efficient implementation we must split into machine word sizes. + * This file uses one 32bit for the moment. Machine endianness comes into + * play. The lle format in relation to machine endianness is discussed + * below by the original author of gf128mul Dr Brian Gladman. + * + * Let's look at the bbe and ble format on a little endian machine. + * + * bbe on a little endian machine u32 x[4]: + * + * MS x[0] LS MS x[1] LS + * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 + * + * MS x[2] LS MS x[3] LS + * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 + * + * ble on a little endian machine + * + * MS x[0] LS MS x[1] LS + * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 + * + * MS x[2] LS MS x[3] LS + * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 + * + * Multiplications in GF(2^128) are mostly bit-shifts, so you see why + * ble (and lbe also) are easier to implement on a little-endian + * machine than on a big-endian machine. The converse holds for bbe + * and lle. + * + * Note: to have good alignment, it seems to me that it is sufficient + * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize + * machines this will automatically aligned to wordsize and on a 64-bit + * machine also. + */ +/* Multiply a GF128 field element by x. Field elements are held in arrays + of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower + indexed bits placed in the more numerically significant bit positions + within bytes. + + On little endian machines the bit indexes translate into the bit + positions within four 32-bit words in the following way + + MS x[0] LS MS x[1] LS + ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 + + MS x[2] LS MS x[3] LS + ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 + + On big endian machines the bit indexes translate into the bit + positions within four 32-bit words in the following way + + MS x[0] LS MS x[1] LS + ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 + + MS x[2] LS MS x[3] LS + ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 +*/ + +/* A slow generic version of gf_mul, implemented for lle and bbe + * It multiplies a and b and puts the result in a */ +void gf128mul_lle(be128 *a, const be128 *b); + +void gf128mul_bbe(be128 *a, const be128 *b); + + +/* 4k table optimization */ + +struct gf128mul_4k { + be128 t[256]; +}; + +struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); +struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); +void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); +void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); + +static inline void gf128mul_free_4k(struct gf128mul_4k *t) +{ + kfree(t); +} + + +/* 64k table optimization, implemented for lle and bbe */ + +struct gf128mul_64k { + struct gf128mul_4k *t[16]; +}; + +/* first initialize with the constant factor with which you + * want to multiply and then call gf128_64k_lle with the other + * factor in the first argument, the table in the second and a + * scratch register in the third. Afterwards *a = *r. */ +struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g); +struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); +void gf128mul_free_64k(struct gf128mul_64k *t); +void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t); +void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); + +#endif /* _CRYPTO_GF128MUL_H */ diff --git a/include/linux/Kbuild b/include/linux/Kbuild index ff433126361..e618b25b5ad 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -221,6 +221,7 @@ unifdef-y += if_bridge.h unifdef-y += if_ec.h unifdef-y += if_eql.h unifdef-y += if_ether.h +unifdef-y += if_fddi.h unifdef-y += if_frad.h unifdef-y += if_ltalk.h unifdef-y += if_pppox.h @@ -282,6 +283,7 @@ unifdef-y += nvram.h unifdef-y += parport.h unifdef-y += patchkey.h unifdef-y += pci.h +unifdef-y += personality.h unifdef-y += pktcdvd.h unifdef-y += pmu.h unifdef-y += poll.h @@ -337,6 +339,7 @@ unifdef-y += videodev.h unifdef-y += wait.h unifdef-y += wanrouter.h unifdef-y += watchdog.h +unifdef-y += wireless.h unifdef-y += xfrm.h objhdr-y += version.h diff --git a/include/linux/aio.h b/include/linux/aio.h index 9e350fd44d7..3372ec6bf53 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -111,7 +111,6 @@ struct kiocb { size_t ki_nbytes; /* copy of iocb->aio_nbytes */ char __user *ki_buf; /* remaining iocb->aio_buf */ size_t ki_left; /* remaining bytes */ - long ki_retried; /* just for testing */ struct iovec ki_inline_vec; /* inline vector */ struct iovec *ki_iovec; unsigned long ki_nr_segs; @@ -238,7 +237,6 @@ do { \ } while (0) #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait) -#define is_retried_kiocb(iocb) ((iocb)->ki_retried > 1) #include <linux/aio_abi.h> diff --git a/include/linux/audit.h b/include/linux/audit.h index b2ca666d999..0e07db6cc0d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -101,6 +101,10 @@ #define AUDIT_MAC_CIPSOV4_DEL 1408 /* NetLabel: del CIPSOv4 DOI entry */ #define AUDIT_MAC_MAP_ADD 1409 /* NetLabel: add LSM domain mapping */ #define AUDIT_MAC_MAP_DEL 1410 /* NetLabel: del LSM domain mapping */ +#define AUDIT_MAC_IPSEC_ADDSA 1411 /* Add a XFRM state */ +#define AUDIT_MAC_IPSEC_DELSA 1412 /* Delete a XFRM state */ +#define AUDIT_MAC_IPSEC_ADDSPD 1413 /* Add a XFRM policy */ +#define AUDIT_MAC_IPSEC_DELSPD 1414 /* Delete a XFRM policy */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 @@ -377,6 +381,7 @@ extern void auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); +extern void audit_log_task_context(struct audit_buffer *ab); extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); extern int audit_bprm(struct linux_binprm *bprm); @@ -449,6 +454,7 @@ extern int audit_n_rules; #define audit_inode_update(i) do { ; } while (0) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) +#define audit_log_task_context(b) do { ; } while (0) #define audit_ipc_obj(i) ({ 0; }) #define audit_ipc_set_perm(q,u,g,m) ({ 0; }) #define audit_bprm(p) ({ 0; }) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 31e9abb6d97..2275f274870 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -119,8 +119,7 @@ extern void *alloc_large_system_hash(const char *tablename, unsigned int *_hash_mask, unsigned long limit); -#define HASH_HIGHMEM 0x00000001 /* Consider highmem? */ -#define HASH_EARLY 0x00000002 /* Allocating during early boot? */ +#define HASH_EARLY 0x00000001 /* Allocating during early boot? */ /* Only NUMA needs hash distribution. * IA64 is known to have sufficient vmalloc space. diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h new file mode 100644 index 00000000000..777dbf695d4 --- /dev/null +++ b/include/linux/bottom_half.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BH_H +#define _LINUX_BH_H + +extern void local_bh_disable(void); +extern void __local_bh_enable(void); +extern void _local_bh_enable(void); +extern void local_bh_enable(void); +extern void local_bh_enable_ip(unsigned long ip); + +#endif /* _LINUX_BH_H */ diff --git a/include/linux/carta_random32.h b/include/linux/carta_random32.h deleted file mode 100644 index f6f3bd9f20b..00000000000 --- a/include/linux/carta_random32.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Fast, simple, yet decent quality random number generator based on - * a paper by David G. Carta ("Two Fast Implementations of the - * `Minimal Standard' Random Number Generator," Communications of the - * ACM, January, 1990). - * - * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. - * Contributed by Stephane Eranian <eranian@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - * 02111-1307 USA - */ -#ifndef _LINUX_CARTA_RANDOM32_H_ -#define _LINUX_CARTA_RANDOM32_H_ - -u64 carta_random32(u64 seed); - -#endif /* _LINUX_CARTA_RANDOM32_H_ */ diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h index 6e27f42e3a5..cb57c30081a 100644 --- a/include/linux/cciss_ioctl.h +++ b/include/linux/cciss_ioctl.h @@ -80,7 +80,7 @@ typedef __u32 DriverVer_type; #define HWORD __u16 #define DWORD __u32 -#define CISS_MAX_LUN 16 +#define CISS_MAX_LUN 1024 #define LEVEL2LUN 1 // index into Target(x) structure, due to byte swapping #define LEVEL3LUN 0 diff --git a/include/linux/cdev.h b/include/linux/cdev.h index ee5f53f2ca1..f309b00e986 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -2,6 +2,10 @@ #define _LINUX_CDEV_H #ifdef __KERNEL__ +#include <linux/kobject.h> +#include <linux/kdev_t.h> +#include <linux/list.h> + struct cdev { struct kobject kobj; struct module *owner; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f02d71bf689..bfb520212d7 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -24,10 +24,11 @@ #include <linux/compiler.h> #include <linux/cpumask.h> #include <asm/semaphore.h> +#include <linux/mutex.h> struct cpu { int node_id; /* The node which contains the CPU */ - int no_control; /* Should the sysfs control file be created? */ + int hotpluggable; /* creates sysfs control file if hotpluggable */ struct sys_device sysdev; }; @@ -74,6 +75,17 @@ extern struct sysdev_class cpu_sysdev_class; #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ + +static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex) +{ + mutex_lock(cpu_hp_mutex); +} + +static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) +{ + mutex_unlock(cpu_hp_mutex); +} + extern void lock_cpu_hotplug(void); extern void unlock_cpu_hotplug(void); #define hotcpu_notifier(fn, pri) { \ @@ -85,17 +97,24 @@ extern void unlock_cpu_hotplug(void); #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) int cpu_down(unsigned int cpu); #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -#else + +#else /* CONFIG_HOTPLUG_CPU */ + +static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex) +{ } +static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) +{ } + #define lock_cpu_hotplug() do { } while (0) #define unlock_cpu_hotplug() do { } while (0) #define lock_cpu_hotplug_interruptible() 0 -#define hotcpu_notifier(fn, pri) do { } while (0) -#define register_hotcpu_notifier(nb) do { } while (0) -#define unregister_hotcpu_notifier(nb) do { } while (0) +#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define register_hotcpu_notifier(nb) do { (void)(nb); } while (0) +#define unregister_hotcpu_notifier(nb) do { (void)(nb); } while (0) /* CPUs don't go offline once they're online w/o CONFIG_HOTPLUG_CPU */ static inline int cpu_is_offline(int cpu) { return 0; } -#endif +#endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_SUSPEND_SMP extern int disable_nonboot_cpus(void); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 4d8adf66368..8821e1f75b4 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -23,6 +23,7 @@ extern void cpuset_fork(struct task_struct *p); extern void cpuset_exit(struct task_struct *p); extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); +#define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); void cpuset_update_task_memory_state(void); #define cpuset_nodes_subset_current_mems_allowed(nodes) \ @@ -45,7 +46,7 @@ extern int cpuset_excl_nodes_overlap(const struct task_struct *p); extern int cpuset_memory_pressure_enabled; extern void __cpuset_memory_pressure_bump(void); -extern struct file_operations proc_cpuset_operations; +extern const struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); extern void cpuset_lock(void); @@ -83,6 +84,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) return node_possible_map; } +#define cpuset_current_mems_allowed (node_online_map) static inline void cpuset_init_current_mems_allowed(void) {} static inline void cpuset_update_task_memory_state(void) {} #define cpuset_nodes_subset_current_mems_allowed(nodes) (1) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 6485e9716b3..4aa9046601d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -241,12 +241,8 @@ int crypto_unregister_alg(struct crypto_alg *alg); * Algorithm query interface. */ #ifdef CONFIG_CRYPTO -int crypto_alg_available(const char *name, u32 flags) - __deprecated_for_modules; int crypto_has_alg(const char *name, u32 type, u32 mask); #else -static int crypto_alg_available(const char *name, u32 flags) - __deprecated_for_modules; static inline int crypto_alg_available(const char *name, u32 flags) { return 0; @@ -707,16 +703,6 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, dst, src); } -void crypto_digest_init(struct crypto_tfm *tfm) __deprecated_for_modules; -void crypto_digest_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg) - __deprecated_for_modules; -void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) - __deprecated_for_modules; -void crypto_digest_digest(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg, u8 *out) - __deprecated_for_modules; - static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm) { return (struct crypto_hash *)tfm; @@ -729,14 +715,6 @@ static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm) return __crypto_hash_cast(tfm); } -static int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) __deprecated; -static inline int crypto_digest_setkey(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen) -{ - return tfm->crt_hash.setkey(crypto_hash_cast(tfm), key, keylen); -} - static inline struct crypto_hash *crypto_alloc_hash(const char *alg_name, u32 type, u32 mask) { diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 952bee79a8f..a1c10b0c4cf 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -24,7 +24,7 @@ extern int debug_locks_off(void); int __ret = 0; \ \ if (unlikely(c)) { \ - if (debug_locks_off()) \ + if (debug_locks_silent || debug_locks_off()) \ WARN_ON(1); \ __ret = 1; \ } \ diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 561e2a77805..55d1ca5e60f 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -30,7 +30,7 @@ #ifdef CONFIG_TASK_DELAY_ACCT extern int delayacct_on; /* Delay accounting turned on/off */ -extern kmem_cache_t *delayacct_cache; +extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); diff --git a/include/linux/device.h b/include/linux/device.h index 583a341e016..49ab53ce92d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -371,6 +371,9 @@ struct device { core doesn't touch it */ struct dev_pm_info power; +#ifdef CONFIG_NUMA + int numa_node; /* NUMA node this device is close to */ +#endif u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for alloc_coherent mappings as @@ -394,6 +397,25 @@ struct device { void (*release)(struct device * dev); }; +#ifdef CONFIG_NUMA +static inline int dev_to_node(struct device *dev) +{ + return dev->numa_node; +} +static inline void set_dev_node(struct device *dev, int node) +{ + dev->numa_node = node; +} +#else +static inline int dev_to_node(struct device *dev) +{ + return -1; +} +static inline void set_dev_node(struct device *dev, int node) +{ +} +#endif + static inline void * dev_get_drvdata (struct device *dev) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 66d621dbcb6..df1c91855f0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -300,8 +300,9 @@ extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource); -extern unsigned long __init efi_get_time(void); +extern unsigned long efi_get_time(void); extern int __init efi_set_rtc_mmss(unsigned long nowtime); +extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** diff --git a/include/linux/elf.h b/include/linux/elf.h index 743d5c8e6d3..60713e6ea29 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -6,6 +6,8 @@ #include <linux/elf-em.h> #include <asm/elf.h> +struct file; + #ifndef elf_read_implies_exec /* Executables for which elf_read_implies_exec() returns TRUE will have the READ_IMPLIES_EXEC personality flag set automatically. @@ -358,6 +360,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elfhdr elf32_hdr #define elf_phdr elf32_phdr #define elf_note elf32_note +#define elf_addr_t Elf32_Off #else @@ -365,6 +368,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elfhdr elf64_hdr #define elf_phdr elf64_phdr #define elf_note elf64_note +#define elf_addr_t Elf64_Off #endif diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h index ce0e6109aff..8c43b13a02f 100644 --- a/include/linux/ext3_jbd.h +++ b/include/linux/ext3_jbd.h @@ -109,74 +109,32 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode); * been done yet. */ -void ext3_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - -static inline int -__ext3_journal_get_undo_access(const char *where, handle_t *handle, - struct buffer_head *bh) +static inline void ext3_journal_release_buffer(handle_t *handle, + struct buffer_head *bh) { - int err = journal_get_undo_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; + journal_release_buffer(handle, bh); } -static inline int -__ext3_journal_get_write_access(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = journal_get_write_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +void ext3_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err); -static inline void -ext3_journal_release_buffer(handle_t *handle, struct buffer_head *bh) -{ - journal_release_buffer(handle, bh); -} +int __ext3_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext3_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh) -{ - int err = journal_forget(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext3_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext3_journal_revoke(const char *where, handle_t *handle, - unsigned long blocknr, struct buffer_head *bh) -{ - int err = journal_revoke(handle, blocknr, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext3_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext3_journal_get_create_access(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = journal_get_create_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext3_journal_revoke(const char *where, handle_t *handle, + unsigned long blocknr, struct buffer_head *bh); -static inline int -__ext3_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = journal_dirty_metadata(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext3_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh); +int __ext3_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh); #define ext3_journal_get_undo_access(handle, bh) \ __ext3_journal_get_undo_access(__FUNCTION__, (handle), (bh)) diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h index 72dd631912e..d716e6392cf 100644 --- a/include/linux/ext4_jbd2.h +++ b/include/linux/ext4_jbd2.h @@ -114,74 +114,32 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); * been done yet. */ -void ext4_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - -static inline int -__ext4_journal_get_undo_access(const char *where, handle_t *handle, - struct buffer_head *bh) +static inline void ext4_journal_release_buffer(handle_t *handle, + struct buffer_head *bh) { - int err = jbd2_journal_get_undo_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; + jbd2_journal_release_buffer(handle, bh); } -static inline int -__ext4_journal_get_write_access(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = jbd2_journal_get_write_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +void ext4_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err); -static inline void -ext4_journal_release_buffer(handle_t *handle, struct buffer_head *bh) -{ - jbd2_journal_release_buffer(handle, bh); -} +int __ext4_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_forget(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext4_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext4_journal_revoke(const char *where, handle_t *handle, - ext4_fsblk_t blocknr, struct buffer_head *bh) -{ - int err = jbd2_journal_revoke(handle, blocknr, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext4_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext4_journal_get_create_access(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_get_create_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext4_journal_revoke(const char *where, handle_t *handle, + ext4_fsblk_t blocknr, struct buffer_head *bh); -static inline int -__ext4_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_dirty_metadata(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext4_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh); +int __ext4_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh); #define ext4_journal_get_undo_access(handle, bh) \ __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) diff --git a/include/linux/file.h b/include/linux/file.h index 74183e6f7f4..6e77b9177f9 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -64,6 +64,8 @@ struct files_struct { #define files_fdtable(files) (rcu_dereference((files)->fdt)) +extern struct kmem_cache *filp_cachep; + extern void FASTCALL(__fput(struct file *)); extern void FASTCALL(fput(struct file *)); @@ -114,4 +116,6 @@ struct files_struct *get_files_struct(struct task_struct *); void FASTCALL(put_files_struct(struct files_struct *fs)); void reset_files_struct(struct task_struct *, struct files_struct *); +extern struct kmem_cache *files_cachep; + #endif /* __LINUX_FILE_H */ diff --git a/include/linux/freezer.h b/include/linux/freezer.h new file mode 100644 index 00000000000..6e05e3e7ce3 --- /dev/null +++ b/include/linux/freezer.h @@ -0,0 +1,87 @@ +/* Freezer declarations */ + +#ifdef CONFIG_PM +/* + * Check if a process has been frozen + */ +static inline int frozen(struct task_struct *p) +{ + return p->flags & PF_FROZEN; +} + +/* + * Check if there is a request to freeze a process + */ +static inline int freezing(struct task_struct *p) +{ + return p->flags & PF_FREEZE; +} + +/* + * Request that a process be frozen + * FIXME: SMP problem. We may not modify other process' flags! + */ +static inline void freeze(struct task_struct *p) +{ + p->flags |= PF_FREEZE; +} + +/* + * Sometimes we may need to cancel the previous 'freeze' request + */ +static inline void do_not_freeze(struct task_struct *p) +{ + p->flags &= ~PF_FREEZE; +} + +/* + * Wake up a frozen process + */ +static inline int thaw_process(struct task_struct *p) +{ + if (frozen(p)) { + p->flags &= ~PF_FROZEN; + wake_up_process(p); + return 1; + } + return 0; +} + +/* + * freezing is complete, mark process as frozen + */ +static inline void frozen_process(struct task_struct *p) +{ + p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN; +} + +extern void refrigerator(void); +extern int freeze_processes(void); +extern void thaw_processes(void); + +static inline int try_to_freeze(void) +{ + if (freezing(current)) { + refrigerator(); + return 1; + } else + return 0; +} + +extern void thaw_some_processes(int all); + +#else +static inline int frozen(struct task_struct *p) { return 0; } +static inline int freezing(struct task_struct *p) { return 0; } +static inline void freeze(struct task_struct *p) { BUG(); } +static inline int thaw_process(struct task_struct *p) { return 1; } +static inline void frozen_process(struct task_struct *p) { BUG(); } + +static inline void refrigerator(void) {} +static inline int freeze_processes(void) { BUG(); return 0; } +static inline void thaw_processes(void) {} + +static inline int try_to_freeze(void) { return 0; } + + +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index cac7b1ef954..70b99fbb560 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -543,19 +543,22 @@ struct inode { struct list_head i_dentry; unsigned long i_ino; atomic_t i_count; - umode_t i_mode; unsigned int i_nlink; uid_t i_uid; gid_t i_gid; dev_t i_rdev; + unsigned long i_version; loff_t i_size; +#ifdef __NEED_I_SIZE_ORDERED + seqcount_t i_size_seqcount; +#endif struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; unsigned int i_blkbits; - unsigned long i_version; blkcnt_t i_blocks; unsigned short i_bytes; + umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ struct mutex i_mutex; struct rw_semaphore i_alloc_sem; @@ -598,9 +601,6 @@ struct inode { void *i_security; #endif void *i_private; /* fs or device private pointer */ -#ifdef __NEED_I_SIZE_ORDERED - seqcount_t i_size_seqcount; -#endif }; /* @@ -636,7 +636,7 @@ extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); * cmpxchg8b without the need of the lock prefix). For SMP compiles * and 64bit archs it makes no difference if preempt is enabled or not. */ -static inline loff_t i_size_read(struct inode *inode) +static inline loff_t i_size_read(const struct inode *inode) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) loff_t i_size; @@ -679,12 +679,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) #endif } -static inline unsigned iminor(struct inode *inode) +static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); } -static inline unsigned imajor(struct inode *inode) +static inline unsigned imajor(const struct inode *inode) { return MAJOR(inode->i_rdev); } @@ -1481,7 +1481,9 @@ extern char * getname(const char __user *); extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(unsigned long); -#define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL) +extern struct kmem_cache *names_cachep; + +#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) #ifndef CONFIG_AUDITSYSCALL #define putname(name) __putname(name) diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index c623d12a486..11a36ceddf7 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -18,6 +18,8 @@ struct fs_struct { .umask = 0022, \ } +extern struct kmem_cache *fs_cachep; + extern void exit_fs(struct task_struct *); extern void set_fs_altroot(void); extern void set_fs_root(struct fs_struct *, struct vfsmount *, struct dentry *); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 9fc48a674b8..534744efe30 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -15,7 +15,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 7 +#define FUSE_KERNEL_MINOR_VERSION 8 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -92,6 +92,11 @@ struct fuse_file_lock { #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) +/** + * Release flags + */ +#define FUSE_RELEASE_FLUSH (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -127,6 +132,8 @@ enum fuse_opcode { FUSE_ACCESS = 34, FUSE_CREATE = 35, FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, + FUSE_DESTROY = 38, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -205,12 +212,13 @@ struct fuse_open_out { struct fuse_release_in { __u64 fh; __u32 flags; - __u32 padding; + __u32 release_flags; + __u64 lock_owner; }; struct fuse_flush_in { __u64 fh; - __u32 flush_flags; + __u32 unused; __u32 padding; __u64 lock_owner; }; @@ -296,6 +304,16 @@ struct fuse_interrupt_in { __u64 unique; }; +struct fuse_bmap_in { + __u64 block; + __u32 blocksize; + __u32 padding; +}; + +struct fuse_bmap_out { + __u64 block; +}; + struct fuse_in_header { __u32 len; __u32 opcode; diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 9049dc65ae5..f7a93770e1b 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -17,6 +17,9 @@ struct genlmsghdr { #define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) #define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 /* * List of reserved static generic netlink identifiers: @@ -58,9 +61,6 @@ enum { CTRL_ATTR_OP_UNSPEC, CTRL_ATTR_OP_ID, CTRL_ATTR_OP_FLAGS, - CTRL_ATTR_OP_POLICY, - CTRL_ATTR_OP_DOIT, - CTRL_ATTR_OP_DUMPIT, __CTRL_ATTR_OP_MAX, }; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index bf2b6bc3f6f..00c314aedab 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -116,6 +116,9 @@ static inline enum zone_type gfp_zone(gfp_t flags) #ifndef HAVE_ARCH_FREE_PAGE static inline void arch_free_page(struct page *page, int order) { } #endif +#ifndef HAVE_ARCH_ALLOC_PAGE +static inline void arch_alloc_page(struct page *page, int order) { } +#endif extern struct page * FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *)); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index a7ae7c177ca..8b7e4c1e32a 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -54,8 +54,13 @@ struct gfs2_inum { __be64 no_addr; }; -static inline int gfs2_inum_equal(const struct gfs2_inum *ino1, - const struct gfs2_inum *ino2) +struct gfs2_inum_host { + __u64 no_formal_ino; + __u64 no_addr; +}; + +static inline int gfs2_inum_equal(const struct gfs2_inum_host *ino1, + const struct gfs2_inum_host *ino2) { return ino1->no_formal_ino == ino2->no_formal_ino && ino1->no_addr == ino2->no_addr; @@ -89,6 +94,12 @@ struct gfs2_meta_header { __be32 __pad1; /* Was incarnation number in gfs1 */ }; +struct gfs2_meta_header_host { + __u32 mh_magic; + __u32 mh_type; + __u32 mh_format; +}; + /* * super-block structure * @@ -128,6 +139,23 @@ struct gfs2_sb { /* In gfs1, quota and license dinodes followed */ }; +struct gfs2_sb_host { + struct gfs2_meta_header_host sb_header; + + __u32 sb_fs_format; + __u32 sb_multihost_format; + + __u32 sb_bsize; + __u32 sb_bsize_shift; + + struct gfs2_inum_host sb_master_dir; /* Was jindex dinode in gfs1 */ + struct gfs2_inum_host sb_root_dir; + + char sb_lockproto[GFS2_LOCKNAME_LEN]; + char sb_locktable[GFS2_LOCKNAME_LEN]; + /* In gfs1, quota and license dinodes followed */ +}; + /* * resource index structure */ @@ -145,6 +173,14 @@ struct gfs2_rindex { __u8 ri_reserved[64]; }; +struct gfs2_rindex_host { + __u64 ri_addr; /* grp block disk address */ + __u64 ri_data0; /* first data location */ + __u32 ri_length; /* length of rgrp header in fs blocks */ + __u32 ri_data; /* num of data blocks in rgrp */ + __u32 ri_bitbytes; /* number of bytes in data bitmaps */ +}; + /* * resource group header structure */ @@ -176,6 +212,13 @@ struct gfs2_rgrp { __u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */ }; +struct gfs2_rgrp_host { + __u32 rg_flags; + __u32 rg_free; + __u32 rg_dinodes; + __u64 rg_igeneration; +}; + /* * quota structure */ @@ -187,6 +230,12 @@ struct gfs2_quota { __u8 qu_reserved[64]; }; +struct gfs2_quota_host { + __u64 qu_limit; + __u64 qu_warn; + __u64 qu_value; +}; + /* * dinode structure */ @@ -270,6 +319,27 @@ struct gfs2_dinode { __u8 di_reserved[56]; }; +struct gfs2_dinode_host { + __u64 di_size; /* number of bytes in file */ + __u64 di_blocks; /* number of blocks in file */ + + /* This section varies from gfs1. Padding added to align with + * remainder of dinode + */ + __u64 di_goal_meta; /* rgrp to alloc from next */ + __u64 di_goal_data; /* data block goal */ + __u64 di_generation; /* generation number for NFS */ + + __u32 di_flags; /* GFS2_DIF_... */ + __u16 di_height; /* height of metadata */ + + /* These only apply to directories */ + __u16 di_depth; /* Number of bits in the table */ + __u32 di_entries; /* The number of entries in the directory */ + + __u64 di_eattr; /* extended attribute block number */ +}; + /* * directory structure - many of these per directory file */ @@ -344,6 +414,16 @@ struct gfs2_log_header { __be32 lh_hash; }; +struct gfs2_log_header_host { + struct gfs2_meta_header_host lh_header; + + __u64 lh_sequence; /* Sequence number of this transaction */ + __u32 lh_flags; /* GFS2_LOG_HEAD_... */ + __u32 lh_tail; /* Block number of log tail */ + __u32 lh_blkno; + __u32 lh_hash; +}; + /* * Log type descriptor */ @@ -384,6 +464,11 @@ struct gfs2_inum_range { __be64 ir_length; }; +struct gfs2_inum_range_host { + __u64 ir_start; + __u64 ir_length; +}; + /* * Statfs change * Describes an change to the pool of free and allocated @@ -396,6 +481,12 @@ struct gfs2_statfs_change { __be64 sc_dinodes; }; +struct gfs2_statfs_change_host { + __u64 sc_total; + __u64 sc_free; + __u64 sc_dinodes; +}; + /* * Quota change * Describes an allocation change for a particular @@ -410,33 +501,38 @@ struct gfs2_quota_change { __be32 qc_id; }; +struct gfs2_quota_change_host { + __u64 qc_change; + __u32 qc_flags; /* GFS2_QCF_... */ + __u32 qc_id; +}; + #ifdef __KERNEL__ /* Translation functions */ -extern void gfs2_inum_in(struct gfs2_inum *no, const void *buf); -extern void gfs2_inum_out(const struct gfs2_inum *no, void *buf); -extern void gfs2_sb_in(struct gfs2_sb *sb, const void *buf); -extern void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf); -extern void gfs2_rindex_out(const struct gfs2_rindex *ri, void *buf); -extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf); -extern void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf); -extern void gfs2_quota_in(struct gfs2_quota *qu, const void *buf); -extern void gfs2_quota_out(const struct gfs2_quota *qu, void *buf); -extern void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf); -extern void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf); +extern void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf); +extern void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf); +extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf); +extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf); +extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf); +extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf); +extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf); +extern void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf); +struct gfs2_inode; +extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); -extern void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf); -extern void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf); -extern void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf); -extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf); -extern void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf); -extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf); +extern void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf); +extern void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf); +extern void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf); +extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf); +extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf); +extern void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf); /* Printing functions */ -extern void gfs2_rindex_print(const struct gfs2_rindex *ri); -extern void gfs2_dinode_print(const struct gfs2_dinode *di); +extern void gfs2_rindex_print(const struct gfs2_rindex_host *ri); +extern void gfs2_dinode_print(const struct gfs2_inode *ip); #endif /* __KERNEL__ */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index fd7d12daa94..3d8768b619e 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -3,6 +3,7 @@ #include <linux/fs.h> #include <linux/mm.h> +#include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -41,9 +42,10 @@ static inline void *kmap(struct page *page) #define kunmap(page) do { (void) (page); } while (0) -#define kmap_atomic(page, idx) page_address(page) -#define kunmap_atomic(addr, idx) do { } while (0) -#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) +#define kmap_atomic(page, idx) \ + ({ pagefault_disable(); page_address(page); }) +#define kunmap_atomic(addr, idx) do { pagefault_enable(); } while (0) +#define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) #endif diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ace64e57e17..a60995afe33 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -35,6 +35,7 @@ extern int sysctl_hugetlb_shm_group; pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 1fb02e17f6f..52f53e2e70c 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -490,7 +490,7 @@ struct i2o_dma { */ struct i2o_pool { char *name; - kmem_cache_t *slab; + struct kmem_cache *slab; mempool_t *mempool; }; @@ -986,7 +986,8 @@ extern void i2o_driver_unregister(struct i2o_driver *); /** * i2o_driver_notify_controller_add - Send notification of added controller - * to a single I2O driver + * @drv: I2O driver + * @c: I2O controller * * Send notification of added controller to a single registered driver. */ @@ -998,8 +999,9 @@ static inline void i2o_driver_notify_controller_add(struct i2o_driver *drv, }; /** - * i2o_driver_notify_controller_remove - Send notification of removed - * controller to a single I2O driver + * i2o_driver_notify_controller_remove - Send notification of removed controller + * @drv: I2O driver + * @c: I2O controller * * Send notification of removed controller to a single registered driver. */ @@ -1011,8 +1013,9 @@ static inline void i2o_driver_notify_controller_remove(struct i2o_driver *drv, }; /** - * i2o_driver_notify_device_add - Send notification of added device to a - * single I2O driver + * i2o_driver_notify_device_add - Send notification of added device + * @drv: I2O driver + * @i2o_dev: the added i2o_device * * Send notification of added device to a single registered driver. */ @@ -1025,7 +1028,8 @@ static inline void i2o_driver_notify_device_add(struct i2o_driver *drv, /** * i2o_driver_notify_device_remove - Send notification of removed device - * to a single I2O driver + * @drv: I2O driver + * @i2o_dev: the added i2o_device * * Send notification of removed device to a single registered driver. */ @@ -1148,7 +1152,7 @@ static inline void i2o_msg_post(struct i2o_controller *c, /** * i2o_msg_post_wait - Post and wait a message and wait until return * @c: controller - * @m: message to post + * @msg: message to post * @timeout: time in seconds to wait * * This API allows an OSM to post a message and then be told whether or diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 33c5daacc74..733790d4f7d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -73,7 +73,7 @@ extern struct nsproxy init_nsproxy; #define INIT_NSPROXY(nsproxy) { \ .count = ATOMIC_INIT(1), \ - .nslock = SPIN_LOCK_UNLOCKED, \ + .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ .uts_ns = &init_uts_ns, \ .namespace = NULL, \ INIT_IPC_NS(ipc_ns) \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5b83e7b5962..de7593f4e89 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -11,6 +11,7 @@ #include <linux/hardirq.h> #include <linux/sched.h> #include <linux/irqflags.h> +#include <linux/bottom_half.h> #include <asm/atomic.h> #include <asm/ptrace.h> #include <asm/system.h> @@ -217,12 +218,6 @@ static inline void __deprecated save_and_cli(unsigned long *x) #define save_and_cli(x) save_and_cli(&x) #endif /* CONFIG_SMP */ -extern void local_bh_disable(void); -extern void __local_bh_enable(void); -extern void _local_bh_enable(void); -extern void local_bh_enable(void); -extern void local_bh_enable_ip(unsigned long ip); - /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high frequency threaded job scheduling. For almost all the purposes tasklets are more than enough. F.e. all serial device BHs et diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 796ca009fd4..7a9db390c56 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -208,6 +208,15 @@ struct kernel_ipmi_msg code as the first byte of the incoming data, unlike a response. */ +/* + * Modes for ipmi_set_maint_mode() and the userland IOCTL. The AUTO + * setting is the default and means it will be set on certain + * commands. Hard setting it on and off will override automatic + * operation. + */ +#define IPMI_MAINTENANCE_MODE_AUTO 0 +#define IPMI_MAINTENANCE_MODE_OFF 1 +#define IPMI_MAINTENANCE_MODE_ON 2 #ifdef __KERNEL__ @@ -374,6 +383,35 @@ int ipmi_unregister_for_cmd(ipmi_user_t user, unsigned int chans); /* + * Go into a mode where the driver will not autonomously attempt to do + * things with the interface. It will still respond to attentions and + * interrupts, and it will expect that commands will complete. It + * will not automatcially check for flags, events, or things of that + * nature. + * + * This is primarily used for firmware upgrades. The idea is that + * when you go into firmware upgrade mode, you do this operation + * and the driver will not attempt to do anything but what you tell + * it or what the BMC asks for. + * + * Note that if you send a command that resets the BMC, the driver + * will still expect a response from that command. So the BMC should + * reset itself *after* the response is sent. Resetting before the + * response is just silly. + * + * If in auto maintenance mode, the driver will automatically go into + * maintenance mode for 30 seconds if it sees a cold reset, a warm + * reset, or a firmware NetFN. This means that code that uses only + * firmware NetFN commands to do upgrades will work automatically + * without change, assuming it sends a message every 30 seconds or + * less. + * + * See the IPMI_MAINTENANCE_MODE_xxx defines for what the mode means. + */ +int ipmi_get_maintenance_mode(ipmi_user_t user); +int ipmi_set_maintenance_mode(ipmi_user_t user, int mode); + +/* * Allow run-to-completion mode to be set for the interface of * a specific user. */ @@ -656,4 +694,11 @@ struct ipmi_timing_parms #define IPMICTL_GET_TIMING_PARMS_CMD _IOR(IPMI_IOC_MAGIC, 23, \ struct ipmi_timing_parms) +/* + * Set the maintenance mode. See ipmi_set_maintenance_mode() above + * for a description of what this does. + */ +#define IPMICTL_GET_MAINTENANCE_MODE_CMD _IOR(IPMI_IOC_MAGIC, 30, int) +#define IPMICTL_SET_MAINTENANCE_MODE_CMD _IOW(IPMI_IOC_MAGIC, 31, int) + #endif /* __LINUX_IPMI_H */ diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index 4d04d8b58a0..b56a158d587 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -46,6 +46,8 @@ #define IPMI_NETFN_APP_REQUEST 0x06 #define IPMI_NETFN_APP_RESPONSE 0x07 #define IPMI_GET_DEVICE_ID_CMD 0x01 +#define IPMI_COLD_RESET_CMD 0x02 +#define IPMI_WARM_RESET_CMD 0x03 #define IPMI_CLEAR_MSG_FLAGS_CMD 0x30 #define IPMI_GET_DEVICE_GUID_CMD 0x08 #define IPMI_GET_MSG_FLAGS_CMD 0x31 @@ -60,20 +62,27 @@ #define IPMI_NETFN_STORAGE_RESPONSE 0x0b #define IPMI_ADD_SEL_ENTRY_CMD 0x44 +#define IPMI_NETFN_FIRMWARE_REQUEST 0x08 +#define IPMI_NETFN_FIRMWARE_RESPONSE 0x09 + /* The default slave address */ #define IPMI_BMC_SLAVE_ADDR 0x20 /* The BT interface on high-end HP systems supports up to 255 bytes in * one transfer. Its "virtual" BMC supports some commands that are longer * than 128 bytes. Use the full 256, plus NetFn/LUN, Cmd, cCode, plus - * some overhead. It would be nice to base this on the "BT Capabilities" - * but that's too hard to propagate to the rest of the driver. */ + * some overhead; it's not worth the effort to dynamically size this based + * on the results of the "Get BT Capabilities" command. */ #define IPMI_MAX_MSG_LENGTH 272 /* multiple of 16 */ #define IPMI_CC_NO_ERROR 0x00 #define IPMI_NODE_BUSY_ERR 0xc0 #define IPMI_INVALID_COMMAND_ERR 0xc1 +#define IPMI_TIMEOUT_ERR 0xc3 #define IPMI_ERR_MSG_TRUNCATED 0xc6 +#define IPMI_REQ_LEN_INVALID_ERR 0xc7 +#define IPMI_REQ_LEN_EXCEEDED_ERR 0xc8 +#define IPMI_NOT_IN_MY_STATE_ERR 0xd5 /* IPMI 2.0 */ #define IPMI_LOST_ARBITRATION_ERR 0x81 #define IPMI_BUS_ERR 0x82 #define IPMI_NAK_ON_WRITE_ERR 0x83 diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 6d9c7e4da47..c0633108d05 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -115,6 +115,13 @@ struct ipmi_smi_handlers poll for operations during things like crash dumps. */ void (*poll)(void *send_info); + /* Enable/disable firmware maintenance mode. Note that this + is *not* the modes defined, this is simply an on/off + setting. The message handler does the mode handling. Note + that this is called from interupt context, so it cannot + block. */ + void (*set_maintenance_mode)(void *send_info, int enable); + /* Tell the handler that we are using it/not using it. The message handler get the modules that this handler belongs to; this function lets the SMI claim any modules that it @@ -173,6 +180,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, void *send_info, struct ipmi_device_id *device_id, struct device *dev, + const char *sysfs_name, unsigned char slave_addr); /* diff --git a/include/linux/jbd.h b/include/linux/jbd.h index fe89444b1c6..45273755126 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -839,7 +839,6 @@ struct journal_s */ /* Filing buffers */ -extern void __journal_temp_unlink_buffer(struct journal_head *jh); extern void journal_unfile_buffer(journal_t *, struct journal_head *); extern void __journal_unfile_buffer(struct journal_head *); extern void __journal_refile_buffer(struct journal_head *); @@ -949,7 +948,7 @@ void journal_put_journal_head(struct journal_head *jh); /* * handle management */ -extern kmem_cache_t *jbd_handle_cache; +extern struct kmem_cache *jbd_handle_cache; static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ddb12879578..0e0fedd2039 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -848,7 +848,6 @@ struct journal_s */ /* Filing buffers */ -extern void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_unfile_buffer(struct journal_head *); extern void __jbd2_journal_refile_buffer(struct journal_head *); @@ -958,7 +957,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh); /* * handle management */ -extern kmem_cache_t *jbd2_handle_cache; +extern struct kmem_cache *jbd2_handle_cache; static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) { diff --git a/include/linux/kexec.h b/include/linux/kexec.h index a4ede62b339..e3abcec6c51 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -105,6 +105,7 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); +void crash_save_cpu(struct pt_regs *regs, int cpu); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index ac4c0559f75..769be39b968 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -165,7 +165,7 @@ extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern kprobe_opcode_t *get_insn_slot(void); -extern void free_insn_slot(kprobe_opcode_t *slot); +extern void free_insn_slot(kprobe_opcode_t *slot, int dirty); extern void kprobes_inc_nmissed_count(struct kprobe *p); /* Get the kprobe at this addr (if any) - called with preemption disabled */ diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 84eeecd60a0..611f17f79ee 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -248,9 +248,9 @@ static inline struct timeval ktime_to_timeval(const ktime_t kt) * * Returns the scalar nanoseconds representation of kt */ -static inline u64 ktime_to_ns(const ktime_t kt) +static inline s64 ktime_to_ns(const ktime_t kt) { - return (u64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec; + return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec; } #endif diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 862d9730a60..8c39654549d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -164,14 +164,12 @@ void nlmclnt_next_cookie(struct nlm_cookie *); */ struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *, int, int, const char *, int); struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *, const char *, int); -struct nlm_host * nlm_lookup_host(int server, const struct sockaddr_in *, int, int, const char *, int); struct rpc_clnt * nlm_bind_host(struct nlm_host *); void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_release_host(struct nlm_host *); void nlm_shutdown_hosts(void); extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int, u32); -struct nsm_handle *nsm_find(const struct sockaddr_in *, const char *, int); void nsm_release(struct nsm_handle *); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 819f08f1310..498bfbd3b4e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -193,7 +193,6 @@ extern void lockdep_free_key_range(void *start, unsigned long size); extern void lockdep_off(void); extern void lockdep_on(void); -extern int lockdep_internal(void); /* * These methods are used by specific locking variants (spinlocks, @@ -243,6 +242,8 @@ extern void lock_release(struct lockdep_map *lock, int nested, # define INIT_LOCKDEP .lockdep_recursion = 0, +#define lockdep_depth(tsk) ((tsk)->lockdep_depth) + #else /* !LOCKDEP */ static inline void lockdep_off(void) @@ -253,11 +254,6 @@ static inline void lockdep_on(void) { } -static inline int lockdep_internal(void) -{ - return 0; -} - # define lock_acquire(l, s, t, r, c, i) do { } while (0) # define lock_release(l, n, i) do { } while (0) # define lockdep_init() do { } while (0) @@ -277,6 +273,9 @@ static inline int lockdep_internal(void) * The class key takes no space if lockdep is disabled: */ struct lock_class_key { }; + +#define lockdep_depth(tsk) (0) + #endif /* !LOCKDEP */ #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) diff --git a/include/linux/mm.h b/include/linux/mm.h index d538de90196..a17b147c61e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -114,6 +114,8 @@ struct vm_area_struct { #endif }; +extern struct kmem_cache *vm_area_cachep; + /* * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is * disabled, then there's a single shared list of VMAs maintained by the @@ -294,6 +296,24 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); /* + * Compound pages have a destructor function. Provide a + * prototype for that function and accessor functions. + * These are _only_ valid on the head of a PG_compound page. + */ +typedef void compound_page_dtor(struct page *); + +static inline void set_compound_page_dtor(struct page *page, + compound_page_dtor *dtor) +{ + page[1].lru.next = (void *)dtor; +} + +static inline compound_page_dtor *get_compound_page_dtor(struct page *page) +{ + return (compound_page_dtor *)page[1].lru.next; +} + +/* * Multiple processes may "see" the same page. E.g. for untouched * mappings of /dev/null, all processes see the same page full of * zeroes, and text pages of executables and shared libraries have @@ -396,7 +416,9 @@ void split_page(struct page *page, unsigned int order); * We are going to use the flags for the page to node mapping if its in * there. This includes the case where there is no node, so it is implicit. */ -#define FLAGS_HAS_NODE (NODES_WIDTH > 0 || NODES_SHIFT == 0) +#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0) +#define NODE_NOT_IN_PAGE_FLAGS +#endif #ifndef PFN_SECTION_SHIFT #define PFN_SECTION_SHIFT 0 @@ -411,13 +433,18 @@ void split_page(struct page *page, unsigned int order); #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -/* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */ -#if FLAGS_HAS_NODE -#define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT) +/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */ +#ifdef NODE_NOT_IN_PAGEFLAGS +#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) +#else +#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) +#endif + +#if ZONES_WIDTH > 0 +#define ZONEID_PGSHIFT ZONES_PGSHIFT #else -#define ZONETABLE_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) +#define ZONEID_PGSHIFT NODES_PGOFF #endif -#define ZONETABLE_PGSHIFT ZONES_PGSHIFT #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED #error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED @@ -426,26 +453,28 @@ void split_page(struct page *page, unsigned int order); #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) +#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(struct page *page) { return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } -struct zone; -extern struct zone *zone_table[]; - +/* + * The identification function is only used by the buddy allocator for + * determining if two pages could be buddies. We are not really + * identifying a zone since we could be using a the section number + * id if we have not node id available in page flags. + * We guarantee only that it will return the same value for two + * combinable pages in a zone. + */ static inline int page_zone_id(struct page *page) { - return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK; -} -static inline struct zone *page_zone(struct page *page) -{ - return zone_table[page_zone_id(page)]; + BUILD_BUG_ON(ZONEID_PGSHIFT == 0 && ZONEID_MASK); + return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; } -static inline unsigned long zone_to_nid(struct zone *zone) +static inline int zone_to_nid(struct zone *zone) { #ifdef CONFIG_NUMA return zone->node; @@ -454,13 +483,20 @@ static inline unsigned long zone_to_nid(struct zone *zone) #endif } -static inline unsigned long page_to_nid(struct page *page) +#ifdef NODE_NOT_IN_PAGE_FLAGS +extern int page_to_nid(struct page *page); +#else +static inline int page_to_nid(struct page *page) +{ + return (page->flags >> NODES_PGSHIFT) & NODES_MASK; +} +#endif + +static inline struct zone *page_zone(struct page *page) { - if (FLAGS_HAS_NODE) - return (page->flags >> NODES_PGSHIFT) & NODES_MASK; - else - return zone_to_nid(page_zone(page)); + return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; } + static inline unsigned long page_to_section(struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; @@ -477,6 +513,7 @@ static inline void set_page_node(struct page *page, unsigned long node) page->flags &= ~(NODES_MASK << NODES_PGSHIFT); page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; } + static inline void set_page_section(struct page *page, unsigned long section) { page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); @@ -947,8 +984,6 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); -extern void zonetable_add(struct zone *zone, int nid, enum zone_type zid, - unsigned long pfn, unsigned long size); #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e06683e2bea..e339a7345f2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -278,7 +278,7 @@ struct zone { /* * rarely used fields: */ - char *name; + const char *name; } ____cacheline_internodealigned_in_smp; /* @@ -288,19 +288,94 @@ struct zone { */ #define DEF_PRIORITY 12 +/* Maximum number of zones on a zonelist */ +#define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) + +#ifdef CONFIG_NUMA +/* + * We cache key information from each zonelist for smaller cache + * footprint when scanning for free pages in get_page_from_freelist(). + * + * 1) The BITMAP fullzones tracks which zones in a zonelist have come + * up short of free memory since the last time (last_fullzone_zap) + * we zero'd fullzones. + * 2) The array z_to_n[] maps each zone in the zonelist to its node + * id, so that we can efficiently evaluate whether that node is + * set in the current tasks mems_allowed. + * + * Both fullzones and z_to_n[] are one-to-one with the zonelist, + * indexed by a zones offset in the zonelist zones[] array. + * + * The get_page_from_freelist() routine does two scans. During the + * first scan, we skip zones whose corresponding bit in 'fullzones' + * is set or whose corresponding node in current->mems_allowed (which + * comes from cpusets) is not set. During the second scan, we bypass + * this zonelist_cache, to ensure we look methodically at each zone. + * + * Once per second, we zero out (zap) fullzones, forcing us to + * reconsider nodes that might have regained more free memory. + * The field last_full_zap is the time we last zapped fullzones. + * + * This mechanism reduces the amount of time we waste repeatedly + * reexaming zones for free memory when they just came up low on + * memory momentarilly ago. + * + * The zonelist_cache struct members logically belong in struct + * zonelist. However, the mempolicy zonelists constructed for + * MPOL_BIND are intentionally variable length (and usually much + * shorter). A general purpose mechanism for handling structs with + * multiple variable length members is more mechanism than we want + * here. We resort to some special case hackery instead. + * + * The MPOL_BIND zonelists don't need this zonelist_cache (in good + * part because they are shorter), so we put the fixed length stuff + * at the front of the zonelist struct, ending in a variable length + * zones[], as is needed by MPOL_BIND. + * + * Then we put the optional zonelist cache on the end of the zonelist + * struct. This optional stuff is found by a 'zlcache_ptr' pointer in + * the fixed length portion at the front of the struct. This pointer + * both enables us to find the zonelist cache, and in the case of + * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL) + * to know that the zonelist cache is not there. + * + * The end result is that struct zonelists come in two flavors: + * 1) The full, fixed length version, shown below, and + * 2) The custom zonelists for MPOL_BIND. + * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache. + * + * Even though there may be multiple CPU cores on a node modifying + * fullzones or last_full_zap in the same zonelist_cache at the same + * time, we don't lock it. This is just hint data - if it is wrong now + * and then, the allocator will still function, perhaps a bit slower. + */ + + +struct zonelist_cache { + unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */ + DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */ + unsigned long last_full_zap; /* when last zap'd (jiffies) */ +}; +#else +struct zonelist_cache; +#endif + /* * One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the * allocation, the other zones are fallback zones, in decreasing * priority. * - * Right now a zonelist takes up less than a cacheline. We never - * modify it apart from boot-up, and only a few indices are used, - * so despite the zonelist table being relatively big, the cache - * footprint of this construct is very small. + * If zlcache_ptr is not NULL, then it is just the address of zlcache, + * as explained above. If zlcache_ptr is NULL, there is no zlcache. */ + struct zonelist { - struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited + struct zonelist_cache *zlcache_ptr; // NULL or &zlcache + struct zone *zones[MAX_ZONES_PER_ZONELIST + 1]; // NULL delimited +#ifdef CONFIG_NUMA + struct zonelist_cache zlcache; // optional ... +#endif }; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 7c0c2c198f1..4a189dadb16 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -63,6 +63,9 @@ struct kparam_array not there, read bits mean it's readable, write bits mean it's writable. */ #define __module_param_call(prefix, name, set, get, arg, perm) \ + /* Default value instead of permissions? */ \ + static int __param_perm_check_##name __attribute__((unused)) = \ + BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)); \ static char __param_str_##name[] = prefix #name; \ static struct kernel_param const __param_##name \ __attribute_used__ \ diff --git a/include/linux/msg.h b/include/linux/msg.h index acc7c174ff0..f1b60740d64 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -92,6 +92,12 @@ struct msg_queue { struct list_head q_senders; }; +/* Helper routines for sys_msgsnd and sys_msgrcv */ +extern long do_msgsnd(int msqid, long mtype, void __user *mtext, + size_t msgsz, int msgflg); +extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext, + size_t msgsz, long msgtyp, int msgflg); + #endif /* __KERNEL__ */ #endif /* _LINUX_MSG_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 27c48daa318..b2b91c47756 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -94,7 +94,7 @@ do { \ #define __MUTEX_INITIALIZER(lockname) \ { .count = ATOMIC_INIT(1) \ - , .wait_lock = SPIN_LOCK_UNLOCKED \ + , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ __DEBUG_MUTEX_INITIALIZER(lockname) \ __DEP_MAP_MUTEX_INITIALIZER(lockname) } diff --git a/include/linux/nbd.h b/include/linux/nbd.h index d6b6dc09ad9..0f3e6930254 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -64,6 +64,7 @@ struct nbd_device { struct gendisk *disk; int blksize; u64 bytesize; + pid_t pid; /* pid of nbd-client, if attached */ }; #endif diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index fb049ec11ff..9d8144a488c 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -2,6 +2,8 @@ #ifndef _NF_CONNTRACK_PPTP_H #define _NF_CONNTRACK_PPTP_H +#include <linux/netfilter/nf_conntrack_common.h> + /* state of the control session */ enum pptp_ctrlsess_state { PPTP_SESSION_NONE, /* no session present */ @@ -295,7 +297,6 @@ union pptp_ctrl_union { /* crap needed for nf_conntrack_compat.h */ struct nf_conn; struct nf_conntrack_expect; -enum ip_conntrack_info; extern int (*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e16904e28c3..acb4ed13024 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -15,9 +15,14 @@ * disables interrupts for a long time. This call is stateless. */ #ifdef ARCH_HAS_NMI_WATCHDOG +#include <asm/nmi.h> extern void touch_nmi_watchdog(void); #else # define touch_nmi_watchdog() touch_softlockup_watchdog() #endif +#ifndef trigger_all_cpu_backtrace +#define trigger_all_cpu_backtrace() do { } while (0) +#endif + #endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c09da1e30c5..4d972bbef31 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -390,7 +390,7 @@ #define PCI_DEVICE_ID_NS_CS5535_IDE 0x002d #define PCI_DEVICE_ID_NS_CS5535_AUDIO 0x002e #define PCI_DEVICE_ID_NS_CS5535_USB 0x002f -#define PCI_DEVICE_ID_NS_CS5535_VIDEO 0x0030 +#define PCI_DEVICE_ID_NS_GX_VIDEO 0x0030 #define PCI_DEVICE_ID_NS_SATURN 0x0035 #define PCI_DEVICE_ID_NS_SCx200_BRIDGE 0x0500 #define PCI_DEVICE_ID_NS_SCx200_SMI 0x0501 @@ -403,8 +403,7 @@ #define PCI_DEVICE_ID_NS_SC1100_XBUS 0x0515 #define PCI_DEVICE_ID_NS_87410 0xd001 -#define PCI_DEVICE_ID_NS_CS5535_HOST_BRIDGE 0x0028 -#define PCI_DEVICE_ID_NS_CS5535_ISA_BRIDGE 0x002b +#define PCI_DEVICE_ID_NS_GX_HOST_BRIDGE 0x0028 #define PCI_VENDOR_ID_TSENG 0x100c #define PCI_DEVICE_ID_TSENG_W32P_2 0x3202 @@ -1864,6 +1863,7 @@ #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 +#define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 #define PCI_VENDOR_ID_SAMSUNG 0x144d @@ -1931,6 +1931,7 @@ #define PCI_DEVICE_ID_TIGON3_5750M 0x167c #define PCI_DEVICE_ID_TIGON3_5751M 0x167d #define PCI_DEVICE_ID_TIGON3_5751F 0x167e +#define PCI_DEVICE_ID_TIGON3_5787F 0x167f #define PCI_DEVICE_ID_TIGON3_5787M 0x1693 #define PCI_DEVICE_ID_TIGON3_5782 0x1696 #define PCI_DEVICE_ID_TIGON3_5786 0x169a @@ -2002,6 +2003,8 @@ #define PCI_DEVICE_ID_FARSITE_TE1 0x1610 #define PCI_DEVICE_ID_FARSITE_TE1C 0x1612 +#define PCI_VENDOR_ID_ARIMA 0x161f + #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 #define PCI_DEVICE_ID_BCM1250_HT 0x0002 diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 0f0b880c428..265bafab649 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -285,6 +285,7 @@ struct sadb_x_sec_ctx { #define SADB_X_AALG_SHA2_384HMAC 6 #define SADB_X_AALG_SHA2_512HMAC 7 #define SADB_X_AALG_RIPEMD160HMAC 8 +#define SADB_X_AALG_AES_XCBC_MAC 9 #define SADB_X_AALG_NULL 251 /* kame */ #define SADB_AALG_MAX 251 diff --git a/include/linux/profile.h b/include/linux/profile.h index acce53fd38b..5670b340c4e 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -6,10 +6,15 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/cpumask.h> +#include <linux/cache.h> + #include <asm/errno.h> +extern int prof_on __read_mostly; + #define CPU_PROFILING 1 #define SCHED_PROFILING 2 +#define SLEEP_PROFILING 3 struct proc_dir_entry; struct pt_regs; @@ -18,7 +23,24 @@ struct notifier_block; /* init basic kernel profiler */ void __init profile_init(void); void profile_tick(int); -void profile_hit(int, void *); + +/* + * Add multiple profiler hits to a given address: + */ +void profile_hits(int, void *ip, unsigned int nr_hits); + +/* + * Single profiler hit: + */ +static inline void profile_hit(int type, void *ip) +{ + /* + * Speedup for the common (no profiling enabled) case: + */ + if (unlikely(prof_on == type)) + profile_hits(type, ip, 1); +} + #ifdef CONFIG_PROC_FS void create_prof_cpu_mask(struct proc_dir_entry *); #else diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 5110201a415..90c23f690c0 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -37,6 +37,9 @@ extern int dquot_release(struct dquot *dquot); extern int dquot_commit_info(struct super_block *sb, int type); extern int dquot_mark_dquot_dirty(struct dquot *dquot); +int remove_inode_dquot_ref(struct inode *inode, int type, + struct list_head *tofree_head); + extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path); extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index cbfa1153742..0deb842541a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -1,6 +1,7 @@ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig + * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -21,6 +22,35 @@ #include <linux/preempt.h> #include <linux/types.h> +#include <linux/kernel.h> +#include <linux/rcupdate.h> + +/* + * A direct pointer (root->rnode pointing directly to a data item, + * rather than another radix_tree_node) is signalled by the low bit + * set in the root->rnode pointer. + * + * In this case root->height is also NULL, but the direct pointer tests are + * needed for RCU lookups when root->height is unreliable. + */ +#define RADIX_TREE_DIRECT_PTR 1 + +static inline void *radix_tree_ptr_to_direct(void *ptr) +{ + return (void *)((unsigned long)ptr | RADIX_TREE_DIRECT_PTR); +} + +static inline void *radix_tree_direct_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_DIRECT_PTR); +} + +static inline int radix_tree_is_direct_ptr(void *ptr) +{ + return (int)((unsigned long)ptr & RADIX_TREE_DIRECT_PTR); +} + +/*** radix-tree API starts here ***/ #define RADIX_TREE_MAX_TAGS 2 @@ -47,6 +77,77 @@ do { \ (root)->rnode = NULL; \ } while (0) +/** + * Radix-tree synchronization + * + * The radix-tree API requires that users provide all synchronisation (with + * specific exceptions, noted below). + * + * Synchronization of access to the data items being stored in the tree, and + * management of their lifetimes must be completely managed by API users. + * + * For API usage, in general, + * - any function _modifying_ the the tree or tags (inserting or deleting + * items, setting or clearing tags must exclude other modifications, and + * exclude any functions reading the tree. + * - any function _reading_ the the tree or tags (looking up items or tags, + * gang lookups) must exclude modifications to the tree, but may occur + * concurrently with other readers. + * + * The notable exceptions to this rule are the following functions: + * radix_tree_lookup + * radix_tree_tag_get + * radix_tree_gang_lookup + * radix_tree_gang_lookup_tag + * radix_tree_tagged + * + * The first 4 functions are able to be called locklessly, using RCU. The + * caller must ensure calls to these functions are made within rcu_read_lock() + * regions. Other readers (lock-free or otherwise) and modifications may be + * running concurrently. + * + * It is still required that the caller manage the synchronization and lifetimes + * of the items. So if RCU lock-free lookups are used, typically this would mean + * that the items have their own locks, or are amenable to lock-free access; and + * that the items are freed by RCU (or only freed after having been deleted from + * the radix tree *and* a synchronize_rcu() grace period). + * + * (Note, rcu_assign_pointer and rcu_dereference are not needed to control + * access to data items when inserting into or looking up from the radix tree) + * + * radix_tree_tagged is able to be called without locking or RCU. + */ + +/** + * radix_tree_deref_slot - dereference a slot + * @pslot: pointer to slot, returned by radix_tree_lookup_slot + * Returns: item that was stored in that slot with any direct pointer flag + * removed. + * + * For use with radix_tree_lookup_slot(). Caller must hold tree at least read + * locked across slot lookup and dereference. More likely, will be used with + * radix_tree_replace_slot(), as well, so caller will hold tree write locked. + */ +static inline void *radix_tree_deref_slot(void **pslot) +{ + return radix_tree_direct_to_ptr(*pslot); +} +/** + * radix_tree_replace_slot - replace item in a slot + * @pslot: pointer to slot, returned by radix_tree_lookup_slot + * @item: new item to store in the slot. + * + * For use with radix_tree_lookup_slot(). Caller must hold tree write locked + * across slot lookup and replacement. + */ +static inline void radix_tree_replace_slot(void **pslot, void *item) +{ + BUG_ON(radix_tree_is_direct_ptr(item)); + rcu_assign_pointer(*pslot, + (void *)((unsigned long)item | + ((unsigned long)*pslot & RADIX_TREE_DIRECT_PTR))); +} + int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index f13299a1559..03636d7918f 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -235,7 +235,7 @@ struct raid5_private_data { */ int active_name; char cache_name[2][20]; - kmem_cache_t *slab_cache; /* for allocating stripes */ + struct kmem_cache *slab_cache; /* for allocating stripes */ int seq_flush, seq_write; int quiesce; diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 7bc6bfb8625..d0e4dce33ad 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -739,7 +739,7 @@ struct block_head { #define PUT_B_FREE_SPACE(p_s_bh,val) do { set_blkh_free_space(B_BLK_HEAD(p_s_bh),val); } while (0) /* Get right delimiting key. -- little endian */ -#define B_PRIGHT_DELIM_KEY(p_s_bh) (&(blk_right_delim_key(B_BLK_HEAD(p_s_bh)) +#define B_PRIGHT_DELIM_KEY(p_s_bh) (&(blk_right_delim_key(B_BLK_HEAD(p_s_bh)))) /* Does the buffer contain a disk leaf. */ #define B_IS_ITEMS_LEVEL(p_s_bh) (B_LEVEL(p_s_bh) == DISK_LEAF_NODE_LEVEL) diff --git a/include/linux/relay.h b/include/linux/relay.h index 0e3d91b7699..c6a48bfc8b1 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -274,7 +274,7 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf, /* * exported relay file operations, kernel/relay.c */ -extern struct file_operations relay_file_operations; +extern const struct file_operations relay_file_operations; #endif /* _LINUX_RELAY_H */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index db2c1df4fef..36f850373d2 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -30,11 +30,11 @@ struct anon_vma { #ifdef CONFIG_MMU -extern kmem_cache_t *anon_vma_cachep; +extern struct kmem_cache *anon_vma_cachep; static inline struct anon_vma *anon_vma_alloc(void) { - return kmem_cache_alloc(anon_vma_cachep, SLAB_KERNEL); + return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); } static inline void anon_vma_free(struct anon_vma *anon_vma) diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 5d41dee82f8..b0090e9f788 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -63,7 +63,7 @@ struct hrtimer_sleeper; #endif #define __RT_MUTEX_INITIALIZER(mutexname) \ - { .wait_lock = SPIN_LOCK_UNLOCKED \ + { .wait_lock = __SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ , .wait_list = PLIST_HEAD_INIT(mutexname.wait_list, mutexname.wait_lock) \ , .owner = NULL \ __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index ae1fcadd598..813cee13da0 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -44,7 +44,8 @@ struct rw_semaphore { #endif #define __RWSEM_INITIALIZER(name) \ -{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } +{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/include/linux/sched.h b/include/linux/sched.h index eafe4a7b823..dede82c6344 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -194,7 +194,16 @@ extern void init_idle(struct task_struct *idle, int cpu); extern cpumask_t nohz_cpu_mask; -extern void show_state(void); +/* + * Only dump TASK_* tasks. (-1 for all tasks) + */ +extern void show_state_filter(unsigned long state_filter); + +static inline void show_state(void) +{ + show_state_filter(-1); +} + extern void show_regs(struct pt_regs *); /* @@ -338,15 +347,23 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - unsigned dumpable:2; cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ mm_context_t context; - /* Token based thrashing protection. */ - unsigned long swap_token_time; - char recent_pagein; + /* Swap token stuff */ + /* + * Last value of global fault stamp as seen by this process. + * In other words, this value gives an indication of how long + * it has been since this task got the token. + * Look at mm/thrash.c + */ + unsigned int faultstamp; + unsigned int token_priority; + unsigned int last_interval; + + unsigned char dumpable:2; /* coredumping support */ int core_waiters; @@ -556,7 +573,7 @@ struct sched_info { #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ #ifdef CONFIG_SCHEDSTATS -extern struct file_operations proc_schedstat_operations; +extern const struct file_operations proc_schedstat_operations; #endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_TASK_DELAY_ACCT @@ -1288,7 +1305,6 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp); extern int kill_pg_info(int, struct siginfo *, pid_t); -extern int kill_proc_info(int, struct siginfo *, pid_t); extern void do_notify_parent(struct task_struct *, int); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); @@ -1610,87 +1626,6 @@ extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls); extern void normalize_rt_tasks(void); -#ifdef CONFIG_PM -/* - * Check if a process has been frozen - */ -static inline int frozen(struct task_struct *p) -{ - return p->flags & PF_FROZEN; -} - -/* - * Check if there is a request to freeze a process - */ -static inline int freezing(struct task_struct *p) -{ - return p->flags & PF_FREEZE; -} - -/* - * Request that a process be frozen - * FIXME: SMP problem. We may not modify other process' flags! - */ -static inline void freeze(struct task_struct *p) -{ - p->flags |= PF_FREEZE; -} - -/* - * Sometimes we may need to cancel the previous 'freeze' request - */ -static inline void do_not_freeze(struct task_struct *p) -{ - p->flags &= ~PF_FREEZE; -} - -/* - * Wake up a frozen process - */ -static inline int thaw_process(struct task_struct *p) -{ - if (frozen(p)) { - p->flags &= ~PF_FROZEN; - wake_up_process(p); - return 1; - } - return 0; -} - -/* - * freezing is complete, mark process as frozen - */ -static inline void frozen_process(struct task_struct *p) -{ - p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN; -} - -extern void refrigerator(void); -extern int freeze_processes(void); -extern void thaw_processes(void); - -static inline int try_to_freeze(void) -{ - if (freezing(current)) { - refrigerator(); - return 1; - } else - return 0; -} -#else -static inline int frozen(struct task_struct *p) { return 0; } -static inline int freezing(struct task_struct *p) { return 0; } -static inline void freeze(struct task_struct *p) { BUG(); } -static inline int thaw_process(struct task_struct *p) { return 1; } -static inline void frozen_process(struct task_struct *p) { BUG(); } - -static inline void refrigerator(void) {} -static inline int freeze_processes(void) { BUG(); return 0; } -static inline void thaw_processes(void) {} - -static inline int try_to_freeze(void) { return 0; } - -#endif /* CONFIG_PM */ #endif /* __KERNEL__ */ #endif diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 2925e66a673..b02308ee766 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -42,7 +42,8 @@ struct screen_info { u16 pages; /* 0x32 */ u16 vesa_attributes; /* 0x34 */ u32 capabilities; /* 0x36 */ - /* 0x3a -- 0x3f reserved for future expansion */ + /* 0x3a -- 0x3b reserved for future expansion */ + /* 0x3c -- 0x3f micro stack for relocatable kernels */ }; extern struct screen_info screen_info; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index b95f6eb7254..3e3cccbb1ca 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -20,7 +20,7 @@ struct seq_file { loff_t index; loff_t version; struct mutex lock; - struct seq_operations *op; + const struct seq_operations *op; void *private; }; @@ -31,7 +31,7 @@ struct seq_operations { int (*show) (struct seq_file *m, void *v); }; -int seq_open(struct file *, struct seq_operations *); +int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 8e968141372..71310d80c09 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -41,6 +41,7 @@ enum { PLAT8250_DEV_FOURPORT, PLAT8250_DEV_ACCENT, PLAT8250_DEV_BOCA, + PLAT8250_DEV_EXAR_ST16C554, PLAT8250_DEV_HUB6, PLAT8250_DEV_MCA, PLAT8250_DEV_AU1X00, diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 463ab953b09..82767213664 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -132,6 +132,8 @@ #define PORT_S3C2412 73 +/* Xilinx uartlite */ +#define PORT_UARTLITE 74 #ifdef __KERNEL__ diff --git a/include/linux/signal.h b/include/linux/signal.h index 117135e33d6..14749056dd6 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -241,6 +241,8 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); struct pt_regs; extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); +extern struct kmem_cache *sighand_cachep; + #endif /* __KERNEL__ */ #endif /* _LINUX_SIGNAL_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a05a5f7c0b7..4ff3940210d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -332,20 +332,20 @@ struct sk_buff { extern void kfree_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int fclone); + gfp_t priority, int fclone, int node); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 0); + return __alloc_skb(size, priority, 0, -1); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 1); + return __alloc_skb(size, priority, 1, -1); } -extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, +extern struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp, unsigned int size, gfp_t priority); extern void kfree_skbmem(struct sk_buff *skb); diff --git a/include/linux/slab.h b/include/linux/slab.h index c4947b8a2c0..2271886744f 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -7,27 +7,17 @@ #ifndef _LINUX_SLAB_H #define _LINUX_SLAB_H -#if defined(__KERNEL__) +#ifdef __KERNEL__ -typedef struct kmem_cache kmem_cache_t; +#include <linux/gfp.h> +#include <linux/init.h> +#include <linux/types.h> +#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ +#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ +#include <linux/compiler.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/types.h> -#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ -#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ - -/* flags for kmem_cache_alloc() */ -#define SLAB_NOFS GFP_NOFS -#define SLAB_NOIO GFP_NOIO -#define SLAB_ATOMIC GFP_ATOMIC -#define SLAB_USER GFP_USER -#define SLAB_KERNEL GFP_KERNEL -#define SLAB_DMA GFP_DMA - -#define SLAB_LEVEL_MASK GFP_LEVEL_MASK - -#define SLAB_NO_GROW __GFP_NO_GROW /* don't grow a cache */ +/* kmem_cache_t exists for legacy reasons and is not used by code in mm */ +typedef struct kmem_cache kmem_cache_t __deprecated; /* flags to pass to kmem_cache_create(). * The first 3 are only valid when the allocator as been build @@ -57,22 +47,23 @@ typedef struct kmem_cache kmem_cache_t; /* prototypes */ extern void __init kmem_cache_init(void); -extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, - void (*)(void *, kmem_cache_t *, unsigned long), - void (*)(void *, kmem_cache_t *, unsigned long)); -extern void kmem_cache_destroy(kmem_cache_t *); -extern int kmem_cache_shrink(kmem_cache_t *); -extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t); +extern struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, + unsigned long, + void (*)(void *, struct kmem_cache *, unsigned long), + void (*)(void *, struct kmem_cache *, unsigned long)); +extern void kmem_cache_destroy(struct kmem_cache *); +extern int kmem_cache_shrink(struct kmem_cache *); +extern void *kmem_cache_alloc(struct kmem_cache *, gfp_t); extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); -extern void kmem_cache_free(kmem_cache_t *, void *); -extern unsigned int kmem_cache_size(kmem_cache_t *); -extern const char *kmem_cache_name(kmem_cache_t *); +extern void kmem_cache_free(struct kmem_cache *, void *); +extern unsigned int kmem_cache_size(struct kmem_cache *); +extern const char *kmem_cache_name(struct kmem_cache *); /* Size description struct for general caches. */ struct cache_sizes { - size_t cs_size; - kmem_cache_t *cs_cachep; - kmem_cache_t *cs_dmacachep; + size_t cs_size; + struct kmem_cache *cs_cachep; + struct kmem_cache *cs_dmacachep; }; extern struct cache_sizes malloc_sizes[]; @@ -211,7 +202,7 @@ extern unsigned int ksize(const void *); extern int slab_is_available(void); #ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); +extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); extern void *__kmalloc_node(size_t size, gfp_t flags, int node); static inline void *kmalloc_node(size_t size, gfp_t flags, int node) @@ -236,8 +227,27 @@ found: } return __kmalloc_node(size, flags, node); } + +/* + * kmalloc_node_track_caller is a special version of kmalloc_node that + * records the calling function of the routine calling it for slab leak + * tracking instead of just the calling function (confusing, eh?). + * It's useful when the call to kmalloc_node comes from a widely-used + * standard allocator where we care about the real place the memory + * allocation request comes from. + */ +#ifndef CONFIG_DEBUG_SLAB +#define kmalloc_node_track_caller(size, flags, node) \ + __kmalloc_node(size, flags, node) #else -static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) +extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); +#define kmalloc_node_track_caller(size, flags, node) \ + __kmalloc_node_track_caller(size, flags, node, \ + __builtin_return_address(0)) +#endif +#else /* CONFIG_NUMA */ +static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, + gfp_t flags, int node) { return kmem_cache_alloc(cachep, flags); } @@ -245,10 +255,13 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return kmalloc(size, flags); } + +#define kmalloc_node_track_caller(size, flags, node) \ + kmalloc_track_caller(size, flags) #endif extern int FASTCALL(kmem_cache_reap(int)); -extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); +extern int FASTCALL(kmem_ptr_validate(struct kmem_cache *cachep, void *ptr)); #else /* CONFIG_SLOB */ @@ -283,16 +296,9 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) #define kzalloc(s, f) __kzalloc(s, f) #define kmalloc_track_caller kmalloc -#endif /* CONFIG_SLOB */ +#define kmalloc_node_track_caller kmalloc_node -/* System wide caches */ -extern kmem_cache_t *vm_area_cachep; -extern kmem_cache_t *names_cachep; -extern kmem_cache_t *files_cachep; -extern kmem_cache_t *filp_cachep; -extern kmem_cache_t *fs_cachep; -extern kmem_cache_t *sighand_cachep; -extern kmem_cache_t *bio_cachep; +#endif /* CONFIG_SLOB */ #endif /* __KERNEL__ */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 51649987f69..7ba23ec8211 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -99,6 +99,13 @@ static inline int up_smp_call_function(void) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) +static inline int smp_call_function_single(int cpuid, void (*func) (void *info), + void *info, int retry, int wait) +{ + /* Disable interrupts here? */ + func(info); + return 0; +} #endif /* !SMP */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 8451052ca66..94b767d6427 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -52,6 +52,7 @@ #include <linux/thread_info.h> #include <linux/kernel.h> #include <linux/stringify.h> +#include <linux/bottom_half.h> #include <asm/system.h> diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h new file mode 100644 index 00000000000..d3e5f275654 --- /dev/null +++ b/include/linux/start_kernel.h @@ -0,0 +1,12 @@ +#ifndef _LINUX_START_KERNEL_H +#define _LINUX_START_KERNEL_H + +#include <linux/linkage.h> +#include <linux/init.h> + +/* Define the prototype for start_kernel here, rather than cluttering + up something else. */ + +extern asmlinkage void __init start_kernel(void); + +#endif /* _LINUX_START_KERNEL_H */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index b6b6ad6253b..97c76165258 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -217,7 +217,7 @@ struct rpc_wait_queue { #ifndef RPC_DEBUG # define RPC_WAITQ_INIT(var,qname) { \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(var.lock), \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ @@ -226,7 +226,7 @@ struct rpc_wait_queue { } #else # define RPC_WAITQ_INIT(var,qname) { \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(var.lock), \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1237f16ecd..bf99bd49f8e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -9,10 +9,13 @@ #include <linux/init.h> #include <linux/pm.h> -/* page backup entry */ +/* struct pbe is used for creating lists of pages that should be restored + * atomically during the resume from disk, because the page frames they have + * occupied before the suspend are in use. + */ struct pbe { - unsigned long address; /* address of the copy */ - unsigned long orig_address; /* original address of page */ + void *address; /* address of the copy */ + void *orig_address; /* original address of a page */ struct pbe *next; }; diff --git a/include/linux/swap.h b/include/linux/swap.h index e7c36ba2a2d..add51cebc8d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -218,8 +218,6 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); /* linux/mm/page_io.c */ extern int swap_readpage(struct file *, struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, - struct bio **bio_chain); extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err); /* linux/mm/swap_state.c */ @@ -247,9 +245,10 @@ extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); extern void free_swap_and_cache(swp_entry_t); -extern int swap_type_of(dev_t); +extern int swap_type_of(dev_t, sector_t); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); +extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); extern int can_share_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); @@ -259,7 +258,6 @@ extern spinlock_t swap_lock; /* linux/mm/thrash.c */ extern struct mm_struct * swap_token_mm; -extern unsigned long swap_token_default_timeout; extern void grab_swap_token(void); extern void __put_swap_token(struct mm_struct *); diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 6562a2050a2..7e9680f4afd 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -12,64 +12,27 @@ #include <net/genetlink.h> #ifdef CONFIG_TASKSTATS -extern kmem_cache_t *taskstats_cache; +extern struct kmem_cache *taskstats_cache; extern struct mutex taskstats_exit_mutex; -static inline void taskstats_exit_free(struct taskstats *tidstats) -{ - if (tidstats) - kmem_cache_free(taskstats_cache, tidstats); -} - static inline void taskstats_tgid_init(struct signal_struct *sig) { sig->stats = NULL; } -static inline void taskstats_tgid_alloc(struct task_struct *tsk) -{ - struct signal_struct *sig = tsk->signal; - struct taskstats *stats; - - if (sig->stats != NULL) - return; - - /* No problem if kmem_cache_zalloc() fails */ - stats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); - - spin_lock_irq(&tsk->sighand->siglock); - if (!sig->stats) { - sig->stats = stats; - stats = NULL; - } - spin_unlock_irq(&tsk->sighand->siglock); - - if (stats) - kmem_cache_free(taskstats_cache, stats); -} - static inline void taskstats_tgid_free(struct signal_struct *sig) { if (sig->stats) kmem_cache_free(taskstats_cache, sig->stats); } -extern void taskstats_exit_alloc(struct taskstats **, unsigned int *); -extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int); +extern void taskstats_exit(struct task_struct *, int group_dead); extern void taskstats_init_early(void); #else -static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) -{} -static inline void taskstats_exit_free(struct taskstats *ptidstats) -{} -static inline void taskstats_exit_send(struct task_struct *tsk, - struct taskstats *tidstats, - int group_dead, unsigned int cpu) +static inline void taskstats_exit(struct task_struct *tsk, int group_dead) {} static inline void taskstats_tgid_init(struct signal_struct *sig) {} -static inline void taskstats_tgid_alloc(struct task_struct *tsk) -{} static inline void taskstats_tgid_free(struct signal_struct *sig) {} static inline void taskstats_init_early(void) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index a48d7f11c7b..975c963e578 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -1,8 +1,43 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ +#include <linux/preempt.h> #include <asm/uaccess.h> +/* + * These routines enable/disable the pagefault handler in that + * it will not take any locks and go straight to the fixup table. + * + * They have great resemblance to the preempt_disable/enable calls + * and in fact they are identical; this is because currently there is + * no other way to make the pagefault handlers do this. So we do + * disable preemption but we don't necessarily care about that. + */ +static inline void pagefault_disable(void) +{ + inc_preempt_count(); + /* + * make sure to have issued the store before a pagefault + * can hit. + */ + barrier(); +} + +static inline void pagefault_enable(void) +{ + /* + * make sure to issue those last loads/stores before enabling + * the pagefault handler again. + */ + barrier(); + dec_preempt_count(); + /* + * make sure we do.. + */ + barrier(); + preempt_check_resched(); +} + #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, @@ -30,14 +65,22 @@ static inline unsigned long __copy_from_user_nocache(void *to, * do_page_fault() doesn't attempt to take mmap_sem. This makes * probe_kernel_address() suitable for use within regions where the caller * already holds mmap_sem, or other locks which nest inside mmap_sem. + * This must be a macro because __get_user() needs to know the types of the + * args. + * + * We don't include enough header files to be able to do the set_fs(). We + * require that the probe_kernel_address() caller will do that. */ #define probe_kernel_address(addr, retval) \ ({ \ long ret; \ + mm_segment_t old_fs = get_fs(); \ \ - inc_preempt_count(); \ - ret = __get_user(retval, addr); \ - dec_preempt_count(); \ + set_fs(KERNEL_DS); \ + pagefault_disable(); \ + ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ + pagefault_enable(); \ + set_fs(old_fs); \ ret; \ }) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4a3ea83c6d1..edef8d50b26 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -147,9 +147,11 @@ struct execute_work { extern struct workqueue_struct *__create_workqueue(const char *name, - int singlethread); -#define create_workqueue(name) __create_workqueue((name), 0) -#define create_singlethread_workqueue(name) __create_workqueue((name), 1) + int singlethread, + int freezeable); +#define create_workqueue(name) __create_workqueue((name), 0, 0) +#define create_freezeable_workqueue(name) __create_workqueue((name), 0, 1) +#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0) extern void destroy_workqueue(struct workqueue_struct *wq); @@ -160,6 +162,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq)); extern int FASTCALL(schedule_work(struct work_struct *work)); +extern int FASTCALL(run_scheduled_work(struct work_struct *work)); extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay)); extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); diff --git a/include/net/dst.h b/include/net/dst.h index e156e38e4ac..62b7e7598e9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -98,7 +98,7 @@ struct dst_ops int entry_size; atomic_t entries; - kmem_cache_t *kmem_cachep; + struct kmem_cache *kmem_cachep; }; #ifdef __KERNEL__ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index a9eb2eaf094..34cc76e3ddb 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -125,7 +125,7 @@ struct inet_hashinfo { rwlock_t lhash_lock ____cacheline_aligned; atomic_t lhash_users; wait_queue_head_t lhash_wait; - kmem_cache_t *bind_bucket_cachep; + struct kmem_cache *bind_bucket_cachep; }; static inline struct inet_ehash_bucket *inet_ehash_bucket( @@ -136,10 +136,10 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( } extern struct inet_bind_bucket * - inet_bind_bucket_create(kmem_cache_t *cachep, + inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, const unsigned short snum); -extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, +extern void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); static inline int inet_bhashfn(const __u16 lport, const int bhash_size) diff --git a/include/net/irda/irlan_filter.h b/include/net/irda/irlan_filter.h index 492dedaa8ac..1720539ac2c 100644 --- a/include/net/irda/irlan_filter.h +++ b/include/net/irda/irlan_filter.h @@ -28,6 +28,8 @@ void irlan_check_command_param(struct irlan_cb *self, char *param, char *value); void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb); +#ifdef CONFIG_PROC_FS void irlan_print_filter(struct seq_file *seq, int filter_type); +#endif #endif /* IRLAN_FILTER_H */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index c8aacbd2e33..23967031ddb 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -160,7 +160,7 @@ struct neigh_table atomic_t entries; rwlock_t lock; unsigned long last_rand; - kmem_cache_t *kmem_cachep; + struct kmem_cache *kmem_cachep; struct neigh_statistics *stats; struct neighbour **hash_buckets; unsigned int hash_mask; diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index cef3136e22a..41bcc9eb420 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -7,7 +7,7 @@ #include <net/netfilter/nf_conntrack.h> extern struct list_head nf_conntrack_expect_list; -extern kmem_cache_t *nf_conntrack_expect_cachep; +extern struct kmem_cache *nf_conntrack_expect_cachep; extern struct file_operations exp_file_ops; struct nf_conntrack_expect diff --git a/include/net/request_sock.h b/include/net/request_sock.h index e37baaf2080..7aed02ce2b6 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -29,7 +29,7 @@ struct proto; struct request_sock_ops { int family; int obj_size; - kmem_cache_t *slab; + struct kmem_cache *slab; int (*rtx_syn_ack)(struct sock *sk, struct request_sock *req, struct dst_entry *dst); @@ -60,7 +60,7 @@ struct request_sock { static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops) { - struct request_sock *req = kmem_cache_alloc(ops->slab, SLAB_ATOMIC); + struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC); if (req != NULL) req->rsk_ops = ops; diff --git a/include/net/sock.h b/include/net/sock.h index fe3a33fad03..03684e702d1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -571,7 +571,7 @@ struct proto { int *sysctl_rmem; int max_header; - kmem_cache_t *slab; + struct kmem_cache *slab; unsigned int obj_size; atomic_t *orphan_count; @@ -746,6 +746,25 @@ static inline int sk_stream_wmem_schedule(struct sock *sk, int size) */ #define sock_owned_by_user(sk) ((sk)->sk_lock.owner) +/* + * Macro so as to not evaluate some arguments when + * lockdep is not enabled. + * + * Mark both the sk_lock and the sk_lock.slock as a + * per-address-family lock class. + */ +#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ +do { \ + sk->sk_lock.owner = NULL; \ + init_waitqueue_head(&sk->sk_lock.wq); \ + spin_lock_init(&(sk)->sk_lock.slock); \ + debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ + sizeof((sk)->sk_lock)); \ + lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ + (skey), (sname)); \ + lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ +} while (0) + extern void FASTCALL(lock_sock_nested(struct sock *sk, int subclass)); static inline void lock_sock(struct sock *sk) diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index d7a306ea560..1e1ee3253fd 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -15,7 +15,7 @@ #include <net/sock.h> struct timewait_sock_ops { - kmem_cache_t *twsk_slab; + struct kmem_cache *twsk_slab; unsigned int twsk_obj_size; int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 15ec19dcf9c..e4765413cf8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -392,6 +392,20 @@ extern int xfrm_unregister_km(struct xfrm_mgr *km); extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; +/* Audit Information */ +struct xfrm_audit +{ + uid_t loginuid; + u32 secid; +}; + +#ifdef CONFIG_AUDITSYSCALL +extern void xfrm_audit_log(uid_t auid, u32 secid, int type, int result, + struct xfrm_policy *xp, struct xfrm_state *x); +#else +#define xfrm_audit_log(a,s,t,r,p,x) do { ; } while (0) +#endif /* CONFIG_AUDITSYSCALL */ + static inline void xfrm_pol_hold(struct xfrm_policy *policy) { if (likely(policy != NULL)) @@ -906,7 +920,7 @@ static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **s #endif extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); -extern void xfrm_state_flush(u8 proto); +extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info); extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq); extern void xfrm_replay_notify(struct xfrm_state *x, int event); @@ -959,13 +973,13 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete); struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete); -void xfrm_policy_flush(u8 type); +void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); void xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi); -struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, +struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, + xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); -extern void xfrm_policy_flush(u8 type); +extern void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 9233ed5de66..0c775fceb67 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -557,7 +557,7 @@ struct sas_task { static inline struct sas_task *sas_alloc_task(gfp_t flags) { - extern kmem_cache_t *sas_task_cache; + extern struct kmem_cache *sas_task_cache; struct sas_task *task = kmem_cache_alloc(sas_task_cache, flags); if (task) { @@ -575,7 +575,7 @@ static inline struct sas_task *sas_alloc_task(gfp_t flags) static inline void sas_free_task(struct sas_task *task) { if (task) { - extern kmem_cache_t *sas_task_cache; + extern struct kmem_cache *sas_task_cache; BUG_ON(!list_empty(&task->list)); kmem_cache_free(sas_task_cache, task); } diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 919a80cb322..2cfd7cb36e7 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -6,6 +6,7 @@ #include <linux/romfs_fs.h> #include <linux/initrd.h> #include <linux/sched.h> +#include <linux/freezer.h> #include "do_mounts.h" diff --git a/init/initramfs.c b/init/initramfs.c index d28c1094d7e..85f04037ade 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -182,6 +182,10 @@ static int __init do_collect(void) static int __init do_header(void) { + if (memcmp(collected, "070707", 6)==0) { + error("incorrect cpio method used: use -H newc option"); + return 1; + } if (memcmp(collected, "070701", 6)) { error("no cpio magic"); return 1; diff --git a/init/main.c b/init/main.c index 36f608a7cfb..1174ae3aec8 100644 --- a/init/main.c +++ b/init/main.c @@ -29,6 +29,7 @@ #include <linux/percpu.h> #include <linux/kmod.h> #include <linux/kernel_stat.h> +#include <linux/start_kernel.h> #include <linux/security.h> #include <linux/workqueue.h> #include <linux/profile.h> @@ -73,6 +74,10 @@ #error Sorry, your GCC is too old. It builds incorrect kernels. #endif +#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0 +#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended. +#endif + static int init(void *); extern void init_IRQ(void); diff --git a/ipc/compat.c b/ipc/compat.c index 4d20cfd38f0..fa18141539f 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -115,7 +115,6 @@ struct compat_shm_info { extern int sem_ctls[]; #define sc_semopm (sem_ctls[2]) -#define MAXBUF (64*1024) static inline int compat_ipc_parse_version(int *cmd) { @@ -307,35 +306,30 @@ long compat_sys_semctl(int first, int second, int third, void __user *uptr) long compat_sys_msgsnd(int first, int second, int third, void __user *uptr) { - struct msgbuf __user *p; struct compat_msgbuf __user *up = uptr; long type; if (first < 0) return -EINVAL; - if (second < 0 || (second >= MAXBUF - sizeof(struct msgbuf))) + if (second < 0) return -EINVAL; - p = compat_alloc_user_space(second + sizeof(struct msgbuf)); - if (get_user(type, &up->mtype) || - put_user(type, &p->mtype) || - copy_in_user(p->mtext, up->mtext, second)) + if (get_user(type, &up->mtype)) return -EFAULT; - return sys_msgsnd(first, p, second, third); + return do_msgsnd(first, type, up->mtext, second, third); } long compat_sys_msgrcv(int first, int second, int msgtyp, int third, int version, void __user *uptr) { - struct msgbuf __user *p; struct compat_msgbuf __user *up; long type; int err; if (first < 0) return -EINVAL; - if (second < 0 || (second >= MAXBUF - sizeof(struct msgbuf))) + if (second < 0) return -EINVAL; if (!version) { @@ -349,14 +343,11 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third, uptr = compat_ptr(ipck.msgp); msgtyp = ipck.msgtyp; } - p = compat_alloc_user_space(second + sizeof(struct msgbuf)); - err = sys_msgrcv(first, p, second, msgtyp, third); + up = uptr; + err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third); if (err < 0) goto out; - up = uptr; - if (get_user(type, &p->mtype) || - put_user(type, &up->mtype) || - copy_in_user(up->mtext, p->mtext, err)) + if (put_user(type, &up->mtype)) err = -EFAULT; out: return err; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 7c274002c9f..3acc1661e51 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -90,7 +90,7 @@ static struct super_operations mqueue_super_ops; static void remove_notification(struct mqueue_inode_info *info); static spinlock_t mq_lock; -static kmem_cache_t *mqueue_inode_cachep; +static struct kmem_cache *mqueue_inode_cachep; static struct vfsmount *mqueue_mnt; static unsigned int queues_count; @@ -211,7 +211,7 @@ static int mqueue_get_sb(struct file_system_type *fs_type, return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt); } -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo; @@ -224,7 +224,7 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb) { struct mqueue_inode_info *ei; - ei = kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(mqueue_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/ipc/msg.c b/ipc/msg.c index 1266b1d0c8e..a388824740e 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -626,12 +626,11 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) return 0; } -asmlinkage long -sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) +long do_msgsnd(int msqid, long mtype, void __user *mtext, + size_t msgsz, int msgflg) { struct msg_queue *msq; struct msg_msg *msg; - long mtype; int err; struct ipc_namespace *ns; @@ -639,12 +638,10 @@ sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) return -EINVAL; - if (get_user(mtype, &msgp->mtype)) - return -EFAULT; if (mtype < 1) return -EINVAL; - msg = load_msg(msgp->mtext, msgsz); + msg = load_msg(mtext, msgsz); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -723,6 +720,16 @@ out_free: return err; } +asmlinkage long +sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) +{ + long mtype; + + if (get_user(mtype, &msgp->mtype)) + return -EFAULT; + return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); +} + static inline int convert_mode(long *msgtyp, int msgflg) { /* @@ -742,8 +749,8 @@ static inline int convert_mode(long *msgtyp, int msgflg) return SEARCH_EQUAL; } -asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, - long msgtyp, int msgflg) +long do_msgrcv(int msqid, long *pmtype, void __user *mtext, + size_t msgsz, long msgtyp, int msgflg) { struct msg_queue *msq; struct msg_msg *msg; @@ -889,15 +896,30 @@ out_unlock: return PTR_ERR(msg); msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; - if (put_user (msg->m_type, &msgp->mtype) || - store_msg(msgp->mtext, msg, msgsz)) { + *pmtype = msg->m_type; + if (store_msg(mtext, msg, msgsz)) msgsz = -EFAULT; - } + free_msg(msg); return msgsz; } +asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, + long msgtyp, int msgflg) +{ + long err, mtype; + + err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); + if (err < 0) + goto out; + + if (put_user(mtype, &msgp->mtype)) + err = -EFAULT; +out: + return err; +} + #ifdef CONFIG_PROC_FS static int sysvipc_msg_proc_show(struct seq_file *s, void *it) { diff --git a/ipc/sem.c b/ipc/sem.c index 21b3289d640..d3e12efd55c 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1070,14 +1070,13 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) ipc_rcu_getref(sma); sem_unlock(sma); - new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); + new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { ipc_lock_by_ptr(&sma->sem_perm); ipc_rcu_putref(sma); sem_unlock(sma); return ERR_PTR(-ENOMEM); } - memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*nsems); new->semadj = (short *) &new[1]; new->semid = semid; diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 248e1c396f8..4af15802ccd 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -7,7 +7,7 @@ choice default HZ_250 help Allows the configuration of the timer frequency. It is customary - to have the timer interrupt run at 1000 HZ but 100 HZ may be more + to have the timer interrupt run at 1000 Hz but 100 Hz may be more beneficial for servers and NUMA systems that do not need to have a fast response for user interaction and that may experience bus contention and cacheline bounces as a result of timer interrupts. @@ -19,21 +19,30 @@ choice config HZ_100 bool "100 HZ" help - 100 HZ is a typical choice for servers, SMP and NUMA systems + 100 Hz is a typical choice for servers, SMP and NUMA systems with lots of processors that may show reduced performance if too many timer interrupts are occurring. config HZ_250 bool "250 HZ" help - 250 HZ is a good compromise choice allowing server performance + 250 Hz is a good compromise choice allowing server performance while also showing good interactive responsiveness even - on SMP and NUMA systems. + on SMP and NUMA systems. If you are going to be using NTSC video + or multimedia, selected 300Hz instead. + + config HZ_300 + bool "300 HZ" + help + 300 Hz is a good compromise choice allowing server performance + while also showing good interactive responsiveness even + on SMP and NUMA systems and exactly dividing by both PAL and + NTSC frame rates for video and multimedia work. config HZ_1000 bool "1000 HZ" help - 1000 HZ is the preferred choice for desktop systems and other + 1000 Hz is the preferred choice for desktop systems and other systems requiring fast interactive responses to events. endchoice @@ -42,5 +51,6 @@ config HZ int default 100 if HZ_100 default 250 if HZ_250 + default 300 if HZ_300 default 1000 if HZ_1000 diff --git a/kernel/acct.c b/kernel/acct.c index 0aad5ca36a8..dc12db8600e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -89,7 +89,8 @@ struct acct_glbs { struct timer_list timer; }; -static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED}; +static struct acct_glbs acct_globals __cacheline_aligned = + {__SPIN_LOCK_UNLOCKED(acct_globals.lock)}; /* * Called whenever the timer says to check the free space. diff --git a/kernel/audit.c b/kernel/audit.c index 98106f6078b..d9b690ac684 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -57,6 +57,7 @@ #include <linux/netlink.h> #include <linux/selinux.h> #include <linux/inotify.h> +#include <linux/freezer.h> #include "audit.h" diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 4f40d923af8..2e896f8ae29 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -636,10 +636,9 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule) struct audit_rule *rule; int i; - rule = kmalloc(sizeof(*rule), GFP_KERNEL); + rule = kzalloc(sizeof(*rule), GFP_KERNEL); if (unlikely(!rule)) return NULL; - memset(rule, 0, sizeof(*rule)); rule->flags = krule->flags | krule->listnr; rule->action = krule->action; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ab97e510123..40722e26de9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -731,7 +731,7 @@ static inline void audit_free_context(struct audit_context *context) printk(KERN_ERR "audit: freed %d contexts\n", count); } -static void audit_log_task_context(struct audit_buffer *ab) +void audit_log_task_context(struct audit_buffer *ab) { char *ctx = NULL; ssize_t len = 0; @@ -760,6 +760,8 @@ error_path: return; } +EXPORT_SYMBOL(audit_log_task_context); + static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { char name[sizeof(tsk->comm)]; @@ -1488,6 +1490,8 @@ uid_t audit_get_loginuid(struct audit_context *ctx) return ctx ? ctx->loginuid : -1; } +EXPORT_SYMBOL(audit_get_loginuid); + /** * __audit_mq_open - record audit data for a POSIX MQ open * @oflag: open flag diff --git a/kernel/configs.c b/kernel/configs.c index f9e31974f4a..8fa1fb28f8a 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -75,7 +75,7 @@ ikconfig_read_current(struct file *file, char __user *buf, return count; } -static struct file_operations ikconfig_file_ops = { +static const struct file_operations ikconfig_file_ops = { .owner = THIS_MODULE, .read = ikconfig_read_current, }; diff --git a/kernel/cpu.c b/kernel/cpu.c index 272254f20d9..9124669f458 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -270,11 +270,7 @@ int disable_nonboot_cpus(void) goto out; } } - error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu)); - if (error) { - printk(KERN_ERR "Could not run on CPU%d\n", first_cpu); - goto out; - } + /* We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6313c38c930..0a6b4d89f9a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -729,9 +729,11 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) } /* Remaining checks don't apply to root cpuset */ - if ((par = cur->parent) == NULL) + if (cur == &top_cpuset) return 0; + par = cur->parent; + /* We must be a subset of our parent cpuset */ if (!is_cpuset_subset(trial, par)) return -EACCES; @@ -1060,10 +1062,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) cpu_exclusive_changed = (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); mutex_lock(&callback_mutex); - if (turning_on) - set_bit(bit, &cs->flags); - else - clear_bit(bit, &cs->flags); + cs->flags = trialcs.flags; mutex_unlock(&callback_mutex); if (cpu_exclusive_changed) @@ -1281,7 +1280,8 @@ typedef enum { FILE_TASKLIST, } cpuset_filetype_t; -static ssize_t cpuset_common_file_write(struct file *file, const char __user *userbuf, +static ssize_t cpuset_common_file_write(struct file *file, + const char __user *userbuf, size_t nbytes, loff_t *unused_ppos) { struct cpuset *cs = __d_cs(file->f_dentry->d_parent); @@ -1292,7 +1292,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us int retval = 0; /* Crude upper limit on largest legitimate cpulist user might write. */ - if (nbytes > 100 + 6 * NR_CPUS) + if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES)) return -E2BIG; /* +1 for nul-terminator */ @@ -1532,7 +1532,7 @@ static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry, return simple_rename(old_dir, old_dentry, new_dir, new_dentry); } -static struct file_operations cpuset_file_operations = { +static const struct file_operations cpuset_file_operations = { .read = cpuset_file_read, .write = cpuset_file_write, .llseek = generic_file_llseek, @@ -2045,7 +2045,6 @@ out: return err; } -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) /* * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs * or memory nodes, we need to walk over the cpuset hierarchy, @@ -2109,9 +2108,7 @@ static void common_cpu_mem_hotplug_unplug(void) mutex_unlock(&callback_mutex); mutex_unlock(&manage_mutex); } -#endif -#ifdef CONFIG_HOTPLUG_CPU /* * The top_cpuset tracks what CPUs and Memory Nodes are online, * period. This is necessary in order to make cpusets transparent @@ -2128,7 +2125,6 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, common_cpu_mem_hotplug_unplug(); return 0; } -#endif #ifdef CONFIG_MEMORY_HOTPLUG /* @@ -2610,7 +2606,7 @@ static int cpuset_open(struct inode *inode, struct file *file) return single_open(file, proc_cpuset_show, pid); } -struct file_operations proc_cpuset_operations = { +const struct file_operations proc_cpuset_operations = { .open = cpuset_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 66a0ea48751..766d5912b26 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -20,7 +20,7 @@ #include <linux/delayacct.h> int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ -kmem_cache_t *delayacct_cache; +struct kmem_cache *delayacct_cache; static int __init delayacct_setup_disable(char *str) { @@ -41,7 +41,7 @@ void delayacct_init(void) void __delayacct_tsk_init(struct task_struct *tsk) { - tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); + tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); if (tsk->delays) spin_lock_init(&tsk->delays->lock); } diff --git a/kernel/dma.c b/kernel/dma.c index 2020644c938..937b13ca33b 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -140,7 +140,7 @@ static int proc_dma_open(struct inode *inode, struct file *file) return single_open(file, proc_dma_show, NULL); } -static struct file_operations proc_dma_operations = { +static const struct file_operations proc_dma_operations = { .open = proc_dma_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/exit.c b/kernel/exit.c index 06de6c4e8ca..4e3f919edc4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -850,9 +850,7 @@ static void exit_notify(struct task_struct *tsk) fastcall NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; - struct taskstats *tidstats; int group_dead; - unsigned int mycpu; profile_task_exit(tsk); @@ -890,8 +888,6 @@ fastcall NORET_TYPE void do_exit(long code) current->comm, current->pid, preempt_count()); - taskstats_exit_alloc(&tidstats, &mycpu); - acct_update_integrals(tsk); if (tsk->mm) { update_hiwater_rss(tsk->mm); @@ -911,8 +907,8 @@ fastcall NORET_TYPE void do_exit(long code) #endif if (unlikely(tsk->audit_context)) audit_free(tsk); - taskstats_exit_send(tsk, tidstats, group_dead, mycpu); - taskstats_exit_free(tidstats); + + taskstats_exit(tsk, group_dead); exit_mm(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 8cdd3e72ba5..7f2e31ba33a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -82,26 +82,26 @@ int nr_processes(void) #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) -static kmem_cache_t *task_struct_cachep; +static struct kmem_cache *task_struct_cachep; #endif /* SLAB cache for signal_struct structures (tsk->signal) */ -static kmem_cache_t *signal_cachep; +static struct kmem_cache *signal_cachep; /* SLAB cache for sighand_struct structures (tsk->sighand) */ -kmem_cache_t *sighand_cachep; +struct kmem_cache *sighand_cachep; /* SLAB cache for files_struct structures (tsk->files) */ -kmem_cache_t *files_cachep; +struct kmem_cache *files_cachep; /* SLAB cache for fs_struct structures (tsk->fs) */ -kmem_cache_t *fs_cachep; +struct kmem_cache *fs_cachep; /* SLAB cache for vm_area_struct structures */ -kmem_cache_t *vm_area_cachep; +struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ -static kmem_cache_t *mm_cachep; +static struct kmem_cache *mm_cachep; void free_task(struct task_struct *tsk) { @@ -237,7 +237,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto fail_nomem; charge = len; } - tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!tmp) goto fail_nomem; *tmp = *mpnt; @@ -319,7 +319,7 @@ static inline void mm_free_pgd(struct mm_struct * mm) __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); -#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) +#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) #include <linux/init_task.h> @@ -448,7 +448,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) tsk->vfork_done = NULL; complete(vfork_done); } - if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) { + + /* + * If we're exiting normally, clear a user-space tid field if + * requested. We leave this alone when dying by signal, to leave + * the value intact in a core dump, and to save the unnecessary + * trouble otherwise. Userland only wants this done for a sys_exit. + */ + if (tsk->clear_child_tid + && !(tsk->flags & PF_SIGNALED) + && atomic_read(&mm->mm_users) > 1) { u32 __user * tidptr = tsk->clear_child_tid; tsk->clear_child_tid = NULL; @@ -479,6 +488,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); + /* Initializing for Swap token stuff */ + mm->token_priority = 0; + mm->last_interval = 0; + if (!mm_init(mm)) goto fail_nomem; @@ -542,6 +555,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) goto fail_nomem; good_mm: + /* Initializing for Swap token stuff */ + mm->token_priority = 0; + mm->last_interval = 0; + tsk->mm = mm; tsk->active_mm = mm; return 0; @@ -613,7 +630,7 @@ static struct files_struct *alloc_files(void) struct files_struct *newf; struct fdtable *fdt; - newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) goto out; @@ -830,7 +847,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); - taskstats_tgid_alloc(current); return 0; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); @@ -1303,7 +1319,7 @@ fork_out: return ERR_PTR(retval); } -struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) +noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); return regs; @@ -1413,7 +1429,7 @@ long do_fork(unsigned long clone_flags, #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) +static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long flags) { struct sighand_struct *sighand = data; diff --git a/kernel/futex.c b/kernel/futex.c index 93ef30ba209..95989a3b416 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -282,9 +282,9 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from) { int ret; - inc_preempt_count(); + pagefault_disable(); ret = __copy_from_user_inatomic(dest, from, sizeof(u32)); - dec_preempt_count(); + pagefault_enable(); return ret ? -EFAULT : 0; } @@ -324,12 +324,11 @@ static int refill_pi_state_cache(void) if (likely(current->pi_state_cache)) return 0; - pi_state = kmalloc(sizeof(*pi_state), GFP_KERNEL); + pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL); if (!pi_state) return -ENOMEM; - memset(pi_state, 0, sizeof(*pi_state)); INIT_LIST_HEAD(&pi_state->list); /* pi_mutex gets initialized later */ pi_state->owner = NULL; @@ -553,7 +552,7 @@ static void wake_futex(struct futex_q *q) * at the end of wake_up_all() does not prevent this store from * moving. */ - wmb(); + smp_wmb(); q->lock_ptr = NULL; } @@ -585,9 +584,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) if (!(uval & FUTEX_OWNER_DIED)) { newval = FUTEX_WAITERS | new_owner->pid; - inc_preempt_count(); + pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + pagefault_enable(); if (curval == -EFAULT) return -EFAULT; if (curval != uval) @@ -618,9 +617,9 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) * There is no waiter, so we unlock the futex. The owner died * bit has not to be preserved here. We are the owner: */ - inc_preempt_count(); + pagefault_disable(); oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0); - dec_preempt_count(); + pagefault_enable(); if (oldval == -EFAULT) return oldval; @@ -1158,9 +1157,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, */ newval = current->pid; - inc_preempt_count(); + pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval); - dec_preempt_count(); + pagefault_enable(); if (unlikely(curval == -EFAULT)) goto uaddr_faulted; @@ -1183,9 +1182,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, uval = curval; newval = uval | FUTEX_WAITERS; - inc_preempt_count(); + pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + pagefault_enable(); if (unlikely(curval == -EFAULT)) goto uaddr_faulted; @@ -1215,10 +1214,10 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, newval = current->pid | FUTEX_OWNER_DIED | FUTEX_WAITERS; - inc_preempt_count(); + pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + pagefault_enable(); if (unlikely(curval == -EFAULT)) goto uaddr_faulted; @@ -1390,9 +1389,9 @@ retry_locked: * anyone else up: */ if (!(uval & FUTEX_OWNER_DIED)) { - inc_preempt_count(); + pagefault_disable(); uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); - dec_preempt_count(); + pagefault_enable(); } if (unlikely(uval == -EFAULT)) @@ -1493,7 +1492,7 @@ static unsigned int futex_poll(struct file *filp, return ret; } -static struct file_operations futex_fops = { +static const struct file_operations futex_fops = { .release = futex_close, .poll = futex_poll, }; @@ -1858,10 +1857,16 @@ static struct file_system_type futex_fs_type = { static int __init init(void) { - unsigned int i; + int i = register_filesystem(&futex_fs_type); + + if (i) + return i; - register_filesystem(&futex_fs_type); futex_mnt = kern_mount(&futex_fs_type); + if (IS_ERR(futex_mnt)) { + unregister_filesystem(&futex_fs_type); + return PTR_ERR(futex_mnt); + } for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { INIT_LIST_HEAD(&futex_queues[i].chain); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a681912bc89..aff1f0fabb0 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -54,7 +54,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned = { .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), #ifdef CONFIG_SMP .affinity = CPU_MASK_ALL #endif diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index eeac3e313b2..ab63cfc4299 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -20,6 +20,7 @@ #include <linux/proc_fs.h> #include <linux/sched.h> /* for cond_resched */ #include <linux/mm.h> +#include <linux/ctype.h> #include <asm/sections.h> @@ -301,13 +302,6 @@ struct kallsym_iter char name[KSYM_NAME_LEN+1]; }; -/* Only label it "global" if it is exported. */ -static void upcase_if_global(struct kallsym_iter *iter) -{ - if (is_exported(iter->name, iter->owner)) - iter->type += 'A' - 'a'; -} - static int get_ksymbol_mod(struct kallsym_iter *iter) { iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, @@ -316,7 +310,10 @@ static int get_ksymbol_mod(struct kallsym_iter *iter) if (iter->owner == NULL) return 0; - upcase_if_global(iter); + /* Label it "global" if it is exported, "local" if not exported. */ + iter->type = is_exported(iter->name, iter->owner) + ? toupper(iter->type) : tolower(iter->type); + return 1; } @@ -401,7 +398,7 @@ static int s_show(struct seq_file *m, void *p) return 0; } -static struct seq_operations kallsyms_op = { +static const struct seq_operations kallsyms_op = { .start = s_start, .next = s_next, .stop = s_stop, @@ -436,7 +433,7 @@ static int kallsyms_release(struct inode *inode, struct file *file) return seq_release(inode, file); } -static struct file_operations kallsyms_operations = { +static const struct file_operations kallsyms_operations = { .open = kallsyms_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/kexec.c b/kernel/kexec.c index fcdd5d2bc3f..afbbbe981be 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -20,6 +20,8 @@ #include <linux/syscalls.h> #include <linux/ioport.h> #include <linux/hardirq.h> +#include <linux/elf.h> +#include <linux/elfcore.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -108,11 +110,10 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, /* Allocate a controlling structure */ result = -ENOMEM; - image = kmalloc(sizeof(*image), GFP_KERNEL); + image = kzalloc(sizeof(*image), GFP_KERNEL); if (!image) goto out; - memset(image, 0, sizeof(*image)); image->head = 0; image->entry = &image->head; image->last_entry = &image->head; @@ -1067,6 +1068,60 @@ void crash_kexec(struct pt_regs *regs) } } +static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note note; + + note.n_namesz = strlen(name) + 1; + note.n_descsz = data_len; + note.n_type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) + 3)/4; + memcpy(buf, name, note.n_namesz); + buf += (note.n_namesz + 3)/4; + memcpy(buf, data, note.n_descsz); + buf += (note.n_descsz + 3)/4; + + return buf; +} + +static void final_note(u32 *buf) +{ + struct elf_note note; + + note.n_namesz = 0; + note.n_descsz = 0; + note.n_type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + +void crash_save_cpu(struct pt_regs *regs, int cpu) +{ + struct elf_prstatus prstatus; + u32 *buf; + + if ((cpu < 0) || (cpu >= NR_CPUS)) + return; + + /* Using ELF notes here is opportunistic. + * I need a well defined structure format + * for the data I pass, and I need tags + * on the data to indicate what information I have + * squirrelled away. ELF notes happen to provide + * all of that, so there is no need to invent something new. + */ + buf = (u32*)per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + memset(&prstatus, 0, sizeof(prstatus)); + prstatus.pr_pid = current->pid; + elf_core_copy_regs(&prstatus.pr_reg, regs); + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + final_note(buf); +} + static int __init crash_notes_memory_init(void) { /* Allocate memory for saving cpu registers. */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 610c837ad9e..17ec4afb099 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -38,6 +38,7 @@ #include <linux/module.h> #include <linux/moduleloader.h> #include <linux/kallsyms.h> +#include <linux/freezer.h> #include <asm-generic/sections.h> #include <asm/cacheflush.h> #include <asm/errno.h> @@ -83,9 +84,36 @@ struct kprobe_insn_page { kprobe_opcode_t *insns; /* Page of instruction slots */ char slot_used[INSNS_PER_PAGE]; int nused; + int ngarbage; }; static struct hlist_head kprobe_insn_pages; +static int kprobe_garbage_slots; +static int collect_garbage_slots(void); + +static int __kprobes check_safety(void) +{ + int ret = 0; +#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) + ret = freeze_processes(); + if (ret == 0) { + struct task_struct *p, *q; + do_each_thread(p, q) { + if (p != current && p->state == TASK_RUNNING && + p->pid != 0) { + printk("Check failed: %s is running\n",p->comm); + ret = -1; + goto loop_end; + } + } while_each_thread(p, q); + } +loop_end: + thaw_processes(); +#else + synchronize_sched(); +#endif + return ret; +} /** * get_insn_slot() - Find a slot on an executable page for an instruction. @@ -96,6 +124,7 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) struct kprobe_insn_page *kip; struct hlist_node *pos; + retry: hlist_for_each(pos, &kprobe_insn_pages) { kip = hlist_entry(pos, struct kprobe_insn_page, hlist); if (kip->nused < INSNS_PER_PAGE) { @@ -112,7 +141,11 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) } } - /* All out of space. Need to allocate a new page. Use slot 0.*/ + /* If there are any garbage slots, collect it and try again. */ + if (kprobe_garbage_slots && collect_garbage_slots() == 0) { + goto retry; + } + /* All out of space. Need to allocate a new page. Use slot 0. */ kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); if (!kip) { return NULL; @@ -133,10 +166,62 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) memset(kip->slot_used, 0, INSNS_PER_PAGE); kip->slot_used[0] = 1; kip->nused = 1; + kip->ngarbage = 0; return kip->insns; } -void __kprobes free_insn_slot(kprobe_opcode_t *slot) +/* Return 1 if all garbages are collected, otherwise 0. */ +static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) +{ + kip->slot_used[idx] = 0; + kip->nused--; + if (kip->nused == 0) { + /* + * Page is no longer in use. Free it unless + * it's the last one. We keep the last one + * so as not to have to set it up again the + * next time somebody inserts a probe. + */ + hlist_del(&kip->hlist); + if (hlist_empty(&kprobe_insn_pages)) { + INIT_HLIST_NODE(&kip->hlist); + hlist_add_head(&kip->hlist, + &kprobe_insn_pages); + } else { + module_free(NULL, kip->insns); + kfree(kip); + } + return 1; + } + return 0; +} + +static int __kprobes collect_garbage_slots(void) +{ + struct kprobe_insn_page *kip; + struct hlist_node *pos, *next; + + /* Ensure no-one is preepmted on the garbages */ + if (check_safety() != 0) + return -EAGAIN; + + hlist_for_each_safe(pos, next, &kprobe_insn_pages) { + int i; + kip = hlist_entry(pos, struct kprobe_insn_page, hlist); + if (kip->ngarbage == 0) + continue; + kip->ngarbage = 0; /* we will collect all garbages */ + for (i = 0; i < INSNS_PER_PAGE; i++) { + if (kip->slot_used[i] == -1 && + collect_one_slot(kip, i)) + break; + } + } + kprobe_garbage_slots = 0; + return 0; +} + +void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) { struct kprobe_insn_page *kip; struct hlist_node *pos; @@ -146,28 +231,18 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) if (kip->insns <= slot && slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { int i = (slot - kip->insns) / MAX_INSN_SIZE; - kip->slot_used[i] = 0; - kip->nused--; - if (kip->nused == 0) { - /* - * Page is no longer in use. Free it unless - * it's the last one. We keep the last one - * so as not to have to set it up again the - * next time somebody inserts a probe. - */ - hlist_del(&kip->hlist); - if (hlist_empty(&kprobe_insn_pages)) { - INIT_HLIST_NODE(&kip->hlist); - hlist_add_head(&kip->hlist, - &kprobe_insn_pages); - } else { - module_free(NULL, kip->insns); - kfree(kip); - } + if (dirty) { + kip->slot_used[i] = -1; + kip->ngarbage++; + } else { + collect_one_slot(kip, i); } - return; + break; } } + if (dirty && (++kprobe_garbage_slots > INSNS_PER_PAGE)) { + collect_garbage_slots(); + } } #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c9fefdb1a7d..b02032476dc 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -140,13 +140,6 @@ void lockdep_on(void) EXPORT_SYMBOL(lockdep_on); -int lockdep_internal(void) -{ - return current->lockdep_recursion != 0; -} - -EXPORT_SYMBOL(lockdep_internal); - /* * Debugging switches: */ @@ -228,17 +221,15 @@ static int save_trace(struct stack_trace *trace) trace->skip = 3; trace->all_contexts = 0; - /* Make sure to not recurse in case the the unwinder needs to tak -e locks. */ - lockdep_off(); save_stack_trace(trace, NULL); - lockdep_on(); trace->max_entries = trace->nr_entries; nr_stack_trace_entries += trace->nr_entries; - if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) + if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) { + __raw_spin_unlock(&hash_lock); return 0; + } if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { __raw_spin_unlock(&hash_lock); @@ -357,7 +348,7 @@ get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4 static void print_lock_name(struct lock_class *class) { - char str[128], c1, c2, c3, c4; + char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4; const char *name; get_usage_chars(class, &c1, &c2, &c3, &c4); @@ -379,7 +370,7 @@ static void print_lock_name(struct lock_class *class) static void print_lockdep_cache(struct lockdep_map *lock) { const char *name; - char str[128]; + char str[KSYM_NAME_LEN + 1]; name = lock->name; if (!name) @@ -449,7 +440,9 @@ static void print_lock_dependencies(struct lock_class *class, int depth) print_lock_class_header(class, depth); list_for_each_entry(entry, &class->locks_after, entry) { - DEBUG_LOCKS_WARN_ON(!entry->class); + if (DEBUG_LOCKS_WARN_ON(!entry->class)) + return; + print_lock_dependencies(entry->class, depth + 1); printk("%*s ... acquired at:\n",depth,""); @@ -474,7 +467,8 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, return 0; entry->class = this; - save_trace(&entry->trace); + if (!save_trace(&entry->trace)) + return 0; /* * Since we never remove from the dependency list, the list can @@ -562,8 +556,12 @@ static noinline int print_circular_bug_tail(void) if (debug_locks_silent) return 0; + /* hash_lock unlocked by the header */ + __raw_spin_lock(&hash_lock); this.class = check_source->class; - save_trace(&this.trace); + if (!save_trace(&this.trace)) + return 0; + __raw_spin_unlock(&hash_lock); print_circular_bug_entry(&this, 0); printk("\nother info that might help us debug this:\n\n"); @@ -966,14 +964,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, &prev->class->locks_after, next->acquire_ip); if (!ret) return 0; - /* - * Return value of 2 signals 'dependency already added', - * in that case we dont have to add the backlink either. - */ - if (ret == 2) - return 2; + ret = add_lock_to_list(next->class, prev->class, &next->class->locks_before, next->acquire_ip); + if (!ret) + return 0; /* * Debugging printouts: @@ -1025,7 +1020,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) * added: */ if (hlock->read != 2) { - check_prev_add(curr, hlock, next); + if (!check_prev_add(curr, hlock, next)) + return 0; /* * Stop after the first non-trylock entry, * as non-trylock entries have added their @@ -1182,6 +1178,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct list_head *hash_head; struct lock_class *class; + unsigned long flags; class = look_up_lock_class(lock, subclass); if (likely(class)) @@ -1203,6 +1200,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) key = lock->key->subkeys + subclass; hash_head = classhashentry(key); + raw_local_irq_save(flags); __raw_spin_lock(&hash_lock); /* * We have to do the hash-walk again, to avoid races @@ -1217,6 +1215,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) */ if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { __raw_spin_unlock(&hash_lock); + raw_local_irq_restore(flags); debug_locks_off(); printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); printk("turning off the locking correctness validator.\n"); @@ -1239,15 +1238,18 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) if (verbose(class)) { __raw_spin_unlock(&hash_lock); + raw_local_irq_restore(flags); printk("\nnew class %p: %s", class->key, class->name); if (class->name_version > 1) printk("#%d", class->name_version); printk("\n"); dump_stack(); + raw_local_irq_save(flags); __raw_spin_lock(&hash_lock); } out_unlock_set: __raw_spin_unlock(&hash_lock); + raw_local_irq_restore(flags); if (!subclass || force) lock->class_cache = class; @@ -1728,6 +1730,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, debug_atomic_dec(&nr_unused_locks); break; default: + __raw_spin_unlock(&hash_lock); debug_locks_off(); WARN_ON(1); return 0; @@ -2645,6 +2648,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) } local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); static void print_held_locks_bug(struct task_struct *curr) { diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index eab043c83bb..8ce09bc4613 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -20,7 +20,7 @@ #define MAX_LOCKDEP_KEYS_BITS 11 #define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) -#define MAX_LOCKDEP_CHAINS_BITS 13 +#define MAX_LOCKDEP_CHAINS_BITS 14 #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) /* diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index f6e72eaab3f..b554b40a4aa 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -113,7 +113,7 @@ static int l_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations lockdep_ops = { +static const struct seq_operations lockdep_ops = { .start = l_start, .next = l_next, .stop = l_stop, @@ -135,7 +135,7 @@ static int lockdep_open(struct inode *inode, struct file *file) return res; } -static struct file_operations proc_lockdep_operations = { +static const struct file_operations proc_lockdep_operations = { .open = lockdep_open, .read = seq_read, .llseek = seq_lseek, @@ -319,7 +319,7 @@ static int lockdep_stats_open(struct inode *inode, struct file *file) return single_open(file, lockdep_stats_show, NULL); } -static struct file_operations proc_lockdep_stats_operations = { +static const struct file_operations proc_lockdep_stats_operations = { .open = lockdep_stats_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/module.c b/kernel/module.c index e2d09d604ca..d9eae45d014 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2209,7 +2209,7 @@ static int m_show(struct seq_file *m, void *p) Where refcount is a number or -, and deps is a comma-separated list of depends or -. */ -struct seq_operations modules_op = { +const struct seq_operations modules_op = { .start = m_start, .next = m_next, .stop = m_stop, diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 18651641a7b..841539d72c5 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -77,6 +77,9 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, void debug_mutex_unlock(struct mutex *lock) { + if (unlikely(!debug_locks)) + return; + DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); DEBUG_LOCKS_WARN_ON(lock->magic != lock); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); diff --git a/kernel/pid.c b/kernel/pid.c index b914392085f..a48879b0b92 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -31,7 +31,7 @@ #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) static struct hlist_head *pid_hash; static int pidhash_shift; -static kmem_cache_t *pid_cachep; +static struct kmem_cache *pid_cachep; int pid_max = PID_MAX_DEFAULT; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 9cbb5d1be06..5fe87de10ff 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -70,7 +70,7 @@ /* * Lets keep our timers in a slab cache :-) */ -static kmem_cache_t *posix_timers_cache; +static struct kmem_cache *posix_timers_cache; static struct idr posix_timers_id; static DEFINE_SPINLOCK(idr_lock); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 825068ca347..710ed084e7c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -78,7 +78,7 @@ config PM_SYSFS_DEPRECATED config SOFTWARE_SUSPEND bool "Software Suspend" - depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP)) + depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) ---help--- Enable the possibility of suspending the machine. It doesn't need ACPI or APM. diff --git a/kernel/power/disk.c b/kernel/power/disk.c index b1fb7866b0b..0b00f56c2ad 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -20,6 +20,7 @@ #include <linux/pm.h> #include <linux/console.h> #include <linux/cpu.h> +#include <linux/freezer.h> #include "power.h" @@ -27,6 +28,23 @@ static int noresume = 0; char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; +sector_t swsusp_resume_block; + +/** + * platform_prepare - prepare the machine for hibernation using the + * platform driver if so configured and return an error code if it fails + */ + +static inline int platform_prepare(void) +{ + int error = 0; + + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->prepare) + error = pm_ops->prepare(PM_SUSPEND_DISK); + } + return error; +} /** * power_down - Shut machine down for hibernate. @@ -40,12 +58,10 @@ dev_t swsusp_resume_device; static void power_down(suspend_disk_method_t mode) { - int error = 0; - switch(mode) { case PM_DISK_PLATFORM: kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - error = pm_ops->enter(PM_SUSPEND_DISK); + pm_ops->enter(PM_SUSPEND_DISK); break; case PM_DISK_SHUTDOWN: kernel_power_off(); @@ -90,12 +106,18 @@ static int prepare_processes(void) goto thaw; } + error = platform_prepare(); + if (error) + goto thaw; + /* Free memory before shutting down devices. */ if (!(error = swsusp_shrink_memory())) return 0; -thaw: + + platform_finish(); + thaw: thaw_processes(); -enable_cpus: + enable_cpus: enable_nonboot_cpus(); pm_restore_console(); return error; @@ -127,7 +149,7 @@ int pm_suspend_disk(void) return error; if (pm_disk_mode == PM_DISK_TESTPROC) - goto Thaw; + return 0; suspend_console(); error = device_suspend(PMSG_FREEZE); @@ -189,10 +211,10 @@ static int software_resume(void) { int error; - down(&pm_sem); + mutex_lock(&pm_mutex); if (!swsusp_resume_device) { if (!strlen(resume_file)) { - up(&pm_sem); + mutex_unlock(&pm_mutex); return -ENOENT; } swsusp_resume_device = name_to_dev_t(resume_file); @@ -207,7 +229,7 @@ static int software_resume(void) * FIXME: If noresume is specified, we need to find the partition * and reset it back to normal swap space. */ - up(&pm_sem); + mutex_unlock(&pm_mutex); return 0; } @@ -251,7 +273,7 @@ static int software_resume(void) unprepare_processes(); Done: /* For success case, the suspend path will release the lock */ - up(&pm_sem); + mutex_unlock(&pm_mutex); pr_debug("PM: Resume from disk failed.\n"); return 0; } @@ -312,7 +334,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) p = memchr(buf, '\n', n); len = p ? p - buf : n; - down(&pm_sem); + mutex_lock(&pm_mutex); for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { if (!strncmp(buf, pm_disk_modes[i], len)) { mode = i; @@ -336,7 +358,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) pr_debug("PM: suspend-to-disk mode set to '%s'\n", pm_disk_modes[mode]); - up(&pm_sem); + mutex_unlock(&pm_mutex); return error ? error : n; } @@ -361,14 +383,14 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) if (maj != MAJOR(res) || min != MINOR(res)) goto out; - down(&pm_sem); + mutex_lock(&pm_mutex); swsusp_resume_device = res; - up(&pm_sem); + mutex_unlock(&pm_mutex); printk("Attempting manual resume\n"); noresume = 0; software_resume(); ret = n; -out: + out: return ret; } @@ -423,6 +445,19 @@ static int __init resume_setup(char *str) return 1; } +static int __init resume_offset_setup(char *str) +{ + unsigned long long offset; + + if (noresume) + return 1; + + if (sscanf(str, "%llu", &offset) == 1) + swsusp_resume_block = offset; + + return 1; +} + static int __init noresume_setup(char *str) { noresume = 1; @@ -430,4 +465,5 @@ static int __init noresume_setup(char *str) } __setup("noresume", noresume_setup); +__setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c index 873228c71da..500eb87f643 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -8,6 +8,7 @@ * */ +#include <linux/module.h> #include <linux/suspend.h> #include <linux/kobject.h> #include <linux/string.h> @@ -18,13 +19,14 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/resume-trace.h> +#include <linux/freezer.h> #include "power.h" /*This is just an arbitrary number */ #define FREE_PAGE_NUMBER (100) -DECLARE_MUTEX(pm_sem); +DEFINE_MUTEX(pm_mutex); struct pm_ops *pm_ops; suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; @@ -36,9 +38,9 @@ suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; void pm_set_ops(struct pm_ops * ops) { - down(&pm_sem); + mutex_lock(&pm_mutex); pm_ops = ops; - up(&pm_sem); + mutex_unlock(&pm_mutex); } @@ -182,7 +184,7 @@ static int enter_state(suspend_state_t state) if (!valid_state(state)) return -ENODEV; - if (down_trylock(&pm_sem)) + if (!mutex_trylock(&pm_mutex)) return -EBUSY; if (state == PM_SUSPEND_DISK) { @@ -200,7 +202,7 @@ static int enter_state(suspend_state_t state) pr_debug("PM: Finishing wakeup.\n"); suspend_finish(state); Unlock: - up(&pm_sem); + mutex_unlock(&pm_mutex); return error; } @@ -229,7 +231,7 @@ int pm_suspend(suspend_state_t state) return -EINVAL; } - +EXPORT_SYMBOL(pm_suspend); decl_subsys(power,NULL,NULL); diff --git a/kernel/power/power.h b/kernel/power/power.h index bfe999f7b27..eb461b816bf 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -22,7 +22,9 @@ static inline int pm_suspend_disk(void) return -EPERM; } #endif -extern struct semaphore pm_sem; + +extern struct mutex pm_mutex; + #define power_attr(_name) \ static struct subsys_attribute _name##_attr = { \ .attr = { \ @@ -42,6 +44,7 @@ extern const void __nosave_begin, __nosave_end; extern unsigned long image_size; extern int in_suspend; extern dev_t swsusp_resume_device; +extern sector_t swsusp_resume_block; extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); @@ -102,8 +105,18 @@ struct snapshot_handle { extern unsigned int snapshot_additional_pages(struct zone *zone); extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); +extern void snapshot_write_finalize(struct snapshot_handle *handle); extern int snapshot_image_loaded(struct snapshot_handle *handle); -extern void snapshot_free_unused_memory(struct snapshot_handle *handle); + +/* + * This structure is used to pass the values needed for the identification + * of the resume swap area from a user space to the kernel via the + * SNAPSHOT_SET_SWAP_AREA ioctl + */ +struct resume_swap_area { + loff_t offset; + u_int32_t dev; +} __attribute__((packed)); #define SNAPSHOT_IOC_MAGIC '3' #define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) @@ -117,7 +130,14 @@ extern void snapshot_free_unused_memory(struct snapshot_handle *handle); #define SNAPSHOT_FREE_SWAP_PAGES _IO(SNAPSHOT_IOC_MAGIC, 9) #define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int) #define SNAPSHOT_S2RAM _IO(SNAPSHOT_IOC_MAGIC, 11) -#define SNAPSHOT_IOC_MAXNR 11 +#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int) +#define SNAPSHOT_SET_SWAP_AREA _IOW(SNAPSHOT_IOC_MAGIC, 13, \ + struct resume_swap_area) +#define SNAPSHOT_IOC_MAXNR 13 + +#define PMOPS_PREPARE 1 +#define PMOPS_ENTER 2 +#define PMOPS_FINISH 3 /** * The bitmap is used for tracing allocated swap pages @@ -141,7 +161,7 @@ struct bitmap_page { extern void free_bitmap(struct bitmap_page *bitmap); extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); -extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); +extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap); extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); extern int swsusp_check(void); @@ -153,3 +173,7 @@ extern int swsusp_read(void); extern int swsusp_write(void); extern void swsusp_close(void); extern int suspend_enter(suspend_state_t state); + +struct timeval; +extern void swsusp_show_speed(struct timeval *, struct timeval *, + unsigned int, char *); diff --git a/kernel/power/process.c b/kernel/power/process.c index 72e72d2c61e..99eeb119b06 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -13,12 +13,15 @@ #include <linux/suspend.h> #include <linux/module.h> #include <linux/syscalls.h> +#include <linux/freezer.h> /* * Timeout for stopping processes */ #define TIMEOUT (20 * HZ) +#define FREEZER_KERNEL_THREADS 0 +#define FREEZER_USER_SPACE 1 static inline int freezeable(struct task_struct * p) { @@ -39,7 +42,6 @@ void refrigerator(void) long save; save = current->state; pr_debug("%s entered refrigerator\n", current->comm); - printk("="); frozen_process(current); spin_lock_irq(¤t->sighand->siglock); @@ -79,96 +81,136 @@ static void cancel_freezing(struct task_struct *p) } } -/* 0 = success, else # of processes that we failed to stop */ -int freeze_processes(void) +static inline int is_user_space(struct task_struct *p) +{ + return p->mm && !(p->flags & PF_BORROWED_MM); +} + +static unsigned int try_to_freeze_tasks(int freeze_user_space) { - int todo, nr_user, user_frozen; - unsigned long start_time; struct task_struct *g, *p; + unsigned long end_time; + unsigned int todo; - printk( "Stopping tasks: " ); - start_time = jiffies; - user_frozen = 0; + end_time = jiffies + TIMEOUT; do { - nr_user = todo = 0; + todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { if (!freezeable(p)) continue; + if (frozen(p)) continue; - if (p->state == TASK_TRACED && frozen(p->parent)) { + + if (p->state == TASK_TRACED && + (frozen(p->parent) || + p->parent->state == TASK_STOPPED)) { cancel_freezing(p); continue; } - if (p->mm && !(p->flags & PF_BORROWED_MM)) { - /* The task is a user-space one. - * Freeze it unless there's a vfork completion - * pending + if (is_user_space(p)) { + if (!freeze_user_space) + continue; + + /* Freeze the task unless there is a vfork + * completion pending */ if (!p->vfork_done) freeze_process(p); - nr_user++; } else { - /* Freeze only if the user space is frozen */ - if (user_frozen) - freeze_process(p); - todo++; + if (freeze_user_space) + continue; + + freeze_process(p); } + todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); - todo += nr_user; - if (!user_frozen && !nr_user) { - sys_sync(); - start_time = jiffies; - } - user_frozen = !nr_user; yield(); /* Yield is okay here */ - if (todo && time_after(jiffies, start_time + TIMEOUT)) + if (todo && time_after(jiffies, end_time)) break; - } while(todo); + } while (todo); - /* This does not unfreeze processes that are already frozen - * (we have slightly ugly calling convention in that respect, - * and caller must call thaw_processes() if something fails), - * but it cleans up leftover PF_FREEZE requests. - */ if (todo) { - printk( "\n" ); - printk(KERN_ERR " stopping tasks timed out " - "after %d seconds (%d tasks remaining):\n", - TIMEOUT / HZ, todo); + /* This does not unfreeze processes that are already frozen + * (we have slightly ugly calling convention in that respect, + * and caller must call thaw_processes() if something fails), + * but it cleans up leftover PF_FREEZE requests. + */ + printk("\n"); + printk(KERN_ERR "Stopping %s timed out after %d seconds " + "(%d tasks refusing to freeze):\n", + freeze_user_space ? "user space processes" : + "kernel threads", + TIMEOUT / HZ, todo); read_lock(&tasklist_lock); do_each_thread(g, p) { + if (is_user_space(p) == !freeze_user_space) + continue; + if (freezeable(p) && !frozen(p)) - printk(KERN_ERR " %s\n", p->comm); + printk(KERN_ERR " %s\n", p->comm); + cancel_freezing(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); - return todo; } - printk( "|\n" ); + return todo; +} + +/** + * freeze_processes - tell processes to enter the refrigerator + * + * Returns 0 on success, or the number of processes that didn't freeze, + * although they were told to. + */ +int freeze_processes(void) +{ + unsigned int nr_unfrozen; + + printk("Stopping tasks ... "); + nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE); + if (nr_unfrozen) + return nr_unfrozen; + + sys_sync(); + nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); + if (nr_unfrozen) + return nr_unfrozen; + + printk("done.\n"); BUG_ON(in_atomic()); return 0; } -void thaw_processes(void) +static void thaw_tasks(int thaw_user_space) { struct task_struct *g, *p; - printk( "Restarting tasks..." ); read_lock(&tasklist_lock); do_each_thread(g, p) { if (!freezeable(p)) continue; + + if (is_user_space(p) == !thaw_user_space) + continue; + if (!thaw_process(p)) - printk(KERN_INFO " Strange, %s not stopped\n", p->comm ); + printk(KERN_WARNING " Strange, %s not stopped\n", + p->comm ); } while_each_thread(g, p); - read_unlock(&tasklist_lock); +} + +void thaw_processes(void) +{ + printk("Restarting tasks ... "); + thaw_tasks(FREEZER_KERNEL_THREADS); + thaw_tasks(FREEZER_USER_SPACE); schedule(); - printk( " done\n" ); + printk("done.\n"); } EXPORT_SYMBOL(refrigerator); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 99f9b7d177d..c024606221c 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1,15 +1,15 @@ /* * linux/kernel/power/snapshot.c * - * This file provide system snapshot/restore functionality. + * This file provides system snapshot/restore functionality for swsusp. * * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz> + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> * - * This file is released under the GPLv2, and is based on swsusp.c. + * This file is released under the GPLv2. * */ - #include <linux/version.h> #include <linux/module.h> #include <linux/mm.h> @@ -34,137 +34,24 @@ #include "power.h" -/* List of PBEs used for creating and restoring the suspend image */ +/* List of PBEs needed for restoring the pages that were allocated before + * the suspend and included in the suspend image, but have also been + * allocated by the "resume" kernel, so their contents cannot be written + * directly to their "original" page frames. + */ struct pbe *restore_pblist; -static unsigned int nr_copy_pages; -static unsigned int nr_meta_pages; +/* Pointer to an auxiliary buffer (1 page) */ static void *buffer; -#ifdef CONFIG_HIGHMEM -unsigned int count_highmem_pages(void) -{ - struct zone *zone; - unsigned long zone_pfn; - unsigned int n = 0; - - for_each_zone (zone) - if (is_highmem(zone)) { - mark_free_pages(zone); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) { - struct page *page; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - if (PageReserved(page)) - continue; - if (PageNosaveFree(page)) - continue; - n++; - } - } - return n; -} - -struct highmem_page { - char *data; - struct page *page; - struct highmem_page *next; -}; - -static struct highmem_page *highmem_copy; - -static int save_highmem_zone(struct zone *zone) -{ - unsigned long zone_pfn; - mark_free_pages(zone); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - struct page *page; - struct highmem_page *save; - void *kaddr; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; - - if (!(pfn%10000)) - printk("."); - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - /* - * This condition results from rvmalloc() sans vmalloc_32() - * and architectural memory reservations. This should be - * corrected eventually when the cases giving rise to this - * are better understood. - */ - if (PageReserved(page)) - continue; - BUG_ON(PageNosave(page)); - if (PageNosaveFree(page)) - continue; - save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC); - if (!save) - return -ENOMEM; - save->next = highmem_copy; - save->page = page; - save->data = (void *) get_zeroed_page(GFP_ATOMIC); - if (!save->data) { - kfree(save); - return -ENOMEM; - } - kaddr = kmap_atomic(page, KM_USER0); - memcpy(save->data, kaddr, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - highmem_copy = save; - } - return 0; -} - -int save_highmem(void) -{ - struct zone *zone; - int res = 0; - - pr_debug("swsusp: Saving Highmem"); - drain_local_pages(); - for_each_zone (zone) { - if (is_highmem(zone)) - res = save_highmem_zone(zone); - if (res) - return res; - } - printk("\n"); - return 0; -} - -int restore_highmem(void) -{ - printk("swsusp: Restoring Highmem\n"); - while (highmem_copy) { - struct highmem_page *save = highmem_copy; - void *kaddr; - highmem_copy = save->next; - - kaddr = kmap_atomic(save->page, KM_USER0); - memcpy(kaddr, save->data, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - free_page((long) save->data); - kfree(save); - } - return 0; -} -#else -static inline unsigned int count_highmem_pages(void) {return 0;} -static inline int save_highmem(void) {return 0;} -static inline int restore_highmem(void) {return 0;} -#endif - /** * @safe_needed - on resume, for storing the PBE list and the image, * we can only use memory pages that do not conflict with the pages - * used before suspend. + * used before suspend. The unsafe pages have PageNosaveFree set + * and we count them using unsafe_pages. * - * The unsafe pages are marked with the PG_nosave_free flag - * and we count them using unsafe_pages + * Each allocated image page is marked as PageNosave and PageNosaveFree + * so that swsusp_free() can release it. */ #define PG_ANY 0 @@ -174,7 +61,7 @@ static inline int restore_highmem(void) {return 0;} static unsigned int allocated_unsafe_pages; -static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) +static void *get_image_page(gfp_t gfp_mask, int safe_needed) { void *res; @@ -195,20 +82,39 @@ static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) unsigned long get_safe_page(gfp_t gfp_mask) { - return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE); + return (unsigned long)get_image_page(gfp_mask, PG_SAFE); +} + +static struct page *alloc_image_page(gfp_t gfp_mask) +{ + struct page *page; + + page = alloc_page(gfp_mask); + if (page) { + SetPageNosave(page); + SetPageNosaveFree(page); + } + return page; } /** * free_image_page - free page represented by @addr, allocated with - * alloc_image_page (page flags set by it must be cleared) + * get_image_page (page flags set by it must be cleared) */ static inline void free_image_page(void *addr, int clear_nosave_free) { - ClearPageNosave(virt_to_page(addr)); + struct page *page; + + BUG_ON(!virt_addr_valid(addr)); + + page = virt_to_page(addr); + + ClearPageNosave(page); if (clear_nosave_free) - ClearPageNosaveFree(virt_to_page(addr)); - free_page((unsigned long)addr); + ClearPageNosaveFree(page); + + __free_page(page); } /* struct linked_page is used to build chains of pages */ @@ -269,7 +175,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { struct linked_page *lp; - lp = alloc_image_page(ca->gfp_mask, ca->safe_needed); + lp = get_image_page(ca->gfp_mask, ca->safe_needed); if (!lp) return NULL; @@ -446,8 +352,8 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) /* Compute the number of zones */ nr = 0; - for_each_zone (zone) - if (populated_zone(zone) && !is_highmem(zone)) + for_each_zone(zone) + if (populated_zone(zone)) nr++; /* Allocate the list of zones bitmap objects */ @@ -459,10 +365,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) } /* Initialize the zone bitmap objects */ - for_each_zone (zone) { + for_each_zone(zone) { unsigned long pfn; - if (!populated_zone(zone) || is_highmem(zone)) + if (!populated_zone(zone)) continue; zone_bm->start_pfn = zone->zone_start_pfn; @@ -481,7 +387,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) while (bb) { unsigned long *ptr; - ptr = alloc_image_page(gfp_mask, safe_needed); + ptr = get_image_page(gfp_mask, safe_needed); bb->data = ptr; if (!ptr) goto Free; @@ -505,7 +411,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) memory_bm_position_reset(bm); return 0; -Free: + Free: bm->p_list = ca.chain; memory_bm_free(bm, PG_UNSAFE_CLEAR); return -ENOMEM; @@ -651,7 +557,7 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) memory_bm_position_reset(bm); return BM_END_OF_MAP; -Return_pfn: + Return_pfn: bm->cur.chunk = chunk; bm->cur.bit = bit; return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit; @@ -669,10 +575,82 @@ unsigned int snapshot_additional_pages(struct zone *zone) res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); - return res; + return 2 * res; +} + +#ifdef CONFIG_HIGHMEM +/** + * count_free_highmem_pages - compute the total number of free highmem + * pages, system-wide. + */ + +static unsigned int count_free_highmem_pages(void) +{ + struct zone *zone; + unsigned int cnt = 0; + + for_each_zone(zone) + if (populated_zone(zone) && is_highmem(zone)) + cnt += zone->free_pages; + + return cnt; +} + +/** + * saveable_highmem_page - Determine whether a highmem page should be + * included in the suspend image. + * + * We should save the page if it isn't Nosave or NosaveFree, or Reserved, + * and it isn't a part of a free chunk of pages. + */ + +static struct page *saveable_highmem_page(unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid(pfn)) + return NULL; + + page = pfn_to_page(pfn); + + BUG_ON(!PageHighMem(page)); + + if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page)) + return NULL; + + return page; } /** + * count_highmem_pages - compute the total number of saveable highmem + * pages. + */ + +unsigned int count_highmem_pages(void) +{ + struct zone *zone; + unsigned int n = 0; + + for_each_zone(zone) { + unsigned long pfn, max_zone_pfn; + + if (!is_highmem(zone)) + continue; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (saveable_highmem_page(pfn)) + n++; + } + return n; +} +#else +static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } +static inline unsigned int count_highmem_pages(void) { return 0; } +#endif /* CONFIG_HIGHMEM */ + +/** * pfn_is_nosave - check if given pfn is in the 'nosave' section */ @@ -684,12 +662,12 @@ static inline int pfn_is_nosave(unsigned long pfn) } /** - * saveable - Determine whether a page should be cloned or not. - * @pfn: The page + * saveable - Determine whether a non-highmem page should be included in + * the suspend image. * - * We save a page if it isn't Nosave, and is not in the range of pages - * statically defined as 'unsaveable', and it - * isn't a part of a free chunk of pages. + * We should save the page if it isn't Nosave, and is not in the range + * of pages statically defined as 'unsaveable', and it isn't a part of + * a free chunk of pages. */ static struct page *saveable_page(unsigned long pfn) @@ -701,76 +679,130 @@ static struct page *saveable_page(unsigned long pfn) page = pfn_to_page(pfn); - if (PageNosave(page)) + BUG_ON(PageHighMem(page)); + + if (PageNosave(page) || PageNosaveFree(page)) return NULL; + if (PageReserved(page) && pfn_is_nosave(pfn)) return NULL; - if (PageNosaveFree(page)) - return NULL; return page; } +/** + * count_data_pages - compute the total number of saveable non-highmem + * pages. + */ + unsigned int count_data_pages(void) { struct zone *zone; unsigned long pfn, max_zone_pfn; unsigned int n = 0; - for_each_zone (zone) { + for_each_zone(zone) { if (is_highmem(zone)) continue; + mark_free_pages(zone); max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) - n += !!saveable_page(pfn); + if(saveable_page(pfn)) + n++; } return n; } -static inline void copy_data_page(long *dst, long *src) +/* This is needed, because copy_page and memcpy are not usable for copying + * task structs. + */ +static inline void do_copy_page(long *dst, long *src) { int n; - /* copy_page and memcpy are not usable for copying task structs. */ for (n = PAGE_SIZE / sizeof(long); n; n--) *dst++ = *src++; } +#ifdef CONFIG_HIGHMEM +static inline struct page * +page_is_saveable(struct zone *zone, unsigned long pfn) +{ + return is_highmem(zone) ? + saveable_highmem_page(pfn) : saveable_page(pfn); +} + +static inline void +copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +{ + struct page *s_page, *d_page; + void *src, *dst; + + s_page = pfn_to_page(src_pfn); + d_page = pfn_to_page(dst_pfn); + if (PageHighMem(s_page)) { + src = kmap_atomic(s_page, KM_USER0); + dst = kmap_atomic(d_page, KM_USER1); + do_copy_page(dst, src); + kunmap_atomic(src, KM_USER0); + kunmap_atomic(dst, KM_USER1); + } else { + src = page_address(s_page); + if (PageHighMem(d_page)) { + /* Page pointed to by src may contain some kernel + * data modified by kmap_atomic() + */ + do_copy_page(buffer, src); + dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); + memcpy(dst, buffer, PAGE_SIZE); + kunmap_atomic(dst, KM_USER0); + } else { + dst = page_address(d_page); + do_copy_page(dst, src); + } + } +} +#else +#define page_is_saveable(zone, pfn) saveable_page(pfn) + +static inline void +copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +{ + do_copy_page(page_address(pfn_to_page(dst_pfn)), + page_address(pfn_to_page(src_pfn))); +} +#endif /* CONFIG_HIGHMEM */ + static void copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) { struct zone *zone; unsigned long pfn; - for_each_zone (zone) { + for_each_zone(zone) { unsigned long max_zone_pfn; - if (is_highmem(zone)) - continue; - mark_free_pages(zone); max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) - if (saveable_page(pfn)) + if (page_is_saveable(zone, pfn)) memory_bm_set_bit(orig_bm, pfn); } memory_bm_position_reset(orig_bm); memory_bm_position_reset(copy_bm); do { pfn = memory_bm_next_pfn(orig_bm); - if (likely(pfn != BM_END_OF_MAP)) { - struct page *page; - void *src; - - page = pfn_to_page(pfn); - src = page_address(page); - page = pfn_to_page(memory_bm_next_pfn(copy_bm)); - copy_data_page(page_address(page), src); - } + if (likely(pfn != BM_END_OF_MAP)) + copy_data_page(memory_bm_next_pfn(copy_bm), pfn); } while (pfn != BM_END_OF_MAP); } +/* Total number of image pages */ +static unsigned int nr_copy_pages; +/* Number of pages needed for saving the original pfns of the image pages */ +static unsigned int nr_meta_pages; + /** * swsusp_free - free pages allocated for the suspend. * @@ -792,7 +824,7 @@ void swsusp_free(void) if (PageNosave(page) && PageNosaveFree(page)) { ClearPageNosave(page); ClearPageNosaveFree(page); - free_page((long) page_address(page)); + __free_page(page); } } } @@ -802,34 +834,108 @@ void swsusp_free(void) buffer = NULL; } +#ifdef CONFIG_HIGHMEM +/** + * count_pages_for_highmem - compute the number of non-highmem pages + * that will be necessary for creating copies of highmem pages. + */ + +static unsigned int count_pages_for_highmem(unsigned int nr_highmem) +{ + unsigned int free_highmem = count_free_highmem_pages(); + + if (free_highmem >= nr_highmem) + nr_highmem = 0; + else + nr_highmem -= free_highmem; + + return nr_highmem; +} +#else +static unsigned int +count_pages_for_highmem(unsigned int nr_highmem) { return 0; } +#endif /* CONFIG_HIGHMEM */ /** - * enough_free_mem - Make sure we enough free memory to snapshot. - * - * Returns TRUE or FALSE after checking the number of available - * free pages. + * enough_free_mem - Make sure we have enough free memory for the + * snapshot image. */ -static int enough_free_mem(unsigned int nr_pages) +static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) { struct zone *zone; unsigned int free = 0, meta = 0; - for_each_zone (zone) - if (!is_highmem(zone)) { + for_each_zone(zone) { + meta += snapshot_additional_pages(zone); + if (!is_highmem(zone)) free += zone->free_pages; - meta += snapshot_additional_pages(zone); - } + } - pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n", + nr_pages += count_pages_for_highmem(nr_highmem); + pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n", nr_pages, PAGES_FOR_IO, meta, free); return free > nr_pages + PAGES_FOR_IO + meta; } +#ifdef CONFIG_HIGHMEM +/** + * get_highmem_buffer - if there are some highmem pages in the suspend + * image, we may need the buffer to copy them and/or load their data. + */ + +static inline int get_highmem_buffer(int safe_needed) +{ + buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); + return buffer ? 0 : -ENOMEM; +} + +/** + * alloc_highmem_image_pages - allocate some highmem pages for the image. + * Try to allocate as many pages as needed, but if the number of free + * highmem pages is lesser than that, allocate them all. + */ + +static inline unsigned int +alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) +{ + unsigned int to_alloc = count_free_highmem_pages(); + + if (to_alloc > nr_highmem) + to_alloc = nr_highmem; + + nr_highmem -= to_alloc; + while (to_alloc-- > 0) { + struct page *page; + + page = alloc_image_page(__GFP_HIGHMEM); + memory_bm_set_bit(bm, page_to_pfn(page)); + } + return nr_highmem; +} +#else +static inline int get_highmem_buffer(int safe_needed) { return 0; } + +static inline unsigned int +alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } +#endif /* CONFIG_HIGHMEM */ + +/** + * swsusp_alloc - allocate memory for the suspend image + * + * We first try to allocate as many highmem pages as there are + * saveable highmem pages in the system. If that fails, we allocate + * non-highmem pages for the copies of the remaining highmem ones. + * + * In this approach it is likely that the copies of highmem pages will + * also be located in the high memory, because of the way in which + * copy_data_pages() works. + */ + static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, - unsigned int nr_pages) + unsigned int nr_pages, unsigned int nr_highmem) { int error; @@ -841,46 +947,61 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, if (error) goto Free; + if (nr_highmem > 0) { + error = get_highmem_buffer(PG_ANY); + if (error) + goto Free; + + nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem); + } while (nr_pages-- > 0) { - struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD); + struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); + if (!page) goto Free; - SetPageNosave(page); - SetPageNosaveFree(page); memory_bm_set_bit(copy_bm, page_to_pfn(page)); } return 0; -Free: + Free: swsusp_free(); return -ENOMEM; } -/* Memory bitmap used for marking saveable pages */ +/* Memory bitmap used for marking saveable pages (during suspend) or the + * suspend image pages (during resume) + */ static struct memory_bitmap orig_bm; -/* Memory bitmap used for marking allocated pages that will contain the copies - * of saveable pages +/* Memory bitmap used on suspend for marking allocated pages that will contain + * the copies of saveable pages. During resume it is initially used for + * marking the suspend image pages, but then its set bits are duplicated in + * @orig_bm and it is released. Next, on systems with high memory, it may be + * used for marking "safe" highmem pages, but it has to be reinitialized for + * this purpose. */ static struct memory_bitmap copy_bm; asmlinkage int swsusp_save(void) { - unsigned int nr_pages; + unsigned int nr_pages, nr_highmem; - pr_debug("swsusp: critical section: \n"); + printk("swsusp: critical section: \n"); drain_local_pages(); nr_pages = count_data_pages(); - printk("swsusp: Need to copy %u pages\n", nr_pages); + nr_highmem = count_highmem_pages(); + printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem); - if (!enough_free_mem(nr_pages)) { + if (!enough_free_mem(nr_pages, nr_highmem)) { printk(KERN_ERR "swsusp: Not enough free memory\n"); return -ENOMEM; } - if (swsusp_alloc(&orig_bm, ©_bm, nr_pages)) + if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { + printk(KERN_ERR "swsusp: Memory allocation failed\n"); return -ENOMEM; + } /* During allocating of suspend pagedir, new cold pages may appear. * Kill them. @@ -894,10 +1015,12 @@ asmlinkage int swsusp_save(void) * touch swap space! Except we must write out our image of course. */ + nr_pages += nr_highmem; nr_copy_pages = nr_pages; - nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT; + nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages); + return 0; } @@ -960,7 +1083,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) if (!buffer) { /* This makes the buffer be freed by swsusp_free() */ - buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); + buffer = get_image_page(GFP_ATOMIC, PG_ANY); if (!buffer) return -ENOMEM; } @@ -975,9 +1098,23 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) memset(buffer, 0, PAGE_SIZE); pack_pfns(buffer, &orig_bm); } else { - unsigned long pfn = memory_bm_next_pfn(©_bm); + struct page *page; - handle->buffer = page_address(pfn_to_page(pfn)); + page = pfn_to_page(memory_bm_next_pfn(©_bm)); + if (PageHighMem(page)) { + /* Highmem pages are copied to the buffer, + * because we can't return with a kmapped + * highmem page (we may not be called again). + */ + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + memcpy(buffer, kaddr, PAGE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + handle->buffer = buffer; + } else { + handle->buffer = page_address(page); + } } handle->prev = handle->cur; } @@ -1005,7 +1142,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) unsigned long pfn, max_zone_pfn; /* Clear page flags */ - for_each_zone (zone) { + for_each_zone(zone) { max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) @@ -1101,6 +1238,218 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) } } +/* List of "safe" pages that may be used to store data loaded from the suspend + * image + */ +static struct linked_page *safe_pages_list; + +#ifdef CONFIG_HIGHMEM +/* struct highmem_pbe is used for creating the list of highmem pages that + * should be restored atomically during the resume from disk, because the page + * frames they have occupied before the suspend are in use. + */ +struct highmem_pbe { + struct page *copy_page; /* data is here now */ + struct page *orig_page; /* data was here before the suspend */ + struct highmem_pbe *next; +}; + +/* List of highmem PBEs needed for restoring the highmem pages that were + * allocated before the suspend and included in the suspend image, but have + * also been allocated by the "resume" kernel, so their contents cannot be + * written directly to their "original" page frames. + */ +static struct highmem_pbe *highmem_pblist; + +/** + * count_highmem_image_pages - compute the number of highmem pages in the + * suspend image. The bits in the memory bitmap @bm that correspond to the + * image pages are assumed to be set. + */ + +static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) +{ + unsigned long pfn; + unsigned int cnt = 0; + + memory_bm_position_reset(bm); + pfn = memory_bm_next_pfn(bm); + while (pfn != BM_END_OF_MAP) { + if (PageHighMem(pfn_to_page(pfn))) + cnt++; + + pfn = memory_bm_next_pfn(bm); + } + return cnt; +} + +/** + * prepare_highmem_image - try to allocate as many highmem pages as + * there are highmem image pages (@nr_highmem_p points to the variable + * containing the number of highmem image pages). The pages that are + * "safe" (ie. will not be overwritten when the suspend image is + * restored) have the corresponding bits set in @bm (it must be + * unitialized). + * + * NOTE: This function should not be called if there are no highmem + * image pages. + */ + +static unsigned int safe_highmem_pages; + +static struct memory_bitmap *safe_highmem_bm; + +static int +prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +{ + unsigned int to_alloc; + + if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) + return -ENOMEM; + + if (get_highmem_buffer(PG_SAFE)) + return -ENOMEM; + + to_alloc = count_free_highmem_pages(); + if (to_alloc > *nr_highmem_p) + to_alloc = *nr_highmem_p; + else + *nr_highmem_p = to_alloc; + + safe_highmem_pages = 0; + while (to_alloc-- > 0) { + struct page *page; + + page = alloc_page(__GFP_HIGHMEM); + if (!PageNosaveFree(page)) { + /* The page is "safe", set its bit the bitmap */ + memory_bm_set_bit(bm, page_to_pfn(page)); + safe_highmem_pages++; + } + /* Mark the page as allocated */ + SetPageNosave(page); + SetPageNosaveFree(page); + } + memory_bm_position_reset(bm); + safe_highmem_bm = bm; + return 0; +} + +/** + * get_highmem_page_buffer - for given highmem image page find the buffer + * that suspend_write_next() should set for its caller to write to. + * + * If the page is to be saved to its "original" page frame or a copy of + * the page is to be made in the highmem, @buffer is returned. Otherwise, + * the copy of the page is to be made in normal memory, so the address of + * the copy is returned. + * + * If @buffer is returned, the caller of suspend_write_next() will write + * the page's contents to @buffer, so they will have to be copied to the + * right location on the next call to suspend_write_next() and it is done + * with the help of copy_last_highmem_page(). For this purpose, if + * @buffer is returned, @last_highmem page is set to the page to which + * the data will have to be copied from @buffer. + */ + +static struct page *last_highmem_page; + +static void * +get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +{ + struct highmem_pbe *pbe; + void *kaddr; + + if (PageNosave(page) && PageNosaveFree(page)) { + /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. + */ + last_highmem_page = page; + return buffer; + } + /* The "original" page frame has not been allocated and we have to + * use a "safe" page frame to store the loaded page. + */ + pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); + if (!pbe) { + swsusp_free(); + return NULL; + } + pbe->orig_page = page; + if (safe_highmem_pages > 0) { + struct page *tmp; + + /* Copy of the page will be stored in high memory */ + kaddr = buffer; + tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); + safe_highmem_pages--; + last_highmem_page = tmp; + pbe->copy_page = tmp; + } else { + /* Copy of the page will be stored in normal memory */ + kaddr = safe_pages_list; + safe_pages_list = safe_pages_list->next; + pbe->copy_page = virt_to_page(kaddr); + } + pbe->next = highmem_pblist; + highmem_pblist = pbe; + return kaddr; +} + +/** + * copy_last_highmem_page - copy the contents of a highmem image from + * @buffer, where the caller of snapshot_write_next() has place them, + * to the right location represented by @last_highmem_page . + */ + +static void copy_last_highmem_page(void) +{ + if (last_highmem_page) { + void *dst; + + dst = kmap_atomic(last_highmem_page, KM_USER0); + memcpy(dst, buffer, PAGE_SIZE); + kunmap_atomic(dst, KM_USER0); + last_highmem_page = NULL; + } +} + +static inline int last_highmem_page_copied(void) +{ + return !last_highmem_page; +} + +static inline void free_highmem_data(void) +{ + if (safe_highmem_bm) + memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); + + if (buffer) + free_image_page(buffer, PG_UNSAFE_CLEAR); +} +#else +static inline int get_safe_write_buffer(void) { return 0; } + +static unsigned int +count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } + +static inline int +prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +{ + return 0; +} + +static inline void * +get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +{ + return NULL; +} + +static inline void copy_last_highmem_page(void) {} +static inline int last_highmem_page_copied(void) { return 1; } +static inline void free_highmem_data(void) {} +#endif /* CONFIG_HIGHMEM */ + /** * prepare_image - use the memory bitmap @bm to mark the pages that will * be overwritten in the process of restoring the system memory state @@ -1110,20 +1459,25 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) * The idea is to allocate a new memory bitmap first and then allocate * as many pages as needed for the image data, but not to assign these * pages to specific tasks initially. Instead, we just mark them as - * allocated and create a list of "safe" pages that will be used later. + * allocated and create a lists of "safe" pages that will be used + * later. On systems with high memory a list of "safe" highmem pages is + * also created. */ #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) -static struct linked_page *safe_pages_list; - static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) { - unsigned int nr_pages; + unsigned int nr_pages, nr_highmem; struct linked_page *sp_list, *lp; int error; + /* If there is no highmem, the buffer will not be necessary */ + free_image_page(buffer, PG_UNSAFE_CLEAR); + buffer = NULL; + + nr_highmem = count_highmem_image_pages(bm); error = mark_unsafe_pages(bm); if (error) goto Free; @@ -1134,6 +1488,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) duplicate_memory_bitmap(new_bm, bm); memory_bm_free(bm, PG_UNSAFE_KEEP); + if (nr_highmem > 0) { + error = prepare_highmem_image(bm, &nr_highmem); + if (error) + goto Free; + } /* Reserve some safe pages for potential later use. * * NOTE: This way we make sure there will be enough safe pages for the @@ -1142,10 +1501,10 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) */ sp_list = NULL; /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ - nr_pages = nr_copy_pages - allocated_unsafe_pages; + nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); while (nr_pages > 0) { - lp = alloc_image_page(GFP_ATOMIC, PG_SAFE); + lp = get_image_page(GFP_ATOMIC, PG_SAFE); if (!lp) { error = -ENOMEM; goto Free; @@ -1156,7 +1515,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) } /* Preallocate memory for the image */ safe_pages_list = NULL; - nr_pages = nr_copy_pages - allocated_unsafe_pages; + nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; while (nr_pages > 0) { lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); if (!lp) { @@ -1181,7 +1540,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) } return 0; -Free: + Free: swsusp_free(); return error; } @@ -1196,6 +1555,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) struct pbe *pbe; struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); + if (PageHighMem(page)) + return get_highmem_page_buffer(page, ca); + if (PageNosave(page) && PageNosaveFree(page)) /* We have allocated the "original" page frame and we can * use it directly to store the loaded page. @@ -1210,12 +1572,12 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) swsusp_free(); return NULL; } - pbe->orig_address = (unsigned long)page_address(page); - pbe->address = (unsigned long)safe_pages_list; + pbe->orig_address = page_address(page); + pbe->address = safe_pages_list; safe_pages_list = safe_pages_list->next; pbe->next = restore_pblist; restore_pblist = pbe; - return (void *)pbe->address; + return pbe->address; } /** @@ -1249,14 +1611,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) return 0; - if (!buffer) { - /* This makes the buffer be freed by swsusp_free() */ - buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); + if (handle->offset == 0) { + if (!buffer) + /* This makes the buffer be freed by swsusp_free() */ + buffer = get_image_page(GFP_ATOMIC, PG_ANY); + if (!buffer) return -ENOMEM; - } - if (!handle->offset) + handle->buffer = buffer; + } handle->sync_read = 1; if (handle->prev < handle->cur) { if (handle->prev == 0) { @@ -1284,8 +1648,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) return -ENOMEM; } } else { + copy_last_highmem_page(); handle->buffer = get_buffer(&orig_bm, &ca); - handle->sync_read = 0; + if (handle->buffer != buffer) + handle->sync_read = 0; } handle->prev = handle->cur; } @@ -1301,15 +1667,73 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) return count; } +/** + * snapshot_write_finalize - must be called after the last call to + * snapshot_write_next() in case the last page in the image happens + * to be a highmem page and its contents should be stored in the + * highmem. Additionally, it releases the memory that will not be + * used any more. + */ + +void snapshot_write_finalize(struct snapshot_handle *handle) +{ + copy_last_highmem_page(); + /* Free only if we have loaded the image entirely */ + if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) { + memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); + free_highmem_data(); + } +} + int snapshot_image_loaded(struct snapshot_handle *handle) { - return !(!nr_copy_pages || + return !(!nr_copy_pages || !last_highmem_page_copied() || handle->cur <= nr_meta_pages + nr_copy_pages); } -void snapshot_free_unused_memory(struct snapshot_handle *handle) +#ifdef CONFIG_HIGHMEM +/* Assumes that @buf is ready and points to a "safe" page */ +static inline void +swap_two_pages_data(struct page *p1, struct page *p2, void *buf) { - /* Free only if we have loaded the image entirely */ - if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) - memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); + void *kaddr1, *kaddr2; + + kaddr1 = kmap_atomic(p1, KM_USER0); + kaddr2 = kmap_atomic(p2, KM_USER1); + memcpy(buf, kaddr1, PAGE_SIZE); + memcpy(kaddr1, kaddr2, PAGE_SIZE); + memcpy(kaddr2, buf, PAGE_SIZE); + kunmap_atomic(kaddr1, KM_USER0); + kunmap_atomic(kaddr2, KM_USER1); +} + +/** + * restore_highmem - for each highmem page that was allocated before + * the suspend and included in the suspend image, and also has been + * allocated by the "resume" kernel swap its current (ie. "before + * resume") contents with the previous (ie. "before suspend") one. + * + * If the resume eventually fails, we can call this function once + * again and restore the "before resume" highmem state. + */ + +int restore_highmem(void) +{ + struct highmem_pbe *pbe = highmem_pblist; + void *buf; + + if (!pbe) + return 0; + + buf = get_image_page(GFP_ATOMIC, PG_SAFE); + if (!buf) + return -ENOMEM; + + while (pbe) { + swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); + pbe = pbe->next; + } + free_image_page(buf, PG_UNSAFE_CLEAR); + return 0; } +#endif /* CONFIG_HIGHMEM */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 1a3b0dd2c3f..f133d4a6d81 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -34,34 +34,123 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" static struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; - swp_entry_t image; + char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; + sector_t image; char orig_sig[10]; char sig[10]; } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; /* - * Saving part... + * General things */ static unsigned short root_swap = 0xffff; +static struct block_device *resume_bdev; + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * @bio_chain: list of pending biod (for async reading) + * + * Straight from the textbook - allocate and initialize the bio. + * If we're reading, make sure the page is marked as dirty. + * Then submit it and, if @bio_chain == NULL, wait. + */ +static int submit(int rw, pgoff_t page_off, struct page *page, + struct bio **bio_chain) +{ + struct bio *bio; + + bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); + if (!bio) + return -ENOMEM; + bio->bi_sector = page_off * (PAGE_SIZE >> 9); + bio->bi_bdev = resume_bdev; + bio->bi_end_io = end_swap_bio_read; + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + printk("swsusp: ERROR: adding page to bio at %ld\n", page_off); + bio_put(bio); + return -EFAULT; + } + + lock_page(page); + bio_get(bio); + + if (bio_chain == NULL) { + submit_bio(rw | (1 << BIO_RW_SYNC), bio); + wait_on_page_locked(page); + if (rw == READ) + bio_set_pages_dirty(bio); + bio_put(bio); + } else { + if (rw == READ) + get_page(page); /* These pages are freed later */ + bio->bi_private = *bio_chain; + *bio_chain = bio; + submit_bio(rw | (1 << BIO_RW_SYNC), bio); + } + return 0; +} + +static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) +{ + return submit(READ, page_off, virt_to_page(addr), bio_chain); +} + +static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) +{ + return submit(WRITE, page_off, virt_to_page(addr), bio_chain); +} + +static int wait_on_bio_chain(struct bio **bio_chain) +{ + struct bio *bio; + struct bio *next_bio; + int ret = 0; + + if (bio_chain == NULL) + return 0; + + bio = *bio_chain; + if (bio == NULL) + return 0; + while (bio) { + struct page *page; + + next_bio = bio->bi_private; + page = bio->bi_io_vec[0].bv_page; + wait_on_page_locked(page); + if (!PageUptodate(page) || PageError(page)) + ret = -EIO; + put_page(page); + bio_put(bio); + bio = next_bio; + } + *bio_chain = NULL; + return ret; +} + +/* + * Saving part + */ -static int mark_swapfiles(swp_entry_t start) +static int mark_swapfiles(sector_t start) { int error; - rw_swap_page_sync(READ, swp_entry(root_swap, 0), - virt_to_page((unsigned long)&swsusp_header), NULL); + bio_read_page(swsusp_resume_block, &swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); memcpy(swsusp_header.sig,SWSUSP_SIG, 10); swsusp_header.image = start; - error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), - virt_to_page((unsigned long)&swsusp_header), - NULL); + error = bio_write_page(swsusp_resume_block, + &swsusp_header, NULL); } else { - pr_debug("swsusp: Partition is not swap space.\n"); + printk(KERN_ERR "swsusp: Swap header not found!\n"); error = -ENODEV; } return error; @@ -74,12 +163,21 @@ static int mark_swapfiles(swp_entry_t start) static int swsusp_swap_check(void) /* This is called before saving image */ { - int res = swap_type_of(swsusp_resume_device); + int res; + + res = swap_type_of(swsusp_resume_device, swsusp_resume_block); + if (res < 0) + return res; + + root_swap = res; + resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE); + if (IS_ERR(resume_bdev)) + return PTR_ERR(resume_bdev); + + res = set_blocksize(resume_bdev, PAGE_SIZE); + if (res < 0) + blkdev_put(resume_bdev); - if (res >= 0) { - root_swap = res; - return 0; - } return res; } @@ -90,36 +188,26 @@ static int swsusp_swap_check(void) /* This is called before saving image */ * @bio_chain: Link the next write BIO here */ -static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) +static int write_page(void *buf, sector_t offset, struct bio **bio_chain) { - swp_entry_t entry; - int error = -ENOSPC; - - if (offset) { - struct page *page = virt_to_page(buf); - - if (bio_chain) { - /* - * Whether or not we successfully allocated a copy page, - * we take a ref on the page here. It gets undone in - * wait_on_bio_chain(). - */ - struct page *page_copy; - page_copy = alloc_page(GFP_ATOMIC); - if (page_copy == NULL) { - WARN_ON_ONCE(1); - bio_chain = NULL; /* Go synchronous */ - get_page(page); - } else { - memcpy(page_address(page_copy), - page_address(page), PAGE_SIZE); - page = page_copy; - } + void *src; + + if (!offset) + return -ENOSPC; + + if (bio_chain) { + src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (src) { + memcpy(src, buf, PAGE_SIZE); + } else { + WARN_ON_ONCE(1); + bio_chain = NULL; /* Go synchronous */ + src = buf; } - entry = swp_entry(root_swap, offset); - error = rw_swap_page_sync(WRITE, entry, page, bio_chain); + } else { + src = buf; } - return error; + return bio_write_page(offset, src, bio_chain); } /* @@ -137,11 +225,11 @@ static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) * at a time. */ -#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(long) - 1) +#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) struct swap_map_page { - unsigned long entries[MAP_PAGE_ENTRIES]; - unsigned long next_swap; + sector_t entries[MAP_PAGE_ENTRIES]; + sector_t next_swap; }; /** @@ -151,7 +239,7 @@ struct swap_map_page { struct swap_map_handle { struct swap_map_page *cur; - unsigned long cur_swap; + sector_t cur_swap; struct bitmap_page *bitmap; unsigned int k; }; @@ -166,26 +254,6 @@ static void release_swap_writer(struct swap_map_handle *handle) handle->bitmap = NULL; } -static void show_speed(struct timeval *start, struct timeval *stop, - unsigned nr_pages, char *msg) -{ - s64 elapsed_centisecs64; - int centisecs; - int k; - int kps; - - elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); - do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); - centisecs = elapsed_centisecs64; - if (centisecs == 0) - centisecs = 1; /* avoid div-by-zero */ - k = nr_pages * (PAGE_SIZE / 1024); - kps = (k * 100) / centisecs; - printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, - centisecs / 100, centisecs % 100, - kps / 1000, (kps % 1000) / 10); -} - static int get_swap_writer(struct swap_map_handle *handle) { handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); @@ -196,7 +264,7 @@ static int get_swap_writer(struct swap_map_handle *handle) release_swap_writer(handle); return -ENOMEM; } - handle->cur_swap = alloc_swap_page(root_swap, handle->bitmap); + handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap); if (!handle->cur_swap) { release_swap_writer(handle); return -ENOSPC; @@ -205,43 +273,15 @@ static int get_swap_writer(struct swap_map_handle *handle) return 0; } -static int wait_on_bio_chain(struct bio **bio_chain) -{ - struct bio *bio; - struct bio *next_bio; - int ret = 0; - - if (bio_chain == NULL) - return 0; - - bio = *bio_chain; - if (bio == NULL) - return 0; - while (bio) { - struct page *page; - - next_bio = bio->bi_private; - page = bio->bi_io_vec[0].bv_page; - wait_on_page_locked(page); - if (!PageUptodate(page) || PageError(page)) - ret = -EIO; - put_page(page); - bio_put(bio); - bio = next_bio; - } - *bio_chain = NULL; - return ret; -} - static int swap_write_page(struct swap_map_handle *handle, void *buf, struct bio **bio_chain) { int error = 0; - unsigned long offset; + sector_t offset; if (!handle->cur) return -EINVAL; - offset = alloc_swap_page(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap, handle->bitmap); error = write_page(buf, offset, bio_chain); if (error) return error; @@ -250,7 +290,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, error = wait_on_bio_chain(bio_chain); if (error) goto out; - offset = alloc_swap_page(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap, handle->bitmap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; @@ -261,7 +301,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, handle->cur_swap = offset; handle->k = 0; } -out: + out: return error; } @@ -315,7 +355,7 @@ static int save_image(struct swap_map_handle *handle, error = err2; if (!error) printk("\b\b\b\bdone\n"); - show_speed(&start, &stop, nr_to_write, "Wrote"); + swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); return error; } @@ -350,100 +390,50 @@ int swsusp_write(void) struct swsusp_info *header; int error; - if ((error = swsusp_swap_check())) { + error = swsusp_swap_check(); + if (error) { printk(KERN_ERR "swsusp: Cannot find swap device, try " "swapon -a.\n"); return error; } memset(&snapshot, 0, sizeof(struct snapshot_handle)); error = snapshot_read_next(&snapshot, PAGE_SIZE); - if (error < PAGE_SIZE) - return error < 0 ? error : -EFAULT; + if (error < PAGE_SIZE) { + if (error >= 0) + error = -EFAULT; + + goto out; + } header = (struct swsusp_info *)data_of(snapshot); if (!enough_swap(header->pages)) { printk(KERN_ERR "swsusp: Not enough free swap\n"); - return -ENOSPC; + error = -ENOSPC; + goto out; } error = get_swap_writer(&handle); if (!error) { - unsigned long start = handle.cur_swap; + sector_t start = handle.cur_swap; + error = swap_write_page(&handle, header, NULL); if (!error) error = save_image(&handle, &snapshot, header->pages - 1); + if (!error) { flush_swap_writer(&handle); printk("S"); - error = mark_swapfiles(swp_entry(root_swap, start)); + error = mark_swapfiles(start); printk("|\n"); } } if (error) free_all_swap_pages(root_swap, handle.bitmap); release_swap_writer(&handle); + out: + swsusp_close(); return error; } -static struct block_device *resume_bdev; - -/** - * submit - submit BIO request. - * @rw: READ or WRITE. - * @off physical offset of page. - * @page: page we're reading or writing. - * @bio_chain: list of pending biod (for async reading) - * - * Straight from the textbook - allocate and initialize the bio. - * If we're reading, make sure the page is marked as dirty. - * Then submit it and, if @bio_chain == NULL, wait. - */ -static int submit(int rw, pgoff_t page_off, struct page *page, - struct bio **bio_chain) -{ - struct bio *bio; - - bio = bio_alloc(GFP_ATOMIC, 1); - if (!bio) - return -ENOMEM; - bio->bi_sector = page_off * (PAGE_SIZE >> 9); - bio->bi_bdev = resume_bdev; - bio->bi_end_io = end_swap_bio_read; - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - printk("swsusp: ERROR: adding page to bio at %ld\n", page_off); - bio_put(bio); - return -EFAULT; - } - - lock_page(page); - bio_get(bio); - - if (bio_chain == NULL) { - submit_bio(rw | (1 << BIO_RW_SYNC), bio); - wait_on_page_locked(page); - if (rw == READ) - bio_set_pages_dirty(bio); - bio_put(bio); - } else { - if (rw == READ) - get_page(page); /* These pages are freed later */ - bio->bi_private = *bio_chain; - *bio_chain = bio; - submit_bio(rw | (1 << BIO_RW_SYNC), bio); - } - return 0; -} - -static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) -{ - return submit(READ, page_off, virt_to_page(addr), bio_chain); -} - -static int bio_write_page(pgoff_t page_off, void *addr) -{ - return submit(WRITE, page_off, virt_to_page(addr), NULL); -} - /** * The following functions allow us to read data using a swap map * in a file-alike way @@ -456,17 +446,18 @@ static void release_swap_reader(struct swap_map_handle *handle) handle->cur = NULL; } -static int get_swap_reader(struct swap_map_handle *handle, - swp_entry_t start) +static int get_swap_reader(struct swap_map_handle *handle, sector_t start) { int error; - if (!swp_offset(start)) + if (!start) return -EINVAL; - handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); + + handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); if (!handle->cur) return -ENOMEM; - error = bio_read_page(swp_offset(start), handle->cur, NULL); + + error = bio_read_page(start, handle->cur, NULL); if (error) { release_swap_reader(handle); return error; @@ -478,7 +469,7 @@ static int get_swap_reader(struct swap_map_handle *handle, static int swap_read_page(struct swap_map_handle *handle, void *buf, struct bio **bio_chain) { - unsigned long offset; + sector_t offset; int error; if (!handle->cur) @@ -547,11 +538,11 @@ static int load_image(struct swap_map_handle *handle, error = err2; if (!error) { printk("\b\b\b\bdone\n"); - snapshot_free_unused_memory(snapshot); + snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) error = -ENODATA; } - show_speed(&start, &stop, nr_to_read, "Read"); + swsusp_show_speed(&start, &stop, nr_to_read, "Read"); return error; } @@ -600,12 +591,16 @@ int swsusp_check(void) if (!IS_ERR(resume_bdev)) { set_blocksize(resume_bdev, PAGE_SIZE); memset(&swsusp_header, 0, sizeof(swsusp_header)); - if ((error = bio_read_page(0, &swsusp_header, NULL))) + error = bio_read_page(swsusp_resume_block, + &swsusp_header, NULL); + if (error) return error; + if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); /* Reset swap signature now */ - error = bio_write_page(0, &swsusp_header); + error = bio_write_page(swsusp_resume_block, + &swsusp_header, NULL); } else { return -EINVAL; } diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 0b66659dc51..31aa0390c77 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -49,6 +49,7 @@ #include <linux/bootmem.h> #include <linux/syscalls.h> #include <linux/highmem.h> +#include <linux/time.h> #include "power.h" @@ -64,10 +65,8 @@ int in_suspend __nosavedata = 0; #ifdef CONFIG_HIGHMEM unsigned int count_highmem_pages(void); -int save_highmem(void); int restore_highmem(void); #else -static inline int save_highmem(void) { return 0; } static inline int restore_highmem(void) { return 0; } static inline unsigned int count_highmem_pages(void) { return 0; } #endif @@ -134,18 +133,18 @@ static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) return 0; } -unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap) +sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { - if (bitmap_set(bitmap, offset)) { + if (bitmap_set(bitmap, offset)) swap_free(swp_entry(swap, offset)); - offset = 0; - } + else + return swapdev_block(swap, offset); } - return offset; + return 0; } void free_all_swap_pages(int swap, struct bitmap_page *bitmap) @@ -166,6 +165,34 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap) } /** + * swsusp_show_speed - print the time elapsed between two events represented by + * @start and @stop + * + * @nr_pages - number of pages processed between @start and @stop + * @msg - introductory message to print + */ + +void swsusp_show_speed(struct timeval *start, struct timeval *stop, + unsigned nr_pages, char *msg) +{ + s64 elapsed_centisecs64; + int centisecs; + int k; + int kps; + + elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); + do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); + centisecs = elapsed_centisecs64; + if (centisecs == 0) + centisecs = 1; /* avoid div-by-zero */ + k = nr_pages * (PAGE_SIZE / 1024); + kps = (k * 100) / centisecs; + printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, + centisecs / 100, centisecs % 100, + kps / 1000, (kps % 1000) / 10); +} + +/** * swsusp_shrink_memory - Try to free as much memory as needed * * ... but do not OOM-kill anyone @@ -184,23 +211,37 @@ static inline unsigned long __shrink_memory(long tmp) int swsusp_shrink_memory(void) { - long size, tmp; + long tmp; struct zone *zone; unsigned long pages = 0; unsigned int i = 0; char *p = "-\\|/"; + struct timeval start, stop; printk("Shrinking memory... "); + do_gettimeofday(&start); do { - size = 2 * count_highmem_pages(); - size += size / 50 + count_data_pages() + PAGES_FOR_IO; + long size, highmem_size; + + highmem_size = count_highmem_pages(); + size = count_data_pages() + PAGES_FOR_IO; tmp = size; + size += highmem_size; for_each_zone (zone) - if (!is_highmem(zone) && populated_zone(zone)) { - tmp -= zone->free_pages; - tmp += zone->lowmem_reserve[ZONE_NORMAL]; - tmp += snapshot_additional_pages(zone); + if (populated_zone(zone)) { + if (is_highmem(zone)) { + highmem_size -= zone->free_pages; + } else { + tmp -= zone->free_pages; + tmp += zone->lowmem_reserve[ZONE_NORMAL]; + tmp += snapshot_additional_pages(zone); + } } + + if (highmem_size < 0) + highmem_size = 0; + + tmp += highmem_size; if (tmp > 0) { tmp = __shrink_memory(tmp); if (!tmp) @@ -212,7 +253,9 @@ int swsusp_shrink_memory(void) } printk("\b%c", p[i++%4]); } while (tmp > 0); + do_gettimeofday(&stop); printk("\bdone (%lu pages freed)\n", pages); + swsusp_show_speed(&start, &stop, pages, "Freed"); return 0; } @@ -223,6 +266,7 @@ int swsusp_suspend(void) if ((error = arch_prepare_suspend())) return error; + local_irq_disable(); /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* device_power_down() now. @@ -235,23 +279,16 @@ int swsusp_suspend(void) goto Enable_irqs; } - if ((error = save_highmem())) { - printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); - goto Restore_highmem; - } - save_processor_state(); if ((error = swsusp_arch_suspend())) printk(KERN_ERR "Error %d suspending\n", error); /* Restore control flow magically appears here */ restore_processor_state(); -Restore_highmem: - restore_highmem(); /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ device_power_up(); -Enable_irqs: + Enable_irqs: local_irq_enable(); return error; } @@ -268,18 +305,23 @@ int swsusp_resume(void) printk(KERN_ERR "Some devices failed to power down, very bad\n"); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); - error = swsusp_arch_resume(); - /* Code below is only ever reached in case of failure. Otherwise - * execution continues at place where swsusp_arch_suspend was called - */ - BUG_ON(!error); + error = restore_highmem(); + if (!error) { + error = swsusp_arch_resume(); + /* The code below is only ever reached in case of a failure. + * Otherwise execution continues at place where + * swsusp_arch_suspend() was called + */ + BUG_ON(!error); + /* This call to restore_highmem() undos the previous one */ + restore_highmem(); + } /* The only reason why swsusp_arch_resume() can fail is memory being * very tight, so we have to free it as soon as we can to avoid * subsequent failures */ swsusp_free(); restore_processor_state(); - restore_highmem(); touch_softlockup_watchdog(); device_power_up(); local_irq_enable(); diff --git a/kernel/power/user.c b/kernel/power/user.c index d991d3b0e5a..89443b85163 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -11,6 +11,7 @@ #include <linux/suspend.h> #include <linux/syscalls.h> +#include <linux/reboot.h> #include <linux/string.h> #include <linux/device.h> #include <linux/miscdevice.h> @@ -21,6 +22,7 @@ #include <linux/fs.h> #include <linux/console.h> #include <linux/cpu.h> +#include <linux/freezer.h> #include <asm/uaccess.h> @@ -54,7 +56,8 @@ static int snapshot_open(struct inode *inode, struct file *filp) filp->private_data = data; memset(&data->handle, 0, sizeof(struct snapshot_handle)); if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { - data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device) : -1; + data->swap = swsusp_resume_device ? + swap_type_of(swsusp_resume_device, 0) : -1; data->mode = O_RDONLY; } else { data->swap = -1; @@ -76,10 +79,10 @@ static int snapshot_release(struct inode *inode, struct file *filp) free_all_swap_pages(data->swap, data->bitmap); free_bitmap(data->bitmap); if (data->frozen) { - down(&pm_sem); + mutex_lock(&pm_mutex); thaw_processes(); enable_nonboot_cpus(); - up(&pm_sem); + mutex_unlock(&pm_mutex); } atomic_inc(&device_available); return 0; @@ -124,7 +127,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, { int error = 0; struct snapshot_data *data; - loff_t offset, avail; + loff_t avail; + sector_t offset; if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) return -ENOTTY; @@ -140,7 +144,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, case SNAPSHOT_FREEZE: if (data->frozen) break; - down(&pm_sem); + mutex_lock(&pm_mutex); error = disable_nonboot_cpus(); if (!error) { error = freeze_processes(); @@ -150,7 +154,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EBUSY; } } - up(&pm_sem); + mutex_unlock(&pm_mutex); if (!error) data->frozen = 1; break; @@ -158,10 +162,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, case SNAPSHOT_UNFREEZE: if (!data->frozen) break; - down(&pm_sem); + mutex_lock(&pm_mutex); thaw_processes(); enable_nonboot_cpus(); - up(&pm_sem); + mutex_unlock(&pm_mutex); data->frozen = 0; break; @@ -170,7 +174,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - down(&pm_sem); + mutex_lock(&pm_mutex); /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (!error) { @@ -183,7 +187,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, } resume_console(); } - up(&pm_sem); + mutex_unlock(&pm_mutex); if (!error) error = put_user(in_suspend, (unsigned int __user *)arg); if (!error) @@ -191,13 +195,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_ATOMIC_RESTORE: + snapshot_write_finalize(&data->handle); if (data->mode != O_WRONLY || !data->frozen || !snapshot_image_loaded(&data->handle)) { error = -EPERM; break; } - snapshot_free_unused_memory(&data->handle); - down(&pm_sem); + mutex_lock(&pm_mutex); pm_prepare_console(); suspend_console(); error = device_suspend(PMSG_PRETHAW); @@ -207,7 +211,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, } resume_console(); pm_restore_console(); - up(&pm_sem); + mutex_unlock(&pm_mutex); break; case SNAPSHOT_FREE: @@ -238,10 +242,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; } } - offset = alloc_swap_page(data->swap, data->bitmap); + offset = alloc_swapdev_block(data->swap, data->bitmap); if (offset) { offset <<= PAGE_SHIFT; - error = put_user(offset, (loff_t __user *)arg); + error = put_user(offset, (sector_t __user *)arg); } else { error = -ENOSPC; } @@ -264,7 +268,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, * so we need to recode them */ if (old_decode_dev(arg)) { - data->swap = swap_type_of(old_decode_dev(arg)); + data->swap = swap_type_of(old_decode_dev(arg), 0); if (data->swap < 0) error = -ENODEV; } else { @@ -282,7 +286,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; } - if (down_trylock(&pm_sem)) { + if (!mutex_trylock(&pm_mutex)) { error = -EBUSY; break; } @@ -309,8 +313,66 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, if (pm_ops->finish) pm_ops->finish(PM_SUSPEND_MEM); -OutS3: - up(&pm_sem); + OutS3: + mutex_unlock(&pm_mutex); + break; + + case SNAPSHOT_PMOPS: + switch (arg) { + + case PMOPS_PREPARE: + if (pm_ops->prepare) { + error = pm_ops->prepare(PM_SUSPEND_DISK); + } + break; + + case PMOPS_ENTER: + kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); + error = pm_ops->enter(PM_SUSPEND_DISK); + break; + + case PMOPS_FINISH: + if (pm_ops && pm_ops->finish) { + pm_ops->finish(PM_SUSPEND_DISK); + } + break; + + default: + printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg); + error = -EINVAL; + + } + break; + + case SNAPSHOT_SET_SWAP_AREA: + if (data->bitmap) { + error = -EPERM; + } else { + struct resume_swap_area swap_area; + dev_t swdev; + + error = copy_from_user(&swap_area, (void __user *)arg, + sizeof(struct resume_swap_area)); + if (error) { + error = -EFAULT; + break; + } + + /* + * User space encodes device types as two-byte values, + * so we need to recode them + */ + swdev = old_decode_dev(swap_area.dev); + if (swdev) { + offset = swap_area.offset; + data->swap = swap_type_of(swdev, offset); + if (data->swap < 0) + error = -ENODEV; + } else { + data->swap = -1; + error = -EINVAL; + } + } break; default: @@ -321,7 +383,7 @@ OutS3: return error; } -static struct file_operations snapshot_fops = { +static const struct file_operations snapshot_fops = { .open = snapshot_open, .release = snapshot_release, .read = snapshot_read, diff --git a/kernel/printk.c b/kernel/printk.c index 66426552fbf..185bb45eacf 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -53,8 +53,6 @@ int console_printk[4] = { DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ }; -EXPORT_UNUSED_SYMBOL(console_printk); /* June 2006 */ - /* * Low lever drivers may need that to know if they can schedule in * their unblank() callback or not. So let's export it. @@ -335,13 +333,25 @@ static void __call_console_drivers(unsigned long start, unsigned long end) } } +static int __read_mostly ignore_loglevel; + +int __init ignore_loglevel_setup(char *str) +{ + ignore_loglevel = 1; + printk(KERN_INFO "debug: ignoring loglevel setting.\n"); + + return 1; +} + +__setup("ignore_loglevel", ignore_loglevel_setup); + /* * Write out chars from start to end - 1 inclusive */ static void _call_console_drivers(unsigned long start, unsigned long end, int msg_log_level) { - if (msg_log_level < console_loglevel && + if ((msg_log_level < console_loglevel || ignore_loglevel) && console_drivers && start != end) { if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { /* wrapped write */ @@ -631,12 +641,7 @@ EXPORT_SYMBOL(vprintk); asmlinkage long sys_syslog(int type, char __user *buf, int len) { - return 0; -} - -int do_syslog(int type, char __user *buf, int len) -{ - return 0; + return -ENOSYS; } static void call_console_drivers(unsigned long start, unsigned long end) @@ -777,7 +782,6 @@ int is_console_locked(void) { return console_locked; } -EXPORT_UNUSED_SYMBOL(is_console_locked); /* June 2006 */ /** * release_console_sem - unlock the console system diff --git a/kernel/profile.c b/kernel/profile.c index f940b462eec..fb5e03d57e9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -40,7 +40,7 @@ int (*timer_hook)(struct pt_regs *) __read_mostly; static atomic_t *prof_buffer; static unsigned long prof_len, prof_shift; -static int prof_on __read_mostly; +int prof_on __read_mostly; static cpumask_t prof_cpu_mask = CPU_MASK_ALL; #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); @@ -51,9 +51,19 @@ static DEFINE_MUTEX(profile_flip_mutex); static int __init profile_setup(char * str) { static char __initdata schedstr[] = "schedule"; + static char __initdata sleepstr[] = "sleep"; int par; - if (!strncmp(str, schedstr, strlen(schedstr))) { + if (!strncmp(str, sleepstr, strlen(sleepstr))) { + prof_on = SLEEP_PROFILING; + if (str[strlen(sleepstr)] == ',') + str += strlen(sleepstr) + 1; + if (get_option(&str, &par)) + prof_shift = par; + printk(KERN_INFO + "kernel sleep profiling enabled (shift: %ld)\n", + prof_shift); + } else if (!strncmp(str, sleepstr, strlen(sleepstr))) { prof_on = SCHED_PROFILING; if (str[strlen(schedstr)] == ',') str += strlen(schedstr) + 1; @@ -204,7 +214,8 @@ EXPORT_SYMBOL_GPL(profile_event_unregister); * positions to which hits are accounted during short intervals (e.g. * several seconds) is usually very small. Exclusion from buffer * flipping is provided by interrupt disablement (note that for - * SCHED_PROFILING profile_hit() may be called from process context). + * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from + * process context). * The hash function is meant to be lightweight as opposed to strong, * and was vaguely inspired by ppc64 firmware-supported inverted * pagetable hash functions, but uses a full hashtable full of finite @@ -257,7 +268,7 @@ static void profile_discard_flip_buffers(void) mutex_unlock(&profile_flip_mutex); } -void profile_hit(int type, void *__pc) +void profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long primary, secondary, flags, pc = (unsigned long)__pc; int i, j, cpu; @@ -274,21 +285,31 @@ void profile_hit(int type, void *__pc) put_cpu(); return; } + /* + * We buffer the global profiler buffer into a per-CPU + * queue and thus reduce the number of global (and possibly + * NUMA-alien) accesses. The write-queue is self-coalescing: + */ local_irq_save(flags); do { for (j = 0; j < PROFILE_GRPSZ; ++j) { if (hits[i + j].pc == pc) { - hits[i + j].hits++; + hits[i + j].hits += nr_hits; goto out; } else if (!hits[i + j].hits) { hits[i + j].pc = pc; - hits[i + j].hits = 1; + hits[i + j].hits = nr_hits; goto out; } } i = (i + secondary) & (NR_PROFILE_HIT - 1); } while (i != primary); - atomic_inc(&prof_buffer[pc]); + + /* + * Add the current hit(s) and flush the write-queue out + * to the global buffer: + */ + atomic_add(nr_hits, &prof_buffer[pc]); for (i = 0; i < NR_PROFILE_HIT; ++i) { atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); hits[i].pc = hits[i].hits = 0; @@ -298,7 +319,6 @@ out: put_cpu(); } -#ifdef CONFIG_HOTPLUG_CPU static int __devinit profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { @@ -351,19 +371,19 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ #else /* !CONFIG_SMP */ #define profile_flip_buffers() do { } while (0) #define profile_discard_flip_buffers() do { } while (0) +#define profile_cpu_callback NULL -void profile_hit(int type, void *__pc) +void profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long pc; if (prof_on != type || !prof_buffer) return; pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; - atomic_inc(&prof_buffer[min(pc, prof_len - 1)]); + atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); } #endif /* !CONFIG_SMP */ @@ -442,7 +462,8 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) read = 0; while (p < sizeof(unsigned int) && count > 0) { - put_user(*((char *)(&sample_step)+p),buf); + if (put_user(*((char *)(&sample_step)+p),buf)) + return -EFAULT; buf++; p++; count--; read++; } pnt = (char *)prof_buffer + p - sizeof(atomic_t); @@ -480,7 +501,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return count; } -static struct file_operations proc_profile_operations = { +static const struct file_operations proc_profile_operations = { .read = read_profile, .write = write_profile, }; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 26bb5ffe1ef..3554b76da84 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -235,12 +235,14 @@ static void rcu_do_batch(struct rcu_data *rdp) list = rdp->donelist; while (list) { - next = rdp->donelist = list->next; + next = list->next; + prefetch(next); list->func(list); list = next; if (++count >= rdp->blimit) break; } + rdp->donelist = list; local_irq_disable(); rdp->qlen -= count; diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index e2bda18f6f4..c52f981ea00 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -401,7 +401,7 @@ static void srcu_torture_cleanup(void) cleanup_srcu_struct(&srcu_ctl); } -static int srcu_torture_read_lock(void) +static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) { return srcu_read_lock(&srcu_ctl); } @@ -419,7 +419,7 @@ static void srcu_read_delay(struct rcu_random_state *rrsp) schedule_timeout_interruptible(longdelay); } -static void srcu_torture_read_unlock(int idx) +static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) { srcu_read_unlock(&srcu_ctl, idx); } diff --git a/kernel/relay.c b/kernel/relay.c index 2b92e8ece85..75a3a9a7efc 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1013,7 +1013,7 @@ static ssize_t relay_file_sendfile(struct file *filp, actor, &desc); } -struct file_operations relay_file_operations = { +const struct file_operations relay_file_operations = { .open = relay_file_open, .poll = relay_file_poll, .mmap = relay_file_mmap, diff --git a/kernel/resource.c b/kernel/resource.c index 6de60c12143..7b9a497419d 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -88,7 +88,7 @@ static int r_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations resource_op = { +static const struct seq_operations resource_op = { .start = r_start, .next = r_next, .stop = r_stop, @@ -115,14 +115,14 @@ static int iomem_open(struct inode *inode, struct file *file) return res; } -static struct file_operations proc_ioports_operations = { +static const struct file_operations proc_ioports_operations = { .open = ioports_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; -static struct file_operations proc_iomem_operations = { +static const struct file_operations proc_iomem_operations = { .open = iomem_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 6dcea9dd8c9..015fc633c96 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -13,6 +13,7 @@ #include <linux/spinlock.h> #include <linux/sysdev.h> #include <linux/timer.h> +#include <linux/freezer.h> #include "rtmutex.h" diff --git a/kernel/sched.c b/kernel/sched.c index 3399701c680..f385eff4682 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -34,7 +34,7 @@ #include <linux/security.h> #include <linux/notifier.h> #include <linux/profile.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/vmalloc.h> #include <linux/blkdev.h> #include <linux/delay.h> @@ -505,7 +505,7 @@ static int schedstat_open(struct inode *inode, struct file *file) return res; } -struct file_operations proc_schedstat_operations = { +const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, @@ -948,6 +948,17 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) } #endif + /* + * Sleep time is in units of nanosecs, so shift by 20 to get a + * milliseconds-range estimation of the amount of time that the task + * spent sleeping: + */ + if (unlikely(prof_on == SLEEP_PROFILING)) { + if (p->state == TASK_UNINTERRUPTIBLE) + profile_hits(SLEEP_PROFILING, (void *)get_wchan(p), + (now - p->timestamp) >> 20); + } + if (!rt_task(p)) p->prio = recalc_task_prio(p, now); @@ -3333,6 +3344,7 @@ asmlinkage void __sched schedule(void) printk(KERN_ERR "BUG: scheduling while atomic: " "%s/0x%08x/%d\n", current->comm, preempt_count(), current->pid); + debug_show_held_locks(current); dump_stack(); } profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -4804,18 +4816,18 @@ static void show_task(struct task_struct *p) show_stack(p, NULL); } -void show_state(void) +void show_state_filter(unsigned long state_filter) { struct task_struct *g, *p; #if (BITS_PER_LONG == 32) printk("\n" - " sibling\n"); - printk(" task PC pid father child younger older\n"); + " free sibling\n"); + printk(" task PC stack pid father child younger older\n"); #else printk("\n" - " sibling\n"); - printk(" task PC pid father child younger older\n"); + " free sibling\n"); + printk(" task PC stack pid father child younger older\n"); #endif read_lock(&tasklist_lock); do_each_thread(g, p) { @@ -4824,11 +4836,16 @@ void show_state(void) * console might take alot of time: */ touch_nmi_watchdog(); - show_task(p); + if (p->state & state_filter) + show_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); - debug_show_all_locks(); + /* + * Only show locks if all tasks are dumped: + */ + if (state_filter == -1) + debug_show_all_locks(); } /** @@ -6723,8 +6740,6 @@ SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, sched_smt_power_savings_store); #endif - -#ifdef CONFIG_HOTPLUG_CPU /* * Force a reinitialization of the sched domains hierarchy. The domains * and groups cannot be updated in place without racing with the balancing @@ -6757,7 +6772,6 @@ static int update_sched_domains(struct notifier_block *nfb, return NOTIFY_OK; } -#endif void __init sched_init_smp(void) { @@ -6867,6 +6881,7 @@ void __might_sleep(char *file, int line) " context at %s:%d\n", file, line); printk("in_atomic():%d, irqs_disabled():%d\n", in_atomic(), irqs_disabled()); + debug_show_held_locks(current); dump_stack(); } #endif diff --git a/kernel/signal.c b/kernel/signal.c index df18c167a2a..ec81defde33 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -23,6 +23,7 @@ #include <linux/ptrace.h> #include <linux/signal.h> #include <linux/capability.h> +#include <linux/freezer.h> #include <asm/param.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -33,7 +34,7 @@ * SLAB caches for signal bits. */ -static kmem_cache_t *sigqueue_cachep; +static struct kmem_cache *sigqueue_cachep; /* * In POSIX a signal is sent either to a specific thread (Linux task) @@ -1133,8 +1134,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) return error; } -int -kill_proc_info(int sig, struct siginfo *info, pid_t pid) +static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) { int error; rcu_read_lock(); diff --git a/kernel/softirq.c b/kernel/softirq.c index bf25015dce1..918e52df090 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -574,8 +574,6 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: - BUG_ON(per_cpu(tasklet_vec, hotcpu).list); - BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list); p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); if (IS_ERR(p)) { printk("ksoftirqd for %i failed\n", hotcpu); diff --git a/kernel/sys.c b/kernel/sys.c index c87b461de38..a0c1a29a507 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1102,14 +1102,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) asmlinkage long sys_setuid(uid_t uid) { int old_euid = current->euid; - int old_ruid, old_suid, new_ruid, new_suid; + int old_ruid, old_suid, new_suid; int retval; retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); if (retval) return retval; - old_ruid = new_ruid = current->uid; + old_ruid = current->uid; old_suid = current->suid; new_suid = old_suid; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e569f4792..8e9f00fd6d1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -54,6 +54,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, #ifdef CONFIG_X86 #include <asm/nmi.h> +#include <asm/stacktrace.h> #endif #if defined(CONFIG_SYSCTL) @@ -170,7 +171,7 @@ static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); static int proc_opensys(struct inode *, struct file *); -struct file_operations proc_sys_file_operations = { +const struct file_operations proc_sys_file_operations = { .open = proc_opensys, .read = proc_readsys, .write = proc_writesys, @@ -707,6 +708,14 @@ static ctl_table kern_table[] = { .mode = 0444, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kstack_depth_to_print", + .data = &kstack_depth_to_print, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { @@ -977,17 +986,6 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, #endif -#ifdef CONFIG_SWAP - { - .ctl_name = VM_SWAP_TOKEN_TIMEOUT, - .procname = "swap_token_timeout", - .data = &swap_token_default_timeout, - .maxlen = sizeof(swap_token_default_timeout), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, -#endif #ifdef CONFIG_NUMA { .ctl_name = VM_ZONE_RECLAIM_MODE, @@ -1886,7 +1884,7 @@ static int __do_proc_dointvec(void *tbl_data, ctl_table *table, p = buf; if (*p == '-' && left > 1) { neg = 1; - left--, p++; + p++; } if (*p < '0' || *p > '9') break; @@ -2137,7 +2135,7 @@ static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write, p = buf; if (*p == '-' && left > 1) { neg = 1; - left--, p++; + p++; } if (*p < '0' || *p > '9') break; diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d3d28919d4b..4c3476fa058 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -34,7 +34,7 @@ static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; static int family_registered; -kmem_cache_t *taskstats_cache; +struct kmem_cache *taskstats_cache; static struct genl_family family = { .id = GENL_ID_GENERATE, @@ -69,7 +69,7 @@ enum actions { }; static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, - void **replyp, size_t size) + size_t size) { struct sk_buff *skb; void *reply; @@ -94,7 +94,6 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, } *skbp = skb; - *replyp = reply; return 0; } @@ -119,10 +118,10 @@ static int send_reply(struct sk_buff *skb, pid_t pid) /* * Send taskstats data in @skb to listeners registered for @cpu's exit data */ -static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) +static void send_cpu_listeners(struct sk_buff *skb, + struct listener_list *listeners) { struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); - struct listener_list *listeners; struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); @@ -135,7 +134,6 @@ static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) } rc = 0; - listeners = &per_cpu(listener_array, cpu); down_read(&listeners->sem); list_for_each_entry(s, &listeners->list, list) { skb_next = NULL; @@ -186,6 +184,7 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, } else get_task_struct(tsk); + memset(stats, 0, sizeof(*stats)); /* * Each accounting subsystem adds calls to its functions to * fill in relevant parts of struct taskstsats as follows @@ -228,6 +227,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, if (first->signal->stats) memcpy(stats, first->signal->stats, sizeof(*stats)); + else + memset(stats, 0, sizeof(*stats)); tsk = first; do { @@ -344,14 +345,36 @@ static int parse(struct nlattr *na, cpumask_t *mask) return ret; } +static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) +{ + struct nlattr *na, *ret; + int aggr; + + aggr = (type == TASKSTATS_TYPE_PID) + ? TASKSTATS_TYPE_AGGR_PID + : TASKSTATS_TYPE_AGGR_TGID; + + na = nla_nest_start(skb, aggr); + if (!na) + goto err; + if (nla_put(skb, type, sizeof(pid), &pid) < 0) + goto err; + ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); + if (!ret) + goto err; + nla_nest_end(skb, na); + + return nla_data(ret); +err: + return NULL; +} + static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { int rc = 0; struct sk_buff *rep_skb; - struct taskstats stats; - void *reply; + struct taskstats *stats; size_t size; - struct nlattr *na; cpumask_t mask; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); @@ -372,83 +395,71 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); - memset(&stats, 0, sizeof(stats)); - rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); + rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; + rc = -EINVAL; if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); - rc = fill_pid(pid, NULL, &stats); - if (rc < 0) + stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); + if (!stats) goto err; - na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); - NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); - NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, - stats); + rc = fill_pid(pid, NULL, stats); + if (rc < 0) + goto err; } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); - rc = fill_tgid(tgid, NULL, &stats); - if (rc < 0) + stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); + if (!stats) goto err; - na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); - NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); - NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, - stats); - } else { - rc = -EINVAL; + rc = fill_tgid(tgid, NULL, stats); + if (rc < 0) + goto err; + } else goto err; - } - - nla_nest_end(rep_skb, na); return send_reply(rep_skb, info->snd_pid); - -nla_put_failure: - rc = genlmsg_cancel(rep_skb, reply); err: nlmsg_free(rep_skb); return rc; } -void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) +static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { - struct listener_list *listeners; - struct taskstats *tmp; - /* - * This is the cpu on which the task is exiting currently and will - * be the one for which the exit event is sent, even if the cpu - * on which this function is running changes later. - */ - *mycpu = raw_smp_processor_id(); + struct signal_struct *sig = tsk->signal; + struct taskstats *stats; - *ptidstats = NULL; - tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); - if (!tmp) - return; + if (sig->stats || thread_group_empty(tsk)) + goto ret; - listeners = &per_cpu(listener_array, *mycpu); - down_read(&listeners->sem); - if (!list_empty(&listeners->list)) { - *ptidstats = tmp; - tmp = NULL; + /* No problem if kmem_cache_zalloc() fails */ + stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); + + spin_lock_irq(&tsk->sighand->siglock); + if (!sig->stats) { + sig->stats = stats; + stats = NULL; } - up_read(&listeners->sem); - kfree(tmp); + spin_unlock_irq(&tsk->sighand->siglock); + + if (stats) + kmem_cache_free(taskstats_cache, stats); +ret: + return sig->stats; } /* Send pid data out on exit */ -void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, - int group_dead, unsigned int mycpu) +void taskstats_exit(struct task_struct *tsk, int group_dead) { int rc; + struct listener_list *listeners; + struct taskstats *stats; struct sk_buff *rep_skb; - void *reply; size_t size; int is_thread_group; - struct nlattr *na; if (!family_registered) return; @@ -459,7 +470,7 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); - is_thread_group = (tsk->signal->stats != NULL); + is_thread_group = !!taskstats_tgid_alloc(tsk); if (is_thread_group) { /* PID + STATS + TGID + STATS */ size = 2 * size; @@ -467,49 +478,39 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, fill_tgid_exit(tsk); } - if (!tidstats) + listeners = &__raw_get_cpu_var(listener_array); + if (list_empty(&listeners->list)) return; - rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); - if (rc < 0) - goto ret; - - rc = fill_pid(tsk->pid, tsk, tidstats); + rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) - goto err_skb; + return; - na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); - NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); - NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, - *tidstats); - nla_nest_end(rep_skb, na); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); + if (!stats) + goto err; - if (!is_thread_group) - goto send; + rc = fill_pid(tsk->pid, tsk, stats); + if (rc < 0) + goto err; /* * Doesn't matter if tsk is the leader or the last group member leaving */ - if (!group_dead) + if (!is_thread_group || !group_dead) goto send; - na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); - NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); - /* No locking needed for tsk->signal->stats since group is dead */ - NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, - *tsk->signal->stats); - nla_nest_end(rep_skb, na); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); + if (!stats) + goto err; + + memcpy(stats, tsk->signal->stats, sizeof(*stats)); send: - send_cpu_listeners(rep_skb, mycpu); + send_cpu_listeners(rep_skb, listeners); return; - -nla_put_failure: - genlmsg_cancel(rep_skb, reply); -err_skb: +err: nlmsg_free(rep_skb); -ret: - return; } static struct genl_ops taskstats_ops = { diff --git a/kernel/unwind.c b/kernel/unwind.c index ed0a21d4a90..09c26132924 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -14,11 +14,12 @@ #include <linux/bootmem.h> #include <linux/sort.h> #include <linux/stop_machine.h> +#include <linux/uaccess.h> #include <asm/sections.h> #include <asm/uaccess.h> #include <asm/unaligned.h> -extern char __start_unwind[], __end_unwind[]; +extern const char __start_unwind[], __end_unwind[]; extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; #define MAX_STACK_DEPTH 8 @@ -94,6 +95,7 @@ static const struct { typedef unsigned long uleb128_t; typedef signed long sleb128_t; +#define sleb128abs __builtin_labs static struct unwind_table { struct { @@ -135,6 +137,17 @@ struct unwind_state { static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; +static unsigned unwind_debug; +static int __init unwind_debug_setup(char *s) +{ + unwind_debug = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("unwind_debug=", unwind_debug_setup); +#define dprintk(lvl, fmt, args...) \ + ((void)(lvl > unwind_debug \ + || printk(KERN_DEBUG "unwind: " fmt "\n", ##args))) + static struct unwind_table *find_table(unsigned long pc) { struct unwind_table *table; @@ -151,7 +164,9 @@ static struct unwind_table *find_table(unsigned long pc) static unsigned long read_pointer(const u8 **pLoc, const void *end, - signed ptrType); + signed ptrType, + unsigned long text_base, + unsigned long data_base); static void init_unwind_table(struct unwind_table *table, const char *name, @@ -176,10 +191,13 @@ static void init_unwind_table(struct unwind_table *table, /* See if the linker provided table looks valid. */ if (header_size <= 4 || header_start[0] != 1 - || (void *)read_pointer(&ptr, end, header_start[1]) != table_start - || header_start[2] == DW_EH_PE_omit - || read_pointer(&ptr, end, header_start[2]) <= 0 - || header_start[3] == DW_EH_PE_omit) + || (void *)read_pointer(&ptr, end, header_start[1], 0, 0) + != table_start + || !read_pointer(&ptr, end, header_start[2], 0, 0) + || !read_pointer(&ptr, end, header_start[3], 0, + (unsigned long)header_start) + || !read_pointer(&ptr, end, header_start[3], 0, + (unsigned long)header_start)) header_start = NULL; table->hdrsz = header_size; smp_wmb(); @@ -269,7 +287,7 @@ static void __init setup_unwind_table(struct unwind_table *table, ptr = (const u8 *)(fde + 2); if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, - ptrType)) + ptrType, 0, 0)) return; ++n; } @@ -279,6 +297,7 @@ static void __init setup_unwind_table(struct unwind_table *table, hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + 2 * n * sizeof(unsigned long); + dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize); header = alloc(hdrSize); if (!header) return; @@ -303,7 +322,7 @@ static void __init setup_unwind_table(struct unwind_table *table, ptr = (const u8 *)(fde + 2); header->table[n].start = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, - fde_pointer_type(cie)); + fde_pointer_type(cie), 0, 0); header->table[n].fde = (unsigned long)fde; ++n; } @@ -486,7 +505,9 @@ static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) static unsigned long read_pointer(const u8 **pLoc, const void *end, - signed ptrType) + signed ptrType, + unsigned long text_base, + unsigned long data_base) { unsigned long value = 0; union { @@ -498,13 +519,17 @@ static unsigned long read_pointer(const u8 **pLoc, const unsigned long *pul; } ptr; - if (ptrType < 0 || ptrType == DW_EH_PE_omit) + if (ptrType < 0 || ptrType == DW_EH_PE_omit) { + dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end); return 0; + } ptr.p8 = *pLoc; switch(ptrType & DW_EH_PE_FORM) { case DW_EH_PE_data2: - if (end < (const void *)(ptr.p16u + 1)) + if (end < (const void *)(ptr.p16u + 1)) { + dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end); return 0; + } if(ptrType & DW_EH_PE_signed) value = get_unaligned(ptr.p16s++); else @@ -512,8 +537,10 @@ static unsigned long read_pointer(const u8 **pLoc, break; case DW_EH_PE_data4: #ifdef CONFIG_64BIT - if (end < (const void *)(ptr.p32u + 1)) + if (end < (const void *)(ptr.p32u + 1)) { + dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end); return 0; + } if(ptrType & DW_EH_PE_signed) value = get_unaligned(ptr.p32s++); else @@ -525,8 +552,10 @@ static unsigned long read_pointer(const u8 **pLoc, BUILD_BUG_ON(sizeof(u32) != sizeof(value)); #endif case DW_EH_PE_native: - if (end < (const void *)(ptr.pul + 1)) + if (end < (const void *)(ptr.pul + 1)) { + dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end); return 0; + } value = get_unaligned(ptr.pul++); break; case DW_EH_PE_leb128: @@ -534,10 +563,14 @@ static unsigned long read_pointer(const u8 **pLoc, value = ptrType & DW_EH_PE_signed ? get_sleb128(&ptr.p8, end) : get_uleb128(&ptr.p8, end); - if ((const void *)ptr.p8 > end) + if ((const void *)ptr.p8 > end) { + dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end); return 0; + } break; default: + dprintk(2, "Cannot decode pointer type %02X (%p,%p).", + ptrType, ptr.p8, end); return 0; } switch(ptrType & DW_EH_PE_ADJUST) { @@ -546,12 +579,33 @@ static unsigned long read_pointer(const u8 **pLoc, case DW_EH_PE_pcrel: value += (unsigned long)*pLoc; break; + case DW_EH_PE_textrel: + if (likely(text_base)) { + value += text_base; + break; + } + dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.", + ptrType, *pLoc, end); + return 0; + case DW_EH_PE_datarel: + if (likely(data_base)) { + value += data_base; + break; + } + dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.", + ptrType, *pLoc, end); + return 0; default: + dprintk(2, "Cannot adjust pointer type %02X (%p,%p).", + ptrType, *pLoc, end); return 0; } if ((ptrType & DW_EH_PE_indirect) - && __get_user(value, (unsigned long *)value)) + && probe_kernel_address((unsigned long *)value, value)) { + dprintk(1, "Cannot read indirect value %lx (%p,%p).", + value, *pLoc, end); return 0; + } *pLoc = ptr.p8; return value; @@ -594,7 +648,8 @@ static signed fde_pointer_type(const u32 *cie) case 'P': { signed ptrType = *ptr++; - if (!read_pointer(&ptr, end, ptrType) || ptr > end) + if (!read_pointer(&ptr, end, ptrType, 0, 0) + || ptr > end) return -1; } break; @@ -654,7 +709,8 @@ static int processCFI(const u8 *start, case DW_CFA_nop: break; case DW_CFA_set_loc: - if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0) + state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0); + if (state->loc == 0) result = 0; break; case DW_CFA_advance_loc1: @@ -700,8 +756,10 @@ static int processCFI(const u8 *start, state->label = NULL; return 1; } - if (state->stackDepth >= MAX_STACK_DEPTH) + if (state->stackDepth >= MAX_STACK_DEPTH) { + dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end); return 0; + } state->stack[state->stackDepth++] = ptr.p8; break; case DW_CFA_restore_state: @@ -716,8 +774,10 @@ static int processCFI(const u8 *start, result = processCFI(start, end, 0, ptrType, state); state->loc = loc; state->label = label; - } else + } else { + dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end); return 0; + } break; case DW_CFA_def_cfa: state->cfa.reg = get_uleb128(&ptr.p8, end); @@ -749,6 +809,7 @@ static int processCFI(const u8 *start, break; case DW_CFA_GNU_window_save: default: + dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end); result = 0; break; } @@ -764,12 +825,17 @@ static int processCFI(const u8 *start, set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); break; } - if (ptr.p8 > end) + if (ptr.p8 > end) { + dprintk(1, "Data overrun (%p,%p).", ptr.p8, end); result = 0; + } if (result && targetLoc != 0 && targetLoc < state->loc) return 1; } + if (result && ptr.p8 < end) + dprintk(1, "Data underrun (%p,%p).", ptr.p8, end); + return result && ptr.p8 == end && (targetLoc == 0 @@ -786,7 +852,7 @@ int unwind(struct unwind_frame_info *frame) #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) const u32 *fde = NULL, *cie = NULL; const u8 *ptr = NULL, *end = NULL; - unsigned long pc = UNW_PC(frame) - frame->call_frame; + unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; unsigned long startLoc = 0, endLoc = 0, cfa; unsigned i; signed ptrType = -1; @@ -813,9 +879,9 @@ int unwind(struct unwind_frame_info *frame) ptr = hdr + 4; end = hdr + table->hdrsz; if (tableSize - && read_pointer(&ptr, end, hdr[1]) + && read_pointer(&ptr, end, hdr[1], 0, 0) == (unsigned long)table->address - && (i = read_pointer(&ptr, end, hdr[2])) > 0 + && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0 && i == (end - ptr) / (2 * tableSize) && !((end - ptr) % (2 * tableSize))) { do { @@ -823,7 +889,8 @@ int unwind(struct unwind_frame_info *frame) startLoc = read_pointer(&cur, cur + tableSize, - hdr[3]); + hdr[3], 0, + (unsigned long)hdr); if (pc < startLoc) i /= 2; else { @@ -834,13 +901,17 @@ int unwind(struct unwind_frame_info *frame) if (i == 1 && (startLoc = read_pointer(&ptr, ptr + tableSize, - hdr[3])) != 0 + hdr[3], 0, + (unsigned long)hdr)) != 0 && pc >= startLoc) fde = (void *)read_pointer(&ptr, ptr + tableSize, - hdr[3]); + hdr[3], 0, + (unsigned long)hdr); } } + if(hdr && !fde) + dprintk(3, "Binary lookup for %lx failed.", pc); if (fde != NULL) { cie = cie_for_fde(fde, table); @@ -851,17 +922,19 @@ int unwind(struct unwind_frame_info *frame) && (ptrType = fde_pointer_type(cie)) >= 0 && read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, - ptrType) == startLoc) { + ptrType, 0, 0) == startLoc) { if (!(ptrType & DW_EH_PE_indirect)) ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; endLoc = startLoc + read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, - ptrType); + ptrType, 0, 0); if(pc >= endLoc) fde = NULL; } else fde = NULL; + if(!fde) + dprintk(1, "Binary lookup result for %lx discarded.", pc); } if (fde == NULL) { for (fde = table->address, tableSize = table->size; @@ -881,7 +954,7 @@ int unwind(struct unwind_frame_info *frame) ptr = (const u8 *)(fde + 2); startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, - ptrType); + ptrType, 0, 0); if (!startLoc) continue; if (!(ptrType & DW_EH_PE_indirect)) @@ -889,10 +962,12 @@ int unwind(struct unwind_frame_info *frame) endLoc = startLoc + read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, - ptrType); + ptrType, 0, 0); if (pc >= startLoc && pc < endLoc) break; } + if(!fde) + dprintk(3, "Linear lookup for %lx failed.", pc); } } if (cie != NULL) { @@ -926,6 +1001,8 @@ int unwind(struct unwind_frame_info *frame) if (ptr >= end || *ptr) cie = NULL; } + if(!cie) + dprintk(1, "CIE unusable (%p,%p).", ptr, end); ++ptr; } if (cie != NULL) { @@ -935,7 +1012,12 @@ int unwind(struct unwind_frame_info *frame) state.dataAlign = get_sleb128(&ptr, end); if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) cie = NULL; - else { + else if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Input pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); + return -EPERM; + } else { retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); /* skip augmentation */ if (((const char *)(cie + 2))[1] == 'z') { @@ -949,6 +1031,8 @@ int unwind(struct unwind_frame_info *frame) || reg_info[retAddrReg].width != sizeof(unsigned long)) cie = NULL; } + if(!cie) + dprintk(1, "CIE validation failed (%p,%p).", ptr, end); } if (cie != NULL) { state.cieStart = ptr; @@ -962,11 +1046,15 @@ int unwind(struct unwind_frame_info *frame) if ((ptr += augSize) > end) fde = NULL; } + if(!fde) + dprintk(1, "FDE validation failed (%p,%p).", ptr, end); } if (cie == NULL || fde == NULL) { #ifdef CONFIG_FRAME_POINTER unsigned long top, bottom; + if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) + return -EPERM; top = STACK_TOP(frame->task); bottom = STACK_BOTTOM(frame->task); # if FRAME_RETADDR_OFFSET < 0 @@ -982,18 +1070,19 @@ int unwind(struct unwind_frame_info *frame) & (sizeof(unsigned long) - 1))) { unsigned long link; - if (!__get_user(link, + if (!probe_kernel_address( (unsigned long *)(UNW_FP(frame) - + FRAME_LINK_OFFSET)) + + FRAME_LINK_OFFSET), + link) # if FRAME_RETADDR_OFFSET < 0 && link > bottom && link < UNW_FP(frame) # else && link > UNW_FP(frame) && link < bottom # endif && !(link & (sizeof(link) - 1)) - && !__get_user(UNW_PC(frame), + && !probe_kernel_address( (unsigned long *)(UNW_FP(frame) - + FRAME_RETADDR_OFFSET))) { + + FRAME_RETADDR_OFFSET), UNW_PC(frame))) { UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET # if FRAME_RETADDR_OFFSET < 0 - @@ -1016,8 +1105,11 @@ int unwind(struct unwind_frame_info *frame) || state.regs[retAddrReg].where == Nowhere || state.cfa.reg >= ARRAY_SIZE(reg_info) || reg_info[state.cfa.reg].width != sizeof(unsigned long) - || state.cfa.offs % sizeof(unsigned long)) + || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) + || state.cfa.offs % sizeof(unsigned long)) { + dprintk(1, "Unusable unwind info (%p,%p).", ptr, end); return -EIO; + } /* update frame */ #ifndef CONFIG_AS_CFI_SIGNAL_FRAME if(frame->call_frame @@ -1036,10 +1128,14 @@ int unwind(struct unwind_frame_info *frame) #else # define CASES CASE(8); CASE(16); CASE(32); CASE(64) #endif + pc = UNW_PC(frame); + sp = UNW_SP(frame); for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { if (REG_INVALID(i)) { if (state.regs[i].where == Nowhere) continue; + dprintk(1, "Cannot restore register %u (%d).", + i, state.regs[i].where); return -EIO; } switch(state.regs[i].where) { @@ -1048,8 +1144,11 @@ int unwind(struct unwind_frame_info *frame) case Register: if (state.regs[i].value >= ARRAY_SIZE(reg_info) || REG_INVALID(state.regs[i].value) - || reg_info[i].width > reg_info[state.regs[i].value].width) + || reg_info[i].width > reg_info[state.regs[i].value].width) { + dprintk(1, "Cannot restore register %u from register %lu.", + i, state.regs[i].value); return -EIO; + } switch(reg_info[state.regs[i].value].width) { #define CASE(n) \ case sizeof(u##n): \ @@ -1059,6 +1158,9 @@ int unwind(struct unwind_frame_info *frame) CASES; #undef CASE default: + dprintk(1, "Unsupported register size %u (%lu).", + reg_info[state.regs[i].value].width, + state.regs[i].value); return -EIO; } break; @@ -1083,12 +1185,17 @@ int unwind(struct unwind_frame_info *frame) CASES; #undef CASE default: + dprintk(1, "Unsupported register size %u (%u).", + reg_info[i].width, i); return -EIO; } break; case Value: - if (reg_info[i].width != sizeof(unsigned long)) + if (reg_info[i].width != sizeof(unsigned long)) { + dprintk(1, "Unsupported value size %u (%u).", + reg_info[i].width, i); return -EIO; + } FRAME_REG(i, unsigned long) = cfa + state.regs[i].value * state.dataAlign; break; @@ -1100,15 +1207,20 @@ int unwind(struct unwind_frame_info *frame) % sizeof(unsigned long) || addr < startLoc || addr + sizeof(unsigned long) < addr - || addr + sizeof(unsigned long) > endLoc) + || addr + sizeof(unsigned long) > endLoc) { + dprintk(1, "Bad memory location %lx (%lx).", + addr, state.regs[i].value); return -EIO; + } switch(reg_info[i].width) { #define CASE(n) case sizeof(u##n): \ - __get_user(FRAME_REG(i, u##n), (u##n *)addr); \ + probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \ break CASES; #undef CASE default: + dprintk(1, "Unsupported memory size %u (%u).", + reg_info[i].width, i); return -EIO; } } @@ -1116,6 +1228,17 @@ int unwind(struct unwind_frame_info *frame) } } + if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Output pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); + return -EIO; + } + if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) { + dprintk(1, "No progress (%lx,%lx).", pc, sp); + return -EIO; + } + return 0; #undef CASES #undef FRAME_REG diff --git a/kernel/user.c b/kernel/user.c index 220e586127a..4869563080e 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -26,7 +26,7 @@ #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) #define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) -static kmem_cache_t *uid_cachep; +static struct kmem_cache *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; /* @@ -132,7 +132,7 @@ struct user_struct * alloc_uid(uid_t uid) if (!up) { struct user_struct *new; - new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL); + new = kmem_cache_alloc(uid_cachep, GFP_KERNEL); if (!new) return NULL; new->uid = uid; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8d1e7cb8a51..6b186750e9b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -29,6 +29,9 @@ #include <linux/kthread.h> #include <linux/hardirq.h> #include <linux/mempolicy.h> +#include <linux/freezer.h> +#include <linux/kallsyms.h> +#include <linux/debug_locks.h> /* * The per-CPU workqueue (if single thread, we always use the first @@ -55,6 +58,8 @@ struct cpu_workqueue_struct { struct task_struct *thread; int run_depth; /* Detect run_workqueue() recursion depth */ + + int freezeable; /* Freeze the thread during suspend */ } ____cacheline_aligned; /* @@ -103,6 +108,79 @@ static inline void *get_wq_data(struct work_struct *work) return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK); } +static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&cwq->lock, flags); + /* + * We need to re-validate the work info after we've gotten + * the cpu_workqueue lock. We can run the work now iff: + * + * - the wq_data still matches the cpu_workqueue_struct + * - AND the work is still marked pending + * - AND the work is still on a list (which will be this + * workqueue_struct list) + * + * All these conditions are important, because we + * need to protect against the work being run right + * now on another CPU (all but the last one might be + * true if it's currently running and has not been + * released yet, for example). + */ + if (get_wq_data(work) == cwq + && work_pending(work) + && !list_empty(&work->entry)) { + work_func_t f = work->func; + list_del_init(&work->entry); + spin_unlock_irqrestore(&cwq->lock, flags); + + if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management)) + work_release(work); + f(work); + + spin_lock_irqsave(&cwq->lock, flags); + cwq->remove_sequence++; + wake_up(&cwq->work_done); + ret = 1; + } + spin_unlock_irqrestore(&cwq->lock, flags); + return ret; +} + +/** + * run_scheduled_work - run scheduled work synchronously + * @work: work to run + * + * This checks if the work was pending, and runs it + * synchronously if so. It returns a boolean to indicate + * whether it had any scheduled work to run or not. + * + * NOTE! This _only_ works for normal work_structs. You + * CANNOT use this for delayed work, because the wq data + * for delayed work will not point properly to the per- + * CPU workqueue struct, but will change! + */ +int fastcall run_scheduled_work(struct work_struct *work) +{ + for (;;) { + struct cpu_workqueue_struct *cwq; + + if (!work_pending(work)) + return 0; + if (list_empty(&work->entry)) + return 0; + /* NOTE! This depends intimately on __queue_work! */ + cwq = get_wq_data(work); + if (!cwq) + return 0; + if (__run_work(cwq, work)) + return 1; + } +} +EXPORT_SYMBOL(run_scheduled_work); + /* Preempt must be disabled. */ static void __queue_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) @@ -250,6 +328,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) work_release(work); f(work); + if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { + printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " + "%s/0x%08x/%d\n", + current->comm, preempt_count(), + current->pid); + printk(KERN_ERR " last function: "); + print_symbol("%s\n", (unsigned long)f); + debug_show_held_locks(current); + dump_stack(); + } + spin_lock_irqsave(&cwq->lock, flags); cwq->remove_sequence++; wake_up(&cwq->work_done); @@ -265,7 +354,8 @@ static int worker_thread(void *__cwq) struct k_sigaction sa; sigset_t blocked; - current->flags |= PF_NOFREEZE; + if (!cwq->freezeable) + current->flags |= PF_NOFREEZE; set_user_nice(current, -5); @@ -288,6 +378,9 @@ static int worker_thread(void *__cwq) set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { + if (cwq->freezeable) + try_to_freeze(); + add_wait_queue(&cwq->more_work, &wait); if (list_empty(&cwq->worklist)) schedule(); @@ -364,7 +457,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) EXPORT_SYMBOL_GPL(flush_workqueue); static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, - int cpu) + int cpu, int freezeable) { struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); struct task_struct *p; @@ -374,6 +467,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, cwq->thread = NULL; cwq->insert_sequence = 0; cwq->remove_sequence = 0; + cwq->freezeable = freezeable; INIT_LIST_HEAD(&cwq->worklist); init_waitqueue_head(&cwq->more_work); init_waitqueue_head(&cwq->work_done); @@ -389,7 +483,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, } struct workqueue_struct *__create_workqueue(const char *name, - int singlethread) + int singlethread, int freezeable) { int cpu, destroy = 0; struct workqueue_struct *wq; @@ -409,7 +503,7 @@ struct workqueue_struct *__create_workqueue(const char *name, mutex_lock(&workqueue_mutex); if (singlethread) { INIT_LIST_HEAD(&wq->list); - p = create_workqueue_thread(wq, singlethread_cpu); + p = create_workqueue_thread(wq, singlethread_cpu, freezeable); if (!p) destroy = 1; else @@ -417,7 +511,7 @@ struct workqueue_struct *__create_workqueue(const char *name, } else { list_add(&wq->list, &workqueues); for_each_online_cpu(cpu) { - p = create_workqueue_thread(wq, cpu); + p = create_workqueue_thread(wq, cpu, freezeable); if (p) { kthread_bind(p, cpu); wake_up_process(p); @@ -634,7 +728,6 @@ int current_is_keventd(void) } -#ifdef CONFIG_HOTPLUG_CPU /* Take the work from this (downed) CPU. */ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) { @@ -667,7 +760,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, mutex_lock(&workqueue_mutex); /* Create a new workqueue thread for it. */ list_for_each_entry(wq, &workqueues, list) { - if (!create_workqueue_thread(wq, hotcpu)) { + if (!create_workqueue_thread(wq, hotcpu, 0)) { printk("workqueue for %i failed\n", hotcpu); return NOTIFY_BAD; } @@ -717,7 +810,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#endif void init_workqueues(void) { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d3679103a8e..b75fed737f2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1,6 +1,7 @@ config PRINTK_TIME bool "Show timing information on printks" + depends on PRINTK help Selecting this option causes timing information to be included in printk output. This allows you to measure diff --git a/lib/Makefile b/lib/Makefile index cf98fabaa54..fea8f9035f0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -25,7 +25,7 @@ lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o -lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o +obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_PLIST) += plist.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o diff --git a/lib/cmdline.c b/lib/cmdline.c index 0331ed825ea..8a5b5303bd4 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -16,6 +16,23 @@ #include <linux/kernel.h> #include <linux/string.h> +/* + * If a hyphen was found in get_option, this will handle the + * range of numbers, M-N. This will expand the range and insert + * the values[M, M+1, ..., N] into the ints array in get_options. + */ + +static int get_range(char **str, int *pint) +{ + int x, inc_counter, upper_range; + + (*str)++; + upper_range = simple_strtol((*str), NULL, 0); + inc_counter = upper_range - *pint; + for (x = *pint; x < upper_range; x++) + *pint++ = x; + return inc_counter; +} /** * get_option - Parse integer from an option string @@ -29,6 +46,7 @@ * 0 : no int in string * 1 : int found, no subsequent comma * 2 : int found including a subsequent comma + * 3 : hyphen found to denote a range */ int get_option (char **str, int *pint) @@ -44,6 +62,8 @@ int get_option (char **str, int *pint) (*str)++; return 2; } + if (**str == '-') + return 3; return 1; } @@ -55,7 +75,8 @@ int get_option (char **str, int *pint) * @ints: integer array * * This function parses a string containing a comma-separated - * list of integers. The parse halts when the array is + * list of integers, a hyphen-separated range of _positive_ integers, + * or a combination of both. The parse halts when the array is * full, or when no more numbers can be retrieved from the * string. * @@ -72,6 +93,18 @@ char *get_options(const char *str, int nints, int *ints) res = get_option ((char **)&str, ints + i); if (res == 0) break; + if (res == 3) { + int range_nums; + range_nums = get_range((char **)&str, ints + i); + if (range_nums < 0) + break; + /* + * Decrement the result by one to leave out the + * last number in the range. The next iteration + * will handle the upper number in the range + */ + i += (range_nums - 1); + } i++; if (res == 1) break; diff --git a/lib/idr.c b/lib/idr.c index 16d2143fea4..71853531d3b 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -33,7 +33,7 @@ #include <linux/string.h> #include <linux/idr.h> -static kmem_cache_t *idr_layer_cache; +static struct kmem_cache *idr_layer_cache; static struct idr_layer *alloc_layer(struct idr *idp) { @@ -445,7 +445,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) } EXPORT_SYMBOL(idr_replace); -static void idr_cache_ctor(void * idr_layer, kmem_cache_t *idr_layer_cache, +static void idr_cache_ctor(void * idr_layer, struct kmem_cache *idr_layer_cache, unsigned long flags) { memset(idr_layer, 0, sizeof(struct idr_layer)); diff --git a/lib/kobject.c b/lib/kobject.c index 744a4b102c7..7ce6dc138e9 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -111,10 +111,9 @@ char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) len = get_kobj_path_length(kobj); if (len == 0) return NULL; - path = kmalloc(len, gfp_mask); + path = kzalloc(len, gfp_mask); if (!path) return NULL; - memset(path, 0x00, len); fill_kobj_path(kobj, path, len); return path; diff --git a/lib/list_debug.c b/lib/list_debug.c index 7ba9d823d38..4350ba9655b 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -21,13 +21,15 @@ void __list_add(struct list_head *new, struct list_head *next) { if (unlikely(next->prev != prev)) { - printk(KERN_ERR "list_add corruption. next->prev should be %p, but was %p\n", - prev, next->prev); + printk(KERN_ERR "list_add corruption. next->prev should be " + "prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); BUG(); } if (unlikely(prev->next != next)) { - printk(KERN_ERR "list_add corruption. prev->next should be %p, but was %p\n", - next, prev->next); + printk(KERN_ERR "list_add corruption. prev->next should be " + "next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); BUG(); } next->prev = new; diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 7945787f439..280332c1827 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -963,7 +963,9 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) printk("failed|"); } else { unexpected_testcase_failures++; + printk("FAILED|"); + dump_stack(); } } else { testcase_successes++; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index aa9bfd0bdbd..d69ddbe4386 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2,6 +2,7 @@ * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> + * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -30,6 +31,7 @@ #include <linux/gfp.h> #include <linux/string.h> #include <linux/bitops.h> +#include <linux/rcupdate.h> #ifdef __KERNEL__ @@ -45,7 +47,9 @@ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) struct radix_tree_node { + unsigned int height; /* Height from the bottom */ unsigned int count; + struct rcu_head rcu_head; void *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; @@ -63,7 +67,7 @@ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; /* * Radix tree node cache. */ -static kmem_cache_t *radix_tree_node_cachep; +static struct kmem_cache *radix_tree_node_cachep; /* * Per-cpu pool of preloaded nodes @@ -100,13 +104,21 @@ radix_tree_node_alloc(struct radix_tree_root *root) rtp->nr--; } } + BUG_ON(radix_tree_is_direct_ptr(ret)); return ret; } +static void radix_tree_node_rcu_free(struct rcu_head *head) +{ + struct radix_tree_node *node = + container_of(head, struct radix_tree_node, rcu_head); + kmem_cache_free(radix_tree_node_cachep, node); +} + static inline void radix_tree_node_free(struct radix_tree_node *node) { - kmem_cache_free(radix_tree_node_cachep, node); + call_rcu(&node->rcu_head, radix_tree_node_rcu_free); } /* @@ -222,11 +234,12 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) } do { + unsigned int newheight; if (!(node = radix_tree_node_alloc(root))) return -ENOMEM; /* Increase the height. */ - node->slots[0] = root->rnode; + node->slots[0] = radix_tree_direct_to_ptr(root->rnode); /* Propagate the aggregated tag info into the new root */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { @@ -234,9 +247,11 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) tag_set(node, tag, 0); } + newheight = root->height+1; + node->height = newheight; node->count = 1; - root->rnode = node; - root->height++; + rcu_assign_pointer(root->rnode, node); + root->height = newheight; } while (height > root->height); out: return 0; @@ -258,6 +273,8 @@ int radix_tree_insert(struct radix_tree_root *root, int offset; int error; + BUG_ON(radix_tree_is_direct_ptr(item)); + /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index); @@ -275,11 +292,12 @@ int radix_tree_insert(struct radix_tree_root *root, /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; + slot->height = height; if (node) { - node->slots[offset] = slot; + rcu_assign_pointer(node->slots[offset], slot); node->count++; } else - root->rnode = slot; + rcu_assign_pointer(root->rnode, slot); } /* Go a level down */ @@ -295,11 +313,11 @@ int radix_tree_insert(struct radix_tree_root *root, if (node) { node->count++; - node->slots[offset] = item; + rcu_assign_pointer(node->slots[offset], item); BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, offset)); } else { - root->rnode = item; + rcu_assign_pointer(root->rnode, radix_tree_ptr_to_direct(item)); BUG_ON(root_tag_get(root, 0)); BUG_ON(root_tag_get(root, 1)); } @@ -308,49 +326,54 @@ int radix_tree_insert(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_insert); -static inline void **__lookup_slot(struct radix_tree_root *root, - unsigned long index) +/** + * radix_tree_lookup_slot - lookup a slot in a radix tree + * @root: radix tree root + * @index: index key + * + * Returns: the slot corresponding to the position @index in the + * radix tree @root. This is useful for update-if-exists operations. + * + * This function cannot be called under rcu_read_lock, it must be + * excluded from writers, as must the returned slot for subsequent + * use by radix_tree_deref_slot() and radix_tree_replace slot. + * Caller must hold tree write locked across slot lookup and + * replace. + */ +void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) { unsigned int height, shift; - struct radix_tree_node **slot; - - height = root->height; + struct radix_tree_node *node, **slot; - if (index > radix_tree_maxindex(height)) + node = root->rnode; + if (node == NULL) return NULL; - if (height == 0 && root->rnode) + if (radix_tree_is_direct_ptr(node)) { + if (index > 0) + return NULL; return (void **)&root->rnode; + } + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; - while (height > 0) { - if (*slot == NULL) + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = *slot; + if (node == NULL) return NULL; - slot = (struct radix_tree_node **) - ((*slot)->slots + - ((index >> shift) & RADIX_TREE_MAP_MASK)); shift -= RADIX_TREE_MAP_SHIFT; height--; - } + } while (height > 0); return (void **)slot; } - -/** - * radix_tree_lookup_slot - lookup a slot in a radix tree - * @root: radix tree root - * @index: index key - * - * Lookup the slot corresponding to the position @index in the radix tree - * @root. This is useful for update-if-exists operations. - */ -void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) -{ - return __lookup_slot(root, index); -} EXPORT_SYMBOL(radix_tree_lookup_slot); /** @@ -359,13 +382,45 @@ EXPORT_SYMBOL(radix_tree_lookup_slot); * @index: index key * * Lookup the item at the position @index in the radix tree @root. + * + * This function can be called under rcu_read_lock, however the caller + * must manage lifetimes of leaf nodes (eg. RCU may also be used to free + * them safely). No RCU barriers are required to access or modify the + * returned item, however. */ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { - void **slot; + unsigned int height, shift; + struct radix_tree_node *node, **slot; + + node = rcu_dereference(root->rnode); + if (node == NULL) + return NULL; + + if (radix_tree_is_direct_ptr(node)) { + if (index > 0) + return NULL; + return radix_tree_direct_to_ptr(node); + } + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; + + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = __lookup_slot(root, index); - return slot != NULL ? *slot : NULL; + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = rcu_dereference(*slot); + if (node == NULL) + return NULL; + + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + return node; } EXPORT_SYMBOL(radix_tree_lookup); @@ -495,27 +550,30 @@ int radix_tree_tag_get(struct radix_tree_root *root, unsigned long index, unsigned int tag) { unsigned int height, shift; - struct radix_tree_node *slot; + struct radix_tree_node *node; int saw_unset_tag = 0; - height = root->height; - if (index > radix_tree_maxindex(height)) - return 0; - /* check the root's tag bit */ if (!root_tag_get(root, tag)) return 0; - if (height == 0) - return 1; + node = rcu_dereference(root->rnode); + if (node == NULL) + return 0; + + if (radix_tree_is_direct_ptr(node)) + return (index == 0); + + height = node->height; + if (index > radix_tree_maxindex(height)) + return 0; shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; for ( ; ; ) { int offset; - if (slot == NULL) + if (node == NULL) return 0; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -524,15 +582,15 @@ int radix_tree_tag_get(struct radix_tree_root *root, * This is just a debug check. Later, we can bale as soon as * we see an unset tag. */ - if (!tag_get(slot, tag, offset)) + if (!tag_get(node, tag, offset)) saw_unset_tag = 1; if (height == 1) { - int ret = tag_get(slot, tag, offset); + int ret = tag_get(node, tag, offset); BUG_ON(ret && saw_unset_tag); return !!ret; } - slot = slot->slots[offset]; + node = rcu_dereference(node->slots[offset]); shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -541,47 +599,45 @@ EXPORT_SYMBOL(radix_tree_tag_get); #endif static unsigned int -__lookup(struct radix_tree_root *root, void **results, unsigned long index, +__lookup(struct radix_tree_node *slot, void **results, unsigned long index, unsigned int max_items, unsigned long *next_index) { unsigned int nr_found = 0; unsigned int shift, height; - struct radix_tree_node *slot; unsigned long i; - height = root->height; - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; + height = slot->height; + if (height == 0) goto out; - } - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; for ( ; height > 1; height--) { - - for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; - i < RADIX_TREE_MAP_SIZE; i++) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { if (slot->slots[i] != NULL) break; index &= ~((1UL << shift) - 1); index += 1UL << shift; if (index == 0) goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; } - if (i == RADIX_TREE_MAP_SIZE) - goto out; shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; + slot = rcu_dereference(slot->slots[i]); + if (slot == NULL) + goto out; } /* Bottom level: grab some items */ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { + struct radix_tree_node *node; index++; - if (slot->slots[i]) { - results[nr_found++] = slot->slots[i]; + node = slot->slots[i]; + if (node) { + results[nr_found++] = rcu_dereference(node); if (nr_found == max_items) goto out; } @@ -603,28 +659,51 @@ out: * *@results. * * The implementation is naive. + * + * Like radix_tree_lookup, radix_tree_gang_lookup may be called under + * rcu_read_lock. In this case, rather than the returned results being + * an atomic snapshot of the tree at a single point in time, the semantics + * of an RCU protected gang lookup are as though multiple radix_tree_lookups + * have been issued in individual locks, and results stored in 'results'. */ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items) { - const unsigned long max_index = radix_tree_maxindex(root->height); + unsigned long max_index; + struct radix_tree_node *node; unsigned long cur_index = first_index; - unsigned int ret = 0; + unsigned int ret; + + node = rcu_dereference(root->rnode); + if (!node) + return 0; + if (radix_tree_is_direct_ptr(node)) { + if (first_index > 0) + return 0; + node = radix_tree_direct_to_ptr(node); + results[0] = rcu_dereference(node); + return 1; + } + + max_index = radix_tree_maxindex(node->height); + + ret = 0; while (ret < max_items) { unsigned int nr_found; unsigned long next_index; /* Index of next search */ if (cur_index > max_index) break; - nr_found = __lookup(root, results + ret, cur_index, + nr_found = __lookup(node, results + ret, cur_index, max_items - ret, &next_index); ret += nr_found; if (next_index == 0) break; cur_index = next_index; } + return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup); @@ -634,55 +713,64 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * open-coding the search. */ static unsigned int -__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index, +__lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, unsigned int max_items, unsigned long *next_index, unsigned int tag) { unsigned int nr_found = 0; - unsigned int shift; - unsigned int height = root->height; - struct radix_tree_node *slot; + unsigned int shift, height; - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; + height = slot->height; + if (height == 0) goto out; - } - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - do { - unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK; + while (height > 0) { + unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; - for ( ; i < RADIX_TREE_MAP_SIZE; i++) { - if (tag_get(slot, tag, i)) { - BUG_ON(slot->slots[i] == NULL); + for (;;) { + if (tag_get(slot, tag, i)) break; - } index &= ~((1UL << shift) - 1); index += 1UL << shift; if (index == 0) goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; } - if (i == RADIX_TREE_MAP_SIZE) - goto out; height--; if (height == 0) { /* Bottom level: grab some items */ unsigned long j = index & RADIX_TREE_MAP_MASK; for ( ; j < RADIX_TREE_MAP_SIZE; j++) { + struct radix_tree_node *node; index++; - if (tag_get(slot, tag, j)) { - BUG_ON(slot->slots[j] == NULL); - results[nr_found++] = slot->slots[j]; + if (!tag_get(slot, tag, j)) + continue; + node = slot->slots[j]; + /* + * Even though the tag was found set, we need to + * recheck that we have a non-NULL node, because + * if this lookup is lockless, it may have been + * subsequently deleted. + * + * Similar care must be taken in any place that + * lookup ->slots[x] without a lock (ie. can't + * rely on its value remaining the same). + */ + if (node) { + node = rcu_dereference(node); + results[nr_found++] = node; if (nr_found == max_items) goto out; } } } shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } while (height > 0); + slot = rcu_dereference(slot->slots[i]); + if (slot == NULL) + break; + } out: *next_index = index; return nr_found; @@ -706,27 +794,44 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items, unsigned int tag) { - const unsigned long max_index = radix_tree_maxindex(root->height); + struct radix_tree_node *node; + unsigned long max_index; unsigned long cur_index = first_index; - unsigned int ret = 0; + unsigned int ret; /* check the root's tag bit */ if (!root_tag_get(root, tag)) return 0; + node = rcu_dereference(root->rnode); + if (!node) + return 0; + + if (radix_tree_is_direct_ptr(node)) { + if (first_index > 0) + return 0; + node = radix_tree_direct_to_ptr(node); + results[0] = rcu_dereference(node); + return 1; + } + + max_index = radix_tree_maxindex(node->height); + + ret = 0; while (ret < max_items) { unsigned int nr_found; unsigned long next_index; /* Index of next search */ if (cur_index > max_index) break; - nr_found = __lookup_tag(root, results + ret, cur_index, + nr_found = __lookup_tag(node, results + ret, cur_index, max_items - ret, &next_index, tag); ret += nr_found; if (next_index == 0) break; cur_index = next_index; } + return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup_tag); @@ -742,8 +847,19 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) root->rnode->count == 1 && root->rnode->slots[0]) { struct radix_tree_node *to_free = root->rnode; + void *newptr; - root->rnode = to_free->slots[0]; + /* + * We don't need rcu_assign_pointer(), since we are simply + * moving the node from one part of the tree to another. If + * it was safe to dereference the old pointer to it + * (to_free->slots[0]), it will be safe to dereference the new + * one (root->rnode). + */ + newptr = to_free->slots[0]; + if (root->height == 1) + newptr = radix_tree_ptr_to_direct(newptr); + root->rnode = newptr; root->height--; /* must only free zeroed nodes into the slab */ tag_clear(to_free, 0, 0); @@ -767,6 +883,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; struct radix_tree_node *slot = NULL; + struct radix_tree_node *to_free; unsigned int height, shift; int tag; int offset; @@ -777,6 +894,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) slot = root->rnode; if (height == 0 && root->rnode) { + slot = radix_tree_direct_to_ptr(slot); root_tag_clear_all(root); root->rnode = NULL; goto out; @@ -809,10 +927,17 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) radix_tree_tag_clear(root, index, tag); } + to_free = NULL; /* Now free the nodes we do not need anymore */ while (pathp->node) { pathp->node->slots[pathp->offset] = NULL; pathp->node->count--; + /* + * Queue the node for deferred freeing after the + * last reference to it disappears (set NULL, above). + */ + if (to_free) + radix_tree_node_free(to_free); if (pathp->node->count) { if (pathp->node == root->rnode) @@ -821,13 +946,15 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) } /* Node with zero slots in use so free it */ - radix_tree_node_free(pathp->node); - + to_free = pathp->node; pathp--; + } root_tag_clear_all(root); root->height = 0; root->rnode = NULL; + if (to_free) + radix_tree_node_free(to_free); out: return slot; @@ -846,7 +973,7 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) EXPORT_SYMBOL(radix_tree_tagged); static void -radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags) +radix_tree_node_ctor(void *node, struct kmem_cache *cachep, unsigned long flags) { memset(node, 0, sizeof(struct radix_tree_node)); } @@ -869,7 +996,6 @@ static __init void radix_tree_init_maxindex(void) height_to_maxindex[i] = __maxindex(i); } -#ifdef CONFIG_HOTPLUG_CPU static int radix_tree_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -889,7 +1015,6 @@ static int radix_tree_callback(struct notifier_block *nfb, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ void __init radix_tree_init(void) { diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index b6c4f898197..479fd462eaa 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -7,6 +7,7 @@ */ #include <linux/spinlock.h> +#include <linux/nmi.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> #include <linux/delay.h> @@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock) raw_smp_processor_id(), current->comm, current->pid, lock); dump_stack(); +#ifdef CONFIG_SMP + trigger_all_cpu_backtrace(); +#endif } } } diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index eaa9abeea53..b2486cf887a 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -17,10 +17,9 @@ void percpu_depopulate(void *__pdata, int cpu) { struct percpu_data *pdata = __percpu_disguise(__pdata); - if (pdata->ptrs[cpu]) { - kfree(pdata->ptrs[cpu]); - pdata->ptrs[cpu] = NULL; - } + + kfree(pdata->ptrs[cpu]); + pdata->ptrs[cpu] = NULL; } EXPORT_SYMBOL_GPL(percpu_depopulate); @@ -123,6 +122,8 @@ EXPORT_SYMBOL_GPL(__percpu_alloc_mask); */ void percpu_free(void *__pdata) { + if (unlikely(!__pdata)) + return; __percpu_depopulate_mask(__pdata, &cpu_possible_map); kfree(__percpu_disguise(__pdata)); } diff --git a/mm/bootmem.c b/mm/bootmem.c index d53112fcb40..00a96970b23 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -27,8 +27,6 @@ unsigned long max_low_pfn; unsigned long min_low_pfn; unsigned long max_pfn; -EXPORT_UNUSED_SYMBOL(max_pfn); /* June 2006 */ - static LIST_HEAD(bdata_list); #ifdef CONFIG_CRASH_DUMP /* @@ -196,6 +194,10 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, if (limit && bdata->node_boot_start >= limit) return NULL; + /* on nodes without memory - bootmem_map is NULL */ + if (!bdata->node_bootmem_map) + return NULL; + end_pfn = bdata->node_low_pfn; limit = PFN_DOWN(limit); if (limit && end_pfn > limit) diff --git a/mm/filemap.c b/mm/filemap.c index 13df01c5047..af7e2f5caea 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1445,7 +1445,6 @@ no_cached_page: * effect. */ error = page_cache_read(file, pgoff); - grab_swap_token(); /* * The page we want has now been added to the page cache. diff --git a/mm/fremap.c b/mm/fremap.c index 7a9d0f5d246..b77a002c335 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -101,7 +101,6 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, { int err = -ENOMEM; pte_t *pte; - pte_t pte_val; spinlock_t *ptl; pte = get_locked_pte(mm, addr, &ptl); @@ -114,7 +113,6 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, } set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); - pte_val = *pte; /* * We don't need to run update_mmu_cache() here because the "file pte" * being installed by install_file_pte() is not a real pte - it's a diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a088f593a80..0ccc7f23025 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -109,7 +109,7 @@ static int alloc_fresh_huge_page(void) if (nid == MAX_NUMNODES) nid = first_node(node_online_map); if (page) { - page[1].lru.next = (void *)free_huge_page; /* dtor */ + set_compound_page_dtor(page, free_huge_page); spin_lock(&hugetlb_lock); nr_huge_pages++; nr_huge_pages_node[page_to_nid(page)]++; @@ -344,7 +344,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, entry = *src_pte; ptepage = pte_page(entry); get_page(ptepage); - add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE); set_huge_pte_at(dst, addr, dst_pte, entry); } spin_unlock(&src->page_table_lock); @@ -365,6 +364,11 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, pte_t pte; struct page *page; struct page *tmp; + /* + * A page gathering list, protected by per file i_mmap_lock. The + * lock is used to avoid list corruption from multiple unmapping + * of the same page since we are using page->lru. + */ LIST_HEAD(page_list); WARN_ON(!is_vm_hugetlb_page(vma)); @@ -372,24 +376,21 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, BUG_ON(end & ~HPAGE_MASK); spin_lock(&mm->page_table_lock); - - /* Update high watermark before we lower rss */ - update_hiwater_rss(mm); - for (address = start; address < end; address += HPAGE_SIZE) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; + if (huge_pmd_unshare(mm, &address, ptep)) + continue; + pte = huge_ptep_get_and_clear(mm, address, ptep); if (pte_none(pte)) continue; page = pte_page(pte); list_add(&page->lru, &page_list); - add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE)); } - spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); list_for_each_entry_safe(page, tmp, &page_list, lru) { @@ -515,7 +516,6 @@ retry: if (!pte_none(*ptep)) goto backout; - add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE); new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))); set_huge_pte_at(mm, address, ptep, new_pte); @@ -653,11 +653,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma, BUG_ON(address >= end); flush_cache_range(vma, address, end); + spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); spin_lock(&mm->page_table_lock); for (; address < end; address += HPAGE_SIZE) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; + if (huge_pmd_unshare(mm, &address, ptep)) + continue; if (!pte_none(*ptep)) { pte = huge_ptep_get_and_clear(mm, address, ptep); pte = pte_mkhuge(pte_modify(pte, newprot)); @@ -666,6 +669,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma, } } spin_unlock(&mm->page_table_lock); + spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); flush_tlb_range(vma, start, end); } diff --git a/mm/memory.c b/mm/memory.c index 156861fcac4..4198df0dff1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1902,7 +1902,6 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) return 0; } -EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */ /** * swapin_readahead - swap in pages in hope we need them soon @@ -1991,6 +1990,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry); if (!page) { + grab_swap_token(); /* Contend for token _before_ read-in */ swapin_readahead(entry, address, vma); page = read_swap_cache_async(entry, vma, address); if (!page) { @@ -2008,7 +2008,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Had to read the page from swap area: Major fault */ ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); - grab_swap_token(); } delayacct_clear_flag(DELAYACCT_PF_SWAPIN); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fd678a662ea..0c055a090f4 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -72,7 +72,6 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) return ret; } memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn); - zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages); return 0; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 617fb31086e..b917d6fdc1b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -141,9 +141,11 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) enum zone_type k; max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); + max++; /* space for zlcache_ptr (see mmzone.h) */ zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); if (!zl) return NULL; + zl->zlcache_ptr = NULL; num = 0; /* First put in the highest zones from all nodes, then all the next lower zones etc. Avoid empty zones because the memory allocator @@ -219,7 +221,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { struct page *page; - unsigned int nid; + int nid; if (!pte_present(*pte)) continue; @@ -1324,7 +1326,7 @@ struct mempolicy *__mpol_copy(struct mempolicy *old) atomic_set(&new->refcnt, 1); if (new->policy == MPOL_BIND) { int sz = ksize(old->v.zonelist); - new->v.zonelist = kmemdup(old->v.zonelist, sz, SLAB_KERNEL); + new->v.zonelist = kmemdup(old->v.zonelist, sz, GFP_KERNEL); if (!new->v.zonelist) { kmem_cache_free(policy_cache, new); return ERR_PTR(-ENOMEM); @@ -1705,8 +1707,8 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) * Display pages allocated per node and memory policy via /proc. */ -static const char *policy_types[] = { "default", "prefer", "bind", - "interleave" }; +static const char * const policy_types[] = + { "default", "prefer", "bind", "interleave" }; /* * Convert a mempolicy into a string. diff --git a/mm/migrate.c b/mm/migrate.c index b4979d423d2..e9b161bde95 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -294,7 +294,7 @@ out: static int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page) { - struct page **radix_pointer; + void **pslot; if (!mapping) { /* Anonymous page */ @@ -305,12 +305,11 @@ static int migrate_page_move_mapping(struct address_space *mapping, write_lock_irq(&mapping->tree_lock); - radix_pointer = (struct page **)radix_tree_lookup_slot( - &mapping->page_tree, - page_index(page)); + pslot = radix_tree_lookup_slot(&mapping->page_tree, + page_index(page)); if (page_count(page) != 2 + !!PagePrivate(page) || - *radix_pointer != page) { + (struct page *)radix_tree_deref_slot(pslot) != page) { write_unlock_irq(&mapping->tree_lock); return -EAGAIN; } @@ -318,7 +317,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, /* * Now we know that no one else is looking at the page. */ - get_page(newpage); + get_page(newpage); /* add cache reference */ #ifdef CONFIG_SWAP if (PageSwapCache(page)) { SetPageSwapCache(newpage); @@ -326,8 +325,14 @@ static int migrate_page_move_mapping(struct address_space *mapping, } #endif - *radix_pointer = newpage; + radix_tree_replace_slot(pslot, newpage); + + /* + * Drop cache reference from old page. + * We know this isn't the last reference. + */ __put_page(page); + write_unlock_irq(&mapping->tree_lock); return 0; diff --git a/mm/mlock.c b/mm/mlock.c index b90c59573ab..3446b7ef731 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -65,7 +65,7 @@ success: ret = make_pages_present(start, end); } - vma->vm_mm->locked_vm -= pages; + mm->locked_vm -= pages; out: if (ret == -ENOMEM) ret = -EAGAIN; diff --git a/mm/mmap.c b/mm/mmap.c index 7b40abd7cba..7be110e98d4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1736,7 +1736,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; - new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) return -ENOMEM; @@ -2057,7 +2057,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, vma_start < new_vma->vm_end) *vmap = new_vma; } else { - new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (new_vma) { *new_vma = *vma; pol = mpol_copy(vma_policy(vma)); diff --git a/mm/mmzone.c b/mm/mmzone.c index febea1c9816..eb5838634f1 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -14,8 +14,6 @@ struct pglist_data *first_online_pgdat(void) return NODE_DATA(first_online_node); } -EXPORT_UNUSED_SYMBOL(first_online_pgdat); /* June 2006 */ - struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) { int nid = next_online_node(pgdat->node_id); @@ -24,8 +22,6 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) return NULL; return NODE_DATA(nid); } -EXPORT_UNUSED_SYMBOL(next_online_pgdat); /* June 2006 */ - /* * next_zone - helper magic for for_each_zone() @@ -45,5 +41,4 @@ struct zone *next_zone(struct zone *zone) } return zone; } -EXPORT_UNUSED_SYMBOL(next_zone); /* June 2006 */ diff --git a/mm/nommu.c b/mm/nommu.c index 6a2a8aada40..af874569d0f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -808,10 +808,9 @@ unsigned long do_mmap_pgoff(struct file *file, vm_flags = determine_vm_flags(file, prot, flags, capabilities); /* we're going to need to record the mapping if it works */ - vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL); + vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); if (!vml) goto error_getting_vml; - memset(vml, 0, sizeof(*vml)); down_write(&nommu_vma_sem); @@ -887,11 +886,10 @@ unsigned long do_mmap_pgoff(struct file *file, } /* we're going to need a VMA struct as well */ - vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); + vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!vma) goto error_getting_vma; - memset(vma, 0, sizeof(*vma)); INIT_LIST_HEAD(&vma->anon_vma_node); atomic_set(&vma->vm_usage, 1); if (file) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2e3ce3a928b..223d9ccb7d6 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -264,7 +264,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO * set. */ -static void __oom_kill_task(struct task_struct *p, const char *message) +static void __oom_kill_task(struct task_struct *p, int verbose) { if (is_init(p)) { WARN_ON(1); @@ -278,10 +278,8 @@ static void __oom_kill_task(struct task_struct *p, const char *message) return; } - if (message) { - printk(KERN_ERR "%s: Killed process %d (%s).\n", - message, p->pid, p->comm); - } + if (verbose) + printk(KERN_ERR "Killed process %d (%s)\n", p->pid, p->comm); /* * We give our sacrificial lamb high priority and access to @@ -294,7 +292,7 @@ static void __oom_kill_task(struct task_struct *p, const char *message) force_sig(SIGKILL, p); } -static int oom_kill_task(struct task_struct *p, const char *message) +static int oom_kill_task(struct task_struct *p) { struct mm_struct *mm; struct task_struct *g, *q; @@ -313,15 +311,25 @@ static int oom_kill_task(struct task_struct *p, const char *message) if (mm == NULL) return 1; - __oom_kill_task(p, message); + /* + * Don't kill the process if any threads are set to OOM_DISABLE + */ + do_each_thread(g, q) { + if (q->mm == mm && p->oomkilladj == OOM_DISABLE) + return 1; + } while_each_thread(g, q); + + __oom_kill_task(p, 1); + /* * kill all processes that share the ->mm (i.e. all threads), - * but are in a different thread group + * but are in a different thread group. Don't let them have access + * to memory reserves though, otherwise we might deplete all memory. */ - do_each_thread(g, q) + do_each_thread(g, q) { if (q->mm == mm && q->tgid != p->tgid) - __oom_kill_task(q, message); - while_each_thread(g, q); + force_sig(SIGKILL, p); + } while_each_thread(g, q); return 0; } @@ -337,21 +345,22 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, * its children or threads, just set TIF_MEMDIE so it can die quickly */ if (p->flags & PF_EXITING) { - __oom_kill_task(p, NULL); + __oom_kill_task(p, 0); return 0; } - printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li" - " and children.\n", p->pid, p->comm, points); + printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n", + message, p->pid, p->comm, points); + /* Try to kill a child first */ list_for_each(tsk, &p->children) { c = list_entry(tsk, struct task_struct, sibling); if (c->mm == p->mm) continue; - if (!oom_kill_task(c, message)) + if (!oom_kill_task(c)) return 0; } - return oom_kill_task(p, message); + return oom_kill_task(p); } static BLOCKING_NOTIFIER_HEAD(oom_notify_list); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index aa6fcc7ca66..cace22b3ac2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -83,14 +83,7 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { EXPORT_SYMBOL(totalram_pages); -/* - * Used by page_zone() to look up the address of the struct zone whose - * id is encoded in the upper bits of page->flags - */ -struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; -EXPORT_SYMBOL(zone_table); - -static char *zone_names[MAX_NR_ZONES] = { +static char * const zone_names[MAX_NR_ZONES] = { "DMA", #ifdef CONFIG_ZONE_DMA32 "DMA32", @@ -237,7 +230,7 @@ static void prep_compound_page(struct page *page, unsigned long order) int i; int nr_pages = 1 << order; - page[1].lru.next = (void *)free_compound_page; /* set dtor */ + set_compound_page_dtor(page, free_compound_page); page[1].lru.prev = (void *)order; for (i = 0; i < nr_pages; i++) { struct page *p = page + i; @@ -486,7 +479,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order) spin_lock(&zone->lock); zone->all_unreclaimable = 0; zone->pages_scanned = 0; - __free_one_page(page, zone ,order); + __free_one_page(page, zone, order); spin_unlock(&zone->lock); } @@ -605,6 +598,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) 1 << PG_checked | 1 << PG_mappedtodisk); set_page_private(page, 0); set_page_refcounted(page); + + arch_alloc_page(page, order); kernel_map_pages(page, 1 << order, 1); if (gfp_flags & __GFP_ZERO) @@ -690,9 +685,15 @@ void drain_node_pages(int nodeid) pcp = &pset->pcp[i]; if (pcp->count) { + int to_drain; + local_irq_save(flags); - free_pages_bulk(zone, pcp->count, &pcp->list, 0); - pcp->count = 0; + if (pcp->count >= pcp->batch) + to_drain = pcp->batch; + else + to_drain = pcp->count; + free_pages_bulk(zone, to_drain, &pcp->list, 0); + pcp->count -= to_drain; local_irq_restore(flags); } } @@ -700,7 +701,6 @@ void drain_node_pages(int nodeid) } #endif -#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) static void __drain_pages(unsigned int cpu) { unsigned long flags; @@ -722,7 +722,6 @@ static void __drain_pages(unsigned int cpu) } } } -#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM @@ -925,31 +924,160 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, return 1; } +#ifdef CONFIG_NUMA +/* + * zlc_setup - Setup for "zonelist cache". Uses cached zone data to + * skip over zones that are not allowed by the cpuset, or that have + * been recently (in last second) found to be nearly full. See further + * comments in mmzone.h. Reduces cache footprint of zonelist scans + * that have to skip over alot of full or unallowed zones. + * + * If the zonelist cache is present in the passed in zonelist, then + * returns a pointer to the allowed node mask (either the current + * tasks mems_allowed, or node_online_map.) + * + * If the zonelist cache is not available for this zonelist, does + * nothing and returns NULL. + * + * If the fullzones BITMAP in the zonelist cache is stale (more than + * a second since last zap'd) then we zap it out (clear its bits.) + * + * We hold off even calling zlc_setup, until after we've checked the + * first zone in the zonelist, on the theory that most allocations will + * be satisfied from that first zone, so best to examine that zone as + * quickly as we can. + */ +static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + nodemask_t *allowednodes; /* zonelist_cache approximation */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return NULL; + + if (jiffies - zlc->last_full_zap > 1 * HZ) { + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); + zlc->last_full_zap = jiffies; + } + + allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? + &cpuset_current_mems_allowed : + &node_online_map; + return allowednodes; +} + +/* + * Given 'z' scanning a zonelist, run a couple of quick checks to see + * if it is worth looking at further for free memory: + * 1) Check that the zone isn't thought to be full (doesn't have its + * bit set in the zonelist_cache fullzones BITMAP). + * 2) Check that the zones node (obtained from the zonelist_cache + * z_to_n[] mapping) is allowed in the passed in allowednodes mask. + * Return true (non-zero) if zone is worth looking at further, or + * else return false (zero) if it is not. + * + * This check -ignores- the distinction between various watermarks, + * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ... If a zone is + * found to be full for any variation of these watermarks, it will + * be considered full for up to one second by all requests, unless + * we are so low on memory on all allowed nodes that we are forced + * into the second scan of the zonelist. + * + * In the second scan we ignore this zonelist cache and exactly + * apply the watermarks to all zones, even it is slower to do so. + * We are low on memory in the second scan, and should leave no stone + * unturned looking for a free page. + */ +static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, + nodemask_t *allowednodes) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + int i; /* index of *z in zonelist zones */ + int n; /* node that zone *z is on */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return 1; + + i = z - zonelist->zones; + n = zlc->z_to_n[i]; + + /* This zone is worth trying if it is allowed but not full */ + return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones); +} + /* - * get_page_from_freeliest goes through the zonelist trying to allocate + * Given 'z' scanning a zonelist, set the corresponding bit in + * zlc->fullzones, so that subsequent attempts to allocate a page + * from that zone don't waste time re-examining it. + */ +static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + int i; /* index of *z in zonelist zones */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return; + + i = z - zonelist->zones; + + set_bit(i, zlc->fullzones); +} + +#else /* CONFIG_NUMA */ + +static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) +{ + return NULL; +} + +static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, + nodemask_t *allowednodes) +{ + return 1; +} + +static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) +{ +} +#endif /* CONFIG_NUMA */ + +/* + * get_page_from_freelist goes through the zonelist trying to allocate * a page. */ static struct page * get_page_from_freelist(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, int alloc_flags) { - struct zone **z = zonelist->zones; + struct zone **z; struct page *page = NULL; - int classzone_idx = zone_idx(*z); + int classzone_idx = zone_idx(zonelist->zones[0]); struct zone *zone; + nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ + int zlc_active = 0; /* set if using zonelist_cache */ + int did_zlc_setup = 0; /* just call zlc_setup() one time */ +zonelist_scan: /* - * Go through the zonelist once, looking for a zone with enough free. + * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ + z = zonelist->zones; + do { + if (NUMA_BUILD && zlc_active && + !zlc_zone_worth_trying(zonelist, z, allowednodes)) + continue; zone = *z; if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) && zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) break; if ((alloc_flags & ALLOC_CPUSET) && - !cpuset_zone_allowed(zone, gfp_mask)) - continue; + !cpuset_zone_allowed(zone, gfp_mask)) + goto try_next_zone; if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; @@ -959,18 +1087,34 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, mark = zone->pages_low; else mark = zone->pages_high; - if (!zone_watermark_ok(zone , order, mark, - classzone_idx, alloc_flags)) + if (!zone_watermark_ok(zone, order, mark, + classzone_idx, alloc_flags)) { if (!zone_reclaim_mode || !zone_reclaim(zone, gfp_mask, order)) - continue; + goto this_zone_full; + } } page = buffered_rmqueue(zonelist, zone, order, gfp_mask); - if (page) { + if (page) break; +this_zone_full: + if (NUMA_BUILD) + zlc_mark_zone_full(zonelist, z); +try_next_zone: + if (NUMA_BUILD && !did_zlc_setup) { + /* we do zlc_setup after the first zone is tried */ + allowednodes = zlc_setup(zonelist, alloc_flags); + zlc_active = 1; + did_zlc_setup = 1; } } while (*(++z) != NULL); + + if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { + /* Disable zlc cache for second zonelist scan */ + zlc_active = 0; + goto zonelist_scan; + } return page; } @@ -1005,9 +1149,19 @@ restart: if (page) goto got_pg; - do { + /* + * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and + * __GFP_NOWARN set) should not cause reclaim since the subsystem + * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim + * using a larger set of nodes after it has established that the + * allowed per node queues are empty and that nodes are + * over allocated. + */ + if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) + goto nopage; + + for (z = zonelist->zones; *z; z++) wakeup_kswapd(*z, order); - } while (*(++z)); /* * OK, we're below the kswapd watermark and have kicked background @@ -1041,6 +1195,7 @@ restart: /* This allocation should allow future memory freeing. */ +rebalance: if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { if (!(gfp_mask & __GFP_NOMEMALLOC)) { @@ -1062,7 +1217,6 @@ nofail_alloc: if (!wait) goto nopage; -rebalance: cond_resched(); /* We now go into synchronous reclaim */ @@ -1262,7 +1416,7 @@ unsigned int nr_free_pagecache_pages(void) static inline void show_node(struct zone *zone) { if (NUMA_BUILD) - printk("Node %ld ", zone_to_nid(zone)); + printk("Node %d ", zone_to_nid(zone)); } void si_meminfo(struct sysinfo *val) @@ -1542,6 +1696,24 @@ static void __meminit build_zonelists(pg_data_t *pgdat) } } +/* Construct the zonelist performance cache - see further mmzone.h */ +static void __meminit build_zonelist_cache(pg_data_t *pgdat) +{ + int i; + + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zonelist *zonelist; + struct zonelist_cache *zlc; + struct zone **z; + + zonelist = pgdat->node_zonelists + i; + zonelist->zlcache_ptr = zlc = &zonelist->zlcache; + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); + for (z = zonelist->zones; *z; z++) + zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); + } +} + #else /* CONFIG_NUMA */ static void __meminit build_zonelists(pg_data_t *pgdat) @@ -1579,14 +1751,26 @@ static void __meminit build_zonelists(pg_data_t *pgdat) } } +/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ +static void __meminit build_zonelist_cache(pg_data_t *pgdat) +{ + int i; + + for (i = 0; i < MAX_NR_ZONES; i++) + pgdat->node_zonelists[i].zlcache_ptr = NULL; +} + #endif /* CONFIG_NUMA */ /* return values int ....just for stop_machine_run() */ static int __meminit __build_all_zonelists(void *dummy) { int nid; - for_each_online_node(nid) + + for_each_online_node(nid) { build_zonelists(NODE_DATA(nid)); + build_zonelist_cache(NODE_DATA(nid)); + } return 0; } @@ -1715,20 +1899,6 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, } } -#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) -void zonetable_add(struct zone *zone, int nid, enum zone_type zid, - unsigned long pfn, unsigned long size) -{ - unsigned long snum = pfn_to_section_nr(pfn); - unsigned long end = pfn_to_section_nr(pfn + size); - - if (FLAGS_HAS_NODE) - zone_table[ZONETABLE_INDEX(nid, zid)] = zone; - else - for (; snum <= end; snum++) - zone_table[ZONETABLE_INDEX(snum, zid)] = zone; -} - #ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ memmap_init_zone((size), (nid), (zone), (start_pfn)) @@ -1881,16 +2051,16 @@ static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, int ret = NOTIFY_OK; switch (action) { - case CPU_UP_PREPARE: - if (process_zones(cpu)) - ret = NOTIFY_BAD; - break; - case CPU_UP_CANCELED: - case CPU_DEAD: - free_zone_pagesets(cpu); - break; - default: - break; + case CPU_UP_PREPARE: + if (process_zones(cpu)) + ret = NOTIFY_BAD; + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + free_zone_pagesets(cpu); + break; + default: + break; } return ret; } @@ -2421,7 +2591,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, if (!size) continue; - zonetable_add(zone, nid, j, zone_start_pfn, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size); BUG_ON(ret); zone_start_pfn += size; @@ -2736,7 +2905,6 @@ void __init free_area_init(unsigned long *zones_size) __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } -#ifdef CONFIG_HOTPLUG_CPU static int page_alloc_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -2751,7 +2919,6 @@ static int page_alloc_cpu_notify(struct notifier_block *self, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ void __init page_alloc_init(void) { @@ -3055,7 +3222,7 @@ void *__init alloc_large_system_hash(const char *tablename, /* allow the kernel cmdline to have a say */ if (!numentries) { /* round applicable memory size up to nearest megabyte */ - numentries = (flags & HASH_HIGHMEM) ? nr_all_pages : nr_kernel_pages; + numentries = nr_kernel_pages; numentries += (1UL << (20 - PAGE_SHIFT)) - 1; numentries >>= 20 - PAGE_SHIFT; numentries <<= 20 - PAGE_SHIFT; diff --git a/mm/page_io.c b/mm/page_io.c index d4840ecbf8f..dbffec0d78c 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -147,48 +147,3 @@ int swap_readpage(struct file *file, struct page *page) out: return ret; } - -#ifdef CONFIG_SOFTWARE_SUSPEND -/* - * A scruffy utility function to read or write an arbitrary swap page - * and wait on the I/O. The caller must have a ref on the page. - * - * We use end_swap_bio_read() even for writes, because it happens to do what - * we want. - */ -int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, - struct bio **bio_chain) -{ - struct bio *bio; - int ret = 0; - int bio_rw; - - lock_page(page); - - bio = get_swap_bio(GFP_KERNEL, entry.val, page, end_swap_bio_read); - if (bio == NULL) { - unlock_page(page); - ret = -ENOMEM; - goto out; - } - - bio_rw = rw; - if (!bio_chain) - bio_rw |= (1 << BIO_RW_SYNC); - if (bio_chain) - bio_get(bio); - submit_bio(bio_rw, bio); - if (bio_chain == NULL) { - wait_on_page_locked(page); - - if (!PageUptodate(page) || PageError(page)) - ret = -EIO; - } - if (bio_chain) { - bio->bi_private = *bio_chain; - *bio_chain = bio; - } -out: - return ret; -} -#endif diff --git a/mm/pdflush.c b/mm/pdflush.c index b02102feeb4..8ce0900dc95 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -21,6 +21,7 @@ #include <linux/writeback.h> // Prototypes pdflush_operation() #include <linux/kthread.h> #include <linux/cpuset.h> +#include <linux/freezer.h> /* diff --git a/mm/readahead.c b/mm/readahead.c index 23cb61a01c6..a386f2b6b33 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -148,13 +148,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, if (!pagevec_add(&lru_pvec, page)) __pagevec_lru_add(&lru_pvec); if (ret) { - while (!list_empty(pages)) { - struct page *victim; - - victim = list_to_page(pages); - list_del(&victim->lru); - page_cache_release(victim); - } + put_pages_list(pages); break; } } diff --git a/mm/shmem.c b/mm/shmem.c index 4959535fc14..c820b4f77b8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -177,7 +177,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) static struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; -static struct file_operations shmem_file_operations; +static const struct file_operations shmem_file_operations; static struct inode_operations shmem_inode_operations; static struct inode_operations shmem_dir_inode_operations; static struct inode_operations shmem_special_inode_operations; @@ -1943,7 +1943,7 @@ static int shmem_xattr_security_set(struct inode *inode, const char *name, return security_inode_setsecurity(inode, name, value, size, flags); } -struct xattr_handler shmem_xattr_security_handler = { +static struct xattr_handler shmem_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .list = shmem_xattr_security_list, .get = shmem_xattr_security_get, @@ -2263,7 +2263,7 @@ static struct kmem_cache *shmem_inode_cachep; static struct inode *shmem_alloc_inode(struct super_block *sb) { struct shmem_inode_info *p; - p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL); + p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); if (!p) return NULL; return &p->vfs_inode; @@ -2319,7 +2319,7 @@ static const struct address_space_operations shmem_aops = { .migratepage = migrate_page, }; -static struct file_operations shmem_file_operations = { +static const struct file_operations shmem_file_operations = { .mmap = shmem_mmap, #ifdef CONFIG_TMPFS .llseek = generic_file_llseek, diff --git a/mm/slab.c b/mm/slab.c index 5de81473df3..068cb4503c1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -103,12 +103,12 @@ #include <linux/module.h> #include <linux/rcupdate.h> #include <linux/string.h> +#include <linux/uaccess.h> #include <linux/nodemask.h> #include <linux/mempolicy.h> #include <linux/mutex.h> #include <linux/rtmutex.h> -#include <asm/uaccess.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/page.h> @@ -730,7 +730,10 @@ static inline void init_lock_keys(void) } #endif -/* Guard access to the cache-chain. */ +/* + * 1. Guard access to the cache-chain. + * 2. Protect sanity of cpu_online_map against cpu hotplug events + */ static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; @@ -866,6 +869,22 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, dump_stack(); } +/* + * By default on NUMA we use alien caches to stage the freeing of + * objects allocated from other nodes. This causes massive memory + * inefficiencies when using fake NUMA setup to split memory into a + * large number of small nodes, so it can be disabled on the command + * line + */ + +static int use_alien_caches __read_mostly = 1; +static int __init noaliencache_setup(char *s) +{ + use_alien_caches = 0; + return 1; +} +__setup("noaliencache", noaliencache_setup); + #ifdef CONFIG_NUMA /* * Special reaping functions for NUMA systems called from cache_reap(). @@ -996,7 +1015,7 @@ static inline void *alternate_node_alloc(struct kmem_cache *cachep, return NULL; } -static inline void *__cache_alloc_node(struct kmem_cache *cachep, +static inline void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { return NULL; @@ -1004,7 +1023,7 @@ static inline void *__cache_alloc_node(struct kmem_cache *cachep, #else /* CONFIG_NUMA */ -static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); +static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); static struct array_cache **alloc_alien_cache(int node, int limit) @@ -1114,7 +1133,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) * Make sure we are not freeing a object from another node to the array * cache on this cpu. */ - if (likely(slabp->nodeid == node)) + if (likely(slabp->nodeid == node) || unlikely(!use_alien_caches)) return 0; l3 = cachep->nodelists[node]; @@ -1192,7 +1211,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; - struct array_cache **alien; + struct array_cache **alien = NULL; nc = alloc_arraycache(node, cachep->limit, cachep->batchcount); @@ -1204,9 +1223,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, if (!shared) goto bad; - alien = alloc_alien_cache(node, cachep->limit); - if (!alien) - goto bad; + if (use_alien_caches) { + alien = alloc_alien_cache(node, cachep->limit); + if (!alien) + goto bad; + } cachep->array[cpu] = nc; l3 = cachep->nodelists[node]; BUG_ON(!l3); @@ -1230,12 +1251,18 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, kfree(shared); free_alien_cache(alien); } - mutex_unlock(&cache_chain_mutex); break; case CPU_ONLINE: + mutex_unlock(&cache_chain_mutex); start_cpu_timer(cpu); break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DOWN_PREPARE: + mutex_lock(&cache_chain_mutex); + break; + case CPU_DOWN_FAILED: + mutex_unlock(&cache_chain_mutex); + break; case CPU_DEAD: /* * Even if all the cpus of a node are down, we don't free the @@ -1246,8 +1273,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, * gets destroyed at kmem_cache_destroy(). */ /* fall thru */ +#endif case CPU_UP_CANCELED: - mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; @@ -1308,11 +1335,9 @@ free_array_cache: } mutex_unlock(&cache_chain_mutex); break; -#endif } return NOTIFY_OK; bad: - mutex_unlock(&cache_chain_mutex); return NOTIFY_BAD; } @@ -1580,12 +1605,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) flags |= __GFP_COMP; #endif - /* - * Under NUMA we want memory on the indicated node. We will handle - * the needed fallback ourselves since we want to serve from our - * per node object lists first for other nodes. - */ - flags |= cachep->gfpflags | GFP_THISNODE; + flags |= cachep->gfpflags; page = alloc_pages_node(nodeid, flags, cachep->gfporder); if (!page) @@ -2098,15 +2118,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, } /* - * Prevent CPUs from coming and going. - * lock_cpu_hotplug() nests outside cache_chain_mutex + * We use cache_chain_mutex to ensure a consistent view of + * cpu_online_map as well. Please see cpuup_callback */ - lock_cpu_hotplug(); - mutex_lock(&cache_chain_mutex); list_for_each_entry(pc, &cache_chain, next) { - mm_segment_t old_fs = get_fs(); char tmp; int res; @@ -2115,9 +2132,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, * destroy its slab cache and no-one else reuses the vmalloc * area of the module. Print a warning. */ - set_fs(KERNEL_DS); - res = __get_user(tmp, pc->name); - set_fs(old_fs); + res = probe_kernel_address(pc->name, tmp); if (res) { printk("SLAB: cache with size %d has lost its name\n", pc->buffer_size); @@ -2197,25 +2212,24 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) ralign = BYTES_PER_WORD; - /* 2) arch mandated alignment: disables debug if necessary */ + /* 2) arch mandated alignment */ if (ralign < ARCH_SLAB_MINALIGN) { ralign = ARCH_SLAB_MINALIGN; - if (ralign > BYTES_PER_WORD) - flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); } - /* 3) caller mandated alignment: disables debug if necessary */ + /* 3) caller mandated alignment */ if (ralign < align) { ralign = align; - if (ralign > BYTES_PER_WORD) - flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); } + /* disable debug if necessary */ + if (ralign > BYTES_PER_WORD) + flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); /* * 4) Store it. */ align = ralign; /* Get cache's description obj. */ - cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL); + cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); if (!cachep) goto oops; @@ -2326,7 +2340,6 @@ oops: panic("kmem_cache_create(): failed to create slab `%s'\n", name); mutex_unlock(&cache_chain_mutex); - unlock_cpu_hotplug(); return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -2444,6 +2457,7 @@ out: return nr_freed; } +/* Called with cache_chain_mutex held to protect against cpu hotplug */ static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; @@ -2474,9 +2488,13 @@ static int __cache_shrink(struct kmem_cache *cachep) */ int kmem_cache_shrink(struct kmem_cache *cachep) { + int ret; BUG_ON(!cachep || in_interrupt()); - return __cache_shrink(cachep); + mutex_lock(&cache_chain_mutex); + ret = __cache_shrink(cachep); + mutex_unlock(&cache_chain_mutex); + return ret; } EXPORT_SYMBOL(kmem_cache_shrink); @@ -2500,23 +2518,16 @@ void kmem_cache_destroy(struct kmem_cache *cachep) { BUG_ON(!cachep || in_interrupt()); - /* Don't let CPUs to come and go */ - lock_cpu_hotplug(); - /* Find the cache in the chain of caches. */ mutex_lock(&cache_chain_mutex); /* * the chain is never empty, cache_cache is never destroyed */ list_del(&cachep->next); - mutex_unlock(&cache_chain_mutex); - if (__cache_shrink(cachep)) { slab_error(cachep, "Can't free all objects"); - mutex_lock(&cache_chain_mutex); list_add(&cachep->next, &cache_chain); mutex_unlock(&cache_chain_mutex); - unlock_cpu_hotplug(); return; } @@ -2524,7 +2535,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) synchronize_rcu(); __kmem_cache_destroy(cachep); - unlock_cpu_hotplug(); + mutex_unlock(&cache_chain_mutex); } EXPORT_SYMBOL(kmem_cache_destroy); @@ -2548,7 +2559,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, if (OFF_SLAB(cachep)) { /* Slab management obj is off-slab. */ slabp = kmem_cache_alloc_node(cachep->slabp_cache, - local_flags, nodeid); + local_flags & ~GFP_THISNODE, nodeid); if (!slabp) return NULL; } else { @@ -2618,7 +2629,7 @@ static void cache_init_objs(struct kmem_cache *cachep, static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) { - if (flags & SLAB_DMA) + if (flags & GFP_DMA) BUG_ON(!(cachep->gfpflags & GFP_DMA)); else BUG_ON(cachep->gfpflags & GFP_DMA); @@ -2689,10 +2700,10 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ -static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) +static int cache_grow(struct kmem_cache *cachep, + gfp_t flags, int nodeid, void *objp) { struct slab *slabp; - void *objp; size_t offset; gfp_t local_flags; unsigned long ctor_flags; @@ -2702,12 +2713,12 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) * Be lazy and only check for valid flags here, keeping it out of the * critical path in kmem_cache_alloc(). */ - BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)); - if (flags & SLAB_NO_GROW) + BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW)); + if (flags & __GFP_NO_GROW) return 0; ctor_flags = SLAB_CTOR_CONSTRUCTOR; - local_flags = (flags & SLAB_LEVEL_MASK); + local_flags = (flags & GFP_LEVEL_MASK); if (!(local_flags & __GFP_WAIT)) /* * Not allowed to sleep. Need to tell a constructor about @@ -2744,12 +2755,14 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) * Get mem for the objs. Attempt to allocate a physical page from * 'nodeid'. */ - objp = kmem_getpages(cachep, flags, nodeid); + if (!objp) + objp = kmem_getpages(cachep, flags, nodeid); if (!objp) goto failed; /* Get slab management. */ - slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid); + slabp = alloc_slabmgmt(cachep, objp, offset, + local_flags & ~GFP_THISNODE, nodeid); if (!slabp) goto opps1; @@ -2987,7 +3000,7 @@ alloc_done: if (unlikely(!ac->avail)) { int x; - x = cache_grow(cachep, flags, node); + x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); /* cache_grow can reenable interrupts, then ac could change. */ ac = cpu_cache_get(cachep); @@ -3063,6 +3076,12 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, cachep->ctor(objp, cachep, ctor_flags); } +#if ARCH_SLAB_MINALIGN + if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { + printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", + objp, ARCH_SLAB_MINALIGN); + } +#endif return objp; } #else @@ -3105,10 +3124,10 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep, objp = ____cache_alloc(cachep, flags); /* * We may just have run out of memory on the local node. - * __cache_alloc_node() knows how to locate memory on other nodes + * ____cache_alloc_node() knows how to locate memory on other nodes */ if (NUMA_BUILD && !objp) - objp = __cache_alloc_node(cachep, flags, numa_node_id()); + objp = ____cache_alloc_node(cachep, flags, numa_node_id()); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); @@ -3135,15 +3154,17 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) else if (current->mempolicy) nid_alloc = slab_node(current->mempolicy); if (nid_alloc != nid_here) - return __cache_alloc_node(cachep, flags, nid_alloc); + return ____cache_alloc_node(cachep, flags, nid_alloc); return NULL; } /* * Fallback function if there was no memory available and no objects on a - * certain node and we are allowed to fall back. We mimick the behavior of - * the page allocator. We fall back according to a zonelist determined by - * the policy layer while obeying cpuset constraints. + * certain node and fall back is permitted. First we scan all the + * available nodelists for available objects. If that fails then we + * perform an allocation without specifying a node. This allows the page + * allocator to do its reclaim / fallback magic. We then insert the + * slab into the proper nodelist and then allocate from it. */ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) { @@ -3151,15 +3172,51 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) ->node_zonelists[gfp_zone(flags)]; struct zone **z; void *obj = NULL; + int nid; +retry: + /* + * Look through allowed nodes for objects available + * from existing per node queues. + */ for (z = zonelist->zones; *z && !obj; z++) { - int nid = zone_to_nid(*z); + nid = zone_to_nid(*z); + + if (cpuset_zone_allowed(*z, flags) && + cache->nodelists[nid] && + cache->nodelists[nid]->free_objects) + obj = ____cache_alloc_node(cache, + flags | GFP_THISNODE, nid); + } - if (zone_idx(*z) <= ZONE_NORMAL && - cpuset_zone_allowed(*z, flags) && - cache->nodelists[nid]) - obj = __cache_alloc_node(cache, - flags | __GFP_THISNODE, nid); + if (!obj) { + /* + * This allocation will be performed within the constraints + * of the current cpuset / memory policy requirements. + * We may trigger various forms of reclaim on the allowed + * set and go into memory reserves if necessary. + */ + obj = kmem_getpages(cache, flags, -1); + if (obj) { + /* + * Insert into the appropriate per node queues + */ + nid = page_to_nid(virt_to_page(obj)); + if (cache_grow(cache, flags, nid, obj)) { + obj = ____cache_alloc_node(cache, + flags | GFP_THISNODE, nid); + if (!obj) + /* + * Another processor may allocate the + * objects in the slab since we are + * not holding any locks. + */ + goto retry; + } else { + kmem_freepages(cache, obj); + obj = NULL; + } + } } return obj; } @@ -3167,7 +3224,7 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) /* * A interface to enable slab creation on nodeid */ -static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, +static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct list_head *entry; @@ -3216,7 +3273,7 @@ retry: must_grow: spin_unlock(&l3->list_lock); - x = cache_grow(cachep, flags, nodeid); + x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); if (x) goto retry; @@ -3434,35 +3491,59 @@ out: * @flags: See kmalloc(). * @nodeid: node number of the target node. * - * Identical to kmem_cache_alloc, except that this function is slow - * and can sleep. And it will allocate memory on the given node, which - * can improve the performance for cpu bound structures. - * New and improved: it will now make sure that the object gets - * put on the correct node list so that there is no false sharing. + * Identical to kmem_cache_alloc but it will allocate memory on the given + * node, which can improve the performance for cpu bound structures. + * + * Fallback to other node is possible if __GFP_THISNODE is not set. */ -void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) +static __always_inline void * +__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, + int nodeid, void *caller) { unsigned long save_flags; - void *ptr; + void *ptr = NULL; cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); - if (nodeid == -1 || nodeid == numa_node_id() || - !cachep->nodelists[nodeid]) - ptr = ____cache_alloc(cachep, flags); - else - ptr = __cache_alloc_node(cachep, flags, nodeid); - local_irq_restore(save_flags); + if (unlikely(nodeid == -1)) + nodeid = numa_node_id(); + + if (likely(cachep->nodelists[nodeid])) { + if (nodeid == numa_node_id()) { + /* + * Use the locally cached objects if possible. + * However ____cache_alloc does not allow fallback + * to other nodes. It may fail while we still have + * objects on other nodes available. + */ + ptr = ____cache_alloc(cachep, flags); + } + if (!ptr) { + /* ___cache_alloc_node can fall back to other nodes */ + ptr = ____cache_alloc_node(cachep, flags, nodeid); + } + } else { + /* Node not bootstrapped yet */ + if (!(flags & __GFP_THISNODE)) + ptr = fallback_alloc(cachep, flags); + } - ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, - __builtin_return_address(0)); + local_irq_restore(save_flags); + ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); return ptr; } + +void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) +{ + return __cache_alloc_node(cachep, flags, nodeid, + __builtin_return_address(0)); +} EXPORT_SYMBOL(kmem_cache_alloc_node); -void *__kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline void * +__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) { struct kmem_cache *cachep; @@ -3471,8 +3552,29 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) return NULL; return kmem_cache_alloc_node(cachep, flags, node); } + +#ifdef CONFIG_DEBUG_SLAB +void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __do_kmalloc_node(size, flags, node, + __builtin_return_address(0)); +} EXPORT_SYMBOL(__kmalloc_node); -#endif + +void *__kmalloc_node_track_caller(size_t size, gfp_t flags, + int node, void *caller) +{ + return __do_kmalloc_node(size, flags, node, caller); +} +EXPORT_SYMBOL(__kmalloc_node_track_caller); +#else +void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __do_kmalloc_node(size, flags, node, NULL); +} +EXPORT_SYMBOL(__kmalloc_node); +#endif /* CONFIG_DEBUG_SLAB */ +#endif /* CONFIG_NUMA */ /** * __do_kmalloc - allocate memory @@ -3583,13 +3685,15 @@ static int alloc_kmemlist(struct kmem_cache *cachep) int node; struct kmem_list3 *l3; struct array_cache *new_shared; - struct array_cache **new_alien; + struct array_cache **new_alien = NULL; for_each_online_node(node) { - new_alien = alloc_alien_cache(node, cachep->limit); - if (!new_alien) - goto fail; + if (use_alien_caches) { + new_alien = alloc_alien_cache(node, cachep->limit); + if (!new_alien) + goto fail; + } new_shared = alloc_arraycache(node, cachep->shared*cachep->batchcount, @@ -4038,7 +4142,7 @@ static int s_show(struct seq_file *m, void *p) * + further values on SMP and with statistics enabled */ -struct seq_operations slabinfo_op = { +const struct seq_operations slabinfo_op = { .start = s_start, .next = s_next, .stop = s_stop, @@ -4236,7 +4340,7 @@ static int leaks_show(struct seq_file *m, void *p) return 0; } -struct seq_operations slabstats_op = { +const struct seq_operations slabstats_op = { .start = leaks_start, .next = s_next, .stop = s_stop, diff --git a/mm/sparse.c b/mm/sparse.c index b3c82ba3001..ac26eb0d73c 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -24,6 +24,25 @@ struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] #endif EXPORT_SYMBOL(mem_section); +#ifdef NODE_NOT_IN_PAGE_FLAGS +/* + * If we did not store the node number in the page then we have to + * do a lookup in the section_to_node_table in order to find which + * node the page belongs to. + */ +#if MAX_NUMNODES <= 256 +static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; +#else +static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; +#endif + +int page_to_nid(struct page *page) +{ + return section_to_node_table[page_to_section(page)]; +} +EXPORT_SYMBOL(page_to_nid); +#endif + #ifdef CONFIG_SPARSEMEM_EXTREME static struct mem_section *sparse_index_alloc(int nid) { @@ -49,6 +68,10 @@ static int sparse_index_init(unsigned long section_nr, int nid) struct mem_section *section; int ret = 0; +#ifdef NODE_NOT_IN_PAGE_FLAGS + section_to_node_table[section_nr] = nid; +#endif + if (mem_section[root]) return -EEXIST; diff --git a/mm/swap.c b/mm/swap.c index d9a3770d8f3..2ed7be39795 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -57,9 +57,9 @@ static void put_compound_page(struct page *page) { page = (struct page *)page_private(page); if (put_page_testzero(page)) { - void (*dtor)(struct page *page); + compound_page_dtor *dtor; - dtor = (void (*)(struct page *))page[1].lru.next; + dtor = get_compound_page_dtor(page); (*dtor)(page); } } @@ -514,5 +514,7 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ +#ifdef CONFIG_HOTPLUG_CPU hotcpu_notifier(cpu_swap_callback, 0); +#endif } diff --git a/mm/swapfile.c b/mm/swapfile.c index a15def63f28..c5431072f42 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -427,34 +427,48 @@ void free_swap_and_cache(swp_entry_t entry) #ifdef CONFIG_SOFTWARE_SUSPEND /* - * Find the swap type that corresponds to given device (if any) + * Find the swap type that corresponds to given device (if any). * - * This is needed for software suspend and is done in such a way that inode - * aliasing is allowed. + * @offset - number of the PAGE_SIZE-sized block of the device, starting + * from 0, in which the swap header is expected to be located. + * + * This is needed for the suspend to disk (aka swsusp). */ -int swap_type_of(dev_t device) +int swap_type_of(dev_t device, sector_t offset) { + struct block_device *bdev = NULL; int i; + if (device) + bdev = bdget(device); + spin_lock(&swap_lock); for (i = 0; i < nr_swapfiles; i++) { - struct inode *inode; + struct swap_info_struct *sis = swap_info + i; - if (!(swap_info[i].flags & SWP_WRITEOK)) + if (!(sis->flags & SWP_WRITEOK)) continue; - if (!device) { + if (!bdev) { spin_unlock(&swap_lock); return i; } - inode = swap_info[i].swap_file->f_dentry->d_inode; - if (S_ISBLK(inode->i_mode) && - device == MKDEV(imajor(inode), iminor(inode))) { - spin_unlock(&swap_lock); - return i; + if (bdev == sis->bdev) { + struct swap_extent *se; + + se = list_entry(sis->extent_list.next, + struct swap_extent, list); + if (se->start_block == offset) { + spin_unlock(&swap_lock); + bdput(bdev); + return i; + } } } spin_unlock(&swap_lock); + if (bdev) + bdput(bdev); + return -ENODEV; } @@ -931,6 +945,23 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset) } } +#ifdef CONFIG_SOFTWARE_SUSPEND +/* + * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev + * corresponding to given index in swap_info (swap type). + */ +sector_t swapdev_block(int swap_type, pgoff_t offset) +{ + struct swap_info_struct *sis; + + if (swap_type >= nr_swapfiles) + return 0; + + sis = swap_info + swap_type; + return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0; +} +#endif /* CONFIG_SOFTWARE_SUSPEND */ + /* * Free all of a swapdev's extent information */ @@ -1274,10 +1305,13 @@ static void *swap_start(struct seq_file *swap, loff_t *pos) mutex_lock(&swapon_mutex); + if (!l) + return SEQ_START_TOKEN; + for (i = 0; i < nr_swapfiles; i++, ptr++) { if (!(ptr->flags & SWP_USED) || !ptr->swap_map) continue; - if (!l--) + if (!--l) return ptr; } @@ -1286,10 +1320,17 @@ static void *swap_start(struct seq_file *swap, loff_t *pos) static void *swap_next(struct seq_file *swap, void *v, loff_t *pos) { - struct swap_info_struct *ptr = v; + struct swap_info_struct *ptr; struct swap_info_struct *endptr = swap_info + nr_swapfiles; - for (++ptr; ptr < endptr; ptr++) { + if (v == SEQ_START_TOKEN) + ptr = swap_info; + else { + ptr = v; + ptr++; + } + + for (; ptr < endptr; ptr++) { if (!(ptr->flags & SWP_USED) || !ptr->swap_map) continue; ++*pos; @@ -1310,8 +1351,10 @@ static int swap_show(struct seq_file *swap, void *v) struct file *file; int len; - if (v == swap_info) - seq_puts(swap, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); + if (ptr == SEQ_START_TOKEN) { + seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); + return 0; + } file = ptr->swap_file; len = seq_path(swap, file->f_vfsmnt, file->f_dentry, " \t\n\\"); @@ -1325,7 +1368,7 @@ static int swap_show(struct seq_file *swap, void *v) return 0; } -static struct seq_operations swaps_op = { +static const struct seq_operations swaps_op = { .start = swap_start, .next = swap_next, .stop = swap_stop, @@ -1337,7 +1380,7 @@ static int swaps_open(struct inode *inode, struct file *file) return seq_open(file, &swaps_op); } -static struct file_operations proc_swaps_operations = { +static const struct file_operations proc_swaps_operations = { .open = swaps_open, .read = seq_read, .llseek = seq_lseek, @@ -1540,6 +1583,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) error = -EINVAL; if (!maxpages) goto bad_swap; + if (swapfilesize && maxpages > swapfilesize) { + printk(KERN_WARNING + "Swap area shorter than signature indicates\n"); + goto bad_swap; + } if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) goto bad_swap; if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) @@ -1567,12 +1615,6 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) goto bad_swap; } - if (swapfilesize && maxpages > swapfilesize) { - printk(KERN_WARNING - "Swap area shorter than signature indicates\n"); - error = -EINVAL; - goto bad_swap; - } if (nr_good_pages) { p->swap_map[0] = SWAP_MAP_BAD; p->max = maxpages; diff --git a/mm/thrash.c b/mm/thrash.c index f4c560b4a2b..9ef9071f99b 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -7,100 +7,74 @@ * * Simple token based thrashing protection, using the algorithm * described in: http://www.cs.wm.edu/~sjiang/token.pdf + * + * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com> + * Improved algorithm to pass token: + * Each task has a priority which is incremented if it contended + * for the token in an interval less than its previous attempt. + * If the token is acquired, that task's priority is boosted to prevent + * the token from bouncing around too often and to let the task make + * some progress in its execution. */ + #include <linux/jiffies.h> #include <linux/mm.h> #include <linux/sched.h> #include <linux/swap.h> static DEFINE_SPINLOCK(swap_token_lock); -static unsigned long swap_token_timeout; -static unsigned long swap_token_check; -struct mm_struct * swap_token_mm = &init_mm; - -#define SWAP_TOKEN_CHECK_INTERVAL (HZ * 2) -#define SWAP_TOKEN_TIMEOUT (300 * HZ) -/* - * Currently disabled; Needs further code to work at HZ * 300. - */ -unsigned long swap_token_default_timeout = SWAP_TOKEN_TIMEOUT; - -/* - * Take the token away if the process had no page faults - * in the last interval, or if it has held the token for - * too long. - */ -#define SWAP_TOKEN_ENOUGH_RSS 1 -#define SWAP_TOKEN_TIMED_OUT 2 -static int should_release_swap_token(struct mm_struct *mm) -{ - int ret = 0; - if (!mm->recent_pagein) - ret = SWAP_TOKEN_ENOUGH_RSS; - else if (time_after(jiffies, swap_token_timeout)) - ret = SWAP_TOKEN_TIMED_OUT; - mm->recent_pagein = 0; - return ret; -} +struct mm_struct *swap_token_mm; +static unsigned int global_faults; -/* - * Try to grab the swapout protection token. We only try to - * grab it once every TOKEN_CHECK_INTERVAL, both to prevent - * SMP lock contention and to check that the process that held - * the token before is no longer thrashing. - */ void grab_swap_token(void) { - struct mm_struct *mm; - int reason; + int current_interval; - /* We have the token. Let others know we still need it. */ - if (has_swap_token(current->mm)) { - current->mm->recent_pagein = 1; - if (unlikely(!swap_token_default_timeout)) - disable_swap_token(); - return; - } - - if (time_after(jiffies, swap_token_check)) { + global_faults++; - if (!swap_token_default_timeout) { - swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - return; - } - - /* ... or if we recently held the token. */ - if (time_before(jiffies, current->mm->swap_token_time)) - return; + current_interval = global_faults - current->mm->faultstamp; - if (!spin_trylock(&swap_token_lock)) - return; + if (!spin_trylock(&swap_token_lock)) + return; - swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; + /* First come first served */ + if (swap_token_mm == NULL) { + current->mm->token_priority = current->mm->token_priority + 2; + swap_token_mm = current->mm; + goto out; + } - mm = swap_token_mm; - if ((reason = should_release_swap_token(mm))) { - unsigned long eligible = jiffies; - if (reason == SWAP_TOKEN_TIMED_OUT) { - eligible += swap_token_default_timeout; - } - mm->swap_token_time = eligible; - swap_token_timeout = jiffies + swap_token_default_timeout; + if (current->mm != swap_token_mm) { + if (current_interval < current->mm->last_interval) + current->mm->token_priority++; + else { + current->mm->token_priority--; + if (unlikely(current->mm->token_priority < 0)) + current->mm->token_priority = 0; + } + /* Check if we deserve the token */ + if (current->mm->token_priority > + swap_token_mm->token_priority) { + current->mm->token_priority += 2; swap_token_mm = current->mm; } - spin_unlock(&swap_token_lock); + } else { + /* Token holder came in again! */ + current->mm->token_priority += 2; } - return; + +out: + current->mm->faultstamp = global_faults; + current->mm->last_interval = current_interval; + spin_unlock(&swap_token_lock); +return; } /* Called on process exit. */ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); - if (likely(mm == swap_token_mm)) { - mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - swap_token_mm = &init_mm; - swap_token_check = jiffies; - } + if (likely(mm == swap_token_mm)) + swap_token_mm = NULL; spin_unlock(&swap_token_lock); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 518540a4a2a..093f5fe6dd7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -36,6 +36,7 @@ #include <linux/rwsem.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/freezer.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -1172,11 +1173,12 @@ loop_again: if (!zone_watermark_ok(zone, order, zone->pages_high, 0, 0)) { end_zone = i; - goto scan; + break; } } - goto out; -scan: + if (i < 0) + goto out; + for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; @@ -1259,6 +1261,9 @@ out: } if (!all_zones_ok) { cond_resched(); + + try_to_freeze(); + goto loop_again; } @@ -1508,7 +1513,6 @@ out: } #endif -#ifdef CONFIG_HOTPLUG_CPU /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes away, we get changed to run anywhere: as the first one comes back, @@ -1529,7 +1533,6 @@ static int __devinit cpu_callback(struct notifier_block *nfb, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ /* * This kswapd start function will be called by init and node-hot-add. diff --git a/mm/vmstat.c b/mm/vmstat.c index 8614e8f6743..dc005a0c96a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -430,7 +430,7 @@ static int frag_show(struct seq_file *m, void *arg) return 0; } -struct seq_operations fragmentation_op = { +const struct seq_operations fragmentation_op = { .start = frag_start, .next = frag_next, .stop = frag_stop, @@ -452,7 +452,7 @@ struct seq_operations fragmentation_op = { #define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \ TEXT_FOR_HIGHMEM(xx) -static char *vmstat_text[] = { +static const char * const vmstat_text[] = { /* Zoned VM counters */ "nr_anon_pages", "nr_mapped", @@ -597,7 +597,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) return 0; } -struct seq_operations zoneinfo_op = { +const struct seq_operations zoneinfo_op = { .start = frag_start, /* iterate over all zones. The same as in * fragmentation. */ .next = frag_next, @@ -660,7 +660,7 @@ static void vmstat_stop(struct seq_file *m, void *arg) m->private = NULL; } -struct seq_operations vmstat_op = { +const struct seq_operations vmstat_op = { .start = vmstat_start, .next = vmstat_next, .stop = vmstat_stop, @@ -679,13 +679,13 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, void *hcpu) { switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_CANCELED: - case CPU_DEAD: - refresh_zone_stat_thresholds(); - break; - default: - break; + case CPU_UP_PREPARE: + case CPU_UP_CANCELED: + case CPU_DEAD: + refresh_zone_stat_thresholds(); + break; + default: + break; } return NOTIFY_OK; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d9f04864d15..8ca448db7a0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,7 +23,7 @@ #include <asm/atomic.h> #include "br_private.h" -static kmem_cache_t *br_fdb_cache __read_mostly; +static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ac47ba2ba02..bd221ad52ea 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -34,6 +34,7 @@ #include <linux/netfilter_ipv6.h> #include <linux/netfilter_arp.h> #include <linux/in_route.h> +#include <linux/inetdevice.h> #include <net/ip.h> #include <net/ipv6.h> @@ -221,10 +222,14 @@ static void __br_dnat_complain(void) * * Otherwise, the packet is considered to be routed and we just * change the destination MAC address so that the packet will - * later be passed up to the IP stack to be routed. + * later be passed up to the IP stack to be routed. For a redirected + * packet, ip_route_input() will give back the localhost as output device, + * which differs from the bridge device. * * Let us now consider the case that ip_route_input() fails: * + * This can be because the destination address is martian, in which case + * the packet will be dropped. * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input() * will fail, while __ip_route_output_key() will return success. The source * address for __ip_route_output_key() is set to zero, so __ip_route_output_key @@ -237,7 +242,8 @@ static void __br_dnat_complain(void) * * --Lennert, 20020411 * --Bart, 20020416 (updated) - * --Bart, 20021007 (updated) */ + * --Bart, 20021007 (updated) + * --Bart, 20062711 (updated) */ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { if (skb->pkt_type == PACKET_OTHERHOST) { @@ -264,15 +270,15 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) struct net_device *dev = skb->dev; struct iphdr *iph = skb->nh.iph; struct nf_bridge_info *nf_bridge = skb->nf_bridge; + int err; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->mask ^= BRNF_PKT_TYPE; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - if (dnat_took_place(skb)) { - if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev)) { + if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct rtable *rt; struct flowi fl = { .nl_u = { @@ -283,19 +289,33 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) }, .proto = 0, }; + struct in_device *in_dev = in_dev_get(dev); + + /* If err equals -EHOSTUNREACH the error is due to a + * martian destination or due to the fact that + * forwarding is disabled. For most martian packets, + * ip_route_output_key() will fail. It won't fail for 2 types of + * martian destinations: loopback destinations and destination + * 0.0.0.0. In both cases the packet will be dropped because the + * destination is the loopback device and not the bridge. */ + if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) + goto free_skb; if (!ip_route_output_key(&rt, &fl)) { /* - Bridged-and-DNAT'ed traffic doesn't - * require ip_forwarding. - * - Deal with redirected traffic. */ - if (((struct dst_entry *)rt)->dev == dev || - rt->rt_type == RTN_LOCAL) { + * require ip_forwarding. */ + if (((struct dst_entry *)rt)->dev == dev) { skb->dst = (struct dst_entry *)rt; goto bridged_dnat; } + /* we are sure that forwarding is disabled, so printing + * this message is no problem. Note that the packet could + * still have a martian destination address, in which case + * the packet could be dropped even if forwarding were enabled */ __br_dnat_complain(); dst_release((struct dst_entry *)rt); } +free_skb: kfree_skb(skb); return 0; } else { diff --git a/net/core/dev.c b/net/core/dev.c index 59d058a3b50..e660cb57e42 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3340,7 +3340,6 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); -#ifdef CONFIG_HOTPLUG_CPU static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -3384,7 +3383,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_NET_DMA /** diff --git a/net/core/dst.c b/net/core/dst.c index 1a5e49da0e7..836ec660692 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -125,7 +125,7 @@ void * dst_alloc(struct dst_ops * ops) if (ops->gc()) return NULL; } - dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC); + dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; memset(dst, 0, ops->entry_size); diff --git a/net/core/flow.c b/net/core/flow.c index b16d31ae5e5..d137f971f97 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -44,7 +44,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; #define flow_table(cpu) (per_cpu(flow_tables, cpu)) -static kmem_cache_t *flow_cachep __read_mostly; +static struct kmem_cache *flow_cachep __read_mostly; static int flow_lwm, flow_hwm; @@ -211,7 +211,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, if (flow_count(cpu) > flow_hwm) flow_cache_shrink(cpu); - fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); + fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { fle->next = *head; *head = fle; @@ -340,7 +340,6 @@ static void __devinit flow_cache_cpu_prepare(int cpu) tasklet_init(tasklet, flow_cache_flush_tasklet, 0); } -#ifdef CONFIG_HOTPLUG_CPU static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -349,7 +348,6 @@ static int flow_cache_cpu(struct notifier_block *nfb, __flow_cache_shrink((unsigned long)hcpu, 0); return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ static int __init flow_cache_init(void) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ba509a4a8e9..0ab1987b934 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -251,7 +251,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) goto out_entries; } - n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); + n = kmem_cache_alloc(tbl->kmem_cachep, GFP_ATOMIC); if (!n) goto out_entries; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8e1c385e5ba..de7801d589e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,8 +68,8 @@ #include "kmap_skb.h" -static kmem_cache_t *skbuff_head_cache __read_mostly; -static kmem_cache_t *skbuff_fclone_cache __read_mostly; +static struct kmem_cache *skbuff_head_cache __read_mostly; +static struct kmem_cache *skbuff_fclone_cache __read_mostly; /* * Keep out-of-line to prevent kernel bloat. @@ -132,6 +132,7 @@ EXPORT_SYMBOL(skb_truesize_bug); * @gfp_mask: allocation mask * @fclone: allocate from fclone cache instead of head cache * and allocate a cloned (child) skb + * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. @@ -141,9 +142,9 @@ EXPORT_SYMBOL(skb_truesize_bug); * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone) + int fclone, int node) { - kmem_cache_t *cache; + struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; @@ -151,14 +152,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; /* Get the HEAD */ - skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA); + skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); if (!skb) goto out; /* Get the DATA. Size must match skb_add_mtu(). */ size = SKB_DATA_ALIGN(size); - data = kmalloc_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask); + data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), + gfp_mask, node); if (!data) goto nodata; @@ -210,7 +211,7 @@ nodata: * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, +struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp, unsigned int size, gfp_t gfp_mask) { @@ -267,9 +268,10 @@ nodata: struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask) { + int node = dev->class_dev.dev ? dev_to_node(dev->class_dev.dev) : -1; struct sk_buff *skb; - skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; diff --git a/net/core/sock.c b/net/core/sock.c index 419c7d3289c..0ed5b4f0bc4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -810,24 +810,11 @@ lenout: */ static void inline sock_lock_init(struct sock *sk) { - spin_lock_init(&sk->sk_lock.slock); - sk->sk_lock.owner = NULL; - init_waitqueue_head(&sk->sk_lock.wq); - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock)); - - /* - * Mark both the sk_lock and the sk_lock.slock as a - * per-address-family lock class: - */ - lockdep_set_class_and_name(&sk->sk_lock.slock, - af_family_slock_keys + sk->sk_family, - af_family_slock_key_strings[sk->sk_family]); - lockdep_init_map(&sk->sk_lock.dep_map, - af_family_key_strings[sk->sk_family], - af_family_keys + sk->sk_family, 0); + sock_lock_init_class_and_name(sk, + af_family_slock_key_strings[sk->sk_family], + af_family_slock_keys + sk->sk_family, + af_family_key_strings[sk->sk_family], + af_family_keys + sk->sk_family); } /** @@ -841,7 +828,7 @@ struct sock *sk_alloc(int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; - kmem_cache_t *slab = prot->slab; + struct kmem_cache *slab = prot->slab; if (slab != NULL) sk = kmem_cache_alloc(slab, priority); diff --git a/net/core/wireless.c b/net/core/wireless.c index cb1b8728d7e..f69ab7b4408 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -2130,7 +2130,7 @@ int iw_handler_set_spy(struct net_device * dev, * The rtnl_lock() make sure we don't race with the other iw_handlers. * This make sure wireless_spy_update() "see" that the spy list * is temporarily disabled. */ - wmb(); + smp_wmb(); /* Are there are addresses to copy? */ if(wrqu->data.length > 0) { @@ -2159,7 +2159,7 @@ int iw_handler_set_spy(struct net_device * dev, } /* Make sure above is updated before re-enabling */ - wmb(); + smp_wmb(); /* Enable addresses */ spydata->spy_number = wrqu->data.length; diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index bdf1bb7a82c..1f4727ddbdb 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -21,8 +21,8 @@ #include <net/sock.h> -static kmem_cache_t *dccp_ackvec_slab; -static kmem_cache_t *dccp_ackvec_record_slab; +static struct kmem_cache *dccp_ackvec_slab; +static struct kmem_cache *dccp_ackvec_record_slab; static struct dccp_ackvec_record *dccp_ackvec_record_new(void) { diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index ff05e59043c..d8cf92f09e6 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -55,9 +55,9 @@ static inline void ccids_read_unlock(void) #define ccids_read_unlock() do { } while(0) #endif -static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...) +static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...) { - kmem_cache_t *slab; + struct kmem_cache *slab; char slab_name_fmt[32], *slab_name; va_list args; @@ -75,7 +75,7 @@ static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...) return slab; } -static void ccid_kmem_cache_destroy(kmem_cache_t *slab) +static void ccid_kmem_cache_destroy(struct kmem_cache *slab) { if (slab != NULL) { const char *name = kmem_cache_name(slab); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index c7c29514dce..bcc2d12ae81 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -27,9 +27,9 @@ struct ccid_operations { unsigned char ccid_id; const char *ccid_name; struct module *ccid_owner; - kmem_cache_t *ccid_hc_rx_slab; + struct kmem_cache *ccid_hc_rx_slab; __u32 ccid_hc_rx_obj_size; - kmem_cache_t *ccid_hc_tx_slab; + struct kmem_cache *ccid_hc_tx_slab; __u32 ccid_hc_tx_obj_size; int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cf8c07b2704..66a27b9688c 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -295,7 +295,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); if (new_packet == NULL || new_packet->dccphtx_sent) { new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, - SLAB_ATOMIC); + GFP_ATOMIC); if (unlikely(new_packet == NULL)) { DCCP_WARN("%s, sk=%p, not enough mem to add to history," @@ -889,7 +889,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) /* new loss event detected */ /* calculate last interval length */ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); - entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); + entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC); if (entry == NULL) { DCCP_BUG("out of memory - can not allocate entry"); @@ -1011,7 +1011,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, - skb, SLAB_ATOMIC); + skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { DCCP_WARN("%s, sk=%p, Not enough mem to add rx packet " "to history, consider it lost!\n", dccp_role(sk), sk); diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 48b9b93f8ac..0a0baef16b3 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -125,7 +125,7 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist, int i; for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { - entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); + entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC); if (entry == NULL) { dccp_li_hist_purge(hist, list); DCCP_BUG("loss interval list entry is NULL"); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 0ae85f0340b..eb257014dd7 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -20,7 +20,7 @@ #define DCCP_LI_HIST_IVAL_F_LENGTH 8 struct dccp_li_hist { - kmem_cache_t *dccplih_slab; + struct kmem_cache *dccplih_slab; }; extern struct dccp_li_hist *dccp_li_hist_new(const char *name); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 067cf1c85a3..9a8bcf224aa 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -68,14 +68,14 @@ struct dccp_rx_hist_entry { }; struct dccp_tx_hist { - kmem_cache_t *dccptxh_slab; + struct kmem_cache *dccptxh_slab; }; extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); struct dccp_rx_hist { - kmem_cache_t *dccprxh_slab; + struct kmem_cache *dccprxh_slab; }; extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index bdbc3f43166..13b2421991b 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; static DEFINE_RWLOCK(dn_fib_tables_lock); -static kmem_cache_t *dn_hash_kmem __read_mostly; +static struct kmem_cache *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) @@ -590,7 +590,7 @@ create: replace: err = -ENOBUFS; - new_f = kmem_cache_alloc(dn_hash_kmem, SLAB_KERNEL); + new_f = kmem_cache_alloc(dn_hash_kmem, GFP_KERNEL); if (new_f == NULL) goto out; diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index 08386c10295..eec1a1dd91d 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -431,6 +431,17 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, return 0; } +void +ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac) +{ + unsigned long flags; + + spin_lock_irqsave(&mac->lock, flags); + mac->associnfo.associating = 1; + schedule_work(&mac->associnfo.work); + spin_unlock_irqrestore(&mac->lock, flags); +} + int ieee80211softmac_handle_disassoc(struct net_device * dev, struct ieee80211_disassoc *disassoc) @@ -449,8 +460,7 @@ ieee80211softmac_handle_disassoc(struct net_device * dev, dprintk(KERN_INFO PFX "got disassoc frame\n"); ieee80211softmac_disassoc(mac); - /* try to reassociate */ - schedule_delayed_work(&mac->associnfo.work, 0); + ieee80211softmac_try_reassoc(mac); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c index 6012705aa4f..8ed3e59b802 100644 --- a/net/ieee80211/softmac/ieee80211softmac_auth.c +++ b/net/ieee80211/softmac/ieee80211softmac_auth.c @@ -337,6 +337,8 @@ ieee80211softmac_deauth_from_net(struct ieee80211softmac_device *mac, /* can't transmit data right now... */ netif_carrier_off(mac->dev); spin_unlock_irqrestore(&mac->lock, flags); + + ieee80211softmac_try_reassoc(mac); } /* diff --git a/net/ieee80211/softmac/ieee80211softmac_priv.h b/net/ieee80211/softmac/ieee80211softmac_priv.h index c0dbe070e54..4c2bba34d32 100644 --- a/net/ieee80211/softmac/ieee80211softmac_priv.h +++ b/net/ieee80211/softmac/ieee80211softmac_priv.h @@ -239,4 +239,6 @@ void ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, in int ieee80211softmac_notify_internal(struct ieee80211softmac_device *mac, int event, void *event_context, notify_function_ptr fun, void *context, gfp_t gfp_mask); +void ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac); + #endif /* IEEE80211SOFTMAC_PRIV_H_ */ diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index 2ffaebd21c5..480d72c7a42 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -495,7 +495,8 @@ ieee80211softmac_wx_set_mlme(struct net_device *dev, printk(KERN_DEBUG PFX "wx_set_mlme: we should know the net here...\n"); goto out; } - return ieee80211softmac_deauth_req(mac, net, reason); + err = ieee80211softmac_deauth_req(mac, net, reason); + goto out; case IW_MLME_DISASSOC: ieee80211softmac_send_disassoc_req(mac, reason); mac->associnfo.associated = 0; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 107bb6cbb0b..648f47c1c39 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -45,8 +45,8 @@ #include "fib_lookup.h" -static kmem_cache_t *fn_hash_kmem __read_mostly; -static kmem_cache_t *fn_alias_kmem __read_mostly; +static struct kmem_cache *fn_hash_kmem __read_mostly; +static struct kmem_cache *fn_alias_kmem __read_mostly; struct fib_node { struct hlist_node fn_hash; @@ -485,13 +485,13 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) goto out; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; new_f = NULL; if (!f) { - new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL); + new_f = kmem_cache_alloc(fn_hash_kmem, GFP_KERNEL); if (new_f == NULL) goto out_free_new_fa; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d17990ec724..cfb249cc0a5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -172,7 +172,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); -static kmem_cache_t *fn_alias_kmem __read_mostly; +static struct kmem_cache *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; @@ -1187,7 +1187,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) u8 state; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; @@ -1232,7 +1232,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) goto out; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 244c4f445c7..8c79c8a4ea5 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -27,11 +27,11 @@ * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ -struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, +struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, const unsigned short snum) { - struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); + struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { tb->port = snum; @@ -45,7 +45,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, /* * Caller must hold hashbucket lock for this tb with local BH disabled */ -void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) +void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 8c74f9168b7..9f414e35c48 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -91,7 +91,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat { struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, - SLAB_ATOMIC); + GFP_ATOMIC); if (tw != NULL) { const struct inet_sock *inet = inet_sk(sk); @@ -178,7 +178,6 @@ void inet_twdr_hangman(unsigned long data) need_timer = 0; if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { twdr->thread_slots |= (1 << twdr->slot); - mb(); schedule_work(&twdr->twkill_work); need_timer = 1; } else { diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f072f3875af..711eb6d0285 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -73,7 +73,7 @@ /* Exported for inet_getid inline function. */ DEFINE_SPINLOCK(inet_peer_idlock); -static kmem_cache_t *peer_cachep __read_mostly; +static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height static struct inet_peer peer_fake_node = { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index efcf45ecc81..ecb5422ea23 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -105,7 +105,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); In this case data path is free of exclusive locks at all. */ -static kmem_cache_t *mrt_cachep __read_mostly; +static struct kmem_cache *mrt_cachep __read_mostly; static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 8832eb517d5..8086787a2c5 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -44,7 +44,7 @@ static struct list_head *ip_vs_conn_tab; /* SLAB cache for IPVS connections */ -static kmem_cache_t *ip_vs_conn_cachep __read_mostly; +static struct kmem_cache *ip_vs_conn_cachep __read_mostly; /* counter for current IPVS connections */ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 413c2d0a1f3..71b76ade00e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -375,6 +375,13 @@ static int mark_source_chains(struct xt_table_info *newinfo, && unconditional(&e->arp)) { unsigned int oldpos, size; + if (t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + /* Return: backtrack through the last * big jump. */ @@ -404,6 +411,14 @@ static int mark_source_chains(struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, ARPT_STANDARD_TARGET) == 0 && newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct arpt_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } + /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -426,8 +441,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, static inline int standard_check(const struct arpt_entry_target *t, unsigned int max_offset) { - struct arpt_standard_target *targ = (void *)t; - /* Check standard info. */ if (t->u.target_size != ARPT_ALIGN(sizeof(struct arpt_standard_target))) { @@ -437,18 +450,6 @@ static inline int standard_check(const struct arpt_entry_target *t, return 0; } - if (targ->verdict >= 0 - && targ->verdict > max_offset - sizeof(struct arpt_entry)) { - duprintf("arpt_standard_check: bad verdict (%i)\n", - targ->verdict); - return 0; - } - - if (targ->verdict < -NF_MAX_VERDICT - 1) { - duprintf("arpt_standard_check: bad negative verdict (%i)\n", - targ->verdict); - return 0; - } return 1; } @@ -627,18 +628,20 @@ static int translate_table(const char *name, } } + if (!mark_source_chains(newinfo, valid_hooks, entry0)) { + duprintf("Looping hook\n"); + return -ELOOP; + } + /* Finally, each sanity check must pass */ i = 0; ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, check_entry, name, size, &i); - if (ret != 0) - goto cleanup; - - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry0)) { - duprintf("Looping hook\n"); - goto cleanup; + if (ret != 0) { + ARPT_ENTRY_ITERATE(entry0, newinfo->size, + cleanup_entry, &i); + return ret; } /* And one copy for every other CPU */ @@ -647,9 +650,6 @@ static int translate_table(const char *name, memcpy(newinfo->entries[i], entry0, newinfo->size); } - return 0; -cleanup: - ARPT_ENTRY_ITERATE(entry0, newinfo->size, cleanup_entry, &i); return ret; } diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index f4b0e68a16d..8556a4f4f60 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -65,8 +65,8 @@ static LIST_HEAD(helpers); unsigned int ip_conntrack_htable_size __read_mostly = 0; int ip_conntrack_max __read_mostly; struct list_head *ip_conntrack_hash __read_mostly; -static kmem_cache_t *ip_conntrack_cachep __read_mostly; -static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; +static struct kmem_cache *ip_conntrack_cachep __read_mostly; +static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; unsigned int ip_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 8a455439b12..0ff2956d35e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -401,6 +401,13 @@ mark_source_chains(struct xt_table_info *newinfo, && unconditional(&e->ip)) { unsigned int oldpos, size; + if (t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + /* Return: backtrack through the last big jump. */ do { @@ -438,6 +445,13 @@ mark_source_chains(struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, IPT_STANDARD_TARGET) == 0 && newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct ipt_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -470,27 +484,6 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) } static inline int -standard_check(const struct ipt_entry_target *t, - unsigned int max_offset) -{ - struct ipt_standard_target *targ = (void *)t; - - /* Check standard info. */ - if (targ->verdict >= 0 - && targ->verdict > max_offset - sizeof(struct ipt_entry)) { - duprintf("ipt_standard_check: bad verdict (%i)\n", - targ->verdict); - return 0; - } - if (targ->verdict < -NF_MAX_VERDICT - 1) { - duprintf("ipt_standard_check: bad negative verdict (%i)\n", - targ->verdict); - return 0; - } - return 1; -} - -static inline int check_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, @@ -576,12 +569,7 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, if (ret) goto err; - if (t->u.kernel.target == &ipt_standard_target) { - if (!standard_check(t, size)) { - ret = -EINVAL; - goto err; - } - } else if (t->u.kernel.target->checkentry + if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", @@ -718,17 +706,19 @@ translate_table(const char *name, } } + if (!mark_source_chains(newinfo, valid_hooks, entry0)) + return -ELOOP; + /* Finally, each sanity check must pass */ i = 0; ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, check_entry, name, size, &i); - if (ret != 0) - goto cleanup; - - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry0)) - goto cleanup; + if (ret != 0) { + IPT_ENTRY_ITERATE(entry0, newinfo->size, + cleanup_entry, &i); + return ret; + } /* And one copy for every other CPU */ for_each_possible_cpu(i) { @@ -736,9 +726,6 @@ translate_table(const char *name, memcpy(newinfo->entries[i], entry0, newinfo->size); } - return 0; -cleanup: - IPT_ENTRY_ITERATE(entry0, newinfo->size, cleanup_entry, &i); return ret; } @@ -1529,25 +1516,8 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, void **dstptr, compat_uint_t *size, const char *name, const struct ipt_ip *ip, unsigned int hookmask) { - struct ipt_entry_match *dm; - struct ipt_match *match; - int ret; - - dm = (struct ipt_entry_match *)*dstptr; - match = m->u.kernel.match; xt_compat_match_from_user(m, dstptr, size); - - ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, dm->data, - hookmask)) { - duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - } - return ret; + return 0; } static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, @@ -1569,7 +1539,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, name, &de->ip, de->comefrom); if (ret) - goto err; + return ret; de->target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); target = t->u.kernel.target; @@ -1582,31 +1552,62 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } + return ret; +} + +static inline int compat_check_match(struct ipt_entry_match *m, const char *name, + const struct ipt_ip *ip, unsigned int hookmask) +{ + struct ipt_match *match; + int ret; + + match = m->u.kernel.match; + ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), + name, hookmask, ip->proto, + ip->invflags & IPT_INV_PROTO); + if (!ret && m->u.kernel.match->checkentry + && !m->u.kernel.match->checkentry(name, ip, match, m->data, + hookmask)) { + duprintf("ip_tables: compat: check failed for `%s'.\n", + m->u.kernel.match->name); + ret = -EINVAL; + } + return ret; +} + +static inline int compat_check_target(struct ipt_entry *e, const char *name) +{ + struct ipt_entry_target *t; + struct ipt_target *target; + int ret; - t = ipt_get_target(de); + t = ipt_get_target(e); target = t->u.kernel.target; ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), name, e->comefrom, e->ip.proto, e->ip.invflags & IPT_INV_PROTO); - if (ret) - goto err; - - ret = -EINVAL; - if (t->u.kernel.target == &ipt_standard_target) { - if (!standard_check(t, *size)) - goto err; - } else if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, de, target, - t->data, de->comefrom)) { + if (!ret && t->u.kernel.target->checkentry + && !t->u.kernel.target->checkentry(name, e, target, + t->data, e->comefrom)) { duprintf("ip_tables: compat: check failed for `%s'.\n", t->u.kernel.target->name); - goto err; + ret = -EINVAL; } - ret = 0; -err: return ret; } +static inline int compat_check_entry(struct ipt_entry *e, const char *name) +{ + int ret; + + ret = IPT_MATCH_ITERATE(e, compat_check_match, name, &e->ip, + e->comefrom); + if (ret) + return ret; + + return compat_check_target(e, name); +} + static int translate_compat_table(const char *name, unsigned int valid_hooks, @@ -1695,6 +1696,11 @@ translate_compat_table(const char *name, if (!mark_source_chains(newinfo, valid_hooks, entry1)) goto free_newinfo; + ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, + name); + if (ret) + goto free_newinfo; + /* And one copy for every other CPU */ for_each_possible_cpu(i) if (newinfo->entries[i] && newinfo->entries[i] != entry1) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9f3924c4905..11c167118e8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1780,7 +1780,7 @@ static inline int __mkroute_input(struct sk_buff *skb, #endif if (in_dev->cnf.no_policy) rth->u.dst.flags |= DST_NOPOLICY; - if (in_dev->cnf.no_xfrm) + if (out_dev->cnf.no_xfrm) rth->u.dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9304034c0c4..c701f6abbfc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4235,7 +4235,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Change state from SYN-SENT only after copied_seq * is initialized. */ tp->copied_seq = tp->rcv_nxt; - mb(); + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); security_inet_conn_established(sk, skb); @@ -4483,7 +4483,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case TCP_SYN_RECV: if (acceptable) { tp->copied_seq = tp->rcv_nxt; - mb(); + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index d4107bb701b..fb9f69c616f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -274,6 +274,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt.idev)) in_dev_put(xdst->u.rt.idev); + if (likely(xdst->u.rt.peer)) + inet_putpeer(xdst->u.rt.peer); xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 87c8f54872b..e5cd83b2205 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -720,10 +720,8 @@ snmp6_mib_free(void *ptr[2]) { if (ptr == NULL) return; - if (ptr[0]) - free_percpu(ptr[0]); - if (ptr[1]) - free_percpu(ptr[1]); + free_percpu(ptr[0]); + free_percpu(ptr[1]); ptr[0] = ptr[1] = NULL; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bf526115e51..96d8310ae9c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -50,7 +50,7 @@ struct rt6_statistics rt6_stats; -static kmem_cache_t * fib6_node_kmem __read_mostly; +static struct kmem_cache * fib6_node_kmem __read_mostly; enum fib_walk_state_t { @@ -150,7 +150,7 @@ static __inline__ struct fib6_node * node_alloc(void) { struct fib6_node *fn; - if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL) + if ((fn = kmem_cache_alloc(fib6_node_kmem, GFP_ATOMIC)) != NULL) memset(fn, 0, sizeof(struct fib6_node)); return fn; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e05ecbb1412..e9212c7ff5c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -624,13 +624,13 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_shinfo(skb)->frag_list = NULL; /* BUILD HEADER */ + *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC); if (!tmp_hdr) { IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } - *prevhdr = NEXTHDR_FRAGMENT; __skb_pull(skb, hlen); fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); skb->nh.raw = __skb_push(skb, hlen); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f63fb86d7c7..4eec4b3988b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -440,6 +440,13 @@ mark_source_chains(struct xt_table_info *newinfo, && unconditional(&e->ipv6)) { unsigned int oldpos, size; + if (t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + /* Return: backtrack through the last big jump. */ do { @@ -477,6 +484,13 @@ mark_source_chains(struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, IP6T_STANDARD_TARGET) == 0 && newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct ip6t_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -509,27 +523,6 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) } static inline int -standard_check(const struct ip6t_entry_target *t, - unsigned int max_offset) -{ - struct ip6t_standard_target *targ = (void *)t; - - /* Check standard info. */ - if (targ->verdict >= 0 - && targ->verdict > max_offset - sizeof(struct ip6t_entry)) { - duprintf("ip6t_standard_check: bad verdict (%i)\n", - targ->verdict); - return 0; - } - if (targ->verdict < -NF_MAX_VERDICT - 1) { - duprintf("ip6t_standard_check: bad negative verdict (%i)\n", - targ->verdict); - return 0; - } - return 1; -} - -static inline int check_match(struct ip6t_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, @@ -616,12 +609,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, if (ret) goto err; - if (t->u.kernel.target == &ip6t_standard_target) { - if (!standard_check(t, size)) { - ret = -EINVAL; - goto err; - } - } else if (t->u.kernel.target->checkentry + if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", @@ -758,17 +746,19 @@ translate_table(const char *name, } } + if (!mark_source_chains(newinfo, valid_hooks, entry0)) + return -ELOOP; + /* Finally, each sanity check must pass */ i = 0; ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, check_entry, name, size, &i); - if (ret != 0) - goto cleanup; - - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry0)) - goto cleanup; + if (ret != 0) { + IP6T_ENTRY_ITERATE(entry0, newinfo->size, + cleanup_entry, &i); + return ret; + } /* And one copy for every other CPU */ for_each_possible_cpu(i) { @@ -777,9 +767,6 @@ translate_table(const char *name, } return 0; -cleanup: - IP6T_ENTRY_ITERATE(entry0, newinfo->size, cleanup_entry, &i); - return ret; } /* Gets counters. */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c2e629d6aea..4ae1b19ada5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -854,7 +854,8 @@ back_from_confirm: } done: dst_release(dst); - release_sock(sk); + if (!inet->hdrincl) + release_sock(sk); out: fl6_sock_release(flowlabel); return err<0?err:len; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 01a5c52a2be..12e426b9aac 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -50,7 +50,7 @@ static u32 xfrm6_tunnel_spi; #define XFRM6_TUNNEL_SPI_MIN 1 #define XFRM6_TUNNEL_SPI_MAX 0xffffffff -static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly; +static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly; #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 @@ -180,7 +180,7 @@ try_next_2:; spi = 0; goto out; alloc_spi: - x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC); + x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC); if (!x6spi) goto out; diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 252f1101256..03504f3e499 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1100,7 +1100,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, return -ENOMEM; /* Reserve space for MUX_CONTROL and LAP header */ - skb_reserve(tx_skb, TTP_MAX_HEADER); + skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER); } else { tx_skb = userdata; /* @@ -1349,7 +1349,7 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, return -ENOMEM; /* Reserve space for MUX_CONTROL and LAP header */ - skb_reserve(tx_skb, TTP_MAX_HEADER); + skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER); } else { tx_skb = userdata; /* diff --git a/net/key/af_key.c b/net/key/af_key.c index 0e1dbfbb9b1..5dd5094659a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -27,6 +27,7 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <net/xfrm.h> +#include <linux/audit.h> #include <net/sock.h> @@ -1420,6 +1421,9 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, else err = xfrm_state_update(x); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); @@ -1460,8 +1464,12 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = -EPERM; goto out; } - + err = xfrm_state_delete(x); + + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + if (err < 0) goto out; @@ -1637,12 +1645,15 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd { unsigned proto; struct km_event c; + struct xfrm_audit audit_info; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; - xfrm_state_flush(proto); + audit_info.loginuid = audit_get_loginuid(current->audit_context); + audit_info.secid = 0; + xfrm_state_flush(proto, &audit_info); c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; @@ -2205,6 +2216,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_ADDSPD, err ? 0 : 1, xp, NULL); + if (err) goto out; @@ -2282,6 +2296,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); security_xfrm_policy_free(&tmp); + + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL); + if (xp == NULL) return -ENOENT; @@ -2416,8 +2434,11 @@ static int key_notify_policy_flush(struct km_event *c) static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { struct km_event c; + struct xfrm_audit audit_info; - xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); + audit_info.loginuid = audit_get_loginuid(current->audit_context); + audit_info.secid = 0; + xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index eaa0f8a1adb..9b02ec4012f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -108,7 +108,7 @@ static struct { size_t size; /* slab cache pointer */ - kmem_cache_t *cachep; + struct kmem_cache *cachep; /* allocated slab cache + modules which uses this slab cache */ int use; @@ -147,7 +147,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name, { int ret = 0; char *cache_name; - kmem_cache_t *cachep; + struct kmem_cache *cachep; DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", features, name, size); @@ -226,7 +226,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_register_cache); /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ void nf_conntrack_unregister_cache(u_int32_t features) { - kmem_cache_t *cachep; + struct kmem_cache *cachep; char *name; /* @@ -1093,7 +1093,7 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) get_order(sizeof(struct list_head) * size)); } -void nf_conntrack_flush() +void nf_conntrack_flush(void) { nf_ct_iterate_cleanup(kill_all, NULL); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 588d3793704..9cbf926cdd1 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -29,7 +29,7 @@ LIST_HEAD(nf_conntrack_expect_list); EXPORT_SYMBOL_GPL(nf_conntrack_expect_list); -kmem_cache_t *nf_conntrack_expect_cachep __read_mostly; +struct kmem_cache *nf_conntrack_expect_cachep __read_mostly; static unsigned int nf_conntrack_expect_next_id; /* nf_conntrack_expect helper functions */ @@ -91,25 +91,28 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get); struct nf_conntrack_expect * find_expectation(const struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_expect *i; + struct nf_conntrack_expect *exp; + + exp = __nf_conntrack_expect_find(tuple); + if (!exp) + return NULL; - list_for_each_entry(i, &nf_conntrack_expect_list, list) { /* If master is not in hash table yet (ie. packet hasn't left this machine yet), how can other end know about expected? Hence these are not the droids you are looking for (if master ct never got confirmed, we'd hold a reference to it and weird things would happen to future packets). */ - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) - && nf_ct_is_confirmed(i->master)) { - if (i->flags & NF_CT_EXPECT_PERMANENT) { - atomic_inc(&i->use); - return i; - } else if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - return i; - } - } + if (!nf_ct_is_confirmed(exp->master)) + return NULL; + + if (exp->flags & NF_CT_EXPECT_PERMANENT) { + atomic_inc(&exp->use); + return exp; + } else if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + return exp; } + return NULL; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a98de0b54d6..a5a6e192ac2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -92,7 +92,7 @@ struct xt_hashlimit_htable { static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */ static HLIST_HEAD(hashlimit_htables); -static kmem_cache_t *hashlimit_cachep __read_mostly; +static struct kmem_cache *hashlimit_cachep __read_mostly; static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b9b03747c1f..548e4e6e698 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -143,6 +143,13 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) goto errout; } + if (ops->dumpit) + ops->flags |= GENL_CMD_CAP_DUMP; + if (ops->doit) + ops->flags |= GENL_CMD_CAP_DO; + if (ops->policy) + ops->flags |= GENL_CMD_CAP_HASPOL; + genl_lock(); list_add_tail(&ops->ops_list, &family->ops_list); genl_unlock(); @@ -387,7 +394,7 @@ static void genl_rcv(struct sock *sk, int len) static struct genl_family genl_ctrl = { .id = GENL_ID_CTRL, .name = "nlctrl", - .version = 0x1, + .version = 0x2, .maxattr = CTRL_ATTR_MAX, }; @@ -425,15 +432,6 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); - if (ops->policy) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); - - if (ops->doit) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); - - if (ops->dumpit) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); - nla_nest_end(skb, nest); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 08e68b67bbf..da73e8a8c18 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -660,7 +660,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe sll->sll_ifindex = dev->ifindex; h->tp_status = status; - mb(); + smp_mb(); { struct page *p_start, *p_end; diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c index dada34a77b2..49effd92144 100644 --- a/net/rxrpc/krxiod.c +++ b/net/rxrpc/krxiod.c @@ -13,6 +13,7 @@ #include <linux/completion.h> #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/freezer.h> #include <rxrpc/krxiod.h> #include <rxrpc/transport.h> #include <rxrpc/peer.h> diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c index cea4eb5e249..3ab0f77409f 100644 --- a/net/rxrpc/krxsecd.c +++ b/net/rxrpc/krxsecd.c @@ -27,6 +27,7 @@ #include <rxrpc/call.h> #include <linux/udp.h> #include <linux/ip.h> +#include <linux/freezer.h> #include <net/sock.h> #include "internal.h" diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c index 3e7466900bd..9a9b6132dba 100644 --- a/net/rxrpc/krxtimod.c +++ b/net/rxrpc/krxtimod.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/completion.h> +#include <linux/freezer.h> #include <rxrpc/rxrpc.h> #include <rxrpc/krxtimod.h> #include <asm/errno.h> diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index f59a2c4aa03..c797d6ada7d 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -101,9 +101,10 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct fw_head *head = (struct fw_head*)tp->root; struct fw_filter *f; int r; - u32 id = skb->mark & head->mask; + u32 id = skb->mark; if (head != NULL) { + id &= head->mask; for (f=head->ht[fw_hash(id)]; f; f=f->next) { if (f->id == id) { *res = f->res; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 11f3b549f4a..f2ba8615895 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -79,8 +79,8 @@ static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; static struct sctp_af *sctp_af_v6_specific; -kmem_cache_t *sctp_chunk_cachep __read_mostly; -kmem_cache_t *sctp_bucket_cachep __read_mostly; +struct kmem_cache *sctp_chunk_cachep __read_mostly; +struct kmem_cache *sctp_bucket_cachep __read_mostly; /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 04954e5f684..30927d3a597 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -65,7 +65,7 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -extern kmem_cache_t *sctp_chunk_cachep; +extern struct kmem_cache *sctp_chunk_cachep; SCTP_STATIC struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, @@ -979,7 +979,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, { struct sctp_chunk *retval; - retval = kmem_cache_alloc(sctp_chunk_cachep, SLAB_ATOMIC); + retval = kmem_cache_alloc(sctp_chunk_cachep, GFP_ATOMIC); if (!retval) goto nodata; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 02b27145b27..1e8132b8c4d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -107,7 +107,7 @@ static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; -extern kmem_cache_t *sctp_bucket_cachep; +extern struct kmem_cache *sctp_bucket_cachep; /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) @@ -4989,7 +4989,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( { struct sctp_bind_bucket *pp; - pp = kmem_cache_alloc(sctp_bucket_cachep, SLAB_ATOMIC); + pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC); SCTP_DBG_OBJCNT_INC(bind_bucket); if (pp) { pp->port = snum; diff --git a/net/socket.c b/net/socket.c index e8db54702a6..29ea1de43ec 100644 --- a/net/socket.c +++ b/net/socket.c @@ -230,13 +230,13 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, #define SOCKFS_MAGIC 0x534F434B -static kmem_cache_t *sock_inode_cachep __read_mostly; +static struct kmem_cache *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; - ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; init_waitqueue_head(&ei->socket.wait); @@ -257,7 +257,7 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct socket_alloc *ei = (struct socket_alloc *)foo; @@ -305,7 +305,14 @@ static struct file_system_type sock_fs_type = { static int sockfs_delete_dentry(struct dentry *dentry) { - return 1; + /* + * At creation time, we pretended this dentry was hashed + * (by clearing DCACHE_UNHASHED bit in d_flags) + * At delete time, we restore the truth : not hashed. + * (so that dput() can proceed correctly) + */ + dentry->d_flags |= DCACHE_UNHASHED; + return 0; } static struct dentry_operations sockfs_dentry_operations = { .d_delete = sockfs_delete_dentry, @@ -353,14 +360,20 @@ static int sock_attach_fd(struct socket *sock, struct file *file) this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); this.name = name; - this.hash = SOCK_INODE(sock)->i_ino; + this.hash = 0; file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); if (unlikely(!file->f_dentry)) return -ENOMEM; file->f_dentry->d_op = &sockfs_dentry_operations; - d_add(file->f_dentry, SOCK_INODE(sock)); + /* + * We dont want to push this dentry into global dentry hash table. + * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED + * This permits a working /proc/$pid/fd/XXX on sockets + */ + file->f_dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(file->f_dentry, SOCK_INODE(sock)); file->f_vfsmnt = mntget(sock_mnt); file->f_mapping = file->f_dentry->d_inode->i_mapping; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 49dba5febbb..19703aa9659 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -33,7 +33,7 @@ static int rpc_mount_count; static struct file_system_type rpc_pipe_fs_type; -static kmem_cache_t *rpc_inode_cachep __read_mostly; +static struct kmem_cache *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) @@ -143,7 +143,7 @@ static struct inode * rpc_alloc_inode(struct super_block *sb) { struct rpc_inode *rpci; - rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL); + rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL); if (!rpci) return NULL; return &rpci->vfs_inode; @@ -824,7 +824,7 @@ static struct file_system_type rpc_pipe_fs_type = { }; static void -init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct rpc_inode *rpci = (struct rpc_inode *) foo; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 18a33d32701..79bc4cdf5d4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -34,8 +34,8 @@ static int rpc_task_id; #define RPC_BUFFER_MAXSIZE (2048) #define RPC_BUFFER_POOLSIZE (8) #define RPC_TASK_POOLSIZE (8) -static kmem_cache_t *rpc_task_slabp __read_mostly; -static kmem_cache_t *rpc_buffer_slabp __read_mostly; +static struct kmem_cache *rpc_task_slabp __read_mostly; +static struct kmem_cache *rpc_buffer_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index ee9bb1522d5..c7bb5f7f21a 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -119,7 +119,8 @@ EXPORT_SYMBOL(svc_auth_unregister); #define DN_HASHMASK (DN_HASHMAX-1) static struct hlist_head auth_domain_table[DN_HASHMAX]; -static spinlock_t auth_domain_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t auth_domain_lock = + __SPIN_LOCK_UNLOCKED(auth_domain_lock); void auth_domain_put(struct auth_domain *dom) { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 64ca1f61dd9..99f54fb6d66 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -32,6 +32,7 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/file.h> +#include <linux/freezer.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip.h> @@ -84,6 +85,35 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req); */ static int svc_conn_age_period = 6*60; +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key svc_key[2]; +static struct lock_class_key svc_slock_key[2]; + +static inline void svc_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + BUG_ON(sk->sk_lock.owner != NULL); + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", + &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); + break; + + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", + &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); + break; + + default: + BUG(); + } +} +#else +static inline void svc_reclassify_socket(struct socket *sock) +{ +} +#endif + /* * Queue up an idle server thread. Must have pool->sp_lock held. * Note: this is really a stack rather than a queue, so that we only @@ -1556,6 +1586,8 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) return error; + svc_reclassify_socket(sock); + if (type == SOCK_STREAM) sock->sk->sk_reuse = 1; /* allow address reuse */ error = kernel_bind(sock, (struct sockaddr *) sin, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3bb232eb5d9..49cabffd7fd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1173,6 +1173,35 @@ static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock) return err; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key xs_key[2]; +static struct lock_class_key xs_slock_key[2]; + +static inline void xs_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + BUG_ON(sk->sk_lock.owner != NULL); + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", + &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); + break; + + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", + &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); + break; + + default: + BUG(); + } +} +#else +static inline void xs_reclassify_socket(struct socket *sock) +{ +} +#endif + /** * xs_udp_connect_worker - set up a UDP socket * @work: RPC transport to connect @@ -1197,6 +1226,7 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } + xs_reclassify_socket(sock); if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { sock_release(sock); @@ -1282,6 +1312,7 @@ static void xs_tcp_connect_worker(struct work_struct *work) dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } + xs_reclassify_socket(sock); if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { sock_release(sock); diff --git a/net/tipc/handler.c b/net/tipc/handler.c index ae6ddf00a1a..eb80778d6d9 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -42,7 +42,7 @@ struct queue_item { unsigned long data; }; -static kmem_cache_t *tipc_queue_item_cache; +static struct kmem_cache *tipc_queue_item_cache; static struct list_head signal_queue_head; static DEFINE_SPINLOCK(qitem_lock); static int handler_enabled = 0; diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 316211d9f17..769cdd62c1b 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -62,63 +62,6 @@ #define KMEM_SAFETYZONE 8 -/***********FOR DEBUGGING PURPOSES********************************************* -static void * dbg_kmalloc(unsigned int size, int prio, int line) { - int i = 0; - void * v = kmalloc(size+sizeof(unsigned int)+2*KMEM_SAFETYZONE*8,prio); - char * c1 = v; - c1 += sizeof(unsigned int); - *((unsigned int *)v) = size; - - for (i = 0; i < KMEM_SAFETYZONE; i++) { - c1[0] = 'D'; c1[1] = 'E'; c1[2] = 'A'; c1[3] = 'D'; - c1[4] = 'B'; c1[5] = 'E'; c1[6] = 'E'; c1[7] = 'F'; - c1 += 8; - } - c1 += size; - for (i = 0; i < KMEM_SAFETYZONE; i++) { - c1[0] = 'M'; c1[1] = 'U'; c1[2] = 'N'; c1[3] = 'G'; - c1[4] = 'W'; c1[5] = 'A'; c1[6] = 'L'; c1[7] = 'L'; - c1 += 8; - } - v = ((char *)v) + sizeof(unsigned int) + KMEM_SAFETYZONE*8; - printk(KERN_INFO "line %d kmalloc(%d,%d) = %p\n",line,size,prio,v); - return v; -} -static void dbg_kfree(void * v, int line) { - unsigned int * sp = (unsigned int *)(((char *)v) - (sizeof(unsigned int) + KMEM_SAFETYZONE*8)); - unsigned int size = *sp; - char * c1 = ((char *)v) - KMEM_SAFETYZONE*8; - int i = 0; - for (i = 0; i < KMEM_SAFETYZONE; i++) { - if ( c1[0] != 'D' || c1[1] != 'E' || c1[2] != 'A' || c1[3] != 'D' - || c1[4] != 'B' || c1[5] != 'E' || c1[6] != 'E' || c1[7] != 'F') { - printk(KERN_INFO "kmalloced block at %p has been corrupted (underrun)!\n",v); - printk(KERN_INFO " %4x: %2x %2x %2x %2x %2x %2x %2x %2x\n", i*8, - c1[0],c1[1],c1[2],c1[3],c1[4],c1[5],c1[6],c1[7] ); - } - c1 += 8; - } - c1 += size; - for (i = 0; i < KMEM_SAFETYZONE; i++) { - if ( c1[0] != 'M' || c1[1] != 'U' || c1[2] != 'N' || c1[3] != 'G' - || c1[4] != 'W' || c1[5] != 'A' || c1[6] != 'L' || c1[7] != 'L' - ) { - printk(KERN_INFO "kmalloced block at %p has been corrupted (overrun):\n",v); - printk(KERN_INFO " %4x: %2x %2x %2x %2x %2x %2x %2x %2x\n", i*8, - c1[0],c1[1],c1[2],c1[3],c1[4],c1[5],c1[6],c1[7] ); - } - c1 += 8; - } - printk(KERN_INFO "line %d kfree(%p)\n",line,v); - v = ((char *)v) - (sizeof(unsigned int) + KMEM_SAFETYZONE*8); - kfree(v); -} - -#define kmalloc(x,y) dbg_kmalloc(x,y,__LINE__) -#define kfree(x) dbg_kfree(x,__LINE__) -*****************************************************************************/ - /* * Function Prototypes */ diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 5a0dbeb6bbe..6b381fc0383 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -119,6 +119,23 @@ static struct xfrm_algo_desc aalg_list[] = { .sadb_alg_maxbits = 160 } }, +{ + .name = "xcbc(aes)", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 128 + } +}, }; static struct xfrm_algo_desc ealg_list[] = { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index e8198a2c785..414f8907038 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -12,7 +12,7 @@ #include <net/ip.h> #include <net/xfrm.h> -static kmem_cache_t *secpath_cachep __read_mostly; +static struct kmem_cache *secpath_cachep __read_mostly; void __secpath_destroy(struct sec_path *sp) { @@ -27,7 +27,7 @@ struct sec_path *secpath_dup(struct sec_path *src) { struct sec_path *sp; - sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC); + sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); if (!sp) return NULL; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f6c77bd36fd..bebd40e5a62 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -25,6 +25,7 @@ #include <linux/cache.h> #include <net/xfrm.h> #include <net/ip.h> +#include <linux/audit.h> #include "xfrm_hash.h" @@ -39,7 +40,7 @@ EXPORT_SYMBOL(xfrm_policy_count); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; -static kmem_cache_t *xfrm_dst_cache __read_mostly; +static struct kmem_cache *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; static HLIST_HEAD(xfrm_policy_gc_list); @@ -804,7 +805,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(u8 type) +void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) { int dir; @@ -824,6 +825,9 @@ void xfrm_policy_flush(u8 type) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); + xfrm_audit_log(audit_info->loginuid, audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL); + xfrm_policy_kill(pol); killed++; @@ -842,6 +846,11 @@ void xfrm_policy_flush(u8 type) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 1, + pol, NULL); + xfrm_policy_kill(pol); killed++; @@ -860,33 +869,12 @@ EXPORT_SYMBOL(xfrm_policy_flush); int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *pol; + struct xfrm_policy *pol, *last = NULL; struct hlist_node *entry; - int dir, count, error; + int dir, last_dir = 0, count, error; read_lock_bh(&xfrm_policy_lock); count = 0; - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - struct hlist_head *table = xfrm_policy_bydst[dir].table; - int i; - - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type == type) - count++; - } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) { - if (pol->type == type) - count++; - } - } - } - - if (count == 0) { - error = -ENOENT; - goto out; - } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { struct hlist_head *table = xfrm_policy_bydst[dir].table; @@ -896,21 +884,37 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*) &xfrm_policy_inexact[dir], bydst) { if (pol->type != type) continue; - error = func(pol, dir % XFRM_POLICY_MAX, --count, data); - if (error) - goto out; + if (last) { + error = func(last, last_dir % XFRM_POLICY_MAX, + count, data); + if (error) + goto out; + } + last = pol; + last_dir = dir; + count++; } for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, table + i, bydst) { if (pol->type != type) continue; - error = func(pol, dir % XFRM_POLICY_MAX, --count, data); - if (error) - goto out; + if (last) { + error = func(last, last_dir % XFRM_POLICY_MAX, + count, data); + if (error) + goto out; + } + last = pol; + last_dir = dir; + count++; } } } - error = 0; + if (count == 0) { + error = -ENOENT; + goto out; + } + error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); out: read_unlock_bh(&xfrm_policy_lock); return error; @@ -1982,6 +1986,117 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, EXPORT_SYMBOL(xfrm_bundle_ok); +#ifdef CONFIG_AUDITSYSCALL +/* Audit addition and deletion of SAs and ipsec policy */ + +void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, + struct xfrm_policy *xp, struct xfrm_state *x) +{ + + char *secctx; + u32 secctx_len; + struct xfrm_sec_ctx *sctx = NULL; + struct audit_buffer *audit_buf; + int family; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + + audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type); + if (audit_buf == NULL) + return; + + switch(type) { + case AUDIT_MAC_IPSEC_ADDSA: + audit_log_format(audit_buf, "SAD add: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_DELSA: + audit_log_format(audit_buf, "SAD delete: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_ADDSPD: + audit_log_format(audit_buf, "SPD add: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_DELSPD: + audit_log_format(audit_buf, "SPD delete: auid=%u", auid); + break; + default: + return; + } + + if (sid != 0 && + security_secid_to_secctx(sid, &secctx, &secctx_len) == 0) + audit_log_format(audit_buf, " subj=%s", secctx); + else + audit_log_task_context(audit_buf); + + if (xp) { + family = xp->selector.family; + if (xp->security) + sctx = xp->security; + } else { + family = x->props.family; + if (x->security) + sctx = x->security; + } + + if (sctx) + audit_log_format(audit_buf, + " sec_alg=%u sec_doi=%u sec_obj=%s", + sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str); + + switch(family) { + case AF_INET: + { + struct in_addr saddr, daddr; + if (xp) { + saddr.s_addr = xp->selector.saddr.a4; + daddr.s_addr = xp->selector.daddr.a4; + } else { + saddr.s_addr = x->props.saddr.a4; + daddr.s_addr = x->id.daddr.a4; + } + audit_log_format(audit_buf, + " src=%u.%u.%u.%u dst=%u.%u.%u.%u", + NIPQUAD(saddr), NIPQUAD(daddr)); + } + break; + case AF_INET6: + { + struct in6_addr saddr6, daddr6; + if (xp) { + memcpy(&saddr6, xp->selector.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, xp->selector.daddr.a6, + sizeof(struct in6_addr)); + } else { + memcpy(&saddr6, x->props.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, x->id.daddr.a6, + sizeof(struct in6_addr)); + } + audit_log_format(audit_buf, + " src=" NIP6_FMT "dst=" NIP6_FMT, + NIP6(saddr6), NIP6(daddr6)); + } + break; + } + + if (x) + audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s", + (unsigned long)ntohl(x->id.spi), + (unsigned long)ntohl(x->id.spi), + x->id.proto == IPPROTO_AH ? "AH" : + (x->id.proto == IPPROTO_ESP ? + "ESP" : "IPCOMP")); + + audit_log_format(audit_buf, " res=%u", result); + audit_log_end(audit_buf); +} + +EXPORT_SYMBOL(xfrm_audit_log); +#endif /* CONFIG_AUDITSYSCALL */ + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index da54a64ccfa..fdb08d9f34a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/cache.h> #include <asm/uaccess.h> +#include <linux/audit.h> #include "xfrm_hash.h" @@ -238,6 +239,7 @@ static void xfrm_timer_handler(unsigned long data) unsigned long now = (unsigned long)xtime.tv_sec; long next = LONG_MAX; int warn = 0; + int err = 0; spin_lock(&x->lock); if (x->km.state == XFRM_STATE_DEAD) @@ -295,9 +297,14 @@ expired: next = 2; goto resched; } - if (!__xfrm_state_delete(x) && x->id.spi) + + err = __xfrm_state_delete(x); + if (!err && x->id.spi) km_state_expired(x, 1, 0); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + out: spin_unlock(&x->lock); } @@ -384,9 +391,10 @@ int xfrm_state_delete(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete); -void xfrm_state_flush(u8 proto) +void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) { int i; + int err = 0; spin_lock_bh(&xfrm_state_lock); for (i = 0; i <= xfrm_state_hmask; i++) { @@ -399,7 +407,11 @@ restart: xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_delete(x); + err = xfrm_state_delete(x); + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSA, + err ? 0 : 1, NULL, x); xfrm_state_put(x); spin_lock_bh(&xfrm_state_lock); @@ -1099,7 +1111,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *data) { int i; - struct xfrm_state *x; + struct xfrm_state *x, *last = NULL; struct hlist_node *entry; int count = 0; int err = 0; @@ -1107,24 +1119,22 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), spin_lock_bh(&xfrm_state_lock); for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (xfrm_id_proto_match(x->id.proto, proto)) - count++; + if (!xfrm_id_proto_match(x->id.proto, proto)) + continue; + if (last) { + err = func(last, count, data); + if (err) + goto out; + } + last = x; + count++; } } if (count == 0) { err = -ENOENT; goto out; } - - for (i = 0; i <= xfrm_state_hmask; i++) { - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (!xfrm_id_proto_match(x->id.proto, proto)) - continue; - err = func(x, --count, data); - if (err) - goto out; - } - } + err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); return err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 311205ffa77..e5372b11fc8 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,6 +31,7 @@ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include <linux/in6.h> #endif +#include <linux/audit.h> static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) { @@ -454,6 +455,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) else err = xfrm_state_update(x); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); @@ -523,6 +527,10 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) } err = xfrm_state_delete(x); + + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + if (err < 0) goto out; @@ -1030,6 +1038,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + if (err) { security_xfrm_policy_free(xp); kfree(xp); @@ -1257,6 +1268,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); security_xfrm_policy_free(&tmp); } + if (delete) + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL); + if (xp == NULL) return -ENOENT; @@ -1291,8 +1306,11 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma { struct km_event c; struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); + struct xfrm_audit audit_info; - xfrm_state_flush(p->proto); + audit_info.loginuid = NETLINK_CB(skb).loginuid; + audit_info.secid = NETLINK_CB(skb).sid; + xfrm_state_flush(p->proto, &audit_info); c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1442,12 +1460,15 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **x struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; + struct xfrm_audit audit_info; err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); if (err) return err; - xfrm_policy_flush(type); + audit_info.loginuid = NETLINK_CB(skb).loginuid; + audit_info.secid = NETLINK_CB(skb).sid; + xfrm_policy_flush(type, &audit_info); c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1502,6 +1523,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * err = 0; if (up->hard) { xfrm_policy_delete(xp, p->dir); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, 1, xp, NULL); + } else { // reset the timers here? printk("Dont know what to do with soft policy expire\n"); @@ -1533,8 +1557,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void ** goto out; km_state_expired(x, ue->hard, current->pid); - if (ue->hard) + if (ue->hard) { __xfrm_state_delete(x); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSA, 1, NULL, x); + } out: spin_unlock_bh(&x->lock); xfrm_state_put(x); diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 22d281c6ec2..f359b730c2c 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -43,7 +43,7 @@ struct sym_entry { static struct sym_entry *table; static unsigned int table_size, table_cnt; -static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext; +static unsigned long long _text, _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext; static int all_symbols = 0; static char symbol_prefix_char = '\0'; @@ -91,7 +91,9 @@ static int read_symbol(FILE *in, struct sym_entry *s) sym++; /* Ignore most absolute/undefined (?) symbols. */ - if (strcmp(sym, "_stext") == 0) + if (strcmp(sym, "_text") == 0) + _text = s->addr; + else if (strcmp(sym, "_stext") == 0) _stext = s->addr; else if (strcmp(sym, "_etext") == 0) _etext = s->addr; @@ -265,9 +267,25 @@ static void write_src(void) printf(".data\n"); + /* Provide proper symbols relocatability by their '_text' + * relativeness. The symbol names cannot be used to construct + * normal symbol references as the list of symbols contains + * symbols that are declared static and are private to their + * .o files. This prevents .tmp_kallsyms.o or any other + * object from referencing them. + */ output_label("kallsyms_addresses"); for (i = 0; i < table_cnt; i++) { - printf("\tPTR\t%#llx\n", table[i].addr); + if (toupper(table[i].sym[0]) != 'A') { + if (_text <= table[i].addr) + printf("\tPTR\t_text + %#llx\n", + table[i].addr - _text); + else + printf("\tPTR\t_text - %#llx\n", + _text - table[i].addr); + } else { + printf("\tPTR\t%#llx\n", table[i].addr); + } } printf("\n"); diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 338bdea9654..f5628c57640 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -798,7 +798,7 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e) QAction *action; headerPopup = new QPopupMenu(this); - action = new QAction("Show Name", 0, this); + action = new QAction(NULL, "Show Name", 0, this); action->setToggleAction(TRUE); connect(action, SIGNAL(toggled(bool)), parent(), SLOT(setShowName(bool))); @@ -806,7 +806,7 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e) action, SLOT(setOn(bool))); action->setOn(showName); action->addTo(headerPopup); - action = new QAction("Show Range", 0, this); + action = new QAction(NULL, "Show Range", 0, this); action->setToggleAction(TRUE); connect(action, SIGNAL(toggled(bool)), parent(), SLOT(setShowRange(bool))); @@ -814,7 +814,7 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e) action, SLOT(setOn(bool))); action->setOn(showRange); action->addTo(headerPopup); - action = new QAction("Show Data", 0, this); + action = new QAction(NULL, "Show Data", 0, this); action->setToggleAction(TRUE); connect(action, SIGNAL(toggled(bool)), parent(), SLOT(setShowData(bool))); @@ -1161,7 +1161,7 @@ void ConfigInfoView::expr_print_help(void *data, struct symbol *sym, const char QPopupMenu* ConfigInfoView::createPopupMenu(const QPoint& pos) { QPopupMenu* popup = Parent::createPopupMenu(pos); - QAction* action = new QAction("Show Debug Info", 0, popup); + QAction* action = new QAction(NULL,"Show Debug Info", 0, popup); action->setToggleAction(TRUE); connect(action, SIGNAL(toggled(bool)), SLOT(setShowDebug(bool))); connect(this, SIGNAL(showDebugChanged(bool)), action, SLOT(setOn(bool))); diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 187f5de4612..df3b272f7ce 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1430,7 +1430,7 @@ sub create_parameterlist($$$) { # corresponding data structures "correctly". Catch it later in # output_* subs. push_parameter($arg, "", $file); - } elsif ($arg =~ m/\(/) { + } elsif ($arg =~ m/\(.*\*/) { # pointer-to-function $arg =~ tr/#/,/; $arg =~ m/[^\(]+\(\*([^\)]+)\)/; diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 2e114162314..ac0a5822299 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -911,6 +911,7 @@ static int init_section_ref_ok(const char *name) ".toc1", /* used by ppc64 */ ".stab", ".rodata", + ".parainstructions", ".text.lock", "__bug_table", /* used by powerpc for BUG() */ ".pci_fixup_header", @@ -931,6 +932,7 @@ static int init_section_ref_ok(const char *name) ".altinstructions", ".eh_frame", ".debug", + ".parainstructions", NULL }; /* part of section name */ diff --git a/scripts/ver_linux b/scripts/ver_linux index 84999f69773..72876dfadc8 100755 --- a/scripts/ver_linux +++ b/scripts/ver_linux @@ -48,6 +48,8 @@ fsck.reiser4 -V 2>&1 | grep ^fsck.reiser4 | awk \ xfs_db -V 2>&1 | grep version | awk \ 'NR==1{print "xfsprogs ", $3}' +pccardctl -V 2>&1| grep pcmciautils | awk '{print "pcmciautils ", $2}' + cardmgr -V 2>&1| grep version | awk \ 'NR==1{print "pcmcia-cs ", $3}' @@ -87,10 +89,16 @@ loadkeys -h 2>&1 | awk \ loadkeys -V 2>&1 | awk \ '(NR==1 && ($2 ~ /console-tools/)) {print "Console-tools ", $3}' +oprofiled --version 2>&1 | awk \ +'(NR==1 && ($2 == "oprofile")) {print "oprofile ", $3}' + expr --v 2>&1 | awk 'NR==1{print "Sh-utils ", $NF}' udevinfo -V 2>&1 | grep version | awk '{print "udev ", $3}' +iwconfig --version 2>&1 | awk \ +'(NR==1 && ($3 == "version")) {print "wireless-tools ",$4}' + if [ -e /proc/modules ]; then X=`cat /proc/modules | sed -e "s/ .*$//"` echo "Modules Loaded "$X diff --git a/security/keys/key.c b/security/keys/key.c index 70eacbe5abd..ac9326c5f1d 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -20,7 +20,7 @@ #include <linux/err.h> #include "internal.h" -static kmem_cache_t *key_jar; +static struct kmem_cache *key_jar; struct rb_root key_serial_tree; /* tree of keys indexed by serial */ DEFINE_SPINLOCK(key_serial_lock); @@ -285,16 +285,14 @@ struct key *key_alloc(struct key_type *type, const char *desc, } /* allocate and initialise the key and its description */ - key = kmem_cache_alloc(key_jar, SLAB_KERNEL); + key = kmem_cache_alloc(key_jar, GFP_KERNEL); if (!key) goto no_memory_2; if (desc) { - key->description = kmalloc(desclen, GFP_KERNEL); + key->description = kmemdup(desc, desclen, GFP_KERNEL); if (!key->description) goto no_memory_3; - - memcpy(key->description, desc, desclen); } atomic_set(&key->usage, 1); diff --git a/security/keys/keyring.c b/security/keys/keyring.c index e8d02acc51e..ad45ce73964 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -706,12 +706,10 @@ int __key_link(struct key *keyring, struct key *key) BUG_ON(size > PAGE_SIZE); ret = -ENOMEM; - nklist = kmalloc(size, GFP_KERNEL); + nklist = kmemdup(klist, size, GFP_KERNEL); if (!nklist) goto error2; - memcpy(nklist, klist, size); - /* replace matched key */ atomic_inc(&key->usage); nklist->keys[loop] = key; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 32150cf7c37..b6f86808475 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(key_session_mutex); struct key_user root_key_user = { .usage = ATOMIC_INIT(3), .consq = LIST_HEAD_INIT(root_key_user.consq), - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(root_key_user.lock), .nkeys = ATOMIC_INIT(2), .nikeys = ATOMIC_INIT(2), .uid = 0, diff --git a/security/selinux/avc.c b/security/selinux/avc.c index e73ac1ab7cf..e7c0b5e2066 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -124,7 +124,7 @@ DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 }; static struct avc_cache avc_cache; static struct avc_callback_node *avc_callbacks; -static kmem_cache_t *avc_node_cachep; +static struct kmem_cache *avc_node_cachep; static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass) { @@ -332,7 +332,7 @@ static struct avc_node *avc_alloc_node(void) { struct avc_node *node; - node = kmem_cache_alloc(avc_node_cachep, SLAB_ATOMIC); + node = kmem_cache_alloc(avc_node_cachep, GFP_ATOMIC); if (!node) goto out; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 78f98fe084e..44e9cd47054 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -124,7 +124,7 @@ static struct security_operations *secondary_ops = NULL; static LIST_HEAD(superblock_security_head); static DEFINE_SPINLOCK(sb_security_lock); -static kmem_cache_t *sel_inode_cache; +static struct kmem_cache *sel_inode_cache; /* Return security context for a given sid or just the context length if the buffer is null or length is 0 */ @@ -181,7 +181,7 @@ static int inode_alloc_security(struct inode *inode) struct task_security_struct *tsec = current->security; struct inode_security_struct *isec; - isec = kmem_cache_alloc(sel_inode_cache, SLAB_KERNEL); + isec = kmem_cache_alloc(sel_inode_cache, GFP_KERNEL); if (!isec) return -ENOMEM; diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index d049c7acbc8..ebb993c5c24 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -28,7 +28,7 @@ (keyp->source_type << 9)) & \ AVTAB_HASH_MASK) -static kmem_cache_t *avtab_node_cachep; +static struct kmem_cache *avtab_node_cachep; static struct avtab_node* avtab_insert_node(struct avtab *h, int hvalue, @@ -36,7 +36,7 @@ avtab_insert_node(struct avtab *h, int hvalue, struct avtab_key *key, struct avtab_datum *datum) { struct avtab_node * newnode; - newnode = kmem_cache_alloc(avtab_node_cachep, SLAB_KERNEL); + newnode = kmem_cache_alloc(avtab_node_cachep, GFP_KERNEL); if (newnode == NULL) return NULL; memset(newnode, 0, sizeof(struct avtab_node)); diff --git a/sound/arm/sa11xx-uda1341.c b/sound/arm/sa11xx-uda1341.c index c79a9afd095..c7e1b264619 100644 --- a/sound/arm/sa11xx-uda1341.c +++ b/sound/arm/sa11xx-uda1341.c @@ -125,7 +125,7 @@ struct audio_stream { #else dma_regs_t *dma_regs; /* points to our DMA registers */ #endif - int active:1; /* we are using this stream for transfer now */ + unsigned int active:1; /* we are using this stream for transfer now */ int period; /* current transfer period */ int periods; /* current count of periods registerd in the DMA engine */ int tx_spin; /* are we recoding - flag used to do DMA trans. for sync */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 66e24b5da46..6ea67b16c67 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3027,7 +3027,7 @@ static struct page * snd_pcm_mmap_status_nopage(struct vm_area_struct *area, struct page * page; if (substream == NULL) - return NOPAGE_OOM; + return NOPAGE_SIGBUS; runtime = substream->runtime; page = virt_to_page(runtime->status); get_page(page); @@ -3070,7 +3070,7 @@ static struct page * snd_pcm_mmap_control_nopage(struct vm_area_struct *area, struct page * page; if (substream == NULL) - return NOPAGE_OOM; + return NOPAGE_SIGBUS; runtime = substream->runtime; page = virt_to_page(runtime->control); get_page(page); @@ -3131,18 +3131,18 @@ static struct page *snd_pcm_mmap_data_nopage(struct vm_area_struct *area, size_t dma_bytes; if (substream == NULL) - return NOPAGE_OOM; + return NOPAGE_SIGBUS; runtime = substream->runtime; offset = area->vm_pgoff << PAGE_SHIFT; offset += address - area->vm_start; - snd_assert((offset % PAGE_SIZE) == 0, return NOPAGE_OOM); + snd_assert((offset % PAGE_SIZE) == 0, return NOPAGE_SIGBUS); dma_bytes = PAGE_ALIGN(runtime->dma_bytes); if (offset > dma_bytes - PAGE_SIZE) return NOPAGE_SIGBUS; if (substream->ops->page) { page = substream->ops->page(substream, offset); if (! page) - return NOPAGE_OOM; + return NOPAGE_OOM; /* XXX: is this really due to OOM? */ } else { vaddr = runtime->dma_area + offset; page = virt_to_page(vaddr); diff --git a/sound/oss/Kconfig b/sound/oss/Kconfig index cc2b9ab7f4e..a0588c21324 100644 --- a/sound/oss/Kconfig +++ b/sound/oss/Kconfig @@ -5,20 +5,6 @@ # # Prompt user for primary drivers. -config OSS_OBSOLETE_DRIVER - bool "Obsolete OSS drivers" - depends on SOUND_PRIME - help - This option enables support for obsolete OSS drivers that - are scheduled for removal in the near future since there - are ALSA drivers for the same hardware. - - Please contact Adrian Bunk <bunk@stusta.de> if you had to - say Y here because your soundcard is not properly supported - by ALSA. - - If unsure, say N. - config SOUND_BT878 tristate "BT878 audio dma" depends on SOUND_PRIME && PCI @@ -35,40 +21,6 @@ config SOUND_BT878 To compile this driver as a module, choose M here: the module will be called btaudio. -config SOUND_EMU10K1 - tristate "Creative SBLive! (EMU10K1)" - depends on SOUND_PRIME && PCI && OSS_OBSOLETE_DRIVER - ---help--- - Say Y or M if you have a PCI sound card using the EMU10K1 chipset, - such as the Creative SBLive!, SB PCI512 or Emu-APS. - - For more information on this driver and the degree of support for - the different card models please check: - - <http://sourceforge.net/projects/emu10k1/> - - It is now possible to load dsp microcode patches into the EMU10K1 - chip. These patches are used to implement real time sound - processing effects which include for example: signal routing, - bass/treble control, AC3 passthrough, ... - Userspace tools to create new patches and load/unload them can be - found in the emu-tools package at the above URL. - -config MIDI_EMU10K1 - bool "Creative SBLive! MIDI (EXPERIMENTAL)" - depends on SOUND_EMU10K1 && EXPERIMENTAL && ISA_DMA_API - help - Say Y if you want to be able to use the OSS /dev/sequencer - interface. This code is still experimental. - -config SOUND_FUSION - tristate "Crystal SoundFusion (CS4280/461x)" - depends on SOUND_PRIME && PCI && OSS_OBSOLETE_DRIVER - help - This module drives the Crystal SoundFusion devices (CS4280/46xx - series) when wired as native sound drivers with AC97 codecs. If - this driver does not work try the CS4232 driver. - config SOUND_BCM_CS4297A tristate "Crystal Sound CS4297a (for Swarm)" depends on SOUND_PRIME && SIBYTE_SWARM @@ -448,47 +400,6 @@ config SOUND_DMAP Say Y unless you have 16MB or more RAM or a PCI sound card. -config SOUND_AD1816 - tristate "AD1816(A) based cards (EXPERIMENTAL)" - depends on EXPERIMENTAL && SOUND_OSS && OSS_OBSOLETE_DRIVER - help - Say M here if you have a sound card based on the Analog Devices - AD1816(A) chip. - - If you compile the driver into the kernel, you have to add - "ad1816=<io>,<irq>,<dma>,<dma2>" to the kernel command line. - -config SOUND_AD1889 - tristate "AD1889 based cards (AD1819 codec) (EXPERIMENTAL)" - depends on EXPERIMENTAL && SOUND_OSS && PCI && OSS_OBSOLETE_DRIVER - help - Say M here if you have a sound card based on the Analog Devices - AD1889 chip. - -config SOUND_ADLIB - tristate "Adlib Cards" - depends on SOUND_OSS && OSS_OBSOLETE_DRIVER - help - Includes ASB 64 4D. Information on programming AdLib cards is - available at <http://www.itsnet.com/home/ldragon/Specs/adlib.html>. - -config SOUND_ACI_MIXER - tristate "ACI mixer (miroSOUND PCM1-pro/PCM12/PCM20)" - depends on SOUND_OSS && OSS_OBSOLETE_DRIVER - ---help--- - ACI (Audio Command Interface) is a protocol used to communicate with - the microcontroller on some sound cards produced by miro and - Cardinal Technologies. The main function of the ACI is to control - the mixer and to get a product identification. - - This VoxWare ACI driver currently supports the ACI functions on the - miroSOUND PCM1-pro, PCM12 and PCM20 radio. On the PCM20 radio, ACI - also controls the radio tuner. This is supported in the video4linux - miropcm20 driver (say M or Y here and go back to "Multimedia - devices" -> "Radio Adapters"). - - This driver is also available as a module and will be called aci. - config SOUND_CS4232 tristate "Crystal CS4232 based (PnP) cards" depends on SOUND_OSS @@ -594,18 +505,6 @@ config SOUND_MPU401 If you compile the driver into the kernel, you have to add "mpu401=<io>,<irq>" to the kernel command line. -config SOUND_NM256 - tristate "NM256AV/NM256ZX audio support" - depends on SOUND_OSS && OSS_OBSOLETE_DRIVER - help - Say M here to include audio support for the NeoMagic 256AV/256ZX - chipsets. These are the audio chipsets found in the Sony - Z505S/SX/DX, some Sony F-series, and the Dell Latitude CPi and CPt - laptops. It includes support for an AC97-compatible mixer and an - apparently proprietary sound engine. - - See <file:Documentation/sound/oss/NM256> for further information. - config SOUND_PAS tristate "ProAudioSpectrum 16 support" depends on SOUND_OSS @@ -714,20 +613,6 @@ config SOUND_YM3812 If unsure, say Y. -config SOUND_OPL3SA2 - tristate "Yamaha OPL3-SA2 and SA3 based PnP cards" - depends on SOUND_OSS && OSS_OBSOLETE_DRIVER - help - Say Y or M if you have a card based on one of these Yamaha sound - chipsets or the "SAx", which is actually a SA3. Read - <file:Documentation/sound/oss/OPL3-SA2> for more information on - configuring these cards. - - If you compile the driver into the kernel and do not also - configure in the optional ISA PnP support, you will have to add - "opl3sa2=<io>,<irq>,<dma>,<dma2>,<mssio>,<mpuio>" to the kernel - command line. - config SOUND_UART6850 tristate "6850 UART support" depends on SOUND_OSS diff --git a/sound/oss/btaudio.c b/sound/oss/btaudio.c index 6ad38411423..ad7210a00dc 100644 --- a/sound/oss/btaudio.c +++ b/sound/oss/btaudio.c @@ -1020,6 +1020,7 @@ static int __devinit btaudio_probe(struct pci_dev *pci_dev, fail2: free_irq(bta->irq,bta); fail1: + iounmap(bta->mmio); kfree(bta); fail0: release_mem_region(pci_resource_start(pci_dev,0), @@ -1051,6 +1052,7 @@ static void __devexit btaudio_remove(struct pci_dev *pci_dev) free_irq(bta->irq,bta); release_mem_region(pci_resource_start(pci_dev,0), pci_resource_len(pci_dev,0)); + iounmap(bta->mmio); /* remove from linked list */ if (bta == btaudios) { diff --git a/sound/oss/emu10k1/audio.c b/sound/oss/emu10k1/audio.c index 86dd23974e0..49f902f35c2 100644 --- a/sound/oss/emu10k1/audio.c +++ b/sound/oss/emu10k1/audio.c @@ -111,9 +111,15 @@ static ssize_t emu10k1_audio_read(struct file *file, char __user *buffer, size_t if ((bytestocopy >= wiinst->buffer.fragment_size) || (bytestocopy >= count)) { + int rc; + bytestocopy = min_t(u32, bytestocopy, count); - emu10k1_wavein_xferdata(wiinst, (u8 __user *)buffer, &bytestocopy); + rc = emu10k1_wavein_xferdata(wiinst, + (u8 __user *)buffer, + &bytestocopy); + if (rc) + return rc; count -= bytestocopy; buffer += bytestocopy; diff --git a/sound/oss/emu10k1/cardwi.c b/sound/oss/emu10k1/cardwi.c index 8bbf44b881b..060d1be94d3 100644 --- a/sound/oss/emu10k1/cardwi.c +++ b/sound/oss/emu10k1/cardwi.c @@ -304,11 +304,12 @@ void emu10k1_wavein_getxfersize(struct wiinst *wiinst, u32 * size) } } -static void copy_block(u8 __user *dst, u8 * src, u32 str, u32 len, u8 cov) +static int copy_block(u8 __user *dst, u8 * src, u32 str, u32 len, u8 cov) { - if (cov == 1) - __copy_to_user(dst, src + str, len); - else { + if (cov == 1) { + if (__copy_to_user(dst, src + str, len)) + return -EFAULT; + } else { u8 byte; u32 i; @@ -316,22 +317,26 @@ static void copy_block(u8 __user *dst, u8 * src, u32 str, u32 len, u8 cov) for (i = 0; i < len; i++) { byte = src[2 * i] ^ 0x80; - __copy_to_user(dst + i, &byte, 1); + if (__copy_to_user(dst + i, &byte, 1)) + return -EFAULT; } } + + return 0; } -void emu10k1_wavein_xferdata(struct wiinst *wiinst, u8 __user *data, u32 * size) +int emu10k1_wavein_xferdata(struct wiinst *wiinst, u8 __user *data, u32 * size) { struct wavein_buffer *buffer = &wiinst->buffer; u32 sizetocopy, sizetocopy_now, start; unsigned long flags; + int ret; sizetocopy = min_t(u32, buffer->size, *size); *size = sizetocopy; if (!sizetocopy) - return; + return 0; spin_lock_irqsave(&wiinst->lock, flags); start = buffer->pos; @@ -345,11 +350,17 @@ void emu10k1_wavein_xferdata(struct wiinst *wiinst, u8 __user *data, u32 * size) if (sizetocopy > sizetocopy_now) { sizetocopy -= sizetocopy_now; - copy_block(data, buffer->addr, start, sizetocopy_now, buffer->cov); - copy_block(data + sizetocopy_now, buffer->addr, 0, sizetocopy, buffer->cov); + ret = copy_block(data, buffer->addr, start, sizetocopy_now, + buffer->cov); + if (ret == 0) + ret = copy_block(data + sizetocopy_now, buffer->addr, 0, + sizetocopy, buffer->cov); } else { - copy_block(data, buffer->addr, start, sizetocopy, buffer->cov); + ret = copy_block(data, buffer->addr, start, sizetocopy, + buffer->cov); } + + return ret; } void emu10k1_wavein_update(struct emu10k1_card *card, struct wiinst *wiinst) diff --git a/sound/oss/emu10k1/cardwi.h b/sound/oss/emu10k1/cardwi.h index 15cfb9b3559..e82029b46ad 100644 --- a/sound/oss/emu10k1/cardwi.h +++ b/sound/oss/emu10k1/cardwi.h @@ -83,7 +83,7 @@ void emu10k1_wavein_close(struct emu10k1_wavedevice *); void emu10k1_wavein_start(struct emu10k1_wavedevice *); void emu10k1_wavein_stop(struct emu10k1_wavedevice *); void emu10k1_wavein_getxfersize(struct wiinst *, u32 *); -void emu10k1_wavein_xferdata(struct wiinst *, u8 __user *, u32 *); +int emu10k1_wavein_xferdata(struct wiinst *, u8 __user *, u32 *); int emu10k1_wavein_setformat(struct emu10k1_wavedevice *, struct wave_format *); void emu10k1_wavein_update(struct emu10k1_card *, struct wiinst *); diff --git a/sound/oss/emu10k1/passthrough.c b/sound/oss/emu10k1/passthrough.c index 4e3baca7d41..6d21d4368de 100644 --- a/sound/oss/emu10k1/passthrough.c +++ b/sound/oss/emu10k1/passthrough.c @@ -162,12 +162,15 @@ ssize_t emu10k1_pt_write(struct file *file, const char __user *buffer, size_t co DPD(3, "prepend size %d, prepending %d bytes\n", pt->prepend_size, needed); if (count < needed) { - copy_from_user(pt->buf + pt->prepend_size, buffer, count); + if (copy_from_user(pt->buf + pt->prepend_size, + buffer, count)) + return -EFAULT; pt->prepend_size += count; DPD(3, "prepend size now %d\n", pt->prepend_size); return count; } - copy_from_user(pt->buf + pt->prepend_size, buffer, needed); + if (copy_from_user(pt->buf + pt->prepend_size, buffer, needed)) + return -EFAULT; r = pt_putblock(wave_dev, (u16 *) pt->buf, nonblock); if (r) return r; @@ -178,7 +181,8 @@ ssize_t emu10k1_pt_write(struct file *file, const char __user *buffer, size_t co blocks_copied = 0; while (blocks > 0) { u16 __user *bufptr = (u16 __user *) buffer + (bytes_copied/2); - copy_from_user(pt->buf, bufptr, PT_BLOCKSIZE); + if (copy_from_user(pt->buf, bufptr, PT_BLOCKSIZE)) + return -EFAULT; r = pt_putblock(wave_dev, (u16 *)pt->buf, nonblock); if (r) { if (bytes_copied) @@ -193,7 +197,8 @@ ssize_t emu10k1_pt_write(struct file *file, const char __user *buffer, size_t co i = count - bytes_copied; if (i) { pt->prepend_size = i; - copy_from_user(pt->buf, buffer + bytes_copied, i); + if (copy_from_user(pt->buf, buffer + bytes_copied, i)) + return -EFAULT; bytes_copied += i; DPD(3, "filling prepend buffer with %d bytes", i); } diff --git a/sound/oss/via82cxxx_audio.c b/sound/oss/via82cxxx_audio.c index 17837d4b5ed..c96cc8c68b3 100644 --- a/sound/oss/via82cxxx_audio.c +++ b/sound/oss/via82cxxx_audio.c @@ -2120,8 +2120,8 @@ static struct page * via_mm_nopage (struct vm_area_struct * vma, return NOPAGE_SIGBUS; /* Disallow mremap */ } if (!card) { - DPRINTK ("EXIT, returning NOPAGE_OOM\n"); - return NOPAGE_OOM; /* Nothing allocated */ + DPRINTK ("EXIT, returning NOPAGE_SIGBUS\n"); + return NOPAGE_SIGBUS; /* Nothing allocated */ } pgoff = vma->vm_pgoff + ((address - vma->vm_start) >> PAGE_SHIFT); diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index 4b52d18dcd5..b76b3dd9df2 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -48,7 +48,7 @@ static struct page * snd_us428ctls_vm_nopage(struct vm_area_struct *area, unsign offset = area->vm_pgoff << PAGE_SHIFT; offset += address - area->vm_start; - snd_assert((offset % PAGE_SIZE) == 0, return NOPAGE_OOM); + snd_assert((offset % PAGE_SIZE) == 0, return NOPAGE_SIGBUS); vaddr = (char*)((struct usX2Ydev *)area->vm_private_data)->us428ctls_sharedmem + offset; page = virt_to_page(vaddr); get_page(page); |